mirror of https://github.com/inclusionAI/AReaL
1105 lines
98 KiB
HTML
Executable File
1105 lines
98 KiB
HTML
Executable File
|
||
<!DOCTYPE html>
|
||
|
||
|
||
<html lang="en" data-content_root="../" >
|
||
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>Tutorial — AReaL Documentation</title>
|
||
|
||
|
||
|
||
<script data-cfasync="false">
|
||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||
</script>
|
||
|
||
<!-- Loaded before other Sphinx assets -->
|
||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
|
||
|
||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=fa44fd50" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=eba8b062" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css?v=be8a1c11" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/sphinx-thebe.css?v=4fa983c6" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" />
|
||
|
||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<script src="../_static/documentation_options.js?v=9eb32ce0"></script>
|
||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||
<script>let toggleHintShow = 'Click to show';</script>
|
||
<script>let toggleHintHide = 'Click to hide';</script>
|
||
<script>let toggleOpenOnPrint = 'true';</script>
|
||
<script src="../_static/togglebutton.js?v=4a39c7ea"></script>
|
||
<script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
|
||
<script src="../_static/design-tabs.js?v=f930bc37"></script>
|
||
<script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
|
||
<script async="async" src="../_static/sphinx-thebe.js?v=c100c467"></script>
|
||
<script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
|
||
<script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
|
||
<script>DOCUMENTATION_OPTIONS.pagename = 'tutorial/tutorial_v0_2_0';</script>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||
<meta name="docsearch:language" content="en"/>
|
||
</head>
|
||
|
||
|
||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||
|
||
|
||
|
||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||
|
||
<div id="pst-scroll-pixel-helper"></div>
|
||
|
||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-primary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-secondary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||
|
||
<div class="search-button__wrapper">
|
||
<div class="search-button__overlay"></div>
|
||
<div class="search-button__search-container">
|
||
<form class="bd-search d-flex align-items-center"
|
||
action="../search.html"
|
||
method="get">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<input type="search"
|
||
class="form-control"
|
||
name="q"
|
||
id="search-input"
|
||
placeholder="Search this book..."
|
||
aria-label="Search this book..."
|
||
autocomplete="off"
|
||
autocorrect="off"
|
||
autocapitalize="off"
|
||
spellcheck="false"/>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||
</form></div>
|
||
</div>
|
||
|
||
<div class="pst-async-banner-revealer d-none">
|
||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||
</div>
|
||
|
||
|
||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||
</header>
|
||
|
||
|
||
<div class="bd-container">
|
||
<div class="bd-container__inner bd-page-width">
|
||
|
||
|
||
|
||
|
||
|
||
<div class="bd-sidebar-primary bd-sidebar">
|
||
|
||
|
||
|
||
<div class="sidebar-header-items sidebar-primary__section">
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||
<div class="sidebar-primary-item">
|
||
|
||
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="../intro.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="../_static/logo.png" class="logo__image only-light" alt="AReaL Documentation - Home"/>
|
||
<script>document.write(`<img src="../_static/logo.png" class="logo__image only-dark" alt="AReaL Documentation - Home"/>`);</script>
|
||
|
||
|
||
</a></div>
|
||
<div class="sidebar-primary-item">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
`);
|
||
</script></div>
|
||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||
<div class="bd-toc-item navbar-nav active">
|
||
|
||
<ul class="nav bd-sidenav bd-sidenav__home-link">
|
||
<li class="toctree-l1">
|
||
<a class="reference internal" href="../intro.html">
|
||
Overview
|
||
</a>
|
||
</li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Tutorial</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../training.html">RL Training</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../eval.html">Evaluation</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../troubleshooting.html">Troubleshooting</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Developer Manual</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../developer/exp_launch.html">Launching Procedure</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../developer/master_worker.html">Master Worker</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../developer/model_worker.html">Model Worker</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../developer/algo_interface.html">Algorithm, Interface & Backends</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../developer/allocation_parallel.html">Allocation & Parallelism</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Contributing</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../contrib.html">Contribution Guide</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</nav></div>
|
||
</div>
|
||
|
||
|
||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||
</div>
|
||
|
||
<div id="rtd-footer-container"></div>
|
||
|
||
|
||
</div>
|
||
|
||
<main id="main-content" class="bd-main" role="main">
|
||
|
||
|
||
|
||
<div class="sbt-scroll-pixel-helper"></div>
|
||
|
||
<div class="bd-content">
|
||
<div class="bd-article-container">
|
||
|
||
<div class="bd-header-article d-print-none">
|
||
<div class="header-article-items header-article__inner">
|
||
|
||
<div class="header-article-items__start">
|
||
|
||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="fa-solid fa-bars"></span>
|
||
</button></div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="header-article-items__end">
|
||
|
||
<div class="header-article-item">
|
||
|
||
<div class="article-header-buttons">
|
||
|
||
|
||
|
||
|
||
|
||
<div class="dropdown dropdown-source-buttons">
|
||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
|
||
<i class="fab fa-github"></i>
|
||
</button>
|
||
<ul class="dropdown-menu">
|
||
|
||
|
||
|
||
<li><a href="https://github.com/inclusionAI/AReaL" target="_blank"
|
||
class="btn btn-sm btn-source-repository-button dropdown-item"
|
||
title="Source repository"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fab fa-github"></i>
|
||
</span>
|
||
<span class="btn__text-container">Repository</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li><a href="https://github.com/inclusionAI/AReaL/issues/new?title=Issue%20on%20page%20%2Ftutorial/tutorial_v0_2_0.html&body=Your%20issue%20content%20here." target="_blank"
|
||
class="btn btn-sm btn-source-issues-button dropdown-item"
|
||
title="Open an issue"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-lightbulb"></i>
|
||
</span>
|
||
<span class="btn__text-container">Open issue</span>
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<div class="dropdown dropdown-download-buttons">
|
||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||
<i class="fas fa-download"></i>
|
||
</button>
|
||
<ul class="dropdown-menu">
|
||
|
||
|
||
|
||
<li><a href="../_sources/tutorial/tutorial_v0_2_0.md" target="_blank"
|
||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||
title="Download source file"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file"></i>
|
||
</span>
|
||
<span class="btn__text-container">.md</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li>
|
||
<button onclick="window.print()"
|
||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||
title="Print to PDF"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file-pdf"></i>
|
||
</span>
|
||
<span class="btn__text-container">.pdf</span>
|
||
</button>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<button onclick="toggleFullScreen()"
|
||
class="btn btn-sm btn-fullscreen-button"
|
||
title="Fullscreen mode"
|
||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-expand"></i>
|
||
</span>
|
||
|
||
</button>
|
||
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="fa-solid fa-list"></span>
|
||
</button>
|
||
</div></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="jb-print-docs-body" class="onlyprint">
|
||
<h1>Tutorial</h1>
|
||
<!-- Table of contents -->
|
||
<div id="print-main-content">
|
||
<div id="jb-print-toc">
|
||
|
||
<div>
|
||
<h2> Contents </h2>
|
||
</div>
|
||
<nav aria-label="Page">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hardware-requirements">Hardware Requirements</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#software-requirements">Software Requirements</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#one-click-environment-setup-and-training-launch">One-Click Environment Setup and Training Launch</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#environment-setup">Environment Setup</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#code">Code</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">Dataset</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model">Model</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#launch-the-ray-cluster">Launch the Ray Cluster</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rl-training">RL Training</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#commandline-options">Commandline Options</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#monitoring-the-training-process">Monitoring the Training Process</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training-progress">Training Progress</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#model-performance">Model Performance</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-process">Evaluation Process</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-results">Evaluation Results</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-notes">Additional Notes</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#key-parameters">Key Parameters</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#runtime">Runtime</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#troubleshooting">Troubleshooting</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-recover">Automatic Recover</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#series-of-outofmemory-errors">Series of OutOfMemory Errors</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#torch-cuda-cudaoutofmemoryerror">torch.cuda.CudaOutOfMemoryError</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cuda-error-out-of-memory">CUDA error: out of memory</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#runtimeerror-aborted-due-to-the-lack-of-cpu-swap-space">RuntimeError: Aborted due to the lack of CPU swap space.</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cuda-error-an-illegal-memory-access-was-encountered">CUDA error: an illegal memory access was encountered</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="searchbox"></div>
|
||
<article class="bd-article">
|
||
|
||
<section class="tex2jax_ignore mathjax_ignore" id="tutorial">
|
||
<h1>Tutorial<a class="headerlink" href="#tutorial" title="Link to this heading">#</a></h1>
|
||
<section id="prerequisites">
|
||
<h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Link to this heading">#</a></h2>
|
||
<section id="hardware-requirements">
|
||
<h3>Hardware Requirements<a class="headerlink" href="#hardware-requirements" title="Link to this heading">#</a></h3>
|
||
<p>Check if your hardware meets these minimum requirements:</p>
|
||
<div class="pst-scrollable-table-container"><table class="table">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p><strong>Model Size</strong></p></th>
|
||
<th class="head text-center"><p><strong>1.5B</strong></p></th>
|
||
<th class="head text-center"><p><strong>1.5B</strong></p></th>
|
||
<th class="head text-center"><p><strong>1.5B</strong></p></th>
|
||
<th class="head text-center"><p><strong>7B</strong></p></th>
|
||
<th class="head text-center"><p><strong>7B</strong></p></th>
|
||
<th class="head text-center"><p><strong>32B</strong></p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><strong>Nodes</strong></p></td>
|
||
<td class="text-center"><p><strong>1</strong></p></td>
|
||
<td class="text-center"><p><strong>4</strong></p></td>
|
||
<td class="text-center"><p><strong>16</strong></p></td>
|
||
<td class="text-center"><p><strong>4</strong></p></td>
|
||
<td class="text-center"><p><strong>16</strong></p></td>
|
||
<td class="text-center"><p><strong>16</strong></p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>GPU</p></td>
|
||
<td class="text-center"><p>8x H800</p></td>
|
||
<td class="text-center"><p>8x H800 per node</p></td>
|
||
<td class="text-center"><p>8x H800 per node</p></td>
|
||
<td class="text-center"><p>8x H800 per node</p></td>
|
||
<td class="text-center"><p>8x H800 per node</p></td>
|
||
<td class="text-center"><p>8x H800 per node</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>CPU</p></td>
|
||
<td class="text-center"><p>48 cores</p></td>
|
||
<td class="text-center"><p>48 cores per node</p></td>
|
||
<td class="text-center"><p>48 cores per node</p></td>
|
||
<td class="text-center"><p>48 cores per node</p></td>
|
||
<td class="text-center"><p>48 cores per node</p></td>
|
||
<td class="text-center"><p>48 cores per node</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>Memory</p></td>
|
||
<td class="text-center"><p>1 TB</p></td>
|
||
<td class="text-center"><p>1 TB per node</p></td>
|
||
<td class="text-center"><p>1 TB per node</p></td>
|
||
<td class="text-center"><p>1 TB per node</p></td>
|
||
<td class="text-center"><p>1 TB per node</p></td>
|
||
<td class="text-center"><p>1 TB per node</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>Network</p></td>
|
||
<td class="text-center"><p>NVSwitch</p></td>
|
||
<td class="text-center"><p>NVSwitch + RoCE 3.2 Tbps</p></td>
|
||
<td class="text-center"><p>NVSwitch + RoCE 3.2 Tbps</p></td>
|
||
<td class="text-center"><p>NVSwitch + RoCE 3.2 Tbps</p></td>
|
||
<td class="text-center"><p>NVSwitch + RoCE 3.2 Tbps</p></td>
|
||
<td class="text-center"><p>NVSwitch + RoCE 3.2 Tbps</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>Storage</p></td>
|
||
<td class="text-center"><p>1TB</p></td>
|
||
<td class="text-center"><p>Shared storage (NAS) 10TB</p></td>
|
||
<td class="text-center"><p>Shared storage (NAS) 10TB</p></td>
|
||
<td class="text-center"><p>Shared storage (NAS) 10TB</p></td>
|
||
<td class="text-center"><p>Shared storage (NAS) 10TB</p></td>
|
||
<td class="text-center"><p>Shared storage (NAS) 10TB</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>BatchSize x GroupSize</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
<td class="text-center"><p>512x16</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><strong>Single-step Time (seconds)</strong></p></td>
|
||
<td class="text-center"><p><strong>3461</strong></p></td>
|
||
<td class="text-center"><p><strong>997</strong></p></td>
|
||
<td class="text-center"><p><strong>391</strong></p></td>
|
||
<td class="text-center"><p><strong>2275</strong></p></td>
|
||
<td class="text-center"><p><strong>815</strong></p></td>
|
||
<td class="text-center"><p><strong>6707</strong></p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><strong>#Steps Until Convergence</strong></p></td>
|
||
<td class="text-center"><p><strong>~250</strong></p></td>
|
||
<td class="text-center"><p><strong>~250</strong></p></td>
|
||
<td class="text-center"><p><strong>~250</strong></p></td>
|
||
<td class="text-center"><p><strong>~400</strong></p></td>
|
||
<td class="text-center"><p><strong>~400</strong></p></td>
|
||
<td class="text-center"><p>-</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><strong>Total Time (Hours)</strong></p></td>
|
||
<td class="text-center"><p><strong>~240</strong></p></td>
|
||
<td class="text-center"><p><strong>~69</strong></p></td>
|
||
<td class="text-center"><p><strong>~27</strong></p></td>
|
||
<td class="text-center"><p><strong>~252</strong></p></td>
|
||
<td class="text-center"><p><strong>~90</strong></p></td>
|
||
<td class="text-center"><p>-</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
<p>Notes:</p>
|
||
<ul class="simple">
|
||
<li><p>GPUs need to have 80GB memory. Other GPU models with similar specs are acceptable.</p></li>
|
||
<li><p>Single-node training can use local storage, but multi-node training requires shared storage.</p></li>
|
||
<li><p>We haven’t successfully train a powerful 32B model, so we cannot estimate the required steps and time.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="software-requirements">
|
||
<h3>Software Requirements<a class="headerlink" href="#software-requirements" title="Link to this heading">#</a></h3>
|
||
<p>This tutorial provides a Docker image. Below are the tested software versions:</p>
|
||
<div class="pst-scrollable-table-container"><table class="table">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p></p></th>
|
||
<th class="head text-center"><p>Version</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p>OS</p></td>
|
||
<td class="text-center"><p>CentOS 7 / Ubuntu 22.04 or any other system that meets the software requirements below</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>NVIDIA Driver</p></td>
|
||
<td class="text-center"><p>550.127.08</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>CUDA</p></td>
|
||
<td class="text-center"><p>12.5</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>Git LFS</p></td>
|
||
<td class="text-center"><p>Refer to: <a class="reference external" href="https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage">https://docs.github.com/en/repositories/working-with-files/managing-large-files/installing-git-large-file-storage</a>. Mainly used for downloading models, datasets, and AReaL project code.</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>Docker</p></td>
|
||
<td class="text-center"><p>27.5.1</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>NVIDIA Container Toolkit</p></td>
|
||
<td class="text-center"><p><a class="reference external" href="https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html">Installing the NVIDIA Container Toolkit</a></p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>AReaL Image</p></td>
|
||
<td class="text-center"><p><code class="docutils literal notranslate"><span class="pre">ghcr.io/inclusionai/areal-runtime:v0.2.0</span></code>. This image includes AReaL’s runtime dependencies and Ray components.</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
<p>Since the installation of NVIDIA Drivers and CUDA, as well as the mounting of shared storage, depends on node configurations and system versions, please complete these installations independently. This tutorial does not cover their setup.</p>
|
||
<p>For multi-node training, ensure that the shared storage is mounted to the <code class="docutils literal notranslate"><span class="pre">/storage</span></code> directory on every node. All subsequent downloads and resources will be stored in this directory. The AReaL container will also mount this directory to <code class="docutils literal notranslate"><span class="pre">/storage</span></code> within the container, enabling seamless access during training.</p>
|
||
</section>
|
||
</section>
|
||
<section id="one-click-environment-setup-and-training-launch">
|
||
<h2>One-Click Environment Setup and Training Launch<a class="headerlink" href="#one-click-environment-setup-and-training-launch" title="Link to this heading">#</a></h2>
|
||
<p>This section provides a one-click setup script to automatically configure the node environment:</p>
|
||
<ol class="arabic simple">
|
||
<li><p>Install Docker, Git LFS, and NVIDIA Container Toolkit</p></li>
|
||
<li><p>Pull the AReaL image on each node</p></li>
|
||
<li><p>Download AReaL code, models, and datasets</p></li>
|
||
<li><p>Set up a Ray cluster</p></li>
|
||
<li><p>[Optional] Launch a training task within the Ray cluster</p></li>
|
||
</ol>
|
||
<p>Please perform the following operations on any chosen node:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>mkdir<span class="w"> </span>-p<span class="w"> </span>/storage/codes
|
||
<span class="nb">cd</span><span class="w"> </span>/storage/codes/
|
||
git<span class="w"> </span>clone<span class="w"> </span>https://github.com/inclusionAI/AReaL.git
|
||
<span class="nb">cd</span><span class="w"> </span>/storage/codes/AReaL
|
||
|
||
python<span class="w"> </span>./examples/env/setup_env_and_start_train.py<span class="w"> </span>setup<span class="w"> </span>--private_key_file<span class="w"> </span>/path/to/ssh_key<span class="w"> </span>--ssh_port<span class="w"> </span><span class="m">22</span><span class="w"> </span>--username<span class="w"> </span>root<span class="w"> </span>--hostnames<span class="w"> </span>NODE_IP_1<span class="w"> </span>NODE_IP_2<span class="w"> </span>NODE_IP_3<span class="w"> </span>NODE_IP_4<span class="w"> </span>--train_param<span class="w"> </span><span class="m">1</span>.5B_n1
|
||
</pre></div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">setup_env_and_start_train.py</span> <span class="pre">setup</span></code> arguments:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">private_key_file</span></code>: SSH secret key. Using by connecting nodes.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">ssh_port</span></code>: SSH port</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">username</span></code>: SSH username</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">hostnames</span></code>: IP list. Split with space. Can be 1, 4, or 16 node IPs</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">train_param</span></code>: [Optional] Training parameters used to launch a training task immediately after environment setup. Valid options are: <code class="docutils literal notranslate"><span class="pre">1.5B_n1</span></code>, <code class="docutils literal notranslate"><span class="pre">1.5B_n4</span></code>, <code class="docutils literal notranslate"><span class="pre">1.5B_n16</span></code>, <code class="docutils literal notranslate"><span class="pre">7B_n4</span></code>, <code class="docutils literal notranslate"><span class="pre">7B_n16</span></code></p></li>
|
||
</ul>
|
||
<p>If the script in this section fails to execute or encounters errors due to environmental discrepancies, you may manually configure the environment and launch training by following the instructions in the subsequent sections of this tutorial.</p>
|
||
</section>
|
||
<section id="environment-setup">
|
||
<h2>Environment Setup<a class="headerlink" href="#environment-setup" title="Link to this heading">#</a></h2>
|
||
<p>Since shared storage is used, downloading only needs to be done on one node.</p>
|
||
<section id="code">
|
||
<h3>Code<a class="headerlink" href="#code" title="Link to this heading">#</a></h3>
|
||
<p>Clone the AReaL project code to <code class="docutils literal notranslate"><span class="pre">/storage/codes</span></code>:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>mkdir<span class="w"> </span>-p<span class="w"> </span>/storage/codes
|
||
<span class="nb">cd</span><span class="w"> </span>/storage/codes/
|
||
git<span class="w"> </span>clone<span class="w"> </span>https://github.com/inclusionAI/AReaL
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="dataset">
|
||
<h3>Dataset<a class="headerlink" href="#dataset" title="Link to this heading">#</a></h3>
|
||
<p>We provide a dataset for training. Download the dataset and place it in <code class="docutils literal notranslate"><span class="pre">/storage/datasets/</span></code>:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>mkdir<span class="w"> </span>-p<span class="w"> </span>/storage/datasets/
|
||
<span class="nb">cd</span><span class="w"> </span>/storage/datasets/
|
||
wget<span class="w"> </span>https://huggingface.co/datasets/inclusionAI/AReaL-RL-Data/resolve/main/data/boba_106k_0319.jsonl?download<span class="o">=</span><span class="nb">true</span>
|
||
wget<span class="w"> </span>https://huggingface.co/datasets/inclusionAI/AReaL-RL-Data/resolve/main/data/orz-zero_56k_0319.jsonl?download<span class="o">=</span><span class="nb">true</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="model">
|
||
<h3>Model<a class="headerlink" href="#model" title="Link to this heading">#</a></h3>
|
||
<p>We train based on open-source models, which can be downloaded directly from HuggingFaceHub (Please ensure that Git LFS is installed):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mkdir</span> <span class="o">-</span><span class="n">p</span> <span class="o">/</span><span class="n">storage</span><span class="o">/</span><span class="n">models</span>
|
||
<span class="n">cd</span> <span class="o">/</span><span class="n">storage</span><span class="o">/</span><span class="n">models</span>
|
||
<span class="n">GIT_LFS_SKIP_SMUDGE</span><span class="o">=</span><span class="mi">1</span> <span class="n">git</span> <span class="n">clone</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">huggingface</span><span class="o">.</span><span class="n">co</span><span class="o">/</span><span class="n">deepseek</span><span class="o">-</span><span class="n">ai</span><span class="o">/</span><span class="n">DeepSeek</span><span class="o">-</span><span class="n">R1</span><span class="o">-</span><span class="n">Distill</span><span class="o">-</span><span class="n">Qwen</span><span class="o">-</span><span class="mi">7</span><span class="n">B</span>
|
||
<span class="n">cd</span> <span class="n">DeepSeek</span><span class="o">-</span><span class="n">R1</span><span class="o">-</span><span class="n">Distill</span><span class="o">-</span><span class="n">Qwen</span><span class="o">-</span><span class="mi">7</span><span class="n">B</span>
|
||
<span class="n">git</span> <span class="n">lfs</span> <span class="n">pull</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>You can also use the HuggingFace CLI to download after installing PyPI and huggingface_hub. Refer to the <a class="reference external" href="https://huggingface.co/docs/huggingface_hub/guides/cli">official documentation</a> for details.</p>
|
||
</section>
|
||
<section id="launch-the-ray-cluster">
|
||
<h3>Launch the Ray Cluster<a class="headerlink" href="#launch-the-ray-cluster" title="Link to this heading">#</a></h3>
|
||
<p>Before proceeding, pull the AReaL environment image, which already includes Ray components.</p>
|
||
<p>On the first node, start the Ray Head with the following command:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span>--name<span class="w"> </span>r1-ray-head<span class="w"> </span>--privileged<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--network<span class="w"> </span>host<span class="w"> </span>--shm-size<span class="w"> </span>700g<span class="w"> </span>-v<span class="w"> </span>/storage:/storage<span class="w"> </span>ghcr.io/inclusionai/areal-runtime:v0.2.0<span class="w"> </span>/bin/bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">"ray start --head --port=6379 && tail -f /dev/null"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>On all other nodes, start the Ray Worker with the following command (skip this step if you only have one node):</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># RAY_HEAD_IP is the IP of the first node</span>
|
||
<span class="nv">RAY_HEAD_IP</span><span class="o">=</span>xxx.xxx.xxx.xxx
|
||
docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span>--name<span class="w"> </span>r1-ray-worker<span class="w"> </span>--privileged<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--network<span class="w"> </span>host<span class="w"> </span>--shm-size<span class="w"> </span>700g<span class="w"> </span>-v<span class="w"> </span>/storage:/storage<span class="w"> </span>ghcr.io/inclusionai/areal-runtime:v0.2.0<span class="w"> </span>/bin/bash<span class="w"> </span>-c<span class="w"> </span><span class="s2">"ray start --address=</span><span class="nv">$RAY_HEAD_IP</span><span class="s2">:6379 && tail -f /dev/null"</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Once all nodes are up, check the Ray cluster status by entering the container on the first node:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span><span class="nb">exec</span><span class="w"> </span>-it<span class="w"> </span>r1-ray-head<span class="w"> </span>bash
|
||
ray<span class="w"> </span>status
|
||
</pre></div>
|
||
</div>
|
||
<p>You should see the Ray resource status. The output will vary depending on your node count (e.g., a 16-node, 128-GPU cluster will show the following results).</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">========</span> <span class="n">Autoscaler</span> <span class="n">status</span><span class="p">:</span> <span class="mi">2025</span><span class="o">-</span><span class="mi">02</span><span class="o">-</span><span class="mi">22</span> <span class="mi">14</span><span class="p">:</span><span class="mi">08</span><span class="p">:</span><span class="mf">51.061250</span> <span class="o">========</span>
|
||
<span class="n">Node</span> <span class="n">status</span>
|
||
<span class="o">---------------------------------------------------------------</span>
|
||
<span class="n">Active</span><span class="p">:</span>
|
||
<span class="mi">1</span> <span class="n">node_d5634ae61bfe6732d957811bed65c8a39f13ece07e0326f941acbc4e</span>
|
||
<span class="mi">1</span> <span class="n">node_23b0c08045c9a39bc4c454cae298ee531d9a474215ac5e77a5b01e74</span>
|
||
<span class="mi">1</span> <span class="n">node_bc1016320658e92645f29cecb8aaf51c0b7e01a44e8ac9c814dfee59</span>
|
||
<span class="mi">1</span> <span class="n">node_4e7d15e9cee9ee0da5d65e45f1e346228c52bc0c557511c6eeab40dc</span>
|
||
<span class="mi">1</span> <span class="n">node_c5bcf15e28a00515be5d2a7e8e33d71f0f57cdfaf1003db9e0c74788</span>
|
||
<span class="mi">1</span> <span class="n">node_ec3f6ee8f6fdf3a5392bb4dac244668da75d094e084dcbb520ce2525</span>
|
||
<span class="mi">1</span> <span class="n">node_dc2f1eef88126ae4ac7902574714af9ab74b78ba037217e73e063639</span>
|
||
<span class="mi">1</span> <span class="n">node_a4728608c1fda187dc33bb24e831c42fe5c8a582ad428b6e595933bc</span>
|
||
<span class="mi">1</span> <span class="n">node_970379a3ba750ee3b13e31612b6a6b758d50bd4943555b2a13d1bd61</span>
|
||
<span class="mi">1</span> <span class="n">node_bf6b658bea9e437fcb642a2d881425662a689d668c92fe1545899b36</span>
|
||
<span class="mi">1</span> <span class="n">node_2c69511f410d9360f1d05893fde2c97dd32240e0315afea9b2d286a3</span>
|
||
<span class="mi">1</span> <span class="n">node_e4c90c17cc48ad469d123041d3302dcff1f7a82a4805279300812b19</span>
|
||
<span class="mi">1</span> <span class="n">node_3f772cbffb206c30b6ccedade83789d78397804bab874ee59563cb96</span>
|
||
<span class="mi">1</span> <span class="n">node_429bd5115b5590b612590bb455f2d3ed4f77055d746a184baf807655</span>
|
||
<span class="mi">1</span> <span class="n">node_75071820f2c16dc51fa271316b72cd45335ec877c06450d292ab7d54</span>
|
||
<span class="mi">1</span> <span class="n">node_6f4323f9038248d82b91321e2c4ca5fa99e65efa2d976c0b896a8964</span>
|
||
<span class="n">Pending</span><span class="p">:</span>
|
||
<span class="p">(</span><span class="n">no</span> <span class="n">pending</span> <span class="n">nodes</span><span class="p">)</span>
|
||
<span class="n">Recent</span> <span class="n">failures</span><span class="p">:</span>
|
||
<span class="p">(</span><span class="n">no</span> <span class="n">failures</span><span class="p">)</span>
|
||
|
||
<span class="n">Resources</span>
|
||
<span class="o">---------------------------------------------------------------</span>
|
||
<span class="n">Usage</span><span class="p">:</span>
|
||
<span class="mf">0.0</span><span class="o">/</span><span class="mf">2128.0</span> <span class="n">CPU</span>
|
||
<span class="mf">0.0</span><span class="o">/</span><span class="mf">128.0</span> <span class="n">GPU</span>
|
||
<span class="mi">0</span><span class="n">B</span><span class="o">/</span><span class="mf">21.08</span><span class="n">TiB</span> <span class="n">memory</span>
|
||
<span class="mi">0</span><span class="n">B</span><span class="o">/</span><span class="mf">2.91</span><span class="n">TiB</span> <span class="n">object_store_memory</span>
|
||
|
||
<span class="n">Demands</span><span class="p">:</span>
|
||
<span class="p">(</span><span class="n">no</span> <span class="n">resource</span> <span class="n">demands</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="rl-training">
|
||
<h2>RL Training<a class="headerlink" href="#rl-training" title="Link to this heading">#</a></h2>
|
||
<p>Before starting distributed training, ensure the Ray cluster is up and running properly.
|
||
Then, on the first node (where the Ray Head is located), enter the container:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">docker</span> <span class="n">exec</span> <span class="o">-</span><span class="n">it</span> <span class="n">r1</span><span class="o">-</span><span class="n">ray</span><span class="o">-</span><span class="n">head</span> <span class="n">bash</span>
|
||
<span class="n">cd</span> <span class="o">/</span><span class="n">storage</span><span class="o">/</span><span class="n">codes</span><span class="o">/</span><span class="n">AReaL</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Choose a config file that matches your hardware environment and run it:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>-m<span class="w"> </span>realhf.apps.quickstart<span class="w"> </span>ppo-math<span class="w"> </span>--config<span class="w"> </span>./examples/configs/7B-distill/ppo-7B-distill-gpus-128.yaml
|
||
</pre></div>
|
||
</div>
|
||
<p>After starting, check the training launch information:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span> ╭─────────────────────────────────────────────────╮
|
||
│ Setting PPOMATHConfig with the Following Values │
|
||
╰─────────────────────────────────────────────────╯
|
||
|
||
───────────────────────── Current Configuration Begin ──────────────────────────
|
||
actor (ModelTrainEvalConfig)
|
||
actor.type (ModelFamily)
|
||
actor.type._class (str) - qwen2
|
||
actor.type.size (int) - 7
|
||
actor.type.is_critic (bool) - False
|
||
...
|
||
────────────────────────── Current Configuration End ───────────────────────────
|
||
|
||
20250222-10:26:34.877 quickstart INFO: Running ppo-math experiment.
|
||
20250222-10:44:15.581 quickstart INFO: Logs will be dumped to /storage/ray/experiments/logs/root/ppo-7B-distill-gpus-128/512x16
|
||
20250222-10:44:15.581 quickstart INFO: Model checkpoints will be saved to /storage/ray/experiments/checkpoints/root/ppo-7B-distill-gpus-128/512x16
|
||
20250222-10:26:36.408 quickstart INFO: Launching experiments with RAY...
|
||
</pre></div>
|
||
</div>
|
||
<p>If errors occur during execution (e.g., keywords like “Error” appear), refer to the troubleshooting section.</p>
|
||
<section id="commandline-options">
|
||
<h3>Commandline Options<a class="headerlink" href="#commandline-options" title="Link to this heading">#</a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>-m<span class="w"> </span>realhf.apps.quickstart<span class="w"> </span>ppo-math<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>The descriptions of the important parameters are as follows:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">mode</span></code>: It is always <code class="docutils literal notranslate"><span class="pre">ray</span></code>, and do not change it to other values when referring to this tutorial for training.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">{actor|critic|ref}.path</span></code>: The path of the model.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">dataset.path</span></code>: The path of the dataset jsonl file</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">external_configs.cluster_config</span></code>: Set config for cluster_config. e.g. fileroot is the root path for saving traning outputs.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">n_nodes</span></code>: The number of nodes</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">n_gpus_per_node</span></code>: The number of GPUs per node</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">allocation_mode</span></code>: The GPU allocation and 3D parallel strategy of the model in the experiment, mainly in the following form:</p>
|
||
<ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">sglang.d${DP1}m${TP1}p${PP1}+d${DP2}m${TP2}p${PP2}</span></code>: Configure the parallel strategies for SGLang generation and training respectively. The generation and training use disjoint sets of GPUs, and the sum of the number of GPUs used by the two should be equal to the total number of GPUs, i.e DP1xTP1xPP1+DP2xTP2xPP2=#GPUs.</p></li>
|
||
</ul>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp_ctrl.total_train_epochs</span></code>: The number of training epochs (i.e., the number of times to iterate over the entire dataset)</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp_ctrl.save_freq_{epochs|steps|secs}</span></code>: The frequency of saving the model parameters in persistent storage. If it is set to null, the model will not be saved.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp_ctrl.ckpt_freq_{epochs|steps|secs}</span></code>: The frequency of saving temporary parameters for restart</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">dataset.train_bs_n_seqs</span></code>: The training batch size, that is, the number of prompts to be sampled each time during training</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">group_size</span></code>: The number of answers to be sampled for each prompt</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">{actor_train|ref_inf}.mb_spec.max_tokens_per_mb</span></code>: The maximum number of tokens in the data for each forward/backward pass during the inference of the reference model and the training of the actor model. It can be reduced to avoid OOM errors. These data will accumulate gradients for a single parameter update.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">ppo.ppo_n_minibatches</span></code>: The number of parts into which all the data will be divided for each PPO update to calculate the loss and update the parameters.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">ppo.gen.max_new_tokens</span></code>: The maximum number of tokens to be generated for a single prompt, default to 16k.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">ppo.gen.min_new_tokens</span></code>: The minimum number of tokens to be generated for a single prompt, default to 0.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="monitoring-the-training-process">
|
||
<h3>Monitoring the Training Process<a class="headerlink" href="#monitoring-the-training-process" title="Link to this heading">#</a></h3>
|
||
<p>Here, we use the logs from a 16-node run (the same applies to 1-node and 4-node setups) to explain several methods for observing training progress and results.</p>
|
||
<section id="training-progress">
|
||
<h4>Training Progress<a class="headerlink" href="#training-progress" title="Link to this heading">#</a></h4>
|
||
<p>Search for the keyword <code class="docutils literal notranslate"><span class="pre">Epoch</span></code> in the logs to see the total number of Epochs and Steps:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-11:11:56.997<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">1</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">1</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2124.429*s. Total time consumption: 2283.862s. </span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-11:52:02.719<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">2</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">2</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2405.716*s. Total time consumption: 4689.584s. </span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-12:27:25.084<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">3</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">3</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2122.318*s. Total time consumption: 6811.949s. Estimated remaining time: 33957.093s. </span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:05:58.246<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">4</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">4</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2313.134*s. Total time consumption: 9125.111s. Estimated remaining time: 33265.891s. </span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:44:14.349<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">5</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">5</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2296.076*s. Total time consumption: 11421.214s. Estimated remaining time: 31413.800s. </span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-14:22:33.864<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Epoch<span class="w"> </span><span class="m">1</span>/1<span class="w"> </span>step<span class="w"> </span><span class="m">6</span>/19<span class="w"> </span><span class="o">(</span>global<span class="w"> </span>step<span class="w"> </span><span class="m">6</span><span class="o">)</span><span class="w"> </span>finishes.<span class="w"> </span>Average<span class="w"> </span><span class="c1">#tokens per batch is 111847. #End to end# execution time: *2299.448*s. Total time consumption: 13720.729s. Estimated remaining time: 29350.673s.</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Six log entries are found. We explain the meaning of each field based on the last entry:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Epoch</span> <span class="pre">1/1</span></code>: Indicates that a total of 1 Epoch is required, and the first Epoch is currently being trained. This example only trains for 1 Epoch. Normally, training should run for 10 Epochs or more.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">step</span> <span class="pre">6/19</span></code>: Indicates that the current Epoch has 19 Steps, and the 6th Step has just finished.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">global</span> <span class="pre">step</span> <span class="pre">6</span></code>: Represents the step count across all Epochs.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">#End</span> <span class="pre">to</span> <span class="pre">end#</span> <span class="pre">execution</span> <span class="pre">time:</span> <span class="pre">*2299.448*s</span></code>: Indicates that the current Step took 2299.448 seconds to complete.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Total</span> <span class="pre">time</span> <span class="pre">consumption:</span> <span class="pre">13720.729s</span></code>: The total time elapsed since training started is 13720.729 seconds.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">Estimated</span> <span class="pre">remaining</span> <span class="pre">time:</span> <span class="pre">29350.673s</span></code>: The estimated time remaining to complete training is 29350.673 seconds.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="model-performance">
|
||
<h4>Model Performance<a class="headerlink" href="#model-performance" title="Link to this heading">#</a></h4>
|
||
<p>Search for the keyword <code class="docutils literal notranslate"><span class="pre">task_reward</span></code> in the logs.</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-11:11:56.991<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.2640759198111482e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span><span class="m">1</span>.1128166761409375e-06,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">2</span>.1122002635820536e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">1</span>.0000014305114746,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.2996826171875,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-2.27004832709099e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.30145370960235596,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.003593671601265669,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">7907</span>.8955078125,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">105</span>.845703125,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">127828786</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">127828786</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.122802734375,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-11:52:02.712<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.493159263394773e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span>-3.846728588996484e-07,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">3</span>.16789424914532e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">0</span>.9999996423721313,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.6793212890625,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-2.536311853873485e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.6813737154006958,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.004844569601118565,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">8203</span>.9453125,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">111</span>.892578125,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">132580185</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">132580185</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.13812255859375,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-12:27:25.077<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.572356243035756e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span>-5.036404786551429e-07,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">1</span>.8960582792715286e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">0</span>.9999992251396179,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.6280517578125,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-2.988609537624143e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.6303607225418091,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.004505862481892109,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">7834</span>.6328125,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">108</span>.900390625,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">126578395</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">126578395</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.11761474609375,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:05:58.239<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.4861981728463434e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span><span class="m">1</span>.3935685672095133e-07,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">3</span>.02603467616791e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">0</span>.9999998807907104,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.78857421875,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-3.672174671009998e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.791388750076294,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.005053278990089893,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">7773</span>.39404296875,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">108</span>.7890625,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">125576883</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">125576883</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.117919921875,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:44:14.342<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.516058702894952e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span>-7.665488510610885e-07,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">1</span>.9505058901359007e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">0</span>.9999997615814209,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.6158447265625,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-4.6867208425283025e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.6195111274719238,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.004475570283830166,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">7928</span>.50830078125,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">105</span>.517578125,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">128171874</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">128171874</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.12353515625,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-14:22:33.857<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>RPC<span class="w"> </span>name<span class="w"> </span>actor_train<span class="w"> </span>returns<span class="w"> </span><span class="o">{</span><span class="s1">'ppo_approx_kl'</span>:<span class="w"> </span>-2.4821250917739235e-05,<span class="w"> </span><span class="s1">'actor_loss'</span>:<span class="w"> </span>-3.922649227661168e-07,<span class="w"> </span><span class="s1">'actor_clip_ratio'</span>:<span class="w"> </span><span class="m">3</span>.323623900541861e-07,<span class="w"> </span><span class="s1">'importance_weight'</span>:<span class="w"> </span><span class="m">1</span>.0000001192092896,<span class="w"> </span><span class="s1">'task_reward'</span>:<span class="w"> </span>-0.7025146484375,<span class="w"> </span><span class="s1">'kl_reward'</span>:<span class="w"> </span>-5.863367960046162e-07,<span class="w"> </span><span class="s1">'final_reward'</span>:<span class="w"> </span>-0.7071446776390076,<span class="w"> </span><span class="s1">'advantage'</span>:<span class="w"> </span><span class="m">0</span>.004277692176401615,<span class="w"> </span><span class="s1">'avg_seq_len'</span>:<span class="w"> </span><span class="m">8002</span>.4873046875,<span class="w"> </span><span class="s1">'avg_prompt_len'</span>:<span class="w"> </span><span class="m">105</span>.951171875,<span class="w"> </span><span class="s1">'n_tokens'</span>:<span class="w"> </span><span class="m">129376851</span>.0,<span class="w"> </span><span class="s1">'n_valid_tokens'</span>:<span class="w"> </span><span class="m">129376851</span>.0,<span class="w"> </span><span class="s1">'n_seqs'</span>:<span class="w"> </span><span class="m">16384</span>.0,<span class="w"> </span><span class="s1">'no_eos_ratio'</span>:<span class="w"> </span><span class="m">0</span>.12286376953125,<span class="w"> </span><span class="s1">'disable_value'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'mask_no_eos_with_zero'</span>:<span class="w"> </span><span class="m">0</span>.0<span class="o">}</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The last entry is used to explain the meaning of key fields:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">task_reward</span></code>: The average reward value of all sampled answers in this step. This value should steadily increase during training and eventually stabilize.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">importance_weight</span></code>: The average importance sampling ratio across all tokens in the PPO loss. This value is typically close to 1.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">actor_clip_ratio</span></code>: The ratio of tokens clipped in the PPO loss to the total number of tokens. This is usually less than 0.1.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">actor_loss</span></code>: The PPO loss. <strong>It does not show a clear upward or downward trend during training</strong> and should not be used as a reference for model performance.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">avg_seq_len</span></code>: The average length of all sequences (i.e., prompts with sampled answers) in this step. In a full multi-stage training process, this value will first decrease and then increase.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">no_eos_ratio</span></code>: The ratio of sampled answers truncated due to exceeding the maximum generation length. An increase in this value indicates that the average length of answers is increasing.</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="evaluation">
|
||
<h2>Evaluation<a class="headerlink" href="#evaluation" title="Link to this heading">#</a></h2>
|
||
<section id="evaluation-process">
|
||
<h3>Evaluation Process<a class="headerlink" href="#evaluation-process" title="Link to this heading">#</a></h3>
|
||
<p>The evaluation code is located in the <code class="docutils literal notranslate"><span class="pre">evaluation</span></code> folder of the repository. As per the previous tutorial, the trained checkpoints will be saved under the path <code class="docutils literal notranslate"><span class="pre">/storage/ray/experiments/checkpoints/root/</span></code>, for example, <code class="docutils literal notranslate"><span class="pre">/storage/ray/experiments/checkpoints/root/ppo-zero-distill-7B-n16/1024x16-n16/actor/epoch1epochstep20globalstep20/</span></code>.</p>
|
||
<p>Start a new container to execute the evaluation script (note: evaluation requires updates to certain Python libraries; avoid using the training container for this task):</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">docker</span> <span class="n">run</span> <span class="o">-</span><span class="n">d</span> <span class="o">--</span><span class="n">name</span> <span class="n">r1</span><span class="o">-</span><span class="nb">eval</span> <span class="o">--</span><span class="n">privileged</span> <span class="o">--</span><span class="n">gpus</span> <span class="nb">all</span> <span class="o">--</span><span class="n">network</span> <span class="n">host</span> <span class="o">--</span><span class="n">shm</span><span class="o">-</span><span class="n">size</span> <span class="mi">700</span><span class="n">g</span> <span class="o">-</span><span class="n">v</span> <span class="o">/</span><span class="n">storage</span><span class="p">:</span><span class="o">/</span><span class="n">storage</span> <span class="n">ghcr</span><span class="o">.</span><span class="n">io</span><span class="o">/</span><span class="n">inclusionai</span><span class="o">/</span><span class="n">areal</span><span class="o">-</span><span class="n">runtime</span><span class="p">:</span><span class="n">v0</span><span class="mf">.2.0</span> <span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">bash</span> <span class="o">-</span><span class="n">c</span> <span class="s2">"tail -f /dev/null"</span>
|
||
<span class="n">docker</span> <span class="n">exec</span> <span class="o">-</span><span class="n">it</span> <span class="n">r1</span><span class="o">-</span><span class="nb">eval</span> <span class="n">bash</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Run the following script inside the Docker container to evaluate:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/storage/codes/AReaL/evaluation
|
||
<span class="nb">cd</span><span class="w"> </span>latex2sympy
|
||
pip<span class="w"> </span>install<span class="w"> </span>-e<span class="w"> </span>.
|
||
<span class="nb">cd</span><span class="w"> </span>..
|
||
pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt<span class="w"> </span>
|
||
pip<span class="w"> </span>install<span class="w"> </span>vllm<span class="w"> </span>--no-build-isolation
|
||
pip<span class="w"> </span>install<span class="w"> </span><span class="nv">transformers</span><span class="o">==</span><span class="m">4</span>.47.0
|
||
pip<span class="w"> </span>install<span class="w"> </span>prettytable<span class="w"> </span>timeout_decorator
|
||
mkdir<span class="w"> </span>/storage/ray/eval_output/
|
||
nohup<span class="w"> </span>python<span class="w"> </span>eval_and_aggregate.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--model_path<span class="w"> </span>/storage/ray/experiments/checkpoints/root/ppo-zero-distill-7B-n16/1024x16-n16/actor/epoch1epochstep20globalstep20/<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--output_path<span class="w"> </span>/storage/ray/eval_output/<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--data_names<span class="w"> </span><span class="s2">"math_500,aime24,amc23"</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--max_gen_tokens<span class="w"> </span><span class="m">32768</span><span class="w"> </span><span class="p">&</span>><span class="w"> </span>/storage/ray/eval_output/eval_and_aggregate_parallel.log<span class="w"> </span><span class="p">&</span>
|
||
</pre></div>
|
||
</div>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--model_path</span></code>: Path to the saved model parameters.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--output_path</span></code>: Path to store the generated answers and log files during evaluation.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--data_names</span></code>: Specify the dataset(s) to evaluate. Multiple datasets can be separated by commas. Default is <code class="docutils literal notranslate"><span class="pre">math_500,</span> <span class="pre">math,</span> <span class="pre">gsm8k,</span> <span class="pre">train_amc_aime,</span> <span class="pre">aime24,</span> <span class="pre">amc23</span></code>.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--max_gen_tokens</span></code>: Maximum length of generated answers. Default is <code class="docutils literal notranslate"><span class="pre">32768</span></code>.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="evaluation-results">
|
||
<h3>Evaluation Results<a class="headerlink" href="#evaluation-results" title="Link to this heading">#</a></h3>
|
||
<p>The evaluation script will output a table in the terminal, for example:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">+----------+---------------+---------------+---------------+------------+---------------+--------+---------+</span>
|
||
<span class="o">|</span> <span class="n">dataset</span> <span class="o">|</span> <span class="n">num_questions</span> <span class="o">|</span> <span class="n">greedy_length</span> <span class="o">|</span> <span class="n">sample_length</span> <span class="o">|</span> <span class="n">greedy_acc</span> <span class="o">|</span> <span class="n">sample_pass</span><span class="o">@</span><span class="mi">1</span> <span class="o">|</span> <span class="k">pass</span><span class="o">@</span><span class="mi">8</span> <span class="o">|</span> <span class="k">pass</span><span class="o">@</span><span class="mi">16</span> <span class="o">|</span>
|
||
<span class="o">+----------+---------------+---------------+---------------+------------+---------------+--------+---------+</span>
|
||
<span class="o">|</span> <span class="n">math_500</span> <span class="o">|</span> <span class="mi">500</span> <span class="o">|</span> <span class="mf">6757.4</span> <span class="o">|</span> <span class="mf">4139.5</span> <span class="o">|</span> <span class="mf">84.4</span> <span class="o">|</span> <span class="mf">92.7</span> <span class="o">|</span> <span class="mf">97.3</span> <span class="o">|</span> <span class="mf">97.7</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">aime24</span> <span class="o">|</span> <span class="mi">30</span> <span class="o">|</span> <span class="mf">19328.0</span> <span class="o">|</span> <span class="mf">13663.5</span> <span class="o">|</span> <span class="mf">50.0</span> <span class="o">|</span> <span class="mf">50.4</span> <span class="o">|</span> <span class="mf">77.3</span> <span class="o">|</span> <span class="mf">80.0</span> <span class="o">|</span>
|
||
<span class="o">|</span> <span class="n">amc23</span> <span class="o">|</span> <span class="mi">40</span> <span class="o">|</span> <span class="mf">8850.0</span> <span class="o">|</span> <span class="mf">6526.2</span> <span class="o">|</span> <span class="mf">80.0</span> <span class="o">|</span> <span class="mf">90.5</span> <span class="o">|</span> <span class="mf">96.8</span> <span class="o">|</span> <span class="mf">98.8</span> <span class="o">|</span>
|
||
<span class="o">+----------+---------------+---------------+---------------+------------+---------------+--------+---------+</span>
|
||
</pre></div>
|
||
</div>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">{greedy|sample}_length</span></code>: Average answer length under greedy or random sampling strategy.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">greedy_acc</span></code>: Average accuracy under greedy sampling.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">sample_pass@{k}</span></code>: Probability of generating a correct answer on average per <code class="docutils literal notranslate"><span class="pre">k</span></code> attempts under random sampling.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="additional-notes">
|
||
<h3>Additional Notes<a class="headerlink" href="#additional-notes" title="Link to this heading">#</a></h3>
|
||
<section id="key-parameters">
|
||
<h4>Key Parameters<a class="headerlink" href="#key-parameters" title="Link to this heading">#</a></h4>
|
||
<ul class="simple">
|
||
<li><p>The evaluation script defaults to taking the average of 32 samples with temperature 0.6.</p></li>
|
||
<li><p>We observed that the <code class="docutils literal notranslate"><span class="pre">enforce_eager</span></code> parameter in vLLM significantly impacts evaluation performance. When <code class="docutils literal notranslate"><span class="pre">enforce_eager=True</span></code>, we can reproduce the model performance reported in previous work. Otherwise, the evaluation results may fall below the reported performance. Therefore, we enforce <code class="docutils literal notranslate"><span class="pre">enforce_eager</span></code> to be enabled during evaluation.</p></li>
|
||
</ul>
|
||
<p>Due to the above reasons, the evaluation process typically takes a considerable amount of time.</p>
|
||
</section>
|
||
<section id="runtime">
|
||
<h4>Runtime<a class="headerlink" href="#runtime" title="Link to this heading">#</a></h4>
|
||
<p>The runtime of the evaluation depends on factors such as the maximum generation length, the number of questions in the dataset, and the model size. On a machine with 8x H100 GPUs, evaluating <code class="docutils literal notranslate"><span class="pre">aime</span></code> and <code class="docutils literal notranslate"><span class="pre">math_500</span></code> takes approximately 80 minutes and 160 minutes, respectively.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="troubleshooting">
|
||
<h2>Troubleshooting<a class="headerlink" href="#troubleshooting" title="Link to this heading">#</a></h2>
|
||
<p>If the following content does not address your issue, feel free to raise a GitHub Issue.</p>
|
||
<section id="automatic-recover">
|
||
<h3>Automatic Recover<a class="headerlink" href="#automatic-recover" title="Link to this heading">#</a></h3>
|
||
<p>When setting <code class="docutils literal notranslate"><span class="pre">recover_mode=auto</span></code> and the experiment config remains the same, AReaL will try to discover previous checkpoints and recover the experiment from it.</p>
|
||
<p>If the automatic recover fails, please check the following possibilities:</p>
|
||
<ul class="simple">
|
||
<li><p>The <code class="docutils literal notranslate"><span class="pre">experiment_name</span></code> and <code class="docutils literal notranslate"><span class="pre">trial_name</span></code> in the training script differ from the previous run.</p></li>
|
||
<li><p>Changes in Batch Size (<code class="docutils literal notranslate"><span class="pre">dataset.train_bs_n_seqs</span></code> in the parameters), Group Size (<code class="docutils literal notranslate"><span class="pre">group_size</span></code> in the parameters), or the number of nodes (<code class="docutils literal notranslate"><span class="pre">n_nodes</span></code> in the parameters).</p></li>
|
||
<li><p>No recover checkpoint was created in the previous run. By default, recover checkpoints are generated under two conditions:</p>
|
||
<ul>
|
||
<li><p>After the completion of the second Step.</p></li>
|
||
<li><p>When a Step completes and more than 600 seconds have passed since the last recover checkpoint. This parameter is in the <code class="docutils literal notranslate"><span class="pre">./examples/configs/*/*.yaml</span></code>, named <code class="docutils literal notranslate"><span class="pre">exp_ctrl.ckpt_freq_secs=600</span></code>.</p></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
<p>You can confirm if a recover checkpoint was generated by searching in the log:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-11:52:02.760<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-12:27:25.105<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:05:58.264<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-13:44:14.411<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-14:22:33.883<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
<span class="o">(</span>master_worker/0<span class="w"> </span><span class="nv">pid</span><span class="o">=</span><span class="m">96390</span>,<span class="w"> </span><span class="nv">ip</span><span class="o">=</span>xxx.xxx.xxx.xxx<span class="o">)</span><span class="w"> </span><span class="m">20250222</span>-14:59:44.925<span class="w"> </span>master<span class="w"> </span>worker<span class="w"> </span>INFO:<span class="w"> </span>Dumped<span class="w"> </span>recover<span class="w"> </span>info<span class="w"> </span>to<span class="w"> </span>file.
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="series-of-outofmemory-errors">
|
||
<h3>Series of OutOfMemory Errors<a class="headerlink" href="#series-of-outofmemory-errors" title="Link to this heading">#</a></h3>
|
||
<p>While our scripts are designed to minimize OOM (Out of Memory) errors, they can still occasionally occur, especially due to memory fragmentation and increasing sequence lengths. Although these issues are often resolved by automatic restarts, users may require the following targeted solutions.</p>
|
||
<section id="torch-cuda-cudaoutofmemoryerror">
|
||
<h4>torch.cuda.CudaOutOfMemoryError<a class="headerlink" href="#torch-cuda-cudaoutofmemoryerror" title="Link to this heading">#</a></h4>
|
||
<p>The key to resolving this issue is identifying the phase in which the error occurs.</p>
|
||
<ul class="simple">
|
||
<li><p><strong>If it occurs during initialization (before <code class="docutils literal notranslate"><span class="pre">actor_gen</span></code>):</strong></p>
|
||
<ul>
|
||
<li><p>Check if there are any idle processes on the GPU. In distributed scenarios, restart the Ray cluster. In single-machine scenarios, use <code class="docutils literal notranslate"><span class="pre">pkill</span></code>.</p></li>
|
||
</ul>
|
||
</li>
|
||
<li><p><strong>This error typically does not occur during the <code class="docutils literal notranslate"><span class="pre">actor_gen</span></code> phase.</strong></p></li>
|
||
<li><p><strong>If it occurs during <code class="docutils literal notranslate"><span class="pre">ref_inf</span></code> or <code class="docutils literal notranslate"><span class="pre">actor_train</span></code>:</strong></p>
|
||
<ul>
|
||
<li><p>Adjust the microbatch size for the corresponding computation task. For example, set <code class="docutils literal notranslate"><span class="pre">actor_train.mb_spec.max_tokens_per_mb=20480</span></code>. This parameter limits the number of tokens per forward/backward pass and can be set as low as the maximum sequence length (including the prompt).</p></li>
|
||
<li><p>Modify the parallelism strategy (<code class="docutils literal notranslate"><span class="pre">allocation_mode</span></code>) for the 7B model. Try reducing data parallelism and increasing tensor or pipeline parallelism.</p></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</section>
|
||
<section id="cuda-error-out-of-memory">
|
||
<h4>CUDA error: out of memory<a class="headerlink" href="#cuda-error-out-of-memory" title="Link to this heading">#</a></h4>
|
||
<p>This issue may occur during vLLM’s initialization of the CPU KV cache, indicating insufficient memory on the machine. To resolve this, reduce the value of <code class="docutils literal notranslate"><span class="pre">actor.vllm.swap_space</span></code>.</p>
|
||
</section>
|
||
<section id="runtimeerror-aborted-due-to-the-lack-of-cpu-swap-space">
|
||
<h4>RuntimeError: Aborted due to the lack of CPU swap space.<a class="headerlink" href="#runtimeerror-aborted-due-to-the-lack-of-cpu-swap-space" title="Link to this heading">#</a></h4>
|
||
<p>This issue arises when the sequence length and KV cache demand exceed GPU memory, and the CPU swap space is insufficient. It is closely related to <a class="reference external" href="https://docs.vllm.ai/en/latest/performance/optimization.html">Preemption errors</a>. To resolve this, increase <code class="docutils literal notranslate"><span class="pre">actor.vllm.swap_space</span></code>. If the error persists, reduce <code class="docutils literal notranslate"><span class="pre">actor.vllm.max_num_seqs</span></code> and refer to the <a class="reference external" href="https://docs.vllm.ai/en/latest/performance/optimization.html">vLLM documentation</a>.</p>
|
||
</section>
|
||
<section id="cuda-error-an-illegal-memory-access-was-encountered">
|
||
<h4>CUDA error: an illegal memory access was encountered<a class="headerlink" href="#cuda-error-an-illegal-memory-access-was-encountered" title="Link to this heading">#</a></h4>
|
||
<p>This error typically occurs during the vLLM generation phase and is another symptom of insufficient GPU memory. Solutions include:</p>
|
||
<ul class="simple">
|
||
<li><p>Reduce the training batch size or the number of answers generated per prompt. Note that this may lower sample efficiency and extend training time.</p></li>
|
||
<li><p><a class="reference external" href="https://github.com/vllm-project/vllm/issues/5376">Switch vLLM’s attention backend to xformers</a>.</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
|
||
<script type="text/x-thebe-config">
|
||
{
|
||
requestKernel: true,
|
||
binderOptions: {
|
||
repo: "binder-examples/jupyter-stacks-datascience",
|
||
ref: "master",
|
||
},
|
||
codeMirrorConfig: {
|
||
theme: "abcdef",
|
||
mode: "python"
|
||
},
|
||
kernelOptions: {
|
||
name: "python3",
|
||
path: "./tutorial"
|
||
},
|
||
predefinedOutput: true
|
||
}
|
||
</script>
|
||
<script>kernelName = 'python3'</script>
|
||
|
||
</article>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<footer class="prev-next-footer d-print-none">
|
||
|
||
<div class="prev-next-area">
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||
|
||
|
||
<div class="sidebar-secondary-item">
|
||
<div class="page-toc tocsection onthispage">
|
||
<i class="fa-solid fa-list"></i> Contents
|
||
</div>
|
||
<nav class="bd-toc-nav page-toc">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#hardware-requirements">Hardware Requirements</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#software-requirements">Software Requirements</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#one-click-environment-setup-and-training-launch">One-Click Environment Setup and Training Launch</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#environment-setup">Environment Setup</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#code">Code</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#dataset">Dataset</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model">Model</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#launch-the-ray-cluster">Launch the Ray Cluster</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#rl-training">RL Training</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#commandline-options">Commandline Options</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#monitoring-the-training-process">Monitoring the Training Process</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#training-progress">Training Progress</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#model-performance">Model Performance</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation">Evaluation</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-process">Evaluation Process</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#evaluation-results">Evaluation Results</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#additional-notes">Additional Notes</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#key-parameters">Key Parameters</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#runtime">Runtime</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#troubleshooting">Troubleshooting</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#automatic-recover">Automatic Recover</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#series-of-outofmemory-errors">Series of OutOfMemory Errors</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#torch-cuda-cudaoutofmemoryerror">torch.cuda.CudaOutOfMemoryError</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cuda-error-out-of-memory">CUDA error: out of memory</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#runtimeerror-aborted-due-to-the-lack-of-cpu-swap-space">RuntimeError: Aborted due to the lack of CPU swap space.</a></li>
|
||
<li class="toc-h4 nav-item toc-entry"><a class="reference internal nav-link" href="#cuda-error-an-illegal-memory-access-was-encountered">CUDA error: an illegal memory access was encountered</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</nav></div>
|
||
|
||
</div></div>
|
||
|
||
|
||
</div>
|
||
<footer class="bd-footer-content">
|
||
|
||
<div class="bd-footer-content__inner container">
|
||
|
||
<div class="footer-item">
|
||
|
||
<p class="component-author">
|
||
By Wei Fu
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
|
||
<p class="copyright">
|
||
|
||
© Copyright 2023.
|
||
<br/>
|
||
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</footer>
|
||
|
||
|
||
</main>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<footer class="bd-footer">
|
||
</footer>
|
||
</body>
|
||
</html> |