[Fix] Merge error fixes. (#152)

* PullRequest: 293 fix get_param_realloc_path

Merge branch xss/debug of git@code.alipay.com:inclusionAI/AReaL.git into gh
https://code.alipay.com/inclusionAI/AReaL/pull_requests/293

Reviewed-by: 博惟 <bowei.fw@antgroup.com>


* fix get_param_realloc_path

* PullRequest: 297 bugfix: reward is always -5

Merge branch xss/debug of git@code.alipay.com:inclusionAI/AReaL.git into gh
https://code.alipay.com/inclusionAI/AReaL/pull_requests/297

Reviewed-by: 博惟 <bowei.fw@antgroup.com>


* bugfix: reward is always -5

* PullRequest: 321 fix checkpoint save dir

Merge branch xss/debug of git@code.alipay.com:inclusionAI/AReaL.git into gh
https://code.alipay.com/inclusionAI/AReaL/pull_requests/321

Reviewed-by: 博惟 <bowei.fw@antgroup.com>


* fix checkpoint save dir

* PullRequest: 328 [Doc] update installation

Merge branch sxj/doc of git@code.alipay.com:inclusionAI/AReaL.git into gh
https://code.alipay.com/inclusionAI/AReaL/pull_requests/328

Reviewed-by: 博惟 <bowei.fw@antgroup.com>


* [Doc] update installation

---------

Co-authored-by: 温差 <xushusheng.xss@antgroup.com>
Co-authored-by: 冰临 <shenxujie.sxj@antgroup.com>
This commit is contained in:
Wei Fu 2025-07-07 10:30:27 +08:00 committed by GitHub
parent e0aee03109
commit 0ff8c59435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 26 deletions

View File

@ -67,6 +67,10 @@ cd AReaL
bash examples/env/scripts/setup-pip-deps.sh bash examples/env/scripts/setup-pip-deps.sh
``` ```
::::{note}
The SGLang patch is applied via `examples/env/scripts/setup-container-deps.sh` or `examples/env/scripts/setup-pip-deps.sh`. To confirm whether it has been applied, run `git status` in the `/sglang` directory (for Docker) or `AReaL/sglang` (for custom setups).
::::
## (Optional) Launch Ray Cluster for Distributed Training ## (Optional) Launch Ray Cluster for Distributed Training
On the first node, start the Ray Head: On the first node, start the Ray Head:

View File

@ -110,7 +110,7 @@ def get_save_path(args: "BaseExperimentConfig") -> str:
def get_param_realloc_path(args: "BaseExperimentConfig"): def get_param_realloc_path(args: "BaseExperimentConfig"):
path = f"{args.cluster.fileroot}/.cache/{getpass.getuser()}/param_realloc" path = f"{args.cluster.fileroot}/.cache/{getpass.getuser()}/param_realloc/{args.experiment_name}/{args.trial_name}"
os.makedirs(path, exist_ok=True) os.makedirs(path, exist_ok=True)
return path return path

View File

@ -3,11 +3,11 @@
import json import json
import multiprocessing import multiprocessing
import re import re
from concurrent.futures import ProcessPoolExecutor, as_completed
from typing import List, Union from typing import List, Union
import regex import regex
from latex2sympy2 import latex2sympy from latex2sympy2 import latex2sympy
from pebble import ProcessPool
from sympy import N, simplify from sympy import N, simplify
from sympy.parsing.latex import parse_latex from sympy.parsing.latex import parse_latex
from sympy.parsing.sympy_parser import parse_expr from sympy.parsing.sympy_parser import parse_expr
@ -785,23 +785,6 @@ def process_results(answer, solution):
return 0, ("None", "None") return 0, ("None", "None")
def process_results_process(a, b, output_queue):
result = process_results(a, b)
output_queue.put(result)
def verify_math_solution(answer: str, solution: str):
# answer is generated by the model, solution is the ground truth
tmp = call_with_timeout(
process_results_process,
answer,
solution,
)
if isinstance(tmp, bool):
return 0
return tmp[0]
def loadJson(dataDir): def loadJson(dataDir):
with open(dataDir, "r") as f: with open(dataDir, "r") as f:
if dataDir.endswith(".jsonl"): if dataDir.endswith(".jsonl"):
@ -817,7 +800,7 @@ def parse_line(id2info, prompt_str, generated, query_id):
label = 0 label = 0
for sol in info["solutions"]: for sol in info["solutions"]:
label = label or verify_math_solution(generated, sol) label = label or process_results(generated, sol)
return label return label
@ -834,21 +817,26 @@ def parse_lines_in_parallel(
) )
all_jobs = [] all_jobs = []
with ProcessPoolExecutor(max_workers=max_workers) as executor: with ProcessPool(max_workers=max_workers) as executor:
for qid, gen in zip(query_ids, generateds): for qid, gen in zip(query_ids, generateds):
info = id2info[qid.split("@idx:")[0]] info = id2info[qid.split("@idx:")[0]]
jobs = [] jobs = []
for sol in info["solutions"]: for sol in info["solutions"]:
job = executor.submit(verify_math_solution, gen, sol) job = executor.schedule(process_results, args=[gen, sol], timeout=15)
jobs.append(job) jobs.append(job)
all_jobs.append(jobs) all_jobs.append(jobs)
labels = [] labels = []
for jobs in all_jobs: for jobs in all_jobs:
label = 0 label = 0
for job in as_completed(jobs): for job in jobs:
x = job.result() try:
label = label or x x = job.result()
except TimeoutError:
# print("[debug: timeout]")
logger.warning(f"Timeout occurred while justifying the math answer.")
x = (0, "timeout", "timeout")
label = label or x[0]
labels.append(label) labels.append(label)
return labels return labels

View File

@ -216,7 +216,7 @@ class ModelFunctionCall:
for p in payloads.values(): for p in payloads.values():
p.post_hooks.append("save") p.post_hooks.append("save")
save_dir = os.path.join( save_dir = os.path.join(
constants.get_log_path(self.args), constants.get_save_path(self.args),
rpc.model_name.role, rpc.model_name.role,
f"epoch{ctrl.step_info.epoch + 1}" f"epoch{ctrl.step_info.epoch + 1}"
f"epochstep{ctrl.step_info.epoch_step + 1}" f"epochstep{ctrl.step_info.epoch_step + 1}"