mirror of https://github.com/inclusionAI/AReaL
add a preprocessing script for code training data and update readme (#126)
* add a preprocessing script for code training data and update readme * add a preprocessing script for code training data and update readme * add a preprocessing script for code training data and update readme * fix eval doc --------- Co-authored-by: hcy <hechuyi.hcy@antgroup.com>
This commit is contained in:
parent
3642cce2fc
commit
e3005d57f6
|
@ -102,9 +102,16 @@ AReaL-boba² allows you to independently customize the [dataset](https://inclusi
|
|||
In particular, we show a simple example to develop a multi-turn math agent for RL training. Please see the learning curve below and reference the [step-by-step guide](https://inclusionai.github.io/AReaL/customization/agent.html) if you want to implement your own agentic RL project.
|
||||
|
||||
## Getting Started
|
||||
Obtain the training data:
|
||||
- [Math](https://huggingface.co/datasets/inclusionAI/AReaL-boba-Data)
|
||||
- [Code](https://huggingface.co/datasets/inclusionAI/AReaL-boba-2-RL-Code)
|
||||
|
||||
Train Qwen3 1.7B locally:
|
||||
For code training data, a simple preprocessing script was provided in `examples/data_preprocess/preprocess_training_data.py`:
|
||||
```bash
|
||||
python3 preprocess_training_data.py --data_path $original_data_path --output_path $training_data_path
|
||||
```
|
||||
|
||||
Train Qwen3 1.7B locally (Remember to modify `dataset.path` in the script below):
|
||||
```bash
|
||||
bash examples/run_async_ppo.sh
|
||||
```
|
||||
|
|
|
@ -51,6 +51,8 @@ nohup python eval_and_aggregate.py \
|
|||
--max_gen_tokens 32768 \
|
||||
--data_names codeforces,lcb_v5 \
|
||||
--prompt_type qwen3-think-pure \
|
||||
--temperature 1.0 \
|
||||
--top_p 0.95 \
|
||||
--num_sample_nodes 8 \
|
||||
--samples_per_node 1 \
|
||||
--n_sampling $((num_sample_nodes * samples_per_node)) \
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
import json
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from typing import Dict, List
|
||||
|
||||
# An example of prompt template, please remember to add special tokens, this example is for boba-2 coding dataset
|
||||
prompt_template = """
|
||||
<|im_start|>user\n{question}\n/think<|im_end|>\n<|im_start|>assistant\n<think>
|
||||
"""
|
||||
|
||||
|
||||
def load_jsonl(file_path: str) -> List[Dict]:
|
||||
"""Load JSONL file with validation"""
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
return [json.loads(line) for line in f]
|
||||
except FileNotFoundError:
|
||||
print(f"ERROR: JSONL file not found: {file_path}")
|
||||
raise
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"ERROR: JSON parsing failed in {file_path}: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def process_code_data(file_path: str) -> List[Dict]:
|
||||
"""Process code dataset from JSONL file"""
|
||||
if not file_path:
|
||||
return []
|
||||
|
||||
raw_data = load_jsonl(file_path)
|
||||
processed = []
|
||||
|
||||
for item in raw_data:
|
||||
# Field extraction and transformation
|
||||
input_output = json.loads(item["input_output"])
|
||||
processed.append(
|
||||
{
|
||||
"task": "code",
|
||||
"query_id": item["query_id"],
|
||||
"prompt": prompt_template.format(question=item["question"]),
|
||||
"solutions": item.get("solutions", []), # nothing for code dataset
|
||||
"input_output": json.dumps(
|
||||
{
|
||||
"inputs": input_output.get("inputs", []),
|
||||
"outputs": input_output.get("outputs", []),
|
||||
"fn_name": item.get("metadata", {}).get("fn_name", ""),
|
||||
"remote": False,
|
||||
}
|
||||
),
|
||||
"language": item.get("language", "PYTHON"), # default to python
|
||||
}
|
||||
)
|
||||
|
||||
case_size = sys.getsizeof(processed[-1]["input_output"])
|
||||
assert (
|
||||
case_size < 500 * 1024
|
||||
), f"'input_output' exceeds 500KB ({case_size} bytes). Use remote testcase instead."
|
||||
|
||||
return processed
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--data_path", type=str, required=True)
|
||||
parser.add_argument("--output_path", type=str, required=True)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
processed_data = process_code_data(args.data_path)
|
||||
with open(args.output_path, "w") as f:
|
||||
for item in processed_data:
|
||||
f.write(json.dumps(item, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue