fix(kag): update examples to work under branch 0.8.0 (#593)

* add graph

* fix bug for None

* add knowledge unit extra

* fix_prompt

* extract common function into benchmark commponent

* format code

* format code

* format code

* fix benchmark knowledge unit

* fix node

* add common component

* Revert "remove local bge model and dependency scikit-learn"

This reverts commit d1c20fc6a8.

* remove unused pkg

* add decompose

* change hybrid default config

* fix legacy commands in readme

* fix example csqa

* fix example baike

* fix example domain_kg

* fix example medicine

* fix example riskmining

* fix example supplychain

* fix example EastElectric

* fix example FinAlibaba

* update readme to use python 3.10

* fix readme format

* fix google_web_search_mcp

* fix example baidu_map_mcp

* format code with black

---------

Co-authored-by: peilong.zip <peilong.zpl@antgroup.com>
This commit is contained in:
xionghuaidong 2025-06-19 11:39:29 +08:00 committed by GitHub
parent ffc20ec5b7
commit fc98ad136e
81 changed files with 389 additions and 378 deletions

View File

@ -150,7 +150,7 @@ Refer to the 3.1 section to complete the installation of the engine & dependent
**Windows developers**
```text
# Install the official Python 3.8.10 or later, install Git.
# Install the official Python 3.10 or later, install Git.
# Create and activate Python venv: py -m venv kag-demo && kag-demo\Scripts\activate

View File

@ -140,7 +140,7 @@ Default password: openspg@kag
**Windows 开发者**
```
# 安装官方 Python 3.8.10 或更新版本,安装 Git。
# 安装官方 Python 3.10 或更新版本,安装 Git。
# 创建、激活 Python 虚拟环境py -m venv kag-demo && kag-demo\Scripts\activate

View File

@ -172,7 +172,7 @@ docker compose -f docker-compose.yml up -d
**Windows開発者**
```text
# 公式のPython 3.8.10以降をインストールし、Gitをインストールします。
# 公式のPython 3.10以降をインストールし、Gitをインストールします。
# Python仮想環境の作成とアクティベートpy -m venv kag-demo && kag-demo\Scripts\activate

View File

@ -143,6 +143,7 @@ def run_benchmark(config, result_queue):
import_modules_from_path("./")
# import benchmark common component
import kag.open_benchmark.common_component
runner = KAGBenchmark.from_config(config)
result = runner.invoke()
result_queue.put((runner.job_name, result))

View File

@ -203,9 +203,13 @@ class DefaultExternalGraphLoader(ExternalGraphLoaderABC):
DefaultExternalGraphLoader: An instance of DefaultExternalGraphLoader initialized with the data from the JSON files.
"""
nodes = []
for item in json.load(open(node_file_path, "r")):
for item in json.load(
open(node_file_path, "r", encoding="utf-8", newline="\n")
):
nodes.append(Node.from_dict(item))
edges = []
for item in json.load(open(edge_file_path, "r")):
for item in json.load(
open(edge_file_path, "r", encoding="utf-8", newline="\n")
):
edges.append(Edge.from_dict(item))
return cls(nodes=nodes, edges=edges, match_config=match_config)

View File

@ -338,7 +338,7 @@ class SchemaFreeExtractor(ExtractorABC):
return None, None
for tri in triples:
if len(tri) != 3:
if tri is None or len(tri) != 3:
continue
s_category, s_name = get_category_and_name(entities, tri[0])
tri[0] = processing_phrases(tri[0])

View File

@ -46,9 +46,9 @@ class DocxNode:
self.content = content
self.node_type = node_type
self.children: List[DocxNode] = []
self.properties: Dict[str, str] = (
{}
) # Store additional properties like style, indent level, etc.
self.properties: Dict[
str, str
] = {} # Store additional properties like style, indent level, etc.
def __str__(self):
return f"{self.node_type}({self.level}): {self.display_title}"

View File

@ -86,9 +86,7 @@ class OutlineSplitter(SplitterABC):
# 如果栈为空,或者当前节点的级别高于栈顶节点的级别,说明当前节点是根节点或新的分支节点
if not stack or stack[-1][1] >= level:
if stack:
stack[-1][2]["children"].append(
node
) # 将新节点添加到最近的父节点的 children 列表中
stack[-1][2]["children"].append(node) # 将新节点添加到最近的父节点的 children 列表中
else:
catalog_tree.append(node) # 如果栈为空,说明这是一个根节点
else:
@ -990,9 +988,7 @@ class OutlineSplitter(SplitterABC):
# 递归为子节点生成chunk
for child in node.get("children", []):
generate_chunks(
child, chunks, full_title
) # 将当前完整title传递给子节点
generate_chunks(child, chunks, full_title) # 将当前完整title传递给子节点
return chunks

View File

@ -20,12 +20,8 @@ logger = logging.getLogger(__name__)
@PromptABC.register("analyze_table")
class AnalyzeTablePrompt(PromptABC):
template_zh: str = (
"""你是一个分析表格的专家, 从table中提取信息并分析最后返回表格有效信息"""
)
template_en: str = (
"""You are an expert in knowledge graph extraction. Based on the schema defined by the constraint, extract all entities and their attributes from the input. Return NAN for attributes not explicitly mentioned in the input. Output the results in standard JSON format, as a list."""
)
template_zh: str = """你是一个分析表格的专家, 从table中提取信息并分析最后返回表格有效信息"""
template_en: str = """You are an expert in knowledge graph extraction. Based on the schema defined by the constraint, extract all entities and their attributes from the input. Return NAN for attributes not explicitly mentioned in the input. Output the results in standard JSON format, as a list."""
def build_prompt(self, variables) -> str:
return json.dumps(

View File

@ -268,10 +268,7 @@ def check_data(line, data_type="knowIE", language="zh"):
check_data.append(ner)
if language == "zh" and isinstance(ner, dict):
if (
len(
set(ner.keys())
& set(["名称", "类型", "领域本体", "解释", "标准名", "同义词"])
)
len(set(ner.keys()) & set(["名称", "类型", "领域本体", "解释", "标准名", "同义词"]))
== 6
):
check_data.append(ner)

View File

@ -88,9 +88,7 @@ class OutlinePrompt(PromptABC):
def parse_response(self, response: str, **kwargs):
# 如果返回结果是字符串,先去除 Markdown 语法,再使用 ast.literal_eval 转换成列表
if isinstance(response, str):
cleaned_data = response.strip(
"`python\n[] \n"
) # 去除 Markdown 语法和多余的空格
cleaned_data = response.strip("`python\n[] \n") # 去除 Markdown 语法和多余的空格
cleaned_data = "[" + cleaned_data + "]" # 恢复为列表格式
try:
parsed_data = ast.literal_eval(cleaned_data)

View File

@ -34,7 +34,9 @@ class TxtCheckPointer(CheckPointer):
"""
ckpt = {}
if os.path.exists(self._ckpt_file_path):
with open(self._ckpt_file_path, "r") as reader:
with open(
self._ckpt_file_path, "r", encoding="utf-8", newline="\n"
) as reader:
for line in reader:
data = json.loads(line)
ckpt[data["id"]] = data["value"]

View File

@ -122,10 +122,10 @@ class GraphApi(object):
) # noqa: E501
# HTTP header `Content-Type`
header_params["Content-Type"] = (
self.api_client.select_header_content_type( # noqa: E501
["application/json"]
)
header_params[
"Content-Type"
] = self.api_client.select_header_content_type( # noqa: E501
["application/json"]
) # noqa: E501
# Authentication setting
@ -240,10 +240,10 @@ class GraphApi(object):
) # noqa: E501
# HTTP header `Content-Type`
header_params["Content-Type"] = (
self.api_client.select_header_content_type( # noqa: E501
["application/json"]
)
header_params[
"Content-Type"
] = self.api_client.select_header_content_type( # noqa: E501
["application/json"]
) # noqa: E501
# Authentication setting
@ -358,10 +358,10 @@ class GraphApi(object):
) # noqa: E501
# HTTP header `Content-Type`
header_params["Content-Type"] = (
self.api_client.select_header_content_type( # noqa: E501
["application/json"]
)
header_params[
"Content-Type"
] = self.api_client.select_header_content_type( # noqa: E501
["application/json"]
) # noqa: E501
# Authentication setting
@ -476,10 +476,10 @@ class GraphApi(object):
) # noqa: E501
# HTTP header `Content-Type`
header_params["Content-Type"] = (
self.api_client.select_header_content_type( # noqa: E501
["application/json"]
)
header_params[
"Content-Type"
] = self.api_client.select_header_content_type( # noqa: E501
["application/json"]
) # noqa: E501
# Authentication setting

View File

@ -198,6 +198,8 @@ class AtomicQueryChunkRetriever(RetrieverABC):
res_chunk_list = []
chunk_id_set = set()
for chunk in chunks:
if chunk is None:
continue
if chunk.chunk_id not in chunk_id_set:
chunk_id_set.add(chunk.chunk_id)
res_chunk_list.append(chunk)

View File

@ -137,7 +137,7 @@ class PprChunkRetriever(RetrieverABC):
)
node_dict = dict(node.items())
return doc_id, ChunkData(
content=node_dict["content"].replace("_split_0", ""),
content=node_dict.get("content", "").replace("_split_0", ""),
title=node_dict["name"].replace("_split_0", ""),
chunk_id=doc_id,
score=doc_score,
@ -177,7 +177,7 @@ class PprChunkRetriever(RetrieverABC):
logger.warning(f"{query} matched docs is empty")
matched_docs.append(
ChunkData(
content=item["node"]["content"],
content=item["node"].get("content", ""),
title=item["node"]["name"],
chunk_id=item["node"]["id"],
score=item["score"],

View File

@ -25,7 +25,7 @@ class KgConstrainRetrieverWithOpenSPGRetriever(RetrieverABC):
entity_linking: EntityLinking = None,
llm: LLMClient = None,
std_schema: StdSchema = None,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
self.name = kwargs.get("name", "kg_cs")
@ -71,7 +71,7 @@ class KgConstrainRetrieverWithOpenSPGRetriever(RetrieverABC):
graph_data=context.variables_graph,
is_exact_match=True,
name=self.name,
**kwargs
**kwargs,
)
return RetrieverOutput(
retriever_method=self.schema().get("name", ""), graphs=[kg_graph]

View File

@ -108,7 +108,9 @@ class FuzzyOneHopSelect(PathSelect):
with_json_parse=True,
with_except=True,
)
if not isinstance(response, list) or not all(isinstance(i, str) for i in response):
if not isinstance(response, list) or not all(
isinstance(i, str) for i in response
):
logger.warning("LLM returned invalid index format: %s", response)
return []
try:

View File

@ -8,5 +8,8 @@ checkpointer: CheckPointer = CheckpointerManager.get_checkpointer(
}
)
tmp_key = checkpointer.keys()[-1]
print(checkpointer.read_from_ckpt(tmp_key))
if checkpointer.size() > 0:
tmp_key = checkpointer.keys()[-1]
print(checkpointer.read_from_ckpt(tmp_key))
else:
print("checkpoint is empty")

View File

@ -1,31 +1,31 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
max_tokens: 8092
max_tokens: 8192
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
max_tokens: 8092
max_tokens: 8192
ner_llm: &ner_llm
ner_llm:
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
max_tokens: 8092
max_tokens: 8192
vectorize_model: &vectorize_model
api_key: sk-
base_url: https://api.siliconflow.cn/v1/
api_key: key
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024

View File

@ -66,7 +66,9 @@ class LLMGeneratorWithThought(GeneratorABC):
2. 如果您认为所提供的文件无法回答问题请回答未知
"""
prompt = f"{system_instruction}\n\n召回文档:\n{refer_data}\n思考:\n{thoughts}问题: {query}"
prompt = (
f"{system_instruction}\n\n召回文档:\n{refer_data}\n思考:\n{thoughts}问题: {query}"
)
response = self.llm_client(prompt)
if "答案:" not in response:
raise ValueError(f"no answer found in response: {response}")

1
kag/examples/FinAlibaba/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
ckpt/

View File

@ -1,8 +1,8 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct
enable_check: false
max_tokens: 8092
@ -10,23 +10,23 @@ openie_llm: &openie_llm
ner_llm: &ner_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct
enable_check: false
max_tokens: 1024
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key: sk-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
max_tokens: 1024
vectorize_model: &vectorize_model
api_key: sk-
base_url: https://api.siliconflow.cn/v1/
api_key: key
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024

View File

@ -3,7 +3,7 @@ from kag.interface.solver.retriever_abc import RetrieverABC
if __name__ == "__main__":
llm_config = {
"api_key": "sk-",
"api_key": "key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen-max-latest",
"type": "maas",
@ -16,8 +16,8 @@ if __name__ == "__main__":
"query_rewrite_prompt": {"type": "atomic_query_rewrite_prompt"},
"vectorize_model": {
"type": "openai",
"base_url": "https://api.siliconflow.cn/v1/",
"api_key": "sk-",
"base_url": "https://api.siliconflow.cn/v1",
"api_key": "key",
"model": "BAAI/bge-m3",
"vector_dimensions": 1024,
},

View File

@ -7,7 +7,7 @@
Please refer to [Quick Start](https://openspg.yuque.com/ndx6g9/cwh47i/rs7gr8g4s538b1n7) to install KAG and its dependency OpenSPG server, and learn about using KAG in developer mode.
Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform](https://lbsyun.baidu.com/apiconsole/key). Be sure to enable “MCP (SSE)” service for best performance.
Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform](https://lbsyun.baidu.com/apiconsole/key). Be sure to enable "MCP (SSE)" service for best performance.
## 2. Steps to reproduce
@ -17,14 +17,13 @@ Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform
cd kag/examples/baidu_map_mcp
```
### Step 2Configure models
### Step 2: Configure models
Update the generative model configurations ``chat_llm`` in [kag_config.yaml](./kag_config.yaml).
You need to fill in correct ``api_key`` and ``BAIDU_MAPS_API_KEY``If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
You need to fill in correct ``api_key`` and ``BAIDU_MAPS_API_KEY``. If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
### Step 6Execute tasks
### Step 3: Execute the QA tasks
In the directory, execute [google_web_search_client.py](./google_web_search_client.py)
@ -33,10 +32,10 @@ python baidu_map_mcp_client.py
```
Example problems:
1. What will the weather be like tomorrow in the West Lake District of Hangzhou?
2. What is the self-driving route from Ant A space in Hangzhou to Ant S space in Shanghai?
3. What is the latitude and longitude of Shanghai Hongqiao Railway Station?
After launch, please input the questions you want to ask, we will retrieve the relevant information through baidu map, and then return the results to you.

View File

@ -1,11 +1,10 @@
# KAG 示例: Baidu Map MCP
这里是一个Baidu Map MCP将用户输入的搜索问题作为输入返回百度地图相关的查询结果。在此处只是展示MCP的执行效果如果需要链接到KAG上进行检索请访问 OPENSPG 创建对应的知识库和应用将MCP链接到KAG中并使用。
[English](./README.md) |
[简体中文](./README_cn.md)
这里是一个 Baidu Map MCP将用户输入的搜索问题作为输入返回百度地图相关的查询结果。在此处只是展示 MCP 的执行效果,如果需要链接到 KAG 上进行检索,请访问 OpenSPG 创建对应的知识库和应用,将 MCP 链接到 KAG 中并使用。
## 1. 前置条件
参考文档 [快速开始](https://openspg.yuque.com/ndx6g9/0.6/quzq24g4esal7q17) 安装 KAG 及其依赖的 OpenSPG server了解开发者模式 KAG 的使用流程。
@ -26,7 +25,7 @@ cd kag/examples/baidu_map_mcp
您需要设置正确的 ``api_key`` 以及 ``BAIDU_MAPS_API_KEY`` 。如果使用的模型供应商和模型名与默认值不同,您还需要更新 ``base_url`` 和 ``model``。
### Step 6:执行 QA 任务
### Step 3:执行 QA 任务
在目录中执行 [baidu_map_mcp_client.py](./baidu_map_mcp_client.py)。
@ -34,8 +33,10 @@ cd kag/examples/baidu_map_mcp
python baidu_map_mcp_client.py
```
问题示例:
1、杭州西湖区的明天的天气怎么样
2、从杭州市蚂蚁A空间回上海市蚂蚁S空间自驾的路线是什么
3、上海虹桥火车站的位置经纬度是什么
启动后请您输入想要询问的问题,我们会通过百度地图检索到相关的信息,然后将结果返还给您。
1. 杭州西湖区的明天的天气怎么样?
2. 从杭州市蚂蚁A空间回上海市蚂蚁S空间自驾的路线是什么
3. 上海虹桥火车站的位置经纬度是什么?
启动后请您输入想要询问的问题,我们会通过百度地图检索到相关的信息,然后将结果返还给您。

View File

@ -30,7 +30,7 @@ async def chat_loop(client):
async def cleanup(client):
"""Clean up resources"""
await client.exit_stack.aclose()
await client.mcp_client.exit_stack.aclose()
async def main():

View File

@ -2,7 +2,7 @@
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct

View File

@ -61,9 +61,3 @@ To delete the checkpoints, execute the following command.
rm -rf ./builder/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -61,9 +61,3 @@ cd solver && python eval.py && cd ..
rm -rf ./builder/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -88,14 +88,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -112,7 +112,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -143,7 +143,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf
type: kag_output_executor

View File

@ -0,0 +1,130 @@
namespace BaiKe
Chunk(文本块): EntityType
properties:
content(内容): Text
index: TextAndVector
ArtificialObject(人造物体): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Astronomy(天文学): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Building(建筑): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Creature(生物): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Concept(概念): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Date(日期): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
GeographicLocation(地理位置): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Keyword(关键词): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Medicine(药物): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
NaturalScience(自然科学): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Organization(组织机构): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Person(人物): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Transport(运输): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Works(作品): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Others(其他): EntityType
properties:
desc(描述): Text
index: TextAndVector
semanticType(语义类型): Text
index: Text
Event(事件): EventType
properties:
subject(主体): Person
participants(参与者): Person
constraint: MultiValue
time(时间): Date
location(地点): GeographicLocation
abstract(摘要): Text
index: TextAndVector
type(事件类型): Text
index: Text
SemanticConcept(语义概念): EntityType
properties:
desc(内容): Text
index: Text

View File

@ -0,0 +1,18 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
"""
{{namespace}}.schema:
The MarkLang file for the schema of this project.
You can execute `kag schema commit` to commit your schema to SPG server.
"""

View File

@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("resp_simple")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n只输出答案,不需要输出额外的信息。"
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
)
template_en = (
"Answer the question based on the given reference."

View File

@ -1,5 +1,9 @@
ckpt/
csqa_ckpt/
/cs.jsonl
/solver/data/csqa_kag_answers.json
/solver/csqa_qfs_res_*.json
/solver/csqa_qfs_res_*.md
/solver/csqa_res_*.json
/solver/csqa_metrics_*.json
/solver/benchmark.txt

View File

@ -59,7 +59,7 @@ Execute [indexer.py](./builder/indexer.py) in the [builder](./builder) directory
cd builder && python indexer.py && cd ..
```
### Step 7: Generate the answers
### Step 7: Execute the QA tasks
Execute [eval.py](./solver/eval.py) in the [solver](./solver) directory to generate the answers.
@ -67,33 +67,12 @@ Execute [eval.py](./solver/eval.py) in the [solver](./solver) directory to gener
cd solver && python eval.py && cd ..
```
The results are saved to ``./solver/data/csqa_kag_answers.json``.
### Step 8: (Optional) Get the answers generated by other systems
Follow the LightRAG [Reproduce](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#reproduce) steps to generate answers to the questions and save the results to [./solver/data/csqa_lightrag_answers.json](./solver/data/csqa_lightrag_answers.json). Since a copy was committed, this step is optional.
### Step 9: Calculate the metrics
Update the LLM configurations in [summarization_metrics.py](./solver/summarization_metrics.py) and [factual_correctness.py](./solver/factual_correctness.py) and execute them to calculate the metrics.
```bash
python ./solver/summarization_metrics.py
python ./solver/factual_correctness.py
```
### Step 10: (Optional) Cleanup
### Step 8: (Optional) Cleanup
To delete the checkpoints, execute the following command.
```bash
rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
rm -rf ./solver/csqa_ckpt
```

View File

@ -59,7 +59,7 @@ knext schema commit
cd builder && python indexer.py && cd ..
```
### Step 7生成答案
### Step 7执行 QA 任务
在 [solver](./solver) 目录执行 [eval.py](./solver/eval.py) 生成答案。
@ -67,33 +67,12 @@ cd builder && python indexer.py && cd ..
cd solver && python eval.py && cd ..
```
生成的结果被保存至 ``./solver/data/csqa_kag_answers.json``.
### Step 8可选获取其他系统生成的答案
按 LightRAG [Reproduce](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#reproduce) 所述复现步骤生成问题的答案,将结果保存至 [./solver/data/csqa_lightrag_answers.json](./solver/data/csqa_lightrag_answers.json)。由于我们提交了一份 LightRAG 生成的答案,因此本步骤是可选的。
### Step 9计算指标
更新 [summarization_metrics.py](./solver/summarization_metrics.py) 和 [factual_correctness.py](./solver/factual_correctness.py) 中的大模型配置并执行它们以计算指标。
```bash
python ./solver/summarization_metrics.py
python ./solver/factual_correctness.py
```
### Step 10可选清理
### Step 8可选清理
若要删除 checkpoint可执行以下命令。
```bash
rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
rm -rf ./solver/csqa_ckpt
```

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -89,14 +89,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -113,7 +113,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -144,7 +144,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf
type: kag_output_executor

View File

@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
class CsQaEvaluator(EvalQa):
def __init__(self, solver_pipeline_name="solver_pipeline"):
self.task_name = "musique"
self.task_name = "csqa"
super().__init__(self.task_name, solver_pipeline_name)
self.solver_pipeline_name = solver_pipeline_name

1
kag/examples/domain_kg/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
ckpt/

View File

@ -3,7 +3,7 @@
[English](./README.md) |
[简体中文](./README_cn.md)
This example provides a case of knowledge injection in the medical domain, where the nodes of the domain knowledge graph are medical terms, and the relationships are defined as "isA." The document contains an introduction to a selection of medical terms.
This example provides a case of knowledge injection in the medical domain, where the nodes of the domain knowledge graph are medical terms, and the relationships are defined as "isA". The document contains an introduction to a selection of medical terms.
## 1. Precondition
@ -33,13 +33,14 @@ knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
### Step 4: Commit the schema
Execute the following command to commit the schema [TwoWiki.schema](./schema/TwoWiki.schema).
Execute the following command to commit the schema [DomainKG.schema](./schema/DomainKG.schema).
```bash
knext schema commit
```
### Step 5: Build the knowledge graph
We first need to inject the domain knowledge graph into the graph database. This allows the PostProcessor component to link the extracted nodes with the nodes of the domain knowledge graph, thereby standardizing them during the construction of the graph from unstructured documents.
Execute [injection.py](./builder/injection.py) in the [builder](./builder) directory to inject the domain KG.
@ -50,14 +51,12 @@ cd builder && python injection.py && cd ..
Note that KAG provides a special implementation of the ``KAGBuilderChain`` for domain knowledge graph injection, known as the ``DomainKnowledgeInjectChain``, which is registered under the name ``domain_kg_inject_chain``. Since domain knowledge injection does not involve scanning files or directories, you can directly call the ``invoke`` interface of the chain to initiate the task.
Next, execute [indexer.py](./builder/indexer.py) in the [builder](./builder) directory to build KG from unstructured document.
```bash
cd builder && python indexer.py && cd ..
```
### Step 6: Execute the QA tasks
Execute [qa.py](./solver/qa.py) in the [solver](./solver) directory to generate the answer to the question.
@ -75,8 +74,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -3,7 +3,7 @@
[English](./README.md) |
[简体中文](./README_cn.md)
本示例提供了一个医疗领域知识注入的案例其中领域知识图谱的节点为医学名词关系为isA。文档内容为部分医学名词的介绍。
本示例提供了一个医疗领域知识注入的案例,其中领域知识图谱的节点为医学名词,关系为 isA。文档内容为部分医学名词的介绍。
## 1. 前置条件
@ -34,7 +34,7 @@ knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
### Step 4提交 schema
执行以下命令提交 schema [TwoWiki.schema](./schema/TwoWiki.schema)。
执行以下命令提交 schema [DomainKG.schema](./schema/DomainKG.schema)。
```bash
knext schema commit
@ -42,15 +42,14 @@ knext schema commit
### Step 5构建知识图谱
我们首先需要将领域知识图谱注入到图数据库中这样在对非结构化文档进行图谱构建的时候PostProcessor组件可以将抽取出的节点与领域知识图谱节点进行链指标准化
我们首先需要将领域知识图谱注入到图数据库中这样在对非结构化文档进行图谱构建的时候PostProcessor 组件可以将抽取出的节点与领域知识图谱节点进行链指(标准化)。
在 [builder](./builder) 目录执行 [injection.py](./builder/injection.py) ,注入图数据。
```bash
cd builder && python injection.py && cd ..
```
注意KAG为领域知识图谱注入提供了一个特殊的KAGBuilderChain实现即DomainKnowledgeInjectChain其注册名为domain_kg_inject_chain。由于领域知识注入不涉及到扫描文件或目录可以直接调用builder chain 的invoke接口启动任务。
注意KAG为领域知识图谱注入提供了一个特殊的 ``KAGBuilderChain`` 实现,即 ``DomainKnowledgeInjectChain``,其注册名为 ``domain_kg_inject_chain``。由于领域知识注入不涉及到扫描文件或目录,可以直接调用 builder chain 的 ``invoke`` 接口启动任务。
接下来,在 [builder](./builder) 目录执行 [indexer.py](./builder/indexer.py) 构建知识图谱。
@ -75,8 +74,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -110,14 +110,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -134,7 +134,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -165,7 +165,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf
type: kag_output_executor

View File

@ -1,10 +1,9 @@
# KAG Example: Google Web Search MCP
Here is a Google Web Search MCP that takes the search question entered by the user as input and returns the relevant web page. Here, we only show the execution effect of MCP. If you need to link to KAG for retrieval, please visit OPENSPG to create the corresponding knowledge base and application, and link MCP to KAG and use it.
[English](./README.md) |
[简体中文](./README_cn.md)
Here is a Google Web Search MCP that takes the search question entered by the user as input and returns the relevant web page. Here, we only show the execution effect of MCP. If you need to link to KAG for retrieval, please visit OpenSPG to create the corresponding knowledge base and application, and link MCP to KAG and use it.
## 1. Precondition
@ -18,14 +17,13 @@ Please refer to [Quick Start](https://openspg.yuque.com/ndx6g9/cwh47i/rs7gr8g4s5
cd kag/examples/google_web_search_mcp
```
### Step 2Configure models
### Step 2: Configure models
Update the generative model configurations ``chat_llm`` in [kag_config.yaml](./kag_config.yaml).
You need to fill in correct ``api_key``If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
You need to fill in correct ``api_key``. If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
### Step 6Execute tasks
### Step 3: Execute the QA tasks
In the directory, execute [google_web_search_client.py](./google_web_search_client.py)
@ -34,7 +32,9 @@ python google_web_search_client.py
```
Example problems:
1. Why is the sky blue?
2. What is Dundar effect?
After launch, please input the questions you want to ask, we will retrieve the relevant web page through google, and then return the results to you.
After launch, please input the questions you want to ask, we will retrieve the relevant web page through Google, and then return the results to you.

View File

@ -1,10 +1,9 @@
# KAG 示例: Google Web Search MCP
这里是一个Google Web Search MCP将用户输入的搜索问题作为输入返回相关的网页。在此处只是展示MCP的执行效果如果需要链接到KAG上进行检索请访问 OPENSPG 创建对应的知识库和应用将MCP链接到KAG中并使用。
[English](./README.md) |
[简体中文](./README_cn.md)
这里是一个 Google Web Search MCP将用户输入的搜索问题作为输入返回相关的网页。在此处只是展示 MCP 的执行效果,如果需要链接到 KAG 上进行检索,请访问 OpenSPG 创建对应的知识库和应用,将 MCP 链接到 KAG 中并使用。
## 1. 前置条件
@ -24,7 +23,7 @@ cd kag/examples/google_web_search_mcp
您需要设置正确的 ``api_key``。如果使用的模型供应商和模型名与默认值不同,您还需要更新 ``base_url`` 和 ``model``。
### Step 6:执行 QA 任务
### Step 3:执行 QA 任务
在目录中执行 [google_web_search_client.py](./google_web_search_client.py)。
@ -33,7 +32,9 @@ python google_web_search_client.py
```
问题示例:
1、天空为什么是蓝色的
2、什么是丁达尔效应
启动后请您输入想要询问的问题我们会通过google检索到相关的网页然后将结果返还给您。
1. 天空为什么是蓝色的?
2. 什么是丁达尔效应?
启动后请您输入想要询问的问题,我们会通过 Google 检索到相关的网页,然后将结果返还给您。

View File

@ -30,7 +30,7 @@ async def chat_loop(client):
async def cleanup(client):
"""Clean up resources"""
await client.exit_stack.aclose()
await client.mcp_client.exit_stack.aclose()
async def main():

View File

@ -1,8 +1,8 @@
#------------project configuration start----------------#
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
api_key:
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen-max-latest
mcp_executor:

View File

@ -100,9 +100,3 @@ To delete the checkpoint, execute the following command.
rm -rf ./builder/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -98,9 +98,3 @@ cd solver && python evaForMedicine.py && cd ..
rm -rf ./builder/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -115,14 +115,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -139,7 +139,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -170,7 +170,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf

View File

@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("example_resp_generator")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n输出答案,并且给出理由。"
"\n给定的引用信息:'$content'\n问题:'$query'"
)
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
template_en = (
"Answer the question based on the given reference."
"\nGive me the answer and why."

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -58,14 +58,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -82,7 +82,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -113,7 +113,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf

View File

@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("resp_riskmining")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n输出答案,并且给出理由。"
"\n给定的引用信息:'$content'\n问题:'$query'"
)
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
template_en = (
"Answer the question based on the given reference."
"\nGive me the answer and why."

View File

@ -1,21 +1,21 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-7b-instruct-1m
enable_check: false
chat_llm: &chat_llm
type: maas
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: key
model: qwen2.5-72b-instruct
enable_check: false
vectorize_model: &vectorize_model
api_key: key
base_url: https://api.siliconflow.cn/v1/
base_url: https://api.siliconflow.cn/v1
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
@ -57,14 +57,14 @@ kg_cs: &kg_cs
search_api: *search_api
recognition_threshold: 0.9
exclude_types:
- "Chunk"
- Chunk
kg_fr: &kg_fr
type: kg_fr_open_spg
top_k: 20
graph_api: *graph_api
search_api: *search_api
vectorize_model: *vectorize_model
vectorize_model: *vectorize_model
path_select:
type: fuzzy_one_hop_select
llm_client: *openie_llm
@ -81,7 +81,7 @@ kg_fr: &kg_fr
search_api: *search_api
recognition_threshold: 0.8
exclude_types:
- "Chunk"
- Chunk
rc: &rc
type: rc_open_spg
@ -112,7 +112,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
- *rc
merger:
type: kag_merger
enable_summary: true
enable_summary: true
kag_output_executor: &kag_output_executor_conf

View File

@ -13,7 +13,7 @@ def read_dsl_files(directory):
for filename in os.listdir(directory):
if filename.endswith(".dsl"):
file_path = os.path.join(directory, filename)
with open(file_path, "r", encoding="utf-8") as file:
with open(file_path, "r", encoding="utf-8", newline="\n") as file:
content = file.read()
dsl_contents.append(content)

View File

@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("resp_supplychain")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n输出答案,并且给出理由。"
"\n给定的引用信息:'$content'\n问题:'$query'"
)
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
template_en = (
"Answer the question based on the given reference."
"\nGive me the answer and why."

View File

@ -69,12 +69,6 @@ To delete the checkpoints, execute the following command.
rm -rf ./src/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8: (Optional) Try the larger datasets
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.

View File

@ -69,12 +69,6 @@ cd src && python eval.py --qa_file ./data/qa_sub.json && cd ..
rm -rf ./src/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8可选尝试更大的数据集
从 Step 1 重新开始,修改 [indexer.py](./src/indexer.py) 和 [eval.py](./src/eval.py) 以尝试更大的数据集。

View File

@ -73,8 +73,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
To delete the KAG project and associated knowledge graph, execute a command similar to the following, replacing the OpenSPG server address and KAG project id with actual values:
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -94,9 +94,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -35,9 +35,7 @@ def extract_admin_types(input_file, output_file):
if len(parts) >= 3 and parts[1] == "type" and "行政区" in parts[2]:
admin_name = parts[0]
admin_type = parts[2]
admin_id = admin_id_map.get(
admin_name, ""
) # 获取对应的ID如果没有则为空字符串
admin_id = admin_id_map.get(admin_name, "") # 获取对应的ID如果没有则为空字符串
admin_types.append(
{

View File

@ -118,9 +118,7 @@ if __name__ == "__main__":
output_file = os.path.join(dir_path, "图书馆.csv")
parser = argparse.ArgumentParser(description="处理图书馆数据")
parser.add_argument("--input_file", default=input_file, help="输入文件路径")
parser.add_argument(
"-o", "--output", default=output_file, help="输出文件路径,默认覆盖原文件"
)
parser.add_argument("-o", "--output", default=output_file, help="输出文件路径,默认覆盖原文件")
parser.add_argument(
"-f",
"--format",

View File

@ -1,11 +1,15 @@
from kag.open_benchmark.common_component.llm_genereator_with_thought import LLMGeneratorWithThought
from kag.open_benchmark.common_component.llm_genereator_with_thought import (
LLMGeneratorWithThought,
)
from kag.open_benchmark.common_component.planner_prompt import StaticPlanningPrompt
from kag.open_benchmark.common_component.resp_generator import RespGenerator
from kag.open_benchmark.common_component.evidence_based_reasoner import EvidenceBasedReasoner
from kag.open_benchmark.common_component.evidence_based_reasoner import (
EvidenceBasedReasoner,
)
__all__ = [
"EvidenceBasedReasoner",
"LLMGeneratorWithThought",
"StaticPlanningPrompt",
"RespGenerator"
"RespGenerator",
]

View File

@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("resp_simple")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n只输出答案,不需要输出额外的信息。"
"\n给定的引用信息:'$content'\n问题:'$query'"
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$content'\n问题:'$query'"
)
# template_en = (
# "Answer the question based on the given reference."

View File

@ -69,12 +69,6 @@ To delete the checkpoints, execute the following command.
rm -rf ./src/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8: (Optional) Try the larger datasets
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.

View File

@ -69,12 +69,6 @@ cd src && python eva.py --qa_file ./data/qa_sub.json && cd ..
rm -rf ./src/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8可选尝试更大的数据集
从 Step 1 重新开始,修改 [indexer.py](./src/indexer.py) 和 [eval.py](./src/eval.py) 以尝试更大的数据集。

View File

@ -70,12 +70,6 @@ To delete the checkpoints, execute the following command.
rm -rf ./src/ckpt
```
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8: (Optional) Try the larger datasets
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.

View File

@ -67,13 +67,6 @@ cd src && python eval.py --qa_file ./data/qa_sub.json && cd ..
```bash
rm -rf ./src/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```
### Step 8可选尝试更大的数据集

View File

@ -32,7 +32,11 @@ class EvaForMusique(EvalQa):
gold_list = []
question_decomposition = sample["question_decomposition"]
for qd in question_decomposition:
gold_list.append(processing_phrases(paragraphs[qd["paragraph_support_idx"]]["title"]).replace(" ", ""))
gold_list.append(
processing_phrases(
paragraphs[qd["paragraph_support_idx"]]["title"]
).replace(" ", "")
)
predictionlist = []
for ref in references:
predictionlist.append(

View File

@ -77,8 +77,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
To delete the KAG project and associated knowledge graph, execute a command similar to the following, replacing the OpenSPG server address and KAG project id with actual values:
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -78,9 +78,3 @@ rm -rf ./builder/ckpt
rm -rf ./solver/ckpt
```
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
```bash
curl http://127.0.0.1:8887/project/api/delete?projectId=1
```

View File

@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("resp_simple")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n只输出答案,不需要输出额外的信息。"
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
)
template_en = (
"Answer the question based on the given reference."

View File

@ -100,9 +100,7 @@ class PrqaExecutor(ExecutorABC):
completion_1 = self.send_cypher_messages(message_list)
if not completion_1.tool_calls:
raise ValueError(
f"{question} 查询失败此时tool_calls 为空或为 None无法继续处理"
)
raise ValueError(f"{question} 查询失败此时tool_calls 为空或为 None无法继续处理")
tool = completion_1.tool_calls[0]
args = json.loads(tool.function.arguments)
cypher_query = args.get("cypher_query")

View File

@ -47,8 +47,7 @@ class PrqaPlanner(PlannerABC):
instruct_content = message["content"]
if "上一次的判断错误,请重新思考" not in instruct_content:
updated_instruct = (
instruct_content.strip()
+ "\n上一次的判断错误,请重新思考。"
instruct_content.strip() + "\n上一次的判断错误,请重新思考。"
)
message["content"] = updated_instruct
break # 修改完成后直接退出循环

View File

@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("prqa_generator")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n输出答案,并且给出理由。"
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
"基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
)
template_en = (
"Answer the question based on the given reference."

View File

@ -86,7 +86,9 @@ class KagOutputExecutor(ExecutorABC):
result = []
for alias in logic_node.alias_name_set:
if context.variables_graph.has_alias(alias.alias_name):
alias_answer = context.variables_graph.get_answered_alias(alias.alias_name)
alias_answer = context.variables_graph.get_answered_alias(
alias.alias_name
)
if alias_answer:
result.append(alias_answer)
if not result:

View File

@ -12,6 +12,7 @@
import json
import sys
from contextlib import AsyncExitStack
from typing import Optional, Dict
import asyncio
@ -55,7 +56,7 @@ class MCPClient:
if not (is_python or is_js):
raise ValueError("Server script must be a .py or .js file")
command = "python3" if is_python else "node"
command = sys.executable if is_python else "node"
server_params = StdioServerParameters(
command=command, args=[server_script_path], env=env
)

View File

@ -175,16 +175,22 @@ async def do_index_pipeline(query, qa_config, reporter):
return await pipeline.ainvoke(query, reporter=reporter)
async def do_qa_pipeline(use_pipeline, query, qa_config, reporter, task_id, kb_project_ids):
async def do_qa_pipeline(
use_pipeline, query, qa_config, reporter, task_id, kb_project_ids
):
retriever_configs = []
kb_configs = qa_config.get("kb", [])
for kb_project_id in kb_project_ids:
kb_task_project_id = f"{task_id}_{kb_project_id}"
try:
kag_config = KAGConfigAccessor.get_config(kb_task_project_id)
matched_kb = next((kb for kb in kb_configs if kb.get("id") == kb_project_id), None)
matched_kb = next(
(kb for kb in kb_configs if kb.get("id") == kb_project_id), None
)
if not matched_kb:
reporter.warning(f"Knowledge base with id {kb_project_id} not found in qa_config['kb']")
reporter.warning(
f"Knowledge base with id {kb_project_id} not found in qa_config['kb']"
)
continue
for index_name in matched_kb.get("index_list", []):
@ -192,13 +198,16 @@ async def do_qa_pipeline(use_pipeline, query, qa_config, reporter, task_id, kb_p
{
"type": index_name,
"llm_config": qa_config.get("llm", {}),
"vectorize_model_config": kag_config.all_config.get("vectorize_model", {}),
"vectorize_model_config": kag_config.all_config.get(
"vectorize_model", {}
),
}
)
retriever_configs.extend(
index_manager.build_retriever_config(
qa_config.get("llm", {}), kag_config.all_config.get("vectorize_model", {}),
kag_qa_task_config_key=kb_task_project_id
qa_config.get("llm", {}),
kag_config.all_config.get("vectorize_model", {}),
kag_qa_task_config_key=kb_task_project_id,
)
)
except Exception as e:
@ -260,7 +269,9 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
global_config = kb.get(KAGConstants.PROJECT_CONFIG_KEY, {})
kb_conf.global_config.initialize(**global_config)
project_client = ProjectClient(host_addr=host_addr, project_id=kb_project_id)
project_client = ProjectClient(
host_addr=host_addr, project_id=kb_project_id
)
project = project_client.get_by_id(kb_project_id)
kb_conf.global_config.project_id = kb_project_id
@ -292,14 +303,15 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
try:
await reporter.start()
if use_pipeline == "index_pipeline":
answer = await do_index_pipeline(
query, main_config, reporter
)
answer = await do_index_pipeline(query, main_config, reporter)
else:
answer = await do_qa_pipeline(
use_pipeline, query, main_config, reporter,
use_pipeline,
query,
main_config,
reporter,
task_id=task_id,
kb_project_ids=kb_project_ids
kb_project_ids=kb_project_ids,
)
reporter.add_report_line("answer", "Final Answer", answer, "FINISH")
@ -324,15 +336,15 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
class SolverMain:
def invoke(
self,
project_id: int,
task_id: int,
query: str,
session_id: str = "0",
is_report=True,
host_addr="http://127.0.0.1:8887",
params=None,
app_id="",
self,
project_id: int,
task_id: int,
query: str,
session_id: str = "0",
is_report=True,
host_addr="http://127.0.0.1:8887",
params=None,
app_id="",
):
answer = None
if params is None:
@ -365,9 +377,7 @@ if __name__ == "__main__":
# "4200052", "https://spg-pre.alipay.com"
# )
config = {}
params = {
"config": config
}
params = {"config": config}
res = SolverMain().invoke(
2100007,
11200009,

View File

@ -39,9 +39,7 @@ class QueryRewritePrompt(PromptABC):
\nexample字段中给出了一个简单的示例供参考请直接返回改写后的问题字符串正如example的output字段一样
""",
"example": {
"input": {
"query": "{{0.output}}获得的奖项中,有哪些是{{1.output}}没有获得过的"
},
"input": {"query": "{{0.output}}获得的奖项中,有哪些是{{1.output}}没有获得过的"},
"context": {
"0": {
"output": [

View File

@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
@PromptABC.register("default_resp_generator")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。"
"\n输出答案,并且给出理由。"
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
"基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
)
template_en = (
"Answer the question based on the given reference."

View File

@ -29,7 +29,7 @@ def execute_reasoner_job(file, dsl, output=None, proj_path="./"):
"""
client = ReasonerClient(host_addr=env.host_addr, project_id=int(env.project_id))
if file and not dsl:
with open(file, "r") as f:
with open(file, "r", encoding="utf-8", newline="\n") as f:
dsl_content = f.read()
elif not file and dsl:
dsl_content = dsl

View File

@ -436,7 +436,7 @@ class SPGConceptRuleMarkLang:
Load and then parse the script file
"""
file = open(filename, "r", encoding="utf-8")
file = open(filename, "r", encoding="utf-8", newline="\n")
lines = file.read().splitlines()
last_indent_level = 0