mirror of https://github.com/OpenSPG/KAG
fix(kag): update examples to work under branch 0.8.0 (#593)
* add graph
* fix bug for None
* add knowledge unit extra
* fix_prompt
* extract common function into benchmark commponent
* format code
* format code
* format code
* fix benchmark knowledge unit
* fix node
* add common component
* Revert "remove local bge model and dependency scikit-learn"
This reverts commit d1c20fc6a8
.
* remove unused pkg
* add decompose
* change hybrid default config
* fix legacy commands in readme
* fix example csqa
* fix example baike
* fix example domain_kg
* fix example medicine
* fix example riskmining
* fix example supplychain
* fix example EastElectric
* fix example FinAlibaba
* update readme to use python 3.10
* fix readme format
* fix google_web_search_mcp
* fix example baidu_map_mcp
* format code with black
---------
Co-authored-by: peilong.zip <peilong.zpl@antgroup.com>
This commit is contained in:
parent
ffc20ec5b7
commit
fc98ad136e
|
@ -150,7 +150,7 @@ Refer to the 3.1 section to complete the installation of the engine & dependent
|
|||
**Windows developers**
|
||||
|
||||
```text
|
||||
# Install the official Python 3.8.10 or later, install Git.
|
||||
# Install the official Python 3.10 or later, install Git.
|
||||
|
||||
# Create and activate Python venv: py -m venv kag-demo && kag-demo\Scripts\activate
|
||||
|
||||
|
|
|
@ -140,7 +140,7 @@ Default password: openspg@kag
|
|||
**Windows 开发者**
|
||||
|
||||
```
|
||||
# 安装官方 Python 3.8.10 或更新版本,安装 Git。
|
||||
# 安装官方 Python 3.10 或更新版本,安装 Git。
|
||||
|
||||
# 创建、激活 Python 虚拟环境:py -m venv kag-demo && kag-demo\Scripts\activate
|
||||
|
||||
|
|
|
@ -172,7 +172,7 @@ docker compose -f docker-compose.yml up -d
|
|||
**Windows開発者**
|
||||
|
||||
```text
|
||||
# 公式のPython 3.8.10以降をインストールし、Gitをインストールします。
|
||||
# 公式のPython 3.10以降をインストールし、Gitをインストールします。
|
||||
|
||||
# Python仮想環境の作成とアクティベート:py -m venv kag-demo && kag-demo\Scripts\activate
|
||||
|
||||
|
|
|
@ -143,6 +143,7 @@ def run_benchmark(config, result_queue):
|
|||
import_modules_from_path("./")
|
||||
# import benchmark common component
|
||||
import kag.open_benchmark.common_component
|
||||
|
||||
runner = KAGBenchmark.from_config(config)
|
||||
result = runner.invoke()
|
||||
result_queue.put((runner.job_name, result))
|
||||
|
|
|
@ -203,9 +203,13 @@ class DefaultExternalGraphLoader(ExternalGraphLoaderABC):
|
|||
DefaultExternalGraphLoader: An instance of DefaultExternalGraphLoader initialized with the data from the JSON files.
|
||||
"""
|
||||
nodes = []
|
||||
for item in json.load(open(node_file_path, "r")):
|
||||
for item in json.load(
|
||||
open(node_file_path, "r", encoding="utf-8", newline="\n")
|
||||
):
|
||||
nodes.append(Node.from_dict(item))
|
||||
edges = []
|
||||
for item in json.load(open(edge_file_path, "r")):
|
||||
for item in json.load(
|
||||
open(edge_file_path, "r", encoding="utf-8", newline="\n")
|
||||
):
|
||||
edges.append(Edge.from_dict(item))
|
||||
return cls(nodes=nodes, edges=edges, match_config=match_config)
|
||||
|
|
|
@ -338,7 +338,7 @@ class SchemaFreeExtractor(ExtractorABC):
|
|||
return None, None
|
||||
|
||||
for tri in triples:
|
||||
if len(tri) != 3:
|
||||
if tri is None or len(tri) != 3:
|
||||
continue
|
||||
s_category, s_name = get_category_and_name(entities, tri[0])
|
||||
tri[0] = processing_phrases(tri[0])
|
||||
|
|
|
@ -46,9 +46,9 @@ class DocxNode:
|
|||
self.content = content
|
||||
self.node_type = node_type
|
||||
self.children: List[DocxNode] = []
|
||||
self.properties: Dict[str, str] = (
|
||||
{}
|
||||
) # Store additional properties like style, indent level, etc.
|
||||
self.properties: Dict[
|
||||
str, str
|
||||
] = {} # Store additional properties like style, indent level, etc.
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.node_type}({self.level}): {self.display_title}"
|
||||
|
|
|
@ -86,9 +86,7 @@ class OutlineSplitter(SplitterABC):
|
|||
# 如果栈为空,或者当前节点的级别高于栈顶节点的级别,说明当前节点是根节点或新的分支节点
|
||||
if not stack or stack[-1][1] >= level:
|
||||
if stack:
|
||||
stack[-1][2]["children"].append(
|
||||
node
|
||||
) # 将新节点添加到最近的父节点的 children 列表中
|
||||
stack[-1][2]["children"].append(node) # 将新节点添加到最近的父节点的 children 列表中
|
||||
else:
|
||||
catalog_tree.append(node) # 如果栈为空,说明这是一个根节点
|
||||
else:
|
||||
|
@ -990,9 +988,7 @@ class OutlineSplitter(SplitterABC):
|
|||
|
||||
# 递归为子节点生成chunk
|
||||
for child in node.get("children", []):
|
||||
generate_chunks(
|
||||
child, chunks, full_title
|
||||
) # 将当前完整title传递给子节点
|
||||
generate_chunks(child, chunks, full_title) # 将当前完整title传递给子节点
|
||||
|
||||
return chunks
|
||||
|
||||
|
|
|
@ -20,12 +20,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
@PromptABC.register("analyze_table")
|
||||
class AnalyzeTablePrompt(PromptABC):
|
||||
template_zh: str = (
|
||||
"""你是一个分析表格的专家, 从table中提取信息并分析,最后返回表格有效信息"""
|
||||
)
|
||||
template_en: str = (
|
||||
"""You are an expert in knowledge graph extraction. Based on the schema defined by the constraint, extract all entities and their attributes from the input. Return NAN for attributes not explicitly mentioned in the input. Output the results in standard JSON format, as a list."""
|
||||
)
|
||||
template_zh: str = """你是一个分析表格的专家, 从table中提取信息并分析,最后返回表格有效信息"""
|
||||
template_en: str = """You are an expert in knowledge graph extraction. Based on the schema defined by the constraint, extract all entities and their attributes from the input. Return NAN for attributes not explicitly mentioned in the input. Output the results in standard JSON format, as a list."""
|
||||
|
||||
def build_prompt(self, variables) -> str:
|
||||
return json.dumps(
|
||||
|
|
|
@ -268,10 +268,7 @@ def check_data(line, data_type="knowIE", language="zh"):
|
|||
check_data.append(ner)
|
||||
if language == "zh" and isinstance(ner, dict):
|
||||
if (
|
||||
len(
|
||||
set(ner.keys())
|
||||
& set(["名称", "类型", "领域本体", "解释", "标准名", "同义词"])
|
||||
)
|
||||
len(set(ner.keys()) & set(["名称", "类型", "领域本体", "解释", "标准名", "同义词"]))
|
||||
== 6
|
||||
):
|
||||
check_data.append(ner)
|
||||
|
|
|
@ -88,9 +88,7 @@ class OutlinePrompt(PromptABC):
|
|||
def parse_response(self, response: str, **kwargs):
|
||||
# 如果返回结果是字符串,先去除 Markdown 语法,再使用 ast.literal_eval 转换成列表
|
||||
if isinstance(response, str):
|
||||
cleaned_data = response.strip(
|
||||
"`python\n[] \n"
|
||||
) # 去除 Markdown 语法和多余的空格
|
||||
cleaned_data = response.strip("`python\n[] \n") # 去除 Markdown 语法和多余的空格
|
||||
cleaned_data = "[" + cleaned_data + "]" # 恢复为列表格式
|
||||
try:
|
||||
parsed_data = ast.literal_eval(cleaned_data)
|
||||
|
|
|
@ -34,7 +34,9 @@ class TxtCheckPointer(CheckPointer):
|
|||
"""
|
||||
ckpt = {}
|
||||
if os.path.exists(self._ckpt_file_path):
|
||||
with open(self._ckpt_file_path, "r") as reader:
|
||||
with open(
|
||||
self._ckpt_file_path, "r", encoding="utf-8", newline="\n"
|
||||
) as reader:
|
||||
for line in reader:
|
||||
data = json.loads(line)
|
||||
ckpt[data["id"]] = data["value"]
|
||||
|
|
|
@ -122,10 +122,10 @@ class GraphApi(object):
|
|||
) # noqa: E501
|
||||
|
||||
# HTTP header `Content-Type`
|
||||
header_params["Content-Type"] = (
|
||||
self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
)
|
||||
header_params[
|
||||
"Content-Type"
|
||||
] = self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
) # noqa: E501
|
||||
|
||||
# Authentication setting
|
||||
|
@ -240,10 +240,10 @@ class GraphApi(object):
|
|||
) # noqa: E501
|
||||
|
||||
# HTTP header `Content-Type`
|
||||
header_params["Content-Type"] = (
|
||||
self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
)
|
||||
header_params[
|
||||
"Content-Type"
|
||||
] = self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
) # noqa: E501
|
||||
|
||||
# Authentication setting
|
||||
|
@ -358,10 +358,10 @@ class GraphApi(object):
|
|||
) # noqa: E501
|
||||
|
||||
# HTTP header `Content-Type`
|
||||
header_params["Content-Type"] = (
|
||||
self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
)
|
||||
header_params[
|
||||
"Content-Type"
|
||||
] = self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
) # noqa: E501
|
||||
|
||||
# Authentication setting
|
||||
|
@ -476,10 +476,10 @@ class GraphApi(object):
|
|||
) # noqa: E501
|
||||
|
||||
# HTTP header `Content-Type`
|
||||
header_params["Content-Type"] = (
|
||||
self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
)
|
||||
header_params[
|
||||
"Content-Type"
|
||||
] = self.api_client.select_header_content_type( # noqa: E501
|
||||
["application/json"]
|
||||
) # noqa: E501
|
||||
|
||||
# Authentication setting
|
||||
|
|
|
@ -198,6 +198,8 @@ class AtomicQueryChunkRetriever(RetrieverABC):
|
|||
res_chunk_list = []
|
||||
chunk_id_set = set()
|
||||
for chunk in chunks:
|
||||
if chunk is None:
|
||||
continue
|
||||
if chunk.chunk_id not in chunk_id_set:
|
||||
chunk_id_set.add(chunk.chunk_id)
|
||||
res_chunk_list.append(chunk)
|
||||
|
|
|
@ -137,7 +137,7 @@ class PprChunkRetriever(RetrieverABC):
|
|||
)
|
||||
node_dict = dict(node.items())
|
||||
return doc_id, ChunkData(
|
||||
content=node_dict["content"].replace("_split_0", ""),
|
||||
content=node_dict.get("content", "").replace("_split_0", ""),
|
||||
title=node_dict["name"].replace("_split_0", ""),
|
||||
chunk_id=doc_id,
|
||||
score=doc_score,
|
||||
|
@ -177,7 +177,7 @@ class PprChunkRetriever(RetrieverABC):
|
|||
logger.warning(f"{query} matched docs is empty")
|
||||
matched_docs.append(
|
||||
ChunkData(
|
||||
content=item["node"]["content"],
|
||||
content=item["node"].get("content", ""),
|
||||
title=item["node"]["name"],
|
||||
chunk_id=item["node"]["id"],
|
||||
score=item["score"],
|
||||
|
|
|
@ -25,7 +25,7 @@ class KgConstrainRetrieverWithOpenSPGRetriever(RetrieverABC):
|
|||
entity_linking: EntityLinking = None,
|
||||
llm: LLMClient = None,
|
||||
std_schema: StdSchema = None,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.name = kwargs.get("name", "kg_cs")
|
||||
|
@ -71,7 +71,7 @@ class KgConstrainRetrieverWithOpenSPGRetriever(RetrieverABC):
|
|||
graph_data=context.variables_graph,
|
||||
is_exact_match=True,
|
||||
name=self.name,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
return RetrieverOutput(
|
||||
retriever_method=self.schema().get("name", ""), graphs=[kg_graph]
|
||||
|
|
|
@ -108,7 +108,9 @@ class FuzzyOneHopSelect(PathSelect):
|
|||
with_json_parse=True,
|
||||
with_except=True,
|
||||
)
|
||||
if not isinstance(response, list) or not all(isinstance(i, str) for i in response):
|
||||
if not isinstance(response, list) or not all(
|
||||
isinstance(i, str) for i in response
|
||||
):
|
||||
logger.warning("LLM returned invalid index format: %s", response)
|
||||
return []
|
||||
try:
|
||||
|
|
|
@ -8,5 +8,8 @@ checkpointer: CheckPointer = CheckpointerManager.get_checkpointer(
|
|||
}
|
||||
)
|
||||
|
||||
tmp_key = checkpointer.keys()[-1]
|
||||
print(checkpointer.read_from_ckpt(tmp_key))
|
||||
if checkpointer.size() > 0:
|
||||
tmp_key = checkpointer.keys()[-1]
|
||||
print(checkpointer.read_from_ckpt(tmp_key))
|
||||
else:
|
||||
print("checkpoint is empty")
|
||||
|
|
|
@ -1,31 +1,31 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
max_tokens: 8092
|
||||
max_tokens: 8192
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
max_tokens: 8092
|
||||
max_tokens: 8192
|
||||
|
||||
ner_llm: &ner_llm
|
||||
ner_llm:
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
max_tokens: 8092
|
||||
max_tokens: 8192
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: sk-
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
|
|
@ -66,7 +66,9 @@ class LLMGeneratorWithThought(GeneratorABC):
|
|||
2. 如果您认为所提供的文件无法回答问题,请回答“未知”。
|
||||
"""
|
||||
|
||||
prompt = f"{system_instruction}\n\n召回文档:\n{refer_data}\n思考:\n{thoughts}问题: {query}"
|
||||
prompt = (
|
||||
f"{system_instruction}\n\n召回文档:\n{refer_data}\n思考:\n{thoughts}问题: {query}"
|
||||
)
|
||||
response = self.llm_client(prompt)
|
||||
if "答案:" not in response:
|
||||
raise ValueError(f"no answer found in response: {response}")
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
ckpt/
|
|
@ -1,8 +1,8 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct
|
||||
enable_check: false
|
||||
max_tokens: 8092
|
||||
|
@ -10,23 +10,23 @@ openie_llm: &openie_llm
|
|||
|
||||
ner_llm: &ner_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct
|
||||
enable_check: false
|
||||
max_tokens: 1024
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key: sk-
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
max_tokens: 1024
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: sk-
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
|
|
@ -3,7 +3,7 @@ from kag.interface.solver.retriever_abc import RetrieverABC
|
|||
|
||||
if __name__ == "__main__":
|
||||
llm_config = {
|
||||
"api_key": "sk-",
|
||||
"api_key": "key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen-max-latest",
|
||||
"type": "maas",
|
||||
|
@ -16,8 +16,8 @@ if __name__ == "__main__":
|
|||
"query_rewrite_prompt": {"type": "atomic_query_rewrite_prompt"},
|
||||
"vectorize_model": {
|
||||
"type": "openai",
|
||||
"base_url": "https://api.siliconflow.cn/v1/",
|
||||
"api_key": "sk-",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "key",
|
||||
"model": "BAAI/bge-m3",
|
||||
"vector_dimensions": 1024,
|
||||
},
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
Please refer to [Quick Start](https://openspg.yuque.com/ndx6g9/cwh47i/rs7gr8g4s538b1n7) to install KAG and its dependency OpenSPG server, and learn about using KAG in developer mode.
|
||||
|
||||
Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform](https://lbsyun.baidu.com/apiconsole/key). Be sure to enable “MCP (SSE)” service for best performance.
|
||||
Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform](https://lbsyun.baidu.com/apiconsole/key). Be sure to enable "MCP (SSE)" service for best performance.
|
||||
|
||||
## 2. Steps to reproduce
|
||||
|
||||
|
@ -17,14 +17,13 @@ Then register and create a server-side API Key (AK) at [Baidu Maps Open Platform
|
|||
cd kag/examples/baidu_map_mcp
|
||||
```
|
||||
|
||||
### Step 2:Configure models
|
||||
### Step 2: Configure models
|
||||
|
||||
Update the generative model configurations ``chat_llm`` in [kag_config.yaml](./kag_config.yaml).
|
||||
|
||||
You need to fill in correct ``api_key`` and ``BAIDU_MAPS_API_KEY``。If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
|
||||
You need to fill in correct ``api_key`` and ``BAIDU_MAPS_API_KEY``. If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
|
||||
|
||||
|
||||
### Step 6:Execute tasks
|
||||
### Step 3: Execute the QA tasks
|
||||
|
||||
In the directory, execute [google_web_search_client.py](./google_web_search_client.py)
|
||||
|
||||
|
@ -33,10 +32,10 @@ python baidu_map_mcp_client.py
|
|||
```
|
||||
|
||||
Example problems:
|
||||
|
||||
1. What will the weather be like tomorrow in the West Lake District of Hangzhou?
|
||||
2. What is the self-driving route from Ant A space in Hangzhou to Ant S space in Shanghai?
|
||||
3. What is the latitude and longitude of Shanghai Hongqiao Railway Station?
|
||||
|
||||
After launch, please input the questions you want to ask, we will retrieve the relevant information through baidu map, and then return the results to you.
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
# KAG 示例: Baidu Map MCP
|
||||
|
||||
这里是一个Baidu Map MCP,将用户输入的搜索问题作为输入,返回百度地图相关的查询结果。在此处只是展示MCP的执行效果,如果需要链接到KAG上进行检索,请访问 OPENSPG 创建对应的知识库和应用,将MCP链接到KAG中并使用。
|
||||
|
||||
|
||||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
这里是一个 Baidu Map MCP,将用户输入的搜索问题作为输入,返回百度地图相关的查询结果。在此处只是展示 MCP 的执行效果,如果需要链接到 KAG 上进行检索,请访问 OpenSPG 创建对应的知识库和应用,将 MCP 链接到 KAG 中并使用。
|
||||
|
||||
## 1. 前置条件
|
||||
|
||||
参考文档 [快速开始](https://openspg.yuque.com/ndx6g9/0.6/quzq24g4esal7q17) 安装 KAG 及其依赖的 OpenSPG server,了解开发者模式 KAG 的使用流程。
|
||||
|
@ -26,7 +25,7 @@ cd kag/examples/baidu_map_mcp
|
|||
|
||||
您需要设置正确的 ``api_key`` 以及 ``BAIDU_MAPS_API_KEY`` 。如果使用的模型供应商和模型名与默认值不同,您还需要更新 ``base_url`` 和 ``model``。
|
||||
|
||||
### Step 6:执行 QA 任务
|
||||
### Step 3:执行 QA 任务
|
||||
|
||||
在目录中执行 [baidu_map_mcp_client.py](./baidu_map_mcp_client.py)。
|
||||
|
||||
|
@ -34,8 +33,10 @@ cd kag/examples/baidu_map_mcp
|
|||
python baidu_map_mcp_client.py
|
||||
```
|
||||
问题示例:
|
||||
1、杭州西湖区的明天的天气怎么样?
|
||||
2、从杭州市蚂蚁A空间回上海市蚂蚁S空间,自驾的路线是什么?
|
||||
3、上海虹桥火车站的位置经纬度是什么?
|
||||
|
||||
启动后请您输入想要询问的问题,我们会通过百度地图检索到相关的信息,然后将结果返还给您。
|
||||
1. 杭州西湖区的明天的天气怎么样?
|
||||
2. 从杭州市蚂蚁A空间回上海市蚂蚁S空间,自驾的路线是什么?
|
||||
3. 上海虹桥火车站的位置经纬度是什么?
|
||||
|
||||
启动后请您输入想要询问的问题,我们会通过百度地图检索到相关的信息,然后将结果返还给您。
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ async def chat_loop(client):
|
|||
|
||||
async def cleanup(client):
|
||||
"""Clean up resources"""
|
||||
await client.exit_stack.aclose()
|
||||
await client.mcp_client.exit_stack.aclose()
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
|
||||
|
|
|
@ -61,9 +61,3 @@ To delete the checkpoints, execute the following command.
|
|||
rm -rf ./builder/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -61,9 +61,3 @@ cd solver && python eval.py && cd ..
|
|||
rm -rf ./builder/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -88,14 +88,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -112,7 +112,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -143,7 +143,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
type: kag_output_executor
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
namespace BaiKe
|
||||
|
||||
Chunk(文本块): EntityType
|
||||
properties:
|
||||
content(内容): Text
|
||||
index: TextAndVector
|
||||
|
||||
ArtificialObject(人造物体): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Astronomy(天文学): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Building(建筑): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Creature(生物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Concept(概念): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Date(日期): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
GeographicLocation(地理位置): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Keyword(关键词): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Medicine(药物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
|
||||
NaturalScience(自然科学): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Organization(组织机构): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Person(人物): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Transport(运输): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Works(作品): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Others(其他): EntityType
|
||||
properties:
|
||||
desc(描述): Text
|
||||
index: TextAndVector
|
||||
semanticType(语义类型): Text
|
||||
index: Text
|
||||
|
||||
Event(事件): EventType
|
||||
properties:
|
||||
subject(主体): Person
|
||||
participants(参与者): Person
|
||||
constraint: MultiValue
|
||||
time(时间): Date
|
||||
location(地点): GeographicLocation
|
||||
abstract(摘要): Text
|
||||
index: TextAndVector
|
||||
type(事件类型): Text
|
||||
index: Text
|
||||
|
||||
|
||||
SemanticConcept(语义概念): EntityType
|
||||
properties:
|
||||
desc(内容): Text
|
||||
index: Text
|
|
@ -0,0 +1,18 @@
|
|||
# Copyright 2023 OpenSPG Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
# in compliance with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
# or implied.
|
||||
|
||||
"""
|
||||
{{namespace}}.schema:
|
||||
The MarkLang file for the schema of this project.
|
||||
You can execute `kag schema commit` to commit your schema to SPG server.
|
||||
|
||||
|
||||
"""
|
|
@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
|
|||
@PromptABC.register("resp_simple")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n只输出答案,不需要输出额外的信息。"
|
||||
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
)
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
ckpt/
|
||||
csqa_ckpt/
|
||||
/cs.jsonl
|
||||
/solver/data/csqa_kag_answers.json
|
||||
/solver/csqa_qfs_res_*.json
|
||||
/solver/csqa_qfs_res_*.md
|
||||
/solver/csqa_res_*.json
|
||||
/solver/csqa_metrics_*.json
|
||||
/solver/benchmark.txt
|
||||
|
|
|
@ -59,7 +59,7 @@ Execute [indexer.py](./builder/indexer.py) in the [builder](./builder) directory
|
|||
cd builder && python indexer.py && cd ..
|
||||
```
|
||||
|
||||
### Step 7: Generate the answers
|
||||
### Step 7: Execute the QA tasks
|
||||
|
||||
Execute [eval.py](./solver/eval.py) in the [solver](./solver) directory to generate the answers.
|
||||
|
||||
|
@ -67,33 +67,12 @@ Execute [eval.py](./solver/eval.py) in the [solver](./solver) directory to gener
|
|||
cd solver && python eval.py && cd ..
|
||||
```
|
||||
|
||||
The results are saved to ``./solver/data/csqa_kag_answers.json``.
|
||||
|
||||
### Step 8: (Optional) Get the answers generated by other systems
|
||||
|
||||
Follow the LightRAG [Reproduce](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#reproduce) steps to generate answers to the questions and save the results to [./solver/data/csqa_lightrag_answers.json](./solver/data/csqa_lightrag_answers.json). Since a copy was committed, this step is optional.
|
||||
|
||||
### Step 9: Calculate the metrics
|
||||
|
||||
Update the LLM configurations in [summarization_metrics.py](./solver/summarization_metrics.py) and [factual_correctness.py](./solver/factual_correctness.py) and execute them to calculate the metrics.
|
||||
|
||||
```bash
|
||||
python ./solver/summarization_metrics.py
|
||||
python ./solver/factual_correctness.py
|
||||
```
|
||||
|
||||
### Step 10: (Optional) Cleanup
|
||||
### Step 8: (Optional) Cleanup
|
||||
|
||||
To delete the checkpoints, execute the following command.
|
||||
|
||||
```bash
|
||||
rm -rf ./builder/ckpt
|
||||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
rm -rf ./solver/csqa_ckpt
|
||||
```
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ knext schema commit
|
|||
cd builder && python indexer.py && cd ..
|
||||
```
|
||||
|
||||
### Step 7:生成答案
|
||||
### Step 7:执行 QA 任务
|
||||
|
||||
在 [solver](./solver) 目录执行 [eval.py](./solver/eval.py) 生成答案。
|
||||
|
||||
|
@ -67,33 +67,12 @@ cd builder && python indexer.py && cd ..
|
|||
cd solver && python eval.py && cd ..
|
||||
```
|
||||
|
||||
生成的结果被保存至 ``./solver/data/csqa_kag_answers.json``.
|
||||
|
||||
### Step 8:(可选)获取其他系统生成的答案
|
||||
|
||||
按 LightRAG [Reproduce](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#reproduce) 所述复现步骤生成问题的答案,将结果保存至 [./solver/data/csqa_lightrag_answers.json](./solver/data/csqa_lightrag_answers.json)。由于我们提交了一份 LightRAG 生成的答案,因此本步骤是可选的。
|
||||
|
||||
### Step 9:计算指标
|
||||
|
||||
更新 [summarization_metrics.py](./solver/summarization_metrics.py) 和 [factual_correctness.py](./solver/factual_correctness.py) 中的大模型配置并执行它们以计算指标。
|
||||
|
||||
```bash
|
||||
python ./solver/summarization_metrics.py
|
||||
python ./solver/factual_correctness.py
|
||||
```
|
||||
|
||||
### Step 10:(可选)清理
|
||||
### Step 8:(可选)清理
|
||||
|
||||
若要删除 checkpoint,可执行以下命令。
|
||||
|
||||
```bash
|
||||
rm -rf ./builder/ckpt
|
||||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
rm -rf ./solver/csqa_ckpt
|
||||
```
|
||||
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -89,14 +89,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -113,7 +113,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -144,7 +144,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
type: kag_output_executor
|
||||
|
|
|
@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class CsQaEvaluator(EvalQa):
|
||||
def __init__(self, solver_pipeline_name="solver_pipeline"):
|
||||
self.task_name = "musique"
|
||||
self.task_name = "csqa"
|
||||
super().__init__(self.task_name, solver_pipeline_name)
|
||||
self.solver_pipeline_name = solver_pipeline_name
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
ckpt/
|
|
@ -3,7 +3,7 @@
|
|||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
This example provides a case of knowledge injection in the medical domain, where the nodes of the domain knowledge graph are medical terms, and the relationships are defined as "isA." The document contains an introduction to a selection of medical terms.
|
||||
This example provides a case of knowledge injection in the medical domain, where the nodes of the domain knowledge graph are medical terms, and the relationships are defined as "isA". The document contains an introduction to a selection of medical terms.
|
||||
|
||||
## 1. Precondition
|
||||
|
||||
|
@ -33,13 +33,14 @@ knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
|
|||
|
||||
### Step 4: Commit the schema
|
||||
|
||||
Execute the following command to commit the schema [TwoWiki.schema](./schema/TwoWiki.schema).
|
||||
Execute the following command to commit the schema [DomainKG.schema](./schema/DomainKG.schema).
|
||||
|
||||
```bash
|
||||
knext schema commit
|
||||
```
|
||||
|
||||
### Step 5: Build the knowledge graph
|
||||
|
||||
We first need to inject the domain knowledge graph into the graph database. This allows the PostProcessor component to link the extracted nodes with the nodes of the domain knowledge graph, thereby standardizing them during the construction of the graph from unstructured documents.
|
||||
|
||||
Execute [injection.py](./builder/injection.py) in the [builder](./builder) directory to inject the domain KG.
|
||||
|
@ -50,14 +51,12 @@ cd builder && python injection.py && cd ..
|
|||
|
||||
Note that KAG provides a special implementation of the ``KAGBuilderChain`` for domain knowledge graph injection, known as the ``DomainKnowledgeInjectChain``, which is registered under the name ``domain_kg_inject_chain``. Since domain knowledge injection does not involve scanning files or directories, you can directly call the ``invoke`` interface of the chain to initiate the task.
|
||||
|
||||
|
||||
Next, execute [indexer.py](./builder/indexer.py) in the [builder](./builder) directory to build KG from unstructured document.
|
||||
|
||||
```bash
|
||||
cd builder && python indexer.py && cd ..
|
||||
```
|
||||
|
||||
|
||||
### Step 6: Execute the QA tasks
|
||||
|
||||
Execute [qa.py](./solver/qa.py) in the [solver](./solver) directory to generate the answer to the question.
|
||||
|
@ -75,8 +74,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
本示例提供了一个医疗领域知识注入的案例,其中领域知识图谱的节点为医学名词,关系为isA。文档内容为部分医学名词的介绍。
|
||||
本示例提供了一个医疗领域知识注入的案例,其中领域知识图谱的节点为医学名词,关系为 isA。文档内容为部分医学名词的介绍。
|
||||
|
||||
|
||||
## 1. 前置条件
|
||||
|
@ -34,7 +34,7 @@ knext project restore --host_addr http://127.0.0.1:8887 --proj_path .
|
|||
|
||||
### Step 4:提交 schema
|
||||
|
||||
执行以下命令提交 schema [TwoWiki.schema](./schema/TwoWiki.schema)。
|
||||
执行以下命令提交 schema [DomainKG.schema](./schema/DomainKG.schema)。
|
||||
|
||||
```bash
|
||||
knext schema commit
|
||||
|
@ -42,15 +42,14 @@ knext schema commit
|
|||
|
||||
### Step 5:构建知识图谱
|
||||
|
||||
|
||||
我们首先需要将领域知识图谱注入到图数据库中,这样在对非结构化文档进行图谱构建的时候,PostProcessor组件可以将抽取出的节点与领域知识图谱节点进行链指(标准化)。
|
||||
我们首先需要将领域知识图谱注入到图数据库中,这样在对非结构化文档进行图谱构建的时候,PostProcessor 组件可以将抽取出的节点与领域知识图谱节点进行链指(标准化)。
|
||||
在 [builder](./builder) 目录执行 [injection.py](./builder/injection.py) ,注入图数据。
|
||||
|
||||
```bash
|
||||
cd builder && python injection.py && cd ..
|
||||
```
|
||||
|
||||
注意,KAG为领域知识图谱注入提供了一个特殊的KAGBuilderChain实现,即DomainKnowledgeInjectChain,其注册名为domain_kg_inject_chain。由于领域知识注入不涉及到扫描文件或目录,可以直接调用builder chain 的invoke接口启动任务。
|
||||
注意,KAG为领域知识图谱注入提供了一个特殊的 ``KAGBuilderChain`` 实现,即 ``DomainKnowledgeInjectChain``,其注册名为 ``domain_kg_inject_chain``。由于领域知识注入不涉及到扫描文件或目录,可以直接调用 builder chain 的 ``invoke`` 接口启动任务。
|
||||
|
||||
接下来,在 [builder](./builder) 目录执行 [indexer.py](./builder/indexer.py) 构建知识图谱。
|
||||
|
||||
|
@ -75,8 +74,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -110,14 +110,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -134,7 +134,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -165,7 +165,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
type: kag_output_executor
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
# KAG Example: Google Web Search MCP
|
||||
|
||||
Here is a Google Web Search MCP that takes the search question entered by the user as input and returns the relevant web page. Here, we only show the execution effect of MCP. If you need to link to KAG for retrieval, please visit OPENSPG to create the corresponding knowledge base and application, and link MCP to KAG and use it.
|
||||
|
||||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
Here is a Google Web Search MCP that takes the search question entered by the user as input and returns the relevant web page. Here, we only show the execution effect of MCP. If you need to link to KAG for retrieval, please visit OpenSPG to create the corresponding knowledge base and application, and link MCP to KAG and use it.
|
||||
|
||||
## 1. Precondition
|
||||
|
||||
|
@ -18,14 +17,13 @@ Please refer to [Quick Start](https://openspg.yuque.com/ndx6g9/cwh47i/rs7gr8g4s5
|
|||
cd kag/examples/google_web_search_mcp
|
||||
```
|
||||
|
||||
### Step 2:Configure models
|
||||
### Step 2: Configure models
|
||||
|
||||
Update the generative model configurations ``chat_llm`` in [kag_config.yaml](./kag_config.yaml).
|
||||
|
||||
You need to fill in correct ``api_key``。If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
|
||||
You need to fill in correct ``api_key``. If your model providers and model names are different from the default values, you also need to update ``base_url`` and ``model``.
|
||||
|
||||
|
||||
### Step 6:Execute tasks
|
||||
### Step 3: Execute the QA tasks
|
||||
|
||||
In the directory, execute [google_web_search_client.py](./google_web_search_client.py)
|
||||
|
||||
|
@ -34,7 +32,9 @@ python google_web_search_client.py
|
|||
```
|
||||
|
||||
Example problems:
|
||||
|
||||
1. Why is the sky blue?
|
||||
2. What is Dundar effect?
|
||||
|
||||
After launch, please input the questions you want to ask, we will retrieve the relevant web page through google, and then return the results to you.
|
||||
After launch, please input the questions you want to ask, we will retrieve the relevant web page through Google, and then return the results to you.
|
||||
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
# KAG 示例: Google Web Search MCP
|
||||
|
||||
这里是一个Google Web Search MCP,将用户输入的搜索问题作为输入,返回相关的网页。在此处只是展示MCP的执行效果,如果需要链接到KAG上进行检索,请访问 OPENSPG 创建对应的知识库和应用,将MCP链接到KAG中并使用。
|
||||
|
||||
[English](./README.md) |
|
||||
[简体中文](./README_cn.md)
|
||||
|
||||
这里是一个 Google Web Search MCP,将用户输入的搜索问题作为输入,返回相关的网页。在此处只是展示 MCP 的执行效果,如果需要链接到 KAG 上进行检索,请访问 OpenSPG 创建对应的知识库和应用,将 MCP 链接到 KAG 中并使用。
|
||||
|
||||
## 1. 前置条件
|
||||
|
||||
|
@ -24,7 +23,7 @@ cd kag/examples/google_web_search_mcp
|
|||
|
||||
您需要设置正确的 ``api_key``。如果使用的模型供应商和模型名与默认值不同,您还需要更新 ``base_url`` 和 ``model``。
|
||||
|
||||
### Step 6:执行 QA 任务
|
||||
### Step 3:执行 QA 任务
|
||||
|
||||
在目录中执行 [google_web_search_client.py](./google_web_search_client.py)。
|
||||
|
||||
|
@ -33,7 +32,9 @@ python google_web_search_client.py
|
|||
```
|
||||
|
||||
问题示例:
|
||||
1、天空为什么是蓝色的?
|
||||
2、什么是丁达尔效应?
|
||||
|
||||
启动后请您输入想要询问的问题,我们会通过google检索到相关的网页,然后将结果返还给您。
|
||||
1. 天空为什么是蓝色的?
|
||||
2. 什么是丁达尔效应?
|
||||
|
||||
启动后请您输入想要询问的问题,我们会通过 Google 检索到相关的网页,然后将结果返还给您。
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ async def chat_loop(client):
|
|||
|
||||
async def cleanup(client):
|
||||
"""Clean up resources"""
|
||||
await client.exit_stack.aclose()
|
||||
await client.mcp_client.exit_stack.aclose()
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
#------------project configuration start----------------#
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
api_key:
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen-max-latest
|
||||
|
||||
mcp_executor:
|
||||
|
|
|
@ -100,9 +100,3 @@ To delete the checkpoint, execute the following command.
|
|||
rm -rf ./builder/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -98,9 +98,3 @@ cd solver && python evaForMedicine.py && cd ..
|
|||
rm -rf ./builder/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -115,14 +115,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -139,7 +139,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -170,7 +170,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
|
|
|
@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
@PromptABC.register("example_resp_generator")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n输出答案,并且给出理由。"
|
||||
"\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
)
|
||||
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
"\nGive me the answer and why."
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -58,14 +58,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -82,7 +82,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -113,7 +113,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
|
|
|
@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
@PromptABC.register("resp_riskmining")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n输出答案,并且给出理由。"
|
||||
"\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
)
|
||||
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
"\nGive me the answer and why."
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
#------------project configuration start----------------#
|
||||
openie_llm: &openie_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-7b-instruct-1m
|
||||
enable_check: false
|
||||
|
||||
chat_llm: &chat_llm
|
||||
type: maas
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1/
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
api_key: key
|
||||
model: qwen2.5-72b-instruct
|
||||
enable_check: false
|
||||
|
||||
vectorize_model: &vectorize_model
|
||||
api_key: key
|
||||
base_url: https://api.siliconflow.cn/v1/
|
||||
base_url: https://api.siliconflow.cn/v1
|
||||
model: BAAI/bge-m3
|
||||
type: openai
|
||||
vector_dimensions: 1024
|
||||
|
@ -57,14 +57,14 @@ kg_cs: &kg_cs
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.9
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
kg_fr: &kg_fr
|
||||
type: kg_fr_open_spg
|
||||
top_k: 20
|
||||
graph_api: *graph_api
|
||||
search_api: *search_api
|
||||
vectorize_model: *vectorize_model
|
||||
vectorize_model: *vectorize_model
|
||||
path_select:
|
||||
type: fuzzy_one_hop_select
|
||||
llm_client: *openie_llm
|
||||
|
@ -81,7 +81,7 @@ kg_fr: &kg_fr
|
|||
search_api: *search_api
|
||||
recognition_threshold: 0.8
|
||||
exclude_types:
|
||||
- "Chunk"
|
||||
- Chunk
|
||||
|
||||
rc: &rc
|
||||
type: rc_open_spg
|
||||
|
@ -112,7 +112,7 @@ kag_hybrid_executor: &kag_hybrid_executor_conf
|
|||
- *rc
|
||||
merger:
|
||||
type: kag_merger
|
||||
enable_summary: true
|
||||
enable_summary: true
|
||||
|
||||
|
||||
kag_output_executor: &kag_output_executor_conf
|
||||
|
|
|
@ -13,7 +13,7 @@ def read_dsl_files(directory):
|
|||
for filename in os.listdir(directory):
|
||||
if filename.endswith(".dsl"):
|
||||
file_path = os.path.join(directory, filename)
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
with open(file_path, "r", encoding="utf-8", newline="\n") as file:
|
||||
content = file.read()
|
||||
dsl_contents.append(content)
|
||||
|
||||
|
|
|
@ -8,11 +8,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
@PromptABC.register("resp_supplychain")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n输出答案,并且给出理由。"
|
||||
"\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
)
|
||||
template_zh = "基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
"\nGive me the answer and why."
|
||||
|
|
|
@ -69,12 +69,6 @@ To delete the checkpoints, execute the following command.
|
|||
rm -rf ./src/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8: (Optional) Try the larger datasets
|
||||
|
||||
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.
|
||||
|
|
|
@ -69,12 +69,6 @@ cd src && python eval.py --qa_file ./data/qa_sub.json && cd ..
|
|||
rm -rf ./src/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8:(可选)尝试更大的数据集
|
||||
|
||||
从 Step 1 重新开始,修改 [indexer.py](./src/indexer.py) 和 [eval.py](./src/eval.py) 以尝试更大的数据集。
|
||||
|
|
|
@ -73,8 +73,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and associated knowledge graph, execute a command similar to the following, replacing the OpenSPG server address and KAG project id with actual values:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
|
@ -94,9 +94,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -35,9 +35,7 @@ def extract_admin_types(input_file, output_file):
|
|||
if len(parts) >= 3 and parts[1] == "type" and "行政区" in parts[2]:
|
||||
admin_name = parts[0]
|
||||
admin_type = parts[2]
|
||||
admin_id = admin_id_map.get(
|
||||
admin_name, ""
|
||||
) # 获取对应的ID,如果没有则为空字符串
|
||||
admin_id = admin_id_map.get(admin_name, "") # 获取对应的ID,如果没有则为空字符串
|
||||
|
||||
admin_types.append(
|
||||
{
|
||||
|
|
|
@ -118,9 +118,7 @@ if __name__ == "__main__":
|
|||
output_file = os.path.join(dir_path, "图书馆.csv")
|
||||
parser = argparse.ArgumentParser(description="处理图书馆数据")
|
||||
parser.add_argument("--input_file", default=input_file, help="输入文件路径")
|
||||
parser.add_argument(
|
||||
"-o", "--output", default=output_file, help="输出文件路径,默认覆盖原文件"
|
||||
)
|
||||
parser.add_argument("-o", "--output", default=output_file, help="输出文件路径,默认覆盖原文件")
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--format",
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
from kag.open_benchmark.common_component.llm_genereator_with_thought import LLMGeneratorWithThought
|
||||
from kag.open_benchmark.common_component.llm_genereator_with_thought import (
|
||||
LLMGeneratorWithThought,
|
||||
)
|
||||
from kag.open_benchmark.common_component.planner_prompt import StaticPlanningPrompt
|
||||
from kag.open_benchmark.common_component.resp_generator import RespGenerator
|
||||
from kag.open_benchmark.common_component.evidence_based_reasoner import EvidenceBasedReasoner
|
||||
from kag.open_benchmark.common_component.evidence_based_reasoner import (
|
||||
EvidenceBasedReasoner,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"EvidenceBasedReasoner",
|
||||
"LLMGeneratorWithThought",
|
||||
"StaticPlanningPrompt",
|
||||
"RespGenerator"
|
||||
"RespGenerator",
|
||||
]
|
||||
|
|
|
@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
|
|||
@PromptABC.register("resp_simple")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n只输出答案,不需要输出额外的信息。"
|
||||
"\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$content'\n问题:'$query'"
|
||||
)
|
||||
# template_en = (
|
||||
# "Answer the question based on the given reference."
|
||||
|
|
|
@ -69,12 +69,6 @@ To delete the checkpoints, execute the following command.
|
|||
rm -rf ./src/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8: (Optional) Try the larger datasets
|
||||
|
||||
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.
|
||||
|
|
|
@ -69,12 +69,6 @@ cd src && python eva.py --qa_file ./data/qa_sub.json && cd ..
|
|||
rm -rf ./src/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8:(可选)尝试更大的数据集
|
||||
|
||||
从 Step 1 重新开始,修改 [indexer.py](./src/indexer.py) 和 [eval.py](./src/eval.py) 以尝试更大的数据集。
|
||||
|
|
|
@ -70,12 +70,6 @@ To delete the checkpoints, execute the following command.
|
|||
rm -rf ./src/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and related knowledge graph, execute the following similar command. Replace the OpenSPG server address and KAG project id with actual values.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8: (Optional) Try the larger datasets
|
||||
|
||||
Restart from Step 1 and modify [indexer.py](./src/indexer.py) and [eval.py](./src/eval.py) to try the larger datasets.
|
||||
|
|
|
@ -67,13 +67,6 @@ cd src && python eval.py --qa_file ./data/qa_sub.json && cd ..
|
|||
|
||||
```bash
|
||||
rm -rf ./src/ckpt
|
||||
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
### Step 8:(可选)尝试更大的数据集
|
||||
|
|
|
@ -32,7 +32,11 @@ class EvaForMusique(EvalQa):
|
|||
gold_list = []
|
||||
question_decomposition = sample["question_decomposition"]
|
||||
for qd in question_decomposition:
|
||||
gold_list.append(processing_phrases(paragraphs[qd["paragraph_support_idx"]]["title"]).replace(" ", ""))
|
||||
gold_list.append(
|
||||
processing_phrases(
|
||||
paragraphs[qd["paragraph_support_idx"]]["title"]
|
||||
).replace(" ", "")
|
||||
)
|
||||
predictionlist = []
|
||||
for ref in references:
|
||||
predictionlist.append(
|
||||
|
|
|
@ -77,8 +77,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
To delete the KAG project and associated knowledge graph, execute a command similar to the following, replacing the OpenSPG server address and KAG project id with actual values:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
|
@ -78,9 +78,3 @@ rm -rf ./builder/ckpt
|
|||
rm -rf ./solver/ckpt
|
||||
```
|
||||
|
||||
若要删除 KAG 项目及关联的知识图谱,可执行以下类似命令,将 OpenSPG server 地址和 KAG 项目 id 换为实际的值。
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8887/project/api/delete?projectId=1
|
||||
```
|
||||
|
||||
|
|
|
@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
|
|||
@PromptABC.register("resp_simple")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n只输出答案,不需要输出额外的信息。"
|
||||
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
)
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
|
|
|
@ -100,9 +100,7 @@ class PrqaExecutor(ExecutorABC):
|
|||
completion_1 = self.send_cypher_messages(message_list)
|
||||
|
||||
if not completion_1.tool_calls:
|
||||
raise ValueError(
|
||||
f"{question} 查询失败,此时tool_calls 为空或为 None,无法继续处理"
|
||||
)
|
||||
raise ValueError(f"{question} 查询失败,此时tool_calls 为空或为 None,无法继续处理")
|
||||
tool = completion_1.tool_calls[0]
|
||||
args = json.loads(tool.function.arguments)
|
||||
cypher_query = args.get("cypher_query")
|
||||
|
|
|
@ -47,8 +47,7 @@ class PrqaPlanner(PlannerABC):
|
|||
instruct_content = message["content"]
|
||||
if "上一次的判断错误,请重新思考" not in instruct_content:
|
||||
updated_instruct = (
|
||||
instruct_content.strip()
|
||||
+ "\n上一次的判断错误,请重新思考。"
|
||||
instruct_content.strip() + "\n上一次的判断错误,请重新思考。"
|
||||
)
|
||||
message["content"] = updated_instruct
|
||||
break # 修改完成后直接退出循环
|
||||
|
|
|
@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
|
|||
@PromptABC.register("prqa_generator")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n输出答案,并且给出理由。"
|
||||
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
"基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
)
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
|
|
|
@ -86,7 +86,9 @@ class KagOutputExecutor(ExecutorABC):
|
|||
result = []
|
||||
for alias in logic_node.alias_name_set:
|
||||
if context.variables_graph.has_alias(alias.alias_name):
|
||||
alias_answer = context.variables_graph.get_answered_alias(alias.alias_name)
|
||||
alias_answer = context.variables_graph.get_answered_alias(
|
||||
alias.alias_name
|
||||
)
|
||||
if alias_answer:
|
||||
result.append(alias_answer)
|
||||
if not result:
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
|
||||
import json
|
||||
import sys
|
||||
from contextlib import AsyncExitStack
|
||||
from typing import Optional, Dict
|
||||
import asyncio
|
||||
|
@ -55,7 +56,7 @@ class MCPClient:
|
|||
if not (is_python or is_js):
|
||||
raise ValueError("Server script must be a .py or .js file")
|
||||
|
||||
command = "python3" if is_python else "node"
|
||||
command = sys.executable if is_python else "node"
|
||||
server_params = StdioServerParameters(
|
||||
command=command, args=[server_script_path], env=env
|
||||
)
|
||||
|
|
|
@ -175,16 +175,22 @@ async def do_index_pipeline(query, qa_config, reporter):
|
|||
return await pipeline.ainvoke(query, reporter=reporter)
|
||||
|
||||
|
||||
async def do_qa_pipeline(use_pipeline, query, qa_config, reporter, task_id, kb_project_ids):
|
||||
async def do_qa_pipeline(
|
||||
use_pipeline, query, qa_config, reporter, task_id, kb_project_ids
|
||||
):
|
||||
retriever_configs = []
|
||||
kb_configs = qa_config.get("kb", [])
|
||||
for kb_project_id in kb_project_ids:
|
||||
kb_task_project_id = f"{task_id}_{kb_project_id}"
|
||||
try:
|
||||
kag_config = KAGConfigAccessor.get_config(kb_task_project_id)
|
||||
matched_kb = next((kb for kb in kb_configs if kb.get("id") == kb_project_id), None)
|
||||
matched_kb = next(
|
||||
(kb for kb in kb_configs if kb.get("id") == kb_project_id), None
|
||||
)
|
||||
if not matched_kb:
|
||||
reporter.warning(f"Knowledge base with id {kb_project_id} not found in qa_config['kb']")
|
||||
reporter.warning(
|
||||
f"Knowledge base with id {kb_project_id} not found in qa_config['kb']"
|
||||
)
|
||||
continue
|
||||
|
||||
for index_name in matched_kb.get("index_list", []):
|
||||
|
@ -192,13 +198,16 @@ async def do_qa_pipeline(use_pipeline, query, qa_config, reporter, task_id, kb_p
|
|||
{
|
||||
"type": index_name,
|
||||
"llm_config": qa_config.get("llm", {}),
|
||||
"vectorize_model_config": kag_config.all_config.get("vectorize_model", {}),
|
||||
"vectorize_model_config": kag_config.all_config.get(
|
||||
"vectorize_model", {}
|
||||
),
|
||||
}
|
||||
)
|
||||
retriever_configs.extend(
|
||||
index_manager.build_retriever_config(
|
||||
qa_config.get("llm", {}), kag_config.all_config.get("vectorize_model", {}),
|
||||
kag_qa_task_config_key=kb_task_project_id
|
||||
qa_config.get("llm", {}),
|
||||
kag_config.all_config.get("vectorize_model", {}),
|
||||
kag_qa_task_config_key=kb_task_project_id,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
|
@ -260,7 +269,9 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
|
|||
|
||||
global_config = kb.get(KAGConstants.PROJECT_CONFIG_KEY, {})
|
||||
kb_conf.global_config.initialize(**global_config)
|
||||
project_client = ProjectClient(host_addr=host_addr, project_id=kb_project_id)
|
||||
project_client = ProjectClient(
|
||||
host_addr=host_addr, project_id=kb_project_id
|
||||
)
|
||||
project = project_client.get_by_id(kb_project_id)
|
||||
|
||||
kb_conf.global_config.project_id = kb_project_id
|
||||
|
@ -292,14 +303,15 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
|
|||
try:
|
||||
await reporter.start()
|
||||
if use_pipeline == "index_pipeline":
|
||||
answer = await do_index_pipeline(
|
||||
query, main_config, reporter
|
||||
)
|
||||
answer = await do_index_pipeline(query, main_config, reporter)
|
||||
else:
|
||||
answer = await do_qa_pipeline(
|
||||
use_pipeline, query, main_config, reporter,
|
||||
use_pipeline,
|
||||
query,
|
||||
main_config,
|
||||
reporter,
|
||||
task_id=task_id,
|
||||
kb_project_ids=kb_project_ids
|
||||
kb_project_ids=kb_project_ids,
|
||||
)
|
||||
|
||||
reporter.add_report_line("answer", "Final Answer", answer, "FINISH")
|
||||
|
@ -324,15 +336,15 @@ async def qa(task_id, query, project_id, host_addr, app_id, params={}):
|
|||
|
||||
class SolverMain:
|
||||
def invoke(
|
||||
self,
|
||||
project_id: int,
|
||||
task_id: int,
|
||||
query: str,
|
||||
session_id: str = "0",
|
||||
is_report=True,
|
||||
host_addr="http://127.0.0.1:8887",
|
||||
params=None,
|
||||
app_id="",
|
||||
self,
|
||||
project_id: int,
|
||||
task_id: int,
|
||||
query: str,
|
||||
session_id: str = "0",
|
||||
is_report=True,
|
||||
host_addr="http://127.0.0.1:8887",
|
||||
params=None,
|
||||
app_id="",
|
||||
):
|
||||
answer = None
|
||||
if params is None:
|
||||
|
@ -365,9 +377,7 @@ if __name__ == "__main__":
|
|||
# "4200052", "https://spg-pre.alipay.com"
|
||||
# )
|
||||
config = {}
|
||||
params = {
|
||||
"config": config
|
||||
}
|
||||
params = {"config": config}
|
||||
res = SolverMain().invoke(
|
||||
2100007,
|
||||
11200009,
|
||||
|
|
|
@ -39,9 +39,7 @@ class QueryRewritePrompt(PromptABC):
|
|||
\nexample字段中给出了一个简单的示例供参考。请直接返回改写后的问题字符串,正如example的output字段一样。
|
||||
""",
|
||||
"example": {
|
||||
"input": {
|
||||
"query": "{{0.output}}获得的奖项中,有哪些是{{1.output}}没有获得过的"
|
||||
},
|
||||
"input": {"query": "{{0.output}}获得的奖项中,有哪些是{{1.output}}没有获得过的"},
|
||||
"context": {
|
||||
"0": {
|
||||
"output": [
|
||||
|
|
|
@ -9,9 +9,7 @@ logger = logging.getLogger(__name__)
|
|||
@PromptABC.register("default_resp_generator")
|
||||
class RespGenerator(PromptABC):
|
||||
template_zh = (
|
||||
"基于给定的引用信息回答问题。"
|
||||
"\n输出答案,并且给出理由。"
|
||||
"\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
"基于给定的引用信息回答问题。" "\n输出答案,并且给出理由。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
|
||||
)
|
||||
template_en = (
|
||||
"Answer the question based on the given reference."
|
||||
|
|
|
@ -29,7 +29,7 @@ def execute_reasoner_job(file, dsl, output=None, proj_path="./"):
|
|||
"""
|
||||
client = ReasonerClient(host_addr=env.host_addr, project_id=int(env.project_id))
|
||||
if file and not dsl:
|
||||
with open(file, "r") as f:
|
||||
with open(file, "r", encoding="utf-8", newline="\n") as f:
|
||||
dsl_content = f.read()
|
||||
elif not file and dsl:
|
||||
dsl_content = dsl
|
||||
|
|
|
@ -436,7 +436,7 @@ class SPGConceptRuleMarkLang:
|
|||
Load and then parse the script file
|
||||
"""
|
||||
|
||||
file = open(filename, "r", encoding="utf-8")
|
||||
file = open(filename, "r", encoding="utf-8", newline="\n")
|
||||
lines = file.read().splitlines()
|
||||
last_indent_level = 0
|
||||
|
||||
|
|
Loading…
Reference in New Issue