add hpcAppCluster

Signed-off-by: jagger <cossjie@foxmail.com>
This commit is contained in:
jagger 2025-06-05 17:02:33 +08:00
parent b61842d80b
commit 583e07bc08
7 changed files with 171 additions and 40 deletions

View File

@ -205,22 +205,17 @@ type (
CreateTime string `json:"create_time"`
UpdateTime string `json:"update_time"`
}
// App string `json:"app"`
//Backend string `json:"backend" binding:"required,oneof=slurm sugonac"` // 后端类型slurm/sugonac
//Partition string // 分区/队列名称
//TaskId string `json:"taskId"`
//ClusterId string `json:"clusterId"`
//JobName string `json:"jobName"`
//ScriptContent string `json:"scriptContent"`
//ScriptDir string `json:"scriptDir"`
//Parameters map[string]string `json:"parameters"`
//TimeLimit time.Duration // 作业时间限制
SubmitHpcTaskReq {
App string `json:"app"`
ClusterId string `json:"clusterId"`
JobName string `json:"jobName"`
ScriptContent string `json:"scriptContent"`
Parameters map[string]string `json:"parameters"`
Backend string `json:"backend"`
Backend string `json:"backend"`
}
)
)
type HpcAppClusterReq {
App string `form:"app"`
}

View File

@ -186,7 +186,7 @@ service pcm {
@doc "同步指定资源规格"
@handler syncResourceSpecHandler
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (CommonResp)
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (ListResult)
@doc "获取指定资源规格详情"
@handler detailResourceSpecHandler
@ -251,6 +251,10 @@ service pcm {
@doc "超算任务日志"
@handler getHpcTaskLogHandler
get /hpc/jobLogs/:taskId (HpcTaskLogReq) returns (HpcTaskLogResp)
@doc "查询超算应用支持的集群"
@handler getHpcAppClusterHandler
get /hpc/getHpcAppCluster (HpcAppClusterReq) returns (CommonResp)
}
//cloud二级接口

View File

@ -0,0 +1,24 @@
package hpc
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/hpc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func GetHpcAppClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.HpcAppClusterReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := hpc.NewGetHpcAppClusterLogic(r.Context(), svcCtx)
resp, err := l.GetHpcAppCluster(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -724,6 +724,12 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/hpc/commitHpcTask",
Handler: hpc.CommitHpcTaskHandler(serverCtx),
},
{
// 查询超算应用支持的集群
Method: http.MethodGet,
Path: "/hpc/getHpcAppCluster",
Handler: hpc.GetHpcAppClusterHandler(serverCtx),
},
{
// 超算查询任务列表
Method: http.MethodGet,

View File

@ -13,6 +13,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"regexp"
"strconv"
"strings"
"sync"
@ -85,13 +86,67 @@ func (l *CommitHpcTaskLogic) getClusterInfo(clusterID string) (*types.ClusterInf
return &clusterInfo, &adapterInfo, nil
}
// 自定义函数映射
func createFuncMap() template.FuncMap {
return template.FuncMap{
"regexMatch": regexMatch,
"required": required,
"error": errorHandler,
"default": defaultHandler,
}
}
func extractUserError(originalErr error) error {
// 尝试匹配模板引擎返回的错误格式
re := regexp.MustCompile(`error calling \w+: (.*)$`)
matches := re.FindStringSubmatch(originalErr.Error())
if len(matches) > 1 {
return errors.New(matches[1])
}
return originalErr
}
// 正则匹配函数
func regexMatch(pattern string) *regexp.Regexp {
return regexp.MustCompile(pattern)
}
// 必填字段检查
func required(msg string, val interface{}) (interface{}, error) {
if val == nil || val == "" {
return nil, errors.New(msg)
}
return val, nil
}
// 错误处理函数
func errorHandler(msg string) (string, error) {
return "", errors.New(msg)
}
// 默认值处理函数
func defaultHandler(defaultVal interface{}, val interface{}) interface{} {
switch v := val.(type) {
case nil:
return defaultVal
case string:
if v == "" {
return defaultVal
}
case int:
if v == 0 {
return defaultVal
}
// 可根据需要添加其他类型判断
}
return val
}
func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobRequest) (string, error) {
// 使用缓存模板
tmpl, ok := templateCache.Load(templateContent)
if !ok {
parsedTmpl, err := template.New("jobScript").Parse(templateContent)
parsedTmpl, err := template.New("slurmTemplate").Funcs(createFuncMap()).Parse(templateContent)
if err != nil {
return "", fmt.Errorf("template parse failed: %w", err)
return "", err
}
templateCache.Store(templateContent, parsedTmpl)
tmpl = parsedTmpl
@ -104,7 +159,8 @@ func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobReq
var buf strings.Builder
if err := tmpl.(*template.Template).Execute(&buf, params); err != nil {
return "", fmt.Errorf("template render failed: %w", err)
log.Error().Err(err).Msg("模板渲染失败")
return "", extractUserError(err)
}
return buf.String(), nil
}
@ -235,36 +291,39 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
if err != nil {
return nil, err
}
scriptContent := req.ScriptContent
if scriptContent == "" {
// 获取模板
var templateInfo types.HpcAppTemplateInfo
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
if req.OperateType != "" {
tx.Where("app_type = ?", req.OperateType)
}
if err := tx.First(&templateInfo).Error; err != nil {
return nil, fmt.Errorf("failed to get template: %w", err)
}
// 获取模板
var templateInfo types.HpcAppTemplateInfo
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
if req.OperateType != "" {
tx.Where("app_type = ?", req.OperateType)
}
if err := tx.First(&templateInfo).Error; err != nil {
return nil, fmt.Errorf("failed to get template: %w", err)
// 转换请求参数
jobRequest, err := ConvertToJobRequest(req)
if err != nil {
return nil, err
}
// 渲染脚本
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
if err != nil {
return nil, err
}
scriptContent = script
}
// 转换请求参数
jobRequest, err := ConvertToJobRequest(req)
if err != nil {
return nil, fmt.Errorf("invalid job request: %w", err)
}
// 渲染脚本
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
if err != nil {
return nil, fmt.Errorf("script rendering failed: %w", err)
}
q, _ := jsoniter.MarshalToString(script)
q, _ := jsoniter.MarshalToString(scriptContent)
submitQ := types.SubmitHpcTaskReq{
App: req.App,
ClusterId: req.ClusterId,
JobName: jobName,
ScriptContent: script,
ScriptContent: scriptContent,
Parameters: req.Parameters,
Backend: req.Backend,
}
@ -276,7 +335,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
jobID := resp.Data.JobInfo["jobId"]
workDir := resp.Data.JobInfo["jobDir"]
taskID, err := l.SaveHpcTaskToDB(req, script, jobID, workDir)
taskID, err := l.SaveHpcTaskToDB(req, scriptContent, jobID, workDir)
if err != nil {
log.Error().Msgf("Failed to save task to DB: %v", err)
return nil, fmt.Errorf("db save failed: %w", err)

View File

@ -0,0 +1,39 @@
package hpc
import (
"context"
"github.com/rs/zerolog/log"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetHpcAppClusterLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetHpcAppClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetHpcAppClusterLogic {
return &GetHpcAppClusterLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetHpcAppClusterLogic) GetHpcAppCluster(req *types.HpcAppClusterReq) (resp *types.ListResult, err error) {
resp = &types.ListResult{}
var clusterIds []string
err = l.svcCtx.DbEngin.Table("hpc_app_template").Distinct("cluster_id").
Where(" app = ? and status = 1 and deleted_at is null", req.App).
Find(&clusterIds).Error
if err != nil {
log.Error().Msgf("GetHpcAppCluster err:%v", err)
return nil, err
}
resp.List = clusterIds
return
}

View File

@ -2786,6 +2786,10 @@ type Hooks struct {
ContainerHooksResp ContainerHooksResp `json:"containerHooks,omitempty" copier:"ContainerHooksResp"` // *
}
type HpcAppClusterReq struct {
App string `form:"app"`
}
type HpcAppTemplateInfo struct {
Id int64 `json:"id"`
Name string `json:"name"`