forked from JointCloud/pcm-coordinator
add hpcAppCluster
Signed-off-by: jagger <cossjie@foxmail.com>
This commit is contained in:
parent
b61842d80b
commit
583e07bc08
|
@ -205,22 +205,17 @@ type (
|
|||
CreateTime string `json:"create_time"`
|
||||
UpdateTime string `json:"update_time"`
|
||||
}
|
||||
// App string `json:"app"`
|
||||
//Backend string `json:"backend" binding:"required,oneof=slurm sugonac"` // 后端类型:slurm/sugonac
|
||||
//Partition string // 分区/队列名称
|
||||
//TaskId string `json:"taskId"`
|
||||
//ClusterId string `json:"clusterId"`
|
||||
//JobName string `json:"jobName"`
|
||||
//ScriptContent string `json:"scriptContent"`
|
||||
//ScriptDir string `json:"scriptDir"`
|
||||
//Parameters map[string]string `json:"parameters"`
|
||||
//TimeLimit time.Duration // 作业时间限制
|
||||
|
||||
SubmitHpcTaskReq {
|
||||
App string `json:"app"`
|
||||
ClusterId string `json:"clusterId"`
|
||||
JobName string `json:"jobName"`
|
||||
ScriptContent string `json:"scriptContent"`
|
||||
Parameters map[string]string `json:"parameters"`
|
||||
Backend string `json:"backend"`
|
||||
Backend string `json:"backend"`
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
type HpcAppClusterReq {
|
||||
App string `form:"app"`
|
||||
}
|
|
@ -186,7 +186,7 @@ service pcm {
|
|||
|
||||
@doc "同步指定资源规格"
|
||||
@handler syncResourceSpecHandler
|
||||
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (CommonResp)
|
||||
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (ListResult)
|
||||
|
||||
@doc "获取指定资源规格详情"
|
||||
@handler detailResourceSpecHandler
|
||||
|
@ -251,6 +251,10 @@ service pcm {
|
|||
@doc "超算任务日志"
|
||||
@handler getHpcTaskLogHandler
|
||||
get /hpc/jobLogs/:taskId (HpcTaskLogReq) returns (HpcTaskLogResp)
|
||||
|
||||
@doc "查询超算应用支持的集群"
|
||||
@handler getHpcAppClusterHandler
|
||||
get /hpc/getHpcAppCluster (HpcAppClusterReq) returns (CommonResp)
|
||||
}
|
||||
|
||||
//cloud二级接口
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package hpc
|
||||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/hpc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func GetHpcAppClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.HpcAppClusterReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := hpc.NewGetHpcAppClusterLogic(r.Context(), svcCtx)
|
||||
resp, err := l.GetHpcAppCluster(&req)
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
|
@ -724,6 +724,12 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/hpc/commitHpcTask",
|
||||
Handler: hpc.CommitHpcTaskHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
// 查询超算应用支持的集群
|
||||
Method: http.MethodGet,
|
||||
Path: "/hpc/getHpcAppCluster",
|
||||
Handler: hpc.GetHpcAppClusterHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
// 超算查询任务列表
|
||||
Method: http.MethodGet,
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -85,13 +86,67 @@ func (l *CommitHpcTaskLogic) getClusterInfo(clusterID string) (*types.ClusterInf
|
|||
return &clusterInfo, &adapterInfo, nil
|
||||
}
|
||||
|
||||
// 自定义函数映射
|
||||
func createFuncMap() template.FuncMap {
|
||||
return template.FuncMap{
|
||||
"regexMatch": regexMatch,
|
||||
"required": required,
|
||||
"error": errorHandler,
|
||||
"default": defaultHandler,
|
||||
}
|
||||
}
|
||||
func extractUserError(originalErr error) error {
|
||||
// 尝试匹配模板引擎返回的错误格式
|
||||
re := regexp.MustCompile(`error calling \w+: (.*)$`)
|
||||
matches := re.FindStringSubmatch(originalErr.Error())
|
||||
if len(matches) > 1 {
|
||||
return errors.New(matches[1])
|
||||
}
|
||||
return originalErr
|
||||
}
|
||||
|
||||
// 正则匹配函数
|
||||
func regexMatch(pattern string) *regexp.Regexp {
|
||||
return regexp.MustCompile(pattern)
|
||||
}
|
||||
|
||||
// 必填字段检查
|
||||
func required(msg string, val interface{}) (interface{}, error) {
|
||||
if val == nil || val == "" {
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// 错误处理函数
|
||||
func errorHandler(msg string) (string, error) {
|
||||
return "", errors.New(msg)
|
||||
}
|
||||
|
||||
// 默认值处理函数
|
||||
func defaultHandler(defaultVal interface{}, val interface{}) interface{} {
|
||||
switch v := val.(type) {
|
||||
case nil:
|
||||
return defaultVal
|
||||
case string:
|
||||
if v == "" {
|
||||
return defaultVal
|
||||
}
|
||||
case int:
|
||||
if v == 0 {
|
||||
return defaultVal
|
||||
}
|
||||
// 可根据需要添加其他类型判断
|
||||
}
|
||||
return val
|
||||
}
|
||||
func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobRequest) (string, error) {
|
||||
// 使用缓存模板
|
||||
tmpl, ok := templateCache.Load(templateContent)
|
||||
if !ok {
|
||||
parsedTmpl, err := template.New("jobScript").Parse(templateContent)
|
||||
parsedTmpl, err := template.New("slurmTemplate").Funcs(createFuncMap()).Parse(templateContent)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("template parse failed: %w", err)
|
||||
return "", err
|
||||
}
|
||||
templateCache.Store(templateContent, parsedTmpl)
|
||||
tmpl = parsedTmpl
|
||||
|
@ -104,7 +159,8 @@ func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobReq
|
|||
|
||||
var buf strings.Builder
|
||||
if err := tmpl.(*template.Template).Execute(&buf, params); err != nil {
|
||||
return "", fmt.Errorf("template render failed: %w", err)
|
||||
log.Error().Err(err).Msg("模板渲染失败")
|
||||
return "", extractUserError(err)
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
@ -235,36 +291,39 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
scriptContent := req.ScriptContent
|
||||
if scriptContent == "" {
|
||||
// 获取模板
|
||||
var templateInfo types.HpcAppTemplateInfo
|
||||
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
|
||||
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
|
||||
if req.OperateType != "" {
|
||||
tx.Where("app_type = ?", req.OperateType)
|
||||
}
|
||||
if err := tx.First(&templateInfo).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to get template: %w", err)
|
||||
}
|
||||
|
||||
// 获取模板
|
||||
var templateInfo types.HpcAppTemplateInfo
|
||||
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
|
||||
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
|
||||
if req.OperateType != "" {
|
||||
tx.Where("app_type = ?", req.OperateType)
|
||||
}
|
||||
if err := tx.First(&templateInfo).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to get template: %w", err)
|
||||
// 转换请求参数
|
||||
jobRequest, err := ConvertToJobRequest(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// 渲染脚本
|
||||
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
scriptContent = script
|
||||
}
|
||||
|
||||
// 转换请求参数
|
||||
jobRequest, err := ConvertToJobRequest(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid job request: %w", err)
|
||||
}
|
||||
|
||||
// 渲染脚本
|
||||
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("script rendering failed: %w", err)
|
||||
}
|
||||
|
||||
q, _ := jsoniter.MarshalToString(script)
|
||||
q, _ := jsoniter.MarshalToString(scriptContent)
|
||||
submitQ := types.SubmitHpcTaskReq{
|
||||
App: req.App,
|
||||
ClusterId: req.ClusterId,
|
||||
JobName: jobName,
|
||||
ScriptContent: script,
|
||||
ScriptContent: scriptContent,
|
||||
Parameters: req.Parameters,
|
||||
Backend: req.Backend,
|
||||
}
|
||||
|
@ -276,7 +335,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
|||
|
||||
jobID := resp.Data.JobInfo["jobId"]
|
||||
workDir := resp.Data.JobInfo["jobDir"]
|
||||
taskID, err := l.SaveHpcTaskToDB(req, script, jobID, workDir)
|
||||
taskID, err := l.SaveHpcTaskToDB(req, scriptContent, jobID, workDir)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed to save task to DB: %v", err)
|
||||
return nil, fmt.Errorf("db save failed: %w", err)
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package hpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetHpcAppClusterLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewGetHpcAppClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetHpcAppClusterLogic {
|
||||
return &GetHpcAppClusterLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetHpcAppClusterLogic) GetHpcAppCluster(req *types.HpcAppClusterReq) (resp *types.ListResult, err error) {
|
||||
resp = &types.ListResult{}
|
||||
var clusterIds []string
|
||||
err = l.svcCtx.DbEngin.Table("hpc_app_template").Distinct("cluster_id").
|
||||
Where(" app = ? and status = 1 and deleted_at is null", req.App).
|
||||
Find(&clusterIds).Error
|
||||
if err != nil {
|
||||
log.Error().Msgf("GetHpcAppCluster err:%v", err)
|
||||
return nil, err
|
||||
}
|
||||
resp.List = clusterIds
|
||||
return
|
||||
}
|
|
@ -2786,6 +2786,10 @@ type Hooks struct {
|
|||
ContainerHooksResp ContainerHooksResp `json:"containerHooks,omitempty" copier:"ContainerHooksResp"` // *
|
||||
}
|
||||
|
||||
type HpcAppClusterReq struct {
|
||||
App string `form:"app"`
|
||||
}
|
||||
|
||||
type HpcAppTemplateInfo struct {
|
||||
Id int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
|
|
Loading…
Reference in New Issue