pcm-coordinator/api/internal/storeLink/shuguangHpc.go

271 lines
6.9 KiB
Go

package storeLink
import (
"context"
"errors"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"strconv"
"strings"
)
type ShuguangHpc struct {
ctx context.Context
svcCtx *svc.ServiceContext
participant *models.StorelinkCenter
}
const (
GAP_WALL_TIME_24H = "24:00:00"
TASK_SHUGUANG_PREFIX = "ShuguangHPC"
NEWLINE = "\n"
JOBNAME = "JOBNAME"
GAP_CMD_FILE = "cmd"
GAP_NNODE = "1" // 节点个数
GAP_NODE_STRING = ""
GAP_APPNAME = "BASE"
GAP_QUEUE = "wzhdtest"
GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME"
GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j"
GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j"
StrJobManagerID = 1637920656
Apptype = "BASIC"
EXPORT = "export"
GAP_NPROC = "1"
GAP_NDCU = "1"
GAP_EXCLUSIVE = ""
GAP_PPN = ""
GAP_NGPU = ""
GAP_MULTI_SUB = ""
)
var RESOURCEMAP = map[string]ResourceSpec{
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": {
GAP_NNODE: "1",
GAP_NPROC: "2",
GAP_NDCU: "1",
},
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": {
GAP_NNODE: "1",
GAP_NPROC: "4",
GAP_NDCU: "2",
},
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": {
GAP_NNODE: "1",
GAP_NPROC: "8",
GAP_NDCU: "4",
},
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": {
GAP_NNODE: "1",
GAP_NPROC: "16",
GAP_NDCU: "4",
},
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": {
GAP_NNODE: "1",
GAP_NPROC: "32",
GAP_NDCU: "4",
},
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": {
GAP_NNODE: "2",
GAP_NPROC: "4",
GAP_NDCU: "2",
},
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": {
GAP_NNODE: "2",
GAP_NPROC: "8",
GAP_NDCU: "4",
},
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": {
GAP_NNODE: "2",
GAP_NPROC: "16",
GAP_NDCU: "4",
},
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": {
GAP_NNODE: "2",
GAP_NPROC: "32",
GAP_NDCU: "8",
},
}
var RESOURCESPECS = map[string]string{
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU",
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU",
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU",
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU",
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU",
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU",
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU",
}
var AcStatus = map[string]string{
"statQ": "Pending",
"statR": "Running",
"statE": "Pending",
"statC": "Completed",
"statH": "Pending",
"statS": "Pending",
"statW": "Pending",
"statX": "Other",
}
type ResourceSpec struct {
GAP_NNODE string
GAP_NPROC string
GAP_NDCU string
}
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc {
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant}
}
func (s ShuguangHpc) UploadImage(path string) (interface{}, error) {
return nil, nil
}
func (s ShuguangHpc) DeleteImage(imageId string) (interface{}, error) {
return nil, nil
}
func (s ShuguangHpc) QueryImageList() (interface{}, error) {
return nil, nil
}
func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) {
// shuguangHpc提交任务
//判断是否resourceId匹配自定义资源Id
_, isMapContainsKey := RESOURCESPECS[resourceId]
if !isMapContainsKey {
return nil, errors.New("shuguangHpc资源Id不存在")
}
//环境变量
var env string
for _, e := range envs {
s := strings.Split(e, COMMA)
env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE
}
//请求
taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10)
GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1)
GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1)
GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1)
req := &hpcAC.SubmitJobReq{
Apptype: Apptype,
Appname: GAP_APPNAME,
StrJobManagerID: StrJobManagerID,
MapAppJobInfo: &hpcAC.MapAppJobInfo{
GAP_CMD_FILE: cmd,
GAP_NNODE: GAP_NNODE,
GAP_NODE_STRING: GAP_NODE_STRING,
GAP_SUBMIT_TYPE: GAP_CMD_FILE,
GAP_JOB_NAME: taskName,
GAP_WORK_DIR: GAP_WORK_DIR,
GAP_QUEUE: GAP_QUEUE,
GAP_NPROC: GAP_NPROC,
GAP_PPN: GAP_PPN,
GAP_NGPU: GAP_NGPU,
GAP_NDCU: GAP_NDCU,
GAP_WALL_TIME: GAP_WALL_TIME_24H,
GAP_EXCLUSIVE: GAP_EXCLUSIVE,
GAP_APPNAME: GAP_APPNAME,
GAP_MULTI_SUB: GAP_MULTI_SUB,
GAP_STD_OUT_FILE: GAP_STD_OUT_FILE,
GAP_STD_ERR_FILE: GAP_STD_ERR_FILE,
GAP_SCHEDULER_OPT_WEB: env,
},
}
updateRequestByResouceId(resourceId, req)
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
if err != nil {
return nil, err
}
//转换成统一返回类型
submitResp, err := ConvertType[hpcAC.SubmitJobResp](resp, nil)
if err != nil {
return nil, err
}
return submitResp, nil
}
func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
//实时作业
reqC := &hpcAC.JobDetailReq{
JobId: taskId,
}
respC, err := s.svcCtx.ACRpc.GetJobDetail(s.ctx, reqC)
if err != nil {
return nil, err
}
taskRespC, err := ConvertType[hpcAC.GetJobDetailResp](respC, nil)
if err != nil {
return nil, err
}
//实时作业检查是否成功
taskResp := taskRespC.(types.GetLinkTaskResp)
if taskResp.Task != nil {
return taskRespC, nil
}
//历史作业
reqH := &hpcAC.HistoryJobDetailReq{
JobId: taskId,
JobmanagerId: strconv.Itoa(StrJobManagerID),
}
respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH)
if err != nil {
return nil, err
}
taskRespH, err := ConvertType[hpcAC.HistoryJobDetailResp](respH, nil)
if err != nil {
return nil, err
}
return taskRespH, nil
}
func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
var resp types.GetResourceSpecsResp
for k, v := range RESOURCESPECS {
var respec types.ResourceSpecSl
respec.SpecId = k
respec.SpecName = v
respec.ParticipantId = s.participant.Id
respec.ParticipantName = s.participant.Name
resp.ResourceSpecs = append(resp.ResourceSpecs, &respec)
}
return resp, nil
}
func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
//req := hpcAC.DeleteJobReq{}
//TODO implement me
panic("implement me")
}
func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) {
spec := RESOURCEMAP[resourceId]
req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE
req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC
req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU
}