forked from JointCloud/pcm-coordinator
271 lines
6.9 KiB
Go
271 lines
6.9 KiB
Go
package storeLink
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
type ShuguangHpc struct {
|
|
ctx context.Context
|
|
svcCtx *svc.ServiceContext
|
|
participant *models.StorelinkCenter
|
|
}
|
|
|
|
const (
|
|
GAP_WALL_TIME_24H = "24:00:00"
|
|
TASK_SHUGUANG_PREFIX = "ShuguangHPC"
|
|
NEWLINE = "\n"
|
|
JOBNAME = "JOBNAME"
|
|
GAP_CMD_FILE = "cmd"
|
|
GAP_NNODE = "1" // 节点个数
|
|
GAP_NODE_STRING = ""
|
|
GAP_APPNAME = "BASE"
|
|
GAP_QUEUE = "wzhdtest"
|
|
GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME"
|
|
GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j"
|
|
GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j"
|
|
StrJobManagerID = 1637920656
|
|
Apptype = "BASIC"
|
|
EXPORT = "export"
|
|
GAP_NPROC = "1"
|
|
GAP_NDCU = "1"
|
|
GAP_EXCLUSIVE = ""
|
|
GAP_PPN = ""
|
|
GAP_NGPU = ""
|
|
GAP_MULTI_SUB = ""
|
|
)
|
|
|
|
var RESOURCEMAP = map[string]ResourceSpec{
|
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": {
|
|
GAP_NNODE: "1",
|
|
GAP_NPROC: "2",
|
|
GAP_NDCU: "1",
|
|
},
|
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": {
|
|
GAP_NNODE: "1",
|
|
GAP_NPROC: "4",
|
|
GAP_NDCU: "2",
|
|
},
|
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": {
|
|
GAP_NNODE: "1",
|
|
GAP_NPROC: "8",
|
|
GAP_NDCU: "4",
|
|
},
|
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": {
|
|
GAP_NNODE: "1",
|
|
GAP_NPROC: "16",
|
|
GAP_NDCU: "4",
|
|
},
|
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": {
|
|
GAP_NNODE: "1",
|
|
GAP_NPROC: "32",
|
|
GAP_NDCU: "4",
|
|
},
|
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": {
|
|
GAP_NNODE: "2",
|
|
GAP_NPROC: "4",
|
|
GAP_NDCU: "2",
|
|
},
|
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": {
|
|
GAP_NNODE: "2",
|
|
GAP_NPROC: "8",
|
|
GAP_NDCU: "4",
|
|
},
|
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": {
|
|
GAP_NNODE: "2",
|
|
GAP_NPROC: "16",
|
|
GAP_NDCU: "4",
|
|
},
|
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": {
|
|
GAP_NNODE: "2",
|
|
GAP_NPROC: "32",
|
|
GAP_NDCU: "8",
|
|
},
|
|
}
|
|
|
|
var RESOURCESPECS = map[string]string{
|
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
|
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
|
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU",
|
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU",
|
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU",
|
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU",
|
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU",
|
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU",
|
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU",
|
|
}
|
|
|
|
var AcStatus = map[string]string{
|
|
"statQ": "Pending",
|
|
"statR": "Running",
|
|
"statE": "Pending",
|
|
"statC": "Completed",
|
|
"statH": "Pending",
|
|
"statS": "Pending",
|
|
"statW": "Pending",
|
|
"statX": "Other",
|
|
}
|
|
|
|
type ResourceSpec struct {
|
|
GAP_NNODE string
|
|
GAP_NPROC string
|
|
GAP_NDCU string
|
|
}
|
|
|
|
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc {
|
|
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant}
|
|
}
|
|
|
|
func (s ShuguangHpc) UploadImage(path string) (interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (s ShuguangHpc) DeleteImage(imageId string) (interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (s ShuguangHpc) QueryImageList() (interface{}, error) {
|
|
return nil, nil
|
|
}
|
|
|
|
func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) {
|
|
// shuguangHpc提交任务
|
|
|
|
//判断是否resourceId匹配自定义资源Id
|
|
_, isMapContainsKey := RESOURCESPECS[resourceId]
|
|
if !isMapContainsKey {
|
|
return nil, errors.New("shuguangHpc资源Id不存在")
|
|
}
|
|
|
|
//环境变量
|
|
var env string
|
|
for _, e := range envs {
|
|
s := strings.Split(e, COMMA)
|
|
env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE
|
|
}
|
|
|
|
//请求
|
|
taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10)
|
|
GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1)
|
|
GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1)
|
|
GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1)
|
|
|
|
req := &hpcAC.SubmitJobReq{
|
|
Apptype: Apptype,
|
|
Appname: GAP_APPNAME,
|
|
StrJobManagerID: StrJobManagerID,
|
|
MapAppJobInfo: &hpcAC.MapAppJobInfo{
|
|
GAP_CMD_FILE: cmd,
|
|
GAP_NNODE: GAP_NNODE,
|
|
GAP_NODE_STRING: GAP_NODE_STRING,
|
|
GAP_SUBMIT_TYPE: GAP_CMD_FILE,
|
|
GAP_JOB_NAME: taskName,
|
|
GAP_WORK_DIR: GAP_WORK_DIR,
|
|
GAP_QUEUE: GAP_QUEUE,
|
|
GAP_NPROC: GAP_NPROC,
|
|
GAP_PPN: GAP_PPN,
|
|
GAP_NGPU: GAP_NGPU,
|
|
GAP_NDCU: GAP_NDCU,
|
|
GAP_WALL_TIME: GAP_WALL_TIME_24H,
|
|
GAP_EXCLUSIVE: GAP_EXCLUSIVE,
|
|
GAP_APPNAME: GAP_APPNAME,
|
|
GAP_MULTI_SUB: GAP_MULTI_SUB,
|
|
GAP_STD_OUT_FILE: GAP_STD_OUT_FILE,
|
|
GAP_STD_ERR_FILE: GAP_STD_ERR_FILE,
|
|
GAP_SCHEDULER_OPT_WEB: env,
|
|
},
|
|
}
|
|
|
|
updateRequestByResouceId(resourceId, req)
|
|
|
|
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
//转换成统一返回类型
|
|
submitResp, err := ConvertType[hpcAC.SubmitJobResp](resp, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return submitResp, nil
|
|
|
|
}
|
|
|
|
func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
|
|
//实时作业
|
|
reqC := &hpcAC.JobDetailReq{
|
|
JobId: taskId,
|
|
}
|
|
respC, err := s.svcCtx.ACRpc.GetJobDetail(s.ctx, reqC)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
taskRespC, err := ConvertType[hpcAC.GetJobDetailResp](respC, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
//实时作业检查是否成功
|
|
taskResp := taskRespC.(types.GetLinkTaskResp)
|
|
if taskResp.Task != nil {
|
|
return taskRespC, nil
|
|
}
|
|
|
|
//历史作业
|
|
reqH := &hpcAC.HistoryJobDetailReq{
|
|
JobId: taskId,
|
|
JobmanagerId: strconv.Itoa(StrJobManagerID),
|
|
}
|
|
|
|
respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
taskRespH, err := ConvertType[hpcAC.HistoryJobDetailResp](respH, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return taskRespH, nil
|
|
}
|
|
|
|
func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
|
|
var resp types.GetResourceSpecsResp
|
|
|
|
for k, v := range RESOURCESPECS {
|
|
var respec types.ResourceSpecSl
|
|
respec.SpecId = k
|
|
respec.SpecName = v
|
|
respec.ParticipantId = s.participant.Id
|
|
respec.ParticipantName = s.participant.Name
|
|
resp.ResourceSpecs = append(resp.ResourceSpecs, &respec)
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
|
|
//req := hpcAC.DeleteJobReq{}
|
|
|
|
//TODO implement me
|
|
panic("implement me")
|
|
}
|
|
|
|
func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) {
|
|
spec := RESOURCEMAP[resourceId]
|
|
req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE
|
|
req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC
|
|
req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU
|
|
}
|