forked from JointCloud/pcm-coordinator
存算联动调整2
This commit is contained in:
parent
eb90e99d47
commit
8bdfaa2320
|
@ -6,6 +6,7 @@ import (
|
|||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -16,9 +17,95 @@ type ShuguangHpc struct {
|
|||
}
|
||||
|
||||
const (
|
||||
SHUGUANGHPC_CUSTOM_RESOURCE_ID = "10240 // 10G"
|
||||
GAP_WALL_TIME_24H = "24:00:00"
|
||||
TASK_SHUGUANG_PREFIX = "ShuguangHPC"
|
||||
NEWLINE = "\n"
|
||||
JOBNAME = "JOBNAME"
|
||||
GAP_CMD_FILE = "cmd"
|
||||
GAP_NNODE = "1" // 节点个数
|
||||
GAP_NODE_STRING = ""
|
||||
GAP_APPNAME = "BASE"
|
||||
GAP_QUEUE = "wzhdtest"
|
||||
GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME"
|
||||
GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j"
|
||||
GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j"
|
||||
StrJobManagerID = 1637920656
|
||||
Apptype = "BASIC"
|
||||
EXPORT = "export"
|
||||
GAP_NPROC = "1"
|
||||
GAP_NDCU = "1"
|
||||
GAP_EXCLUSIVE = ""
|
||||
GAP_PPN = ""
|
||||
GAP_NGPU = ""
|
||||
GAP_MULTI_SUB = ""
|
||||
)
|
||||
|
||||
var RESOURCEMAP = map[string]ResourceSpec{
|
||||
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "2",
|
||||
GAP_NDCU: "1",
|
||||
},
|
||||
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "4",
|
||||
GAP_NDCU: "2",
|
||||
},
|
||||
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "8",
|
||||
GAP_NDCU: "4",
|
||||
},
|
||||
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "16",
|
||||
GAP_NDCU: "4",
|
||||
},
|
||||
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "32",
|
||||
GAP_NDCU: "4",
|
||||
},
|
||||
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": {
|
||||
GAP_NNODE: "2",
|
||||
GAP_NPROC: "4",
|
||||
GAP_NDCU: "2",
|
||||
},
|
||||
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": {
|
||||
GAP_NNODE: "2",
|
||||
GAP_NPROC: "8",
|
||||
GAP_NDCU: "4",
|
||||
},
|
||||
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": {
|
||||
GAP_NNODE: "2",
|
||||
GAP_NPROC: "16",
|
||||
GAP_NDCU: "4",
|
||||
},
|
||||
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": {
|
||||
GAP_NNODE: "2",
|
||||
GAP_NPROC: "32",
|
||||
GAP_NDCU: "8",
|
||||
},
|
||||
}
|
||||
|
||||
var RESOURCESPECS = map[string]string{
|
||||
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
|
||||
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
|
||||
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU",
|
||||
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU",
|
||||
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU",
|
||||
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU",
|
||||
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU",
|
||||
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU",
|
||||
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU",
|
||||
}
|
||||
|
||||
type ResourceSpec struct {
|
||||
GAP_NNODE string
|
||||
GAP_NPROC string
|
||||
GAP_NDCU string
|
||||
}
|
||||
|
||||
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc {
|
||||
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant}
|
||||
}
|
||||
|
@ -39,7 +126,8 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
|
|||
// shuguangHpc提交任务
|
||||
|
||||
//判断是否resourceId匹配自定义资源Id
|
||||
if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID {
|
||||
_, isMapContainsKey := RESOURCESPECS[resourceId]
|
||||
if !isMapContainsKey {
|
||||
return nil, errors.New("shuguangHpc资源Id不存在")
|
||||
}
|
||||
|
||||
|
@ -47,37 +135,43 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
|
|||
var env string
|
||||
for _, e := range envs {
|
||||
s := strings.Split(e, COMMA)
|
||||
env += s[0] + "=" + s[1] + SPACE
|
||||
env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE
|
||||
}
|
||||
|
||||
//请求
|
||||
taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10)
|
||||
GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1)
|
||||
GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1)
|
||||
GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1)
|
||||
|
||||
req := &hpcAC.SubmitJobReq{
|
||||
Apptype: "",
|
||||
Appname: "",
|
||||
StrJobManagerID: 0,
|
||||
Apptype: Apptype,
|
||||
Appname: GAP_APPNAME,
|
||||
StrJobManagerID: StrJobManagerID,
|
||||
MapAppJobInfo: &hpcAC.MapAppJobInfo{
|
||||
GAP_CMD_FILE: "echo $TESTDIR; echo $TESTENV; sleep 30",
|
||||
GAP_NNODE: "1",
|
||||
GAP_NODE_STRING: "",
|
||||
GAP_SUBMIT_TYPE: "cmd",
|
||||
GAP_JOB_NAME: "testSlurmjob1",
|
||||
GAP_WORK_DIR: "/work/home/acgnnmfbwo/BASE/testSlurmjob1",
|
||||
GAP_QUEUE: "wzhdtest",
|
||||
GAP_NPROC: "1",
|
||||
GAP_PPN: "",
|
||||
GAP_NGPU: "",
|
||||
GAP_NDCU: "1",
|
||||
GAP_WALL_TIME: "01:00:00",
|
||||
GAP_EXCLUSIVE: "",
|
||||
GAP_APPNAME: "BASE",
|
||||
GAP_MULTI_SUB: "",
|
||||
GAP_STD_OUT_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.out.%j",
|
||||
GAP_STD_ERR_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.err.%j",
|
||||
GAP_SCHEDULER_OPT_WEB: "export TESTDIR=/bin/emacs\nexport TESTENV=12345",
|
||||
GAP_CMD_FILE: cmd,
|
||||
GAP_NNODE: GAP_NNODE,
|
||||
GAP_NODE_STRING: GAP_NODE_STRING,
|
||||
GAP_SUBMIT_TYPE: GAP_CMD_FILE,
|
||||
GAP_JOB_NAME: taskName,
|
||||
GAP_WORK_DIR: GAP_WORK_DIR,
|
||||
GAP_QUEUE: GAP_QUEUE,
|
||||
GAP_NPROC: GAP_NPROC,
|
||||
GAP_PPN: GAP_PPN,
|
||||
GAP_NGPU: GAP_NGPU,
|
||||
GAP_NDCU: GAP_NDCU,
|
||||
GAP_WALL_TIME: GAP_WALL_TIME_24H,
|
||||
GAP_EXCLUSIVE: GAP_EXCLUSIVE,
|
||||
GAP_APPNAME: GAP_APPNAME,
|
||||
GAP_MULTI_SUB: GAP_MULTI_SUB,
|
||||
GAP_STD_OUT_FILE: GAP_STD_OUT_FILE,
|
||||
GAP_STD_ERR_FILE: GAP_STD_ERR_FILE,
|
||||
GAP_SCHEDULER_OPT_WEB: env,
|
||||
},
|
||||
}
|
||||
|
||||
updateRequestByResouceId(resourceId, req)
|
||||
|
||||
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -107,3 +201,10 @@ func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
|
|||
//TODO implement me
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) {
|
||||
spec := RESOURCEMAP[resourceId]
|
||||
req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE
|
||||
req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC
|
||||
req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ const (
|
|||
PY_PARAM_PREFIX = "--"
|
||||
SPACE = " "
|
||||
UNDERSCORE = "_"
|
||||
EQUAL = "="
|
||||
COMMA = ","
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
|
|
Loading…
Reference in New Issue