forked from JointCloud/pcm-coordinator
存算联动提交接口修改
This commit is contained in:
parent
0ad85477e4
commit
6b5a30defe
|
@ -2,11 +2,13 @@ package storeLink
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-participant-ac/hpcAC"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
@ -16,6 +18,16 @@ type ShuguangAi struct {
|
|||
participant *models.ScParticipantPhyInfo
|
||||
}
|
||||
|
||||
const (
|
||||
WORKER_RAM_SIZE = 10240 // 10G
|
||||
WORKER_NUMBER = 1
|
||||
WORKER_CPU_NUMBER = 5
|
||||
WORKER_GPU_NUMBER = 1
|
||||
PY_PARAM_PREFIX = "--"
|
||||
SPACE = " "
|
||||
SHUGUANGAI_RESOURCE_ID = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi"
|
||||
)
|
||||
|
||||
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.ScParticipantPhyInfo) *ShuguangAi {
|
||||
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, participant: participant}
|
||||
}
|
||||
|
@ -51,6 +63,11 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) {
|
|||
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, resourceId string) (interface{}, error) {
|
||||
// shuguangAi提交任务
|
||||
|
||||
//判断是否resourceId匹配自定义资源Id
|
||||
if resourceId != SHUGUANGAI_RESOURCE_ID {
|
||||
return nil, errors.New("shuguangAi资源Id不存在")
|
||||
}
|
||||
|
||||
//根据imageId获取imagePath, version
|
||||
imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId}
|
||||
imageResp, err := s.svcCtx.ACRpc.GetImageAiById(s.ctx, imageReq)
|
||||
|
@ -60,6 +77,13 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res
|
|||
|
||||
dateStr := timeutils.UnixTimeToString(time.Now().Unix())
|
||||
|
||||
//python参数
|
||||
var pythonArg string
|
||||
for _, param := range params {
|
||||
s := strings.Split(param, COMMA)
|
||||
pythonArg += PY_PARAM_PREFIX + s[0] + "=" + s[1] + SPACE
|
||||
}
|
||||
|
||||
req := &hpcAC.SubmitPytorchTaskReq{
|
||||
Params: &hpcAC.SubmitPytorchTaskParams{
|
||||
TaskName: TASK_PYTORCH_PREFIX + "_" + utils.RandomString(7) + dateStr,
|
||||
|
@ -70,13 +94,14 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res
|
|||
AcceleratorType: DCU,
|
||||
Version: imageResp.Image.Version,
|
||||
ImagePath: imageResp.Image.Path,
|
||||
WorkerNumber: 1,
|
||||
WorkerCpuNumber: "1",
|
||||
WorkerGpuNumber: 1,
|
||||
WorkerRamSize: 1024,
|
||||
WorkerNumber: WORKER_NUMBER,
|
||||
WorkerCpuNumber: WORKER_CPU_NUMBER,
|
||||
WorkerGpuNumber: WORKER_GPU_NUMBER,
|
||||
WorkerRamSize: WORKER_RAM_SIZE,
|
||||
ResourceGroup: RESOURCE_GROUP,
|
||||
TimeoutLimit: TimeoutLimit,
|
||||
PythonCodePath: PythonCodePath,
|
||||
PythonArg: pythonArg,
|
||||
},
|
||||
}
|
||||
resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req)
|
||||
|
|
|
@ -315,6 +315,7 @@ func ConvertType[T any](in *T, participant *models.ScParticipantPhyInfo) (interf
|
|||
resp.Success = true
|
||||
spec.ParticipantName = participant.Name
|
||||
spec.ParticipantId = strconv.FormatInt(participant.Id, 10)
|
||||
spec.SpecId = SHUGUANGAI_RESOURCE_ID
|
||||
resp.ResourceSpecs = append(resp.ResourceSpecs, &spec)
|
||||
}
|
||||
return resp, nil
|
||||
|
|
2
go.mod
2
go.mod
|
@ -21,7 +21,7 @@ require (
|
|||
github.com/shopspring/decimal v1.3.1
|
||||
github.com/zeromicro/go-queue v1.1.8
|
||||
github.com/zeromicro/go-zero v1.5.5
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1
|
||||
|
|
4
go.sum
4
go.sum
|
@ -1033,8 +1033,8 @@ github.com/zeromicro/go-zero v1.4.3/go.mod h1:UmDjuW7LHd9j7+nnnPBcXF0HLNmjJw6OjH
|
|||
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
|
||||
github.com/zeromicro/go-zero v1.5.3 h1:9poyd+raeL7gSMUu6P19N7bssTppieR2j7Oos2j1yFQ=
|
||||
github.com/zeromicro/go-zero v1.5.3/go.mod h1:dmoBpgJTxt9KWmgrNGpv06XxZRPXMakrxUVgROFAR3g=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d h1:CY4pWM8JVRXBtD5CdVZC0fe4xUxjHmQegdwpHBaOBes=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31 h1:SppjTZvObJgqliPk1wSeuezQu1k/tMGcyVaMVEaDIUU=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835 h1:WDCPqD8IrepGJXankkpG14Ny6inh9AldB0RX9WWa+ck=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef h1:s7JfXjka2MhGaDjKMJ57fj0k3XuDB6w+UlYHFLyJlUY=
|
||||
|
|
Loading…
Reference in New Issue