forked from JointCloud/pcm-coordinator
存算联动修改
This commit is contained in:
parent
0c40e5f84c
commit
3e024424ac
|
@ -17,6 +17,10 @@ type GetAISpecsLogic struct {
|
|||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
const (
|
||||
Wzhdtest = "wzhdtest"
|
||||
)
|
||||
|
||||
func NewGetAISpecsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAISpecsLogic {
|
||||
return &GetAISpecsLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
|
@ -27,32 +31,30 @@ func NewGetAISpecsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAIS
|
|||
|
||||
func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *types.GetResourceSpecsResp, err error) {
|
||||
var res types.GetResourceSpecsResp
|
||||
participants := storeLink.GetParticipants(l.svcCtx.DbEngin)
|
||||
|
||||
for _, participant := range participants {
|
||||
|
||||
switch participant.Type {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
req := &octopus.GetResourceSpecsReq{
|
||||
Platform: participant.Name,
|
||||
ResourcePool: "common-pool",
|
||||
}
|
||||
specs, err := l.svcCtx.OctopusRpc.GetResourceSpecs(l.ctx, req)
|
||||
if err != nil || !specs.Success {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, spec := range specs.TrainResourceSpecs {
|
||||
var respec types.ResourceSpecSl
|
||||
respec.SpecId = spec.Id
|
||||
respec.SpecName = spec.Name
|
||||
respec.ParticipantId = strconv.FormatInt(participant.Id, 10)
|
||||
respec.ParticipantName = participant.Name
|
||||
respec.SpecPrice = spec.Price
|
||||
res.ResourceSpecs = append(res.ResourceSpecs, respec)
|
||||
}
|
||||
participant := storeLink.GetParticipantById(req.PartId, l.svcCtx.DbEngin)
|
||||
|
||||
switch participant.Type {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
req := &octopus.GetResourceSpecsReq{
|
||||
Platform: participant.Name,
|
||||
ResourcePool: "common-pool",
|
||||
}
|
||||
specs, err := l.svcCtx.OctopusRpc.GetResourceSpecs(l.ctx, req)
|
||||
if err != nil || !specs.Success {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, spec := range specs.TrainResourceSpecs {
|
||||
var respec types.ResourceSpecSl
|
||||
respec.SpecId = spec.Id
|
||||
respec.SpecName = spec.Name
|
||||
respec.ParticipantId = strconv.FormatInt(participant.Id, 10)
|
||||
respec.ParticipantName = participant.Name
|
||||
respec.SpecPrice = spec.Price
|
||||
res.ResourceSpecs = append(res.ResourceSpecs, &respec)
|
||||
}
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
|
||||
}
|
||||
|
||||
if len(res.ResourceSpecs) == 0 {
|
||||
|
|
|
@ -38,7 +38,7 @@ func (l *GetParticipantsLogic) GetParticipants(req *types.GetParticipantsReq) (r
|
|||
p.ParticipantId = strconv.FormatInt(participant.Id, 10)
|
||||
p.ParticipantType = storeLink.AITYPE[participant.Type]
|
||||
p.ParticipantName = participant.Name
|
||||
res.Participants = append(res.Participants, p)
|
||||
res.Participants = append(res.Participants, &p)
|
||||
}
|
||||
|
||||
res.Success = true
|
||||
|
|
|
@ -3,7 +3,10 @@ package storeLink
|
|||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-participant-ac/hpcAC"
|
||||
"time"
|
||||
)
|
||||
|
||||
type ShuguangAi struct {
|
||||
|
@ -12,10 +15,14 @@ type ShuguangAi struct {
|
|||
}
|
||||
|
||||
const (
|
||||
DCU = "dcu"
|
||||
PYTORCH = "Pytorch"
|
||||
TENSORFLOW = "Tensorflow"
|
||||
Wzhdtest = "wzhdtest"
|
||||
DCU = "dcu"
|
||||
PYTORCH = "Pytorch"
|
||||
TASK_PYTORCH_PREFIX = "PytorchTask"
|
||||
TENSORFLOW = "Tensorflow"
|
||||
RESOURCE_GROUP = "wzhdtest"
|
||||
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
|
||||
TimeoutLimit = "10:00:00"
|
||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||
)
|
||||
|
||||
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext) *ShuguangAi {
|
||||
|
@ -53,7 +60,34 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) {
|
|||
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, resourceId string) (interface{}, error) {
|
||||
// shuguangAi提交任务
|
||||
|
||||
req := &hpcAC.SubmitPytorchTaskReq{}
|
||||
//根据imageId获取imagePath, version
|
||||
imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId}
|
||||
imageResp, err := s.svcCtx.ACRpc.GetImageAiById(s.ctx, imageReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dateStr := timeutils.UnixTimeToString(time.Now().Unix())
|
||||
|
||||
req := &hpcAC.SubmitPytorchTaskReq{
|
||||
Params: &hpcAC.SubmitPytorchTaskParams{
|
||||
TaskName: TASK_PYTORCH_PREFIX + "_" + utils.RandomString(7) + dateStr,
|
||||
WorkPath: WorkPath,
|
||||
IsDistributed: false,
|
||||
IsHvd: false,
|
||||
//Env:
|
||||
AcceleratorType: DCU,
|
||||
Version: imageResp.Image.Version,
|
||||
ImagePath: imageResp.Image.Path,
|
||||
WorkerNumber: 1,
|
||||
WorkerCpuNumber: "1",
|
||||
WorkerGpuNumber: 1,
|
||||
WorkerRamSize: 1024,
|
||||
ResourceGroup: RESOURCE_GROUP,
|
||||
TimeoutLimit: TimeoutLimit,
|
||||
PythonCodePath: PythonCodePath,
|
||||
},
|
||||
}
|
||||
resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -23,11 +23,12 @@ type Linkage interface {
|
|||
}
|
||||
|
||||
const (
|
||||
COMMA = ","
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
OCTOPUS = "Octopus"
|
||||
MODELARTS = "Modelarts"
|
||||
COMMA = ","
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
TYPE_SHUGUANGAI = "3"
|
||||
OCTOPUS = "Octopus"
|
||||
MODELARTS = "Modelarts"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -77,6 +78,17 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
}
|
||||
|
||||
return resp, nil
|
||||
case *octopus.DeleteImageResp:
|
||||
var resp types.DeleteLinkImageResp
|
||||
inresp := (interface{})(in).(*octopus.DeleteImageResp)
|
||||
resp.Success = inresp.Success
|
||||
if !resp.Success {
|
||||
resp.ErrorMsg = inresp.Error.Message
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
||||
case *octopus.GetUserImageListResp:
|
||||
var resp types.GetLinkImageListResp
|
||||
inresp := (interface{})(in).(*octopus.GetUserImageListResp)
|
||||
|
@ -92,7 +104,7 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
image.ImageId = v.Image.Id
|
||||
image.ImageName = v.Image.ImageName
|
||||
image.ImageStatus = OctImgStatus[v.Image.ImageStatus]
|
||||
resp.Images = append(resp.Images, image)
|
||||
resp.Images = append(resp.Images, &image)
|
||||
}
|
||||
return resp, nil
|
||||
case *modelarts.ListReposDetailsResp:
|
||||
|
@ -111,8 +123,8 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
var image types.ImageSl
|
||||
image.ImageId = v.Namespace + "/" + v.Name + ":" + r
|
||||
image.ImageName = v.Name
|
||||
image.ImageStatus = "succeed"
|
||||
resp.Images = append(resp.Images, image)
|
||||
image.ImageStatus = "created"
|
||||
resp.Images = append(resp.Images, &image)
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
|
@ -122,20 +134,19 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
|
||||
if inresp.Code == "0" {
|
||||
resp.Success = true
|
||||
for _, img := range inresp.Data {
|
||||
var image types.ImageSl
|
||||
image.ImageId = img.ImageId
|
||||
image.ImageName = img.Name
|
||||
image.ImageStatus = "created"
|
||||
resp.Images = append(resp.Images, &image)
|
||||
}
|
||||
} else {
|
||||
resp.Success = false
|
||||
resp.ErrorMsg = inresp.Msg
|
||||
resp.Images = nil
|
||||
}
|
||||
return resp, nil
|
||||
case *octopus.DeleteImageResp:
|
||||
var resp types.DeleteLinkImageResp
|
||||
inresp := (interface{})(in).(*octopus.DeleteImageResp)
|
||||
resp.Success = inresp.Success
|
||||
if !resp.Success {
|
||||
resp.ErrorMsg = inresp.Error.Message
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
||||
case *octopus.CreateTrainJobResp:
|
||||
var resp types.SubmitLinkTaskResp
|
||||
|
@ -168,8 +179,10 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
|
||||
if inresp.Code == "0" {
|
||||
resp.Success = true
|
||||
resp.TaskId = inresp.Data
|
||||
} else {
|
||||
resp.Success = false
|
||||
resp.ErrorMsg = inresp.Msg
|
||||
}
|
||||
return resp, nil
|
||||
case *octopus.GetTrainJobResp:
|
||||
|
@ -217,6 +230,7 @@ func ConvertType[T any](in *T) (interface{}, error) {
|
|||
} else {
|
||||
resp.Success = false
|
||||
resp.ErrorMsg = inresp.Msg
|
||||
resp.Task = nil
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
|
2
go.mod
2
go.mod
|
@ -21,7 +21,7 @@ require (
|
|||
github.com/shopspring/decimal v1.3.1
|
||||
github.com/zeromicro/go-queue v1.1.8
|
||||
github.com/zeromicro/go-zero v1.5.5
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1
|
||||
|
|
4
go.sum
4
go.sum
|
@ -1033,8 +1033,8 @@ github.com/zeromicro/go-zero v1.4.3/go.mod h1:UmDjuW7LHd9j7+nnnPBcXF0HLNmjJw6OjH
|
|||
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
|
||||
github.com/zeromicro/go-zero v1.5.3 h1:9poyd+raeL7gSMUu6P19N7bssTppieR2j7Oos2j1yFQ=
|
||||
github.com/zeromicro/go-zero v1.5.3/go.mod h1:dmoBpgJTxt9KWmgrNGpv06XxZRPXMakrxUVgROFAR3g=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141 h1:AN+syg6txsOn5YIuY59mjGz93dPx5vpOLBo0eijO5oc=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d h1:CY4pWM8JVRXBtD5CdVZC0fe4xUxjHmQegdwpHBaOBes=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835 h1:WDCPqD8IrepGJXankkpG14Ny6inh9AldB0RX9WWa+ck=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef h1:s7JfXjka2MhGaDjKMJ57fj0k3XuDB6w+UlYHFLyJlUY=
|
||||
|
|
|
@ -55,3 +55,9 @@ func StringToUnixTime(str string) int64 {
|
|||
}
|
||||
return dt.Unix()
|
||||
}
|
||||
|
||||
func UnixTimeToString(ut int64) string {
|
||||
t := time.Unix(ut, 0)
|
||||
|
||||
return t.Format("2006-01-02 15:04:05")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue