存算联动修改

This commit is contained in:
tzwang 2023-10-26 17:10:12 +08:00
parent 0c40e5f84c
commit 3e024424ac
7 changed files with 107 additions and 51 deletions

View File

@ -17,6 +17,10 @@ type GetAISpecsLogic struct {
svcCtx *svc.ServiceContext
}
const (
Wzhdtest = "wzhdtest"
)
func NewGetAISpecsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAISpecsLogic {
return &GetAISpecsLogic{
Logger: logx.WithContext(ctx),
@ -27,32 +31,30 @@ func NewGetAISpecsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAIS
func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *types.GetResourceSpecsResp, err error) {
var res types.GetResourceSpecsResp
participants := storeLink.GetParticipants(l.svcCtx.DbEngin)
for _, participant := range participants {
switch participant.Type {
case storeLink.TYPE_OCTOPUS:
req := &octopus.GetResourceSpecsReq{
Platform: participant.Name,
ResourcePool: "common-pool",
}
specs, err := l.svcCtx.OctopusRpc.GetResourceSpecs(l.ctx, req)
if err != nil || !specs.Success {
continue
}
for _, spec := range specs.TrainResourceSpecs {
var respec types.ResourceSpecSl
respec.SpecId = spec.Id
respec.SpecName = spec.Name
respec.ParticipantId = strconv.FormatInt(participant.Id, 10)
respec.ParticipantName = participant.Name
respec.SpecPrice = spec.Price
res.ResourceSpecs = append(res.ResourceSpecs, respec)
}
participant := storeLink.GetParticipantById(req.PartId, l.svcCtx.DbEngin)
switch participant.Type {
case storeLink.TYPE_OCTOPUS:
req := &octopus.GetResourceSpecsReq{
Platform: participant.Name,
ResourcePool: "common-pool",
}
specs, err := l.svcCtx.OctopusRpc.GetResourceSpecs(l.ctx, req)
if err != nil || !specs.Success {
return nil, err
}
for _, spec := range specs.TrainResourceSpecs {
var respec types.ResourceSpecSl
respec.SpecId = spec.Id
respec.SpecName = spec.Name
respec.ParticipantId = strconv.FormatInt(participant.Id, 10)
respec.ParticipantName = participant.Name
respec.SpecPrice = spec.Price
res.ResourceSpecs = append(res.ResourceSpecs, &respec)
}
case storeLink.TYPE_SHUGUANGAI:
}
if len(res.ResourceSpecs) == 0 {

View File

@ -38,7 +38,7 @@ func (l *GetParticipantsLogic) GetParticipants(req *types.GetParticipantsReq) (r
p.ParticipantId = strconv.FormatInt(participant.Id, 10)
p.ParticipantType = storeLink.AITYPE[participant.Type]
p.ParticipantName = participant.Name
res.Participants = append(res.Participants, p)
res.Participants = append(res.Participants, &p)
}
res.Success = true

View File

@ -3,7 +3,10 @@ package storeLink
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
"gitlink.org.cn/jcce-pcm/pcm-participant-ac/hpcAC"
"time"
)
type ShuguangAi struct {
@ -12,10 +15,14 @@ type ShuguangAi struct {
}
const (
DCU = "dcu"
PYTORCH = "Pytorch"
TENSORFLOW = "Tensorflow"
Wzhdtest = "wzhdtest"
DCU = "dcu"
PYTORCH = "Pytorch"
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
)
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext) *ShuguangAi {
@ -53,7 +60,34 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) {
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, resourceId string) (interface{}, error) {
// shuguangAi提交任务
req := &hpcAC.SubmitPytorchTaskReq{}
//根据imageId获取imagePath, version
imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId}
imageResp, err := s.svcCtx.ACRpc.GetImageAiById(s.ctx, imageReq)
if err != nil {
return nil, err
}
dateStr := timeutils.UnixTimeToString(time.Now().Unix())
req := &hpcAC.SubmitPytorchTaskReq{
Params: &hpcAC.SubmitPytorchTaskParams{
TaskName: TASK_PYTORCH_PREFIX + "_" + utils.RandomString(7) + dateStr,
WorkPath: WorkPath,
IsDistributed: false,
IsHvd: false,
//Env:
AcceleratorType: DCU,
Version: imageResp.Image.Version,
ImagePath: imageResp.Image.Path,
WorkerNumber: 1,
WorkerCpuNumber: "1",
WorkerGpuNumber: 1,
WorkerRamSize: 1024,
ResourceGroup: RESOURCE_GROUP,
TimeoutLimit: TimeoutLimit,
PythonCodePath: PythonCodePath,
},
}
resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req)
if err != nil {
return nil, err

View File

@ -23,11 +23,12 @@ type Linkage interface {
}
const (
COMMA = ","
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
COMMA = ","
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
)
var (
@ -77,6 +78,17 @@ func ConvertType[T any](in *T) (interface{}, error) {
}
return resp, nil
case *octopus.DeleteImageResp:
var resp types.DeleteLinkImageResp
inresp := (interface{})(in).(*octopus.DeleteImageResp)
resp.Success = inresp.Success
if !resp.Success {
resp.ErrorMsg = inresp.Error.Message
return resp, nil
}
return resp, nil
case *octopus.GetUserImageListResp:
var resp types.GetLinkImageListResp
inresp := (interface{})(in).(*octopus.GetUserImageListResp)
@ -92,7 +104,7 @@ func ConvertType[T any](in *T) (interface{}, error) {
image.ImageId = v.Image.Id
image.ImageName = v.Image.ImageName
image.ImageStatus = OctImgStatus[v.Image.ImageStatus]
resp.Images = append(resp.Images, image)
resp.Images = append(resp.Images, &image)
}
return resp, nil
case *modelarts.ListReposDetailsResp:
@ -111,8 +123,8 @@ func ConvertType[T any](in *T) (interface{}, error) {
var image types.ImageSl
image.ImageId = v.Namespace + "/" + v.Name + ":" + r
image.ImageName = v.Name
image.ImageStatus = "succeed"
resp.Images = append(resp.Images, image)
image.ImageStatus = "created"
resp.Images = append(resp.Images, &image)
}
}
return resp, nil
@ -122,20 +134,19 @@ func ConvertType[T any](in *T) (interface{}, error) {
if inresp.Code == "0" {
resp.Success = true
for _, img := range inresp.Data {
var image types.ImageSl
image.ImageId = img.ImageId
image.ImageName = img.Name
image.ImageStatus = "created"
resp.Images = append(resp.Images, &image)
}
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg
resp.Images = nil
}
return resp, nil
case *octopus.DeleteImageResp:
var resp types.DeleteLinkImageResp
inresp := (interface{})(in).(*octopus.DeleteImageResp)
resp.Success = inresp.Success
if !resp.Success {
resp.ErrorMsg = inresp.Error.Message
return resp, nil
}
return resp, nil
case *octopus.CreateTrainJobResp:
var resp types.SubmitLinkTaskResp
@ -168,8 +179,10 @@ func ConvertType[T any](in *T) (interface{}, error) {
if inresp.Code == "0" {
resp.Success = true
resp.TaskId = inresp.Data
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg
}
return resp, nil
case *octopus.GetTrainJobResp:
@ -217,6 +230,7 @@ func ConvertType[T any](in *T) (interface{}, error) {
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg
resp.Task = nil
}
return resp, nil

2
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/shopspring/decimal v1.3.1
github.com/zeromicro/go-queue v1.1.8
github.com/zeromicro/go-zero v1.5.5
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1

4
go.sum
View File

@ -1033,8 +1033,8 @@ github.com/zeromicro/go-zero v1.4.3/go.mod h1:UmDjuW7LHd9j7+nnnPBcXF0HLNmjJw6OjH
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
github.com/zeromicro/go-zero v1.5.3 h1:9poyd+raeL7gSMUu6P19N7bssTppieR2j7Oos2j1yFQ=
github.com/zeromicro/go-zero v1.5.3/go.mod h1:dmoBpgJTxt9KWmgrNGpv06XxZRPXMakrxUVgROFAR3g=
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141 h1:AN+syg6txsOn5YIuY59mjGz93dPx5vpOLBo0eijO5oc=
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231025091007-71bc3e55c141/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d h1:CY4pWM8JVRXBtD5CdVZC0fe4xUxjHmQegdwpHBaOBes=
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231026084523-f76f3da5525d/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835 h1:WDCPqD8IrepGJXankkpG14Ny6inh9AldB0RX9WWa+ck=
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef h1:s7JfXjka2MhGaDjKMJ57fj0k3XuDB6w+UlYHFLyJlUY=

View File

@ -55,3 +55,9 @@ func StringToUnixTime(str string) int64 {
}
return dt.Unix()
}
func UnixTimeToString(ut int64) string {
t := time.Unix(ut, 0)
return t.Format("2006-01-02 15:04:05")
}