存算联动添加曙光Hpc

This commit is contained in:
tzwang 2023-11-23 10:50:32 +08:00
parent 9905536031
commit e6103a5cd0
4 changed files with 71 additions and 22 deletions

View File

@ -0,0 +1,48 @@
package storeLink
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
)
type ShuguangHpc struct {
ctx context.Context
svcCtx *svc.ServiceContext
participant *models.StorelinkCenter
}
func (s ShuguangHpc) UploadImage(path string) (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) DeleteImage(imageId string) (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) QueryImageList() (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
//TODO implement me
panic("implement me")
}

View File

@ -37,6 +37,14 @@ const (
WORKER_GPU_NUMBER = 1
SHUGUANGAI_CUSTOM_RESOURCE_ID = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi"
SHUGUANGAI_CUSTOM_RESOURCE_NAME = "1*DCU, CPU:5, 内存:10GB"
DCU = "dcu"
PYTORCH = "Pytorch"
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
)
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangAi {

View File

@ -38,24 +38,16 @@ type Linkage interface {
}
const (
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
COMMA = ","
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
SHUGUANGAI = "ShuguangAi"
DCU = "dcu"
PYTORCH = "Pytorch"
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
COMMA = ","
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
SHUGUANGAI = "ShuguangAi"
)
var (

View File

@ -139,15 +139,16 @@ func (s *scheduler) SaveToDb() error {
func (s *scheduler) obtainParamsforStrategy() (*algo.Task, []*algo.Provider, error) {
task, providerList := s.scheduleService.genTaskAndProviders(s.task, s.dbEngin)
if len(providerList) == 0 {
return nil, nil, errors.New("获取集群失败")
}
// 过滤可用集群
err := s.filterAvailableProviders(&providerList)
if err != nil {
return nil, nil, err
}
//可用集群为0
if len(providerList) == 0 {
return nil, nil, errors.New("未能获取可用集群")
}
return task, providerList, nil
}