forked from JointCloud/pcm-coordinator
修改超算任务提交 返回jobId
This commit is contained in:
parent
b7c289a790
commit
b70c45167c
|
@ -112,7 +112,8 @@ type HpcInfo struct {
|
||||||
StdOutFile string `json:"stdOutFile"` // 工作路径/std.err.%j
|
StdOutFile string `json:"stdOutFile"` // 工作路径/std.err.%j
|
||||||
StdErrFile string `json:"stdErrFile"` // 工作路径/std.err.%j
|
StdErrFile string `json:"stdErrFile"` // 工作路径/std.err.%j
|
||||||
StdInput string `json:"stdInput"`
|
StdInput string `json:"stdInput"`
|
||||||
Environment string `json:"environment"`
|
EnvPath string `json:"envPath"`
|
||||||
|
EnvLdPath string `json:"envLdPath"`
|
||||||
DeletedFlag int64 `json:"deletedFlag"` // 是否删除(0-否,1-是)
|
DeletedFlag int64 `json:"deletedFlag"` // 是否删除(0-否,1-是)
|
||||||
CreatedBy int64 `json:"createdBy"` // 创建人
|
CreatedBy int64 `json:"createdBy"` // 创建人
|
||||||
CreatedTime time.Time `json:"createdTime"` // 创建时间
|
CreatedTime time.Time `json:"createdTime"` // 创建时间
|
||||||
|
|
|
@ -902,7 +902,7 @@ type (
|
||||||
}
|
}
|
||||||
ClusterInfo {
|
ClusterInfo {
|
||||||
Id string `json:"id,omitempty" db:"id"`
|
Id string `json:"id,omitempty" db:"id"`
|
||||||
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
|
AdapterId int64 `json:"adapterId,omitempty" db:"adapter_id"`
|
||||||
Name string `json:"name,omitempty" db:"name"`
|
Name string `json:"name,omitempty" db:"name"`
|
||||||
Nickname string `json:"nickname,omitempty" db:"nickname"`
|
Nickname string `json:"nickname,omitempty" db:"nickname"`
|
||||||
Description string `json:"description,omitempty" db:"description"`
|
Description string `json:"description,omitempty" db:"description"`
|
||||||
|
@ -923,7 +923,8 @@ type (
|
||||||
RegionDict string `json:"regionDict,omitempty" db:"region_dict"`
|
RegionDict string `json:"regionDict,omitempty" db:"region_dict"`
|
||||||
Location string `json:"location,omitempty" db:"location"`
|
Location string `json:"location,omitempty" db:"location"`
|
||||||
CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"`
|
CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"`
|
||||||
Environment string `json:"environment,omitempty" db:"environment"`
|
EnvPath string `json:"envPath,omitempty" db:"env_path"`
|
||||||
|
EnvLdPath string `json:"envLdPath,omitempty" db:"env_ld_path"`
|
||||||
WorkDir string `json:"workDir,omitempty" db:"work_dir"`
|
WorkDir string `json:"workDir,omitempty" db:"work_dir"`
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -33,9 +33,8 @@ type (
|
||||||
}
|
}
|
||||||
|
|
||||||
commitHpcTaskResp {
|
commitHpcTaskResp {
|
||||||
TaskId int64 `json:"taskId"`
|
ClusterId int64 `json:"clusterId"`
|
||||||
Code int32 `json:"code"`
|
JobId string `json:"jobId"`
|
||||||
Msg string `json:"msg"`
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ func (l *GetAdapterRelationLogic) GetAdapterRelation(req *types.AdapterRelationQ
|
||||||
cr = &types.ClusterRelationInfo{}
|
cr = &types.ClusterRelationInfo{}
|
||||||
utils.Convert(&v, &cr)
|
utils.Convert(&v, &cr)
|
||||||
cr.CId = c.Id
|
cr.CId = c.Id
|
||||||
cr.CAdapterId = c.AdapterId
|
cr.CAdapterId = string(c.AdapterId)
|
||||||
cr.CName = c.Name
|
cr.CName = c.Name
|
||||||
cr.CNickname = c.Nickname
|
cr.CNickname = c.Nickname
|
||||||
cr.CDescription = c.Description
|
cr.CDescription = c.Description
|
||||||
|
|
|
@ -49,7 +49,6 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
|
||||||
}
|
}
|
||||||
var clusterType string
|
var clusterType string
|
||||||
l.svcCtx.DbEngin.Raw("SELECT label FROM `t_cluster` where id = ? ", hpcInfo.ClusterId).Scan(&clusterType)
|
l.svcCtx.DbEngin.Raw("SELECT label FROM `t_cluster` where id = ? ", hpcInfo.ClusterId).Scan(&clusterType)
|
||||||
utils.Convert(hpcInfo.Environment, &resp.HpcInfoList[i].Environment)
|
|
||||||
resp.HpcInfoList[i].ClusterType = clusterType
|
resp.HpcInfoList[i].ClusterType = clusterType
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,6 @@ func NewCancelJobLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CancelJ
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *CancelJobLogic) CancelJob(req *types.CancelJobReq) error {
|
func (l *CancelJobLogic) CancelJob(req *types.CancelJobReq) error {
|
||||||
// todo: add your logic here and delete this line
|
|
||||||
var clusterInfo *types.ClusterInfo
|
var clusterInfo *types.ClusterInfo
|
||||||
tx := l.svcCtx.DbEngin.Raw("select * from t_cluster where id = ?", req.ClusterId).Scan(&clusterInfo)
|
tx := l.svcCtx.DbEngin.Raw("select * from t_cluster where id = ?", req.ClusterId).Scan(&clusterInfo)
|
||||||
if tx.Error != nil {
|
if tx.Error != nil {
|
||||||
|
|
|
@ -5,6 +5,8 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-hpc/slurm"
|
||||||
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||||
|
@ -33,9 +35,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
||||||
l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
|
l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
|
||||||
|
|
||||||
if len(clusterInfo.Id) == 0 {
|
if len(clusterInfo.Id) == 0 {
|
||||||
resp.Code = 400
|
return resp, errors.New("cluster not found")
|
||||||
resp.Msg = "no cluster found"
|
|
||||||
return resp, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 构建主任务结构体
|
// 构建主任务结构体
|
||||||
|
@ -43,7 +43,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
||||||
Name: req.Name,
|
Name: req.Name,
|
||||||
Description: req.Description,
|
Description: req.Description,
|
||||||
CommitTime: time.Now(),
|
CommitTime: time.Now(),
|
||||||
Status: "Saved",
|
Status: "Running",
|
||||||
AdapterTypeDict: "2",
|
AdapterTypeDict: "2",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,22 +53,18 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
||||||
return nil, tx.Error
|
return nil, tx.Error
|
||||||
}
|
}
|
||||||
|
|
||||||
var clusterName string
|
|
||||||
var adapterId int64
|
|
||||||
var adapterName string
|
var adapterName string
|
||||||
l.svcCtx.DbEngin.Raw("SELECT nickname FROM `t_cluster` where id = ?", req.ClusterId).Scan(&clusterName)
|
l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterName)
|
||||||
l.svcCtx.DbEngin.Raw("SELECT adapter_id FROM `t_cluster` where id = ?", req.ClusterId).Scan(&adapterId)
|
|
||||||
l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", adapterId).Scan(&adapterName)
|
|
||||||
if len(adapterName) == 0 || adapterName == "" {
|
if len(adapterName) == 0 || adapterName == "" {
|
||||||
return nil, errors.New("no corresponding adapter found")
|
return nil, errors.New("no corresponding adapter found")
|
||||||
}
|
}
|
||||||
|
|
||||||
hpcInfo := models.TaskHpc{
|
hpcInfo := models.TaskHpc{
|
||||||
TaskId: taskModel.Id,
|
TaskId: taskModel.Id,
|
||||||
AdapterId: adapterId,
|
AdapterId: clusterInfo.AdapterId,
|
||||||
AdapterName: adapterName,
|
AdapterName: adapterName,
|
||||||
ClusterId: req.ClusterId,
|
ClusterId: req.ClusterId,
|
||||||
ClusterName: clusterName,
|
ClusterName: clusterInfo.Name,
|
||||||
Name: taskModel.Name,
|
Name: taskModel.Name,
|
||||||
CmdScript: req.CmdScript,
|
CmdScript: req.CmdScript,
|
||||||
StartTime: time.Now().String(),
|
StartTime: time.Now().String(),
|
||||||
|
@ -83,21 +79,26 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
||||||
Account: clusterInfo.Username,
|
Account: clusterInfo.Username,
|
||||||
StdInput: req.StdInput,
|
StdInput: req.StdInput,
|
||||||
Partition: req.Partition,
|
Partition: req.Partition,
|
||||||
Environment: clusterInfo.Environment,
|
|
||||||
CreatedTime: time.Now(),
|
CreatedTime: time.Now(),
|
||||||
UpdatedTime: time.Now(),
|
UpdatedTime: time.Now(),
|
||||||
Status: "Saved",
|
Status: "Running",
|
||||||
}
|
}
|
||||||
hpcInfo.WorkDir = clusterInfo.WorkDir + req.WorkDir
|
hpcInfo.WorkDir = clusterInfo.WorkDir + req.WorkDir
|
||||||
tx = l.svcCtx.DbEngin.Create(&hpcInfo)
|
tx = l.svcCtx.DbEngin.Create(&hpcInfo)
|
||||||
if tx.Error != nil {
|
if tx.Error != nil {
|
||||||
return nil, tx.Error
|
return nil, tx.Error
|
||||||
}
|
}
|
||||||
|
// 提交job到指定集群
|
||||||
|
jobId, err := submitJob(&hpcInfo, &clusterInfo)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// 保存操作记录
|
||||||
noticeInfo := clientCore.NoticeInfo{
|
noticeInfo := clientCore.NoticeInfo{
|
||||||
AdapterId: adapterId,
|
AdapterId: clusterInfo.AdapterId,
|
||||||
AdapterName: adapterName,
|
AdapterName: adapterName,
|
||||||
ClusterId: req.ClusterId,
|
ClusterId: req.ClusterId,
|
||||||
ClusterName: clusterName,
|
ClusterName: clusterInfo.Name,
|
||||||
NoticeType: "create",
|
NoticeType: "create",
|
||||||
TaskName: req.Name,
|
TaskName: req.Name,
|
||||||
Incident: "任务创建中",
|
Incident: "任务创建中",
|
||||||
|
@ -108,9 +109,46 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
||||||
logx.Errorf("Task creation failure, err: %v", result.Error)
|
logx.Errorf("Task creation failure, err: %v", result.Error)
|
||||||
}
|
}
|
||||||
resp = &types.CommitHpcTaskResp{
|
resp = &types.CommitHpcTaskResp{
|
||||||
Code: 200,
|
JobId: string(jobId),
|
||||||
Msg: "success",
|
|
||||||
TaskId: taskModel.Id,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func submitJob(hpcInfo *models.TaskHpc, clusterInfo *types.ClusterInfo) (string, error) {
|
||||||
|
|
||||||
|
client, err := slurm.NewClient(slurm.ClientOptions{
|
||||||
|
URL: clusterInfo.Server,
|
||||||
|
ClientVersion: clusterInfo.Version,
|
||||||
|
RestUserName: clusterInfo.Username,
|
||||||
|
Token: clusterInfo.Token})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
job, err := client.Job(slurm.JobOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// 封装请求参数
|
||||||
|
submitReq := slurm.JobOptions{
|
||||||
|
Script: hpcInfo.CmdScript,
|
||||||
|
Job: &slurm.JobProperties{
|
||||||
|
Account: hpcInfo.Account,
|
||||||
|
Name: hpcInfo.Name,
|
||||||
|
NTasks: 1,
|
||||||
|
CurrentWorkingDirectory: hpcInfo.WorkDir,
|
||||||
|
Partition: hpcInfo.Partition,
|
||||||
|
Environment: map[string]string{"PATH": clusterInfo.EnvPath,
|
||||||
|
"LD_LIBRARY_PATH": clusterInfo.EnvLdPath},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
submitReq.Job.StandardOutput = submitReq.Job.CurrentWorkingDirectory + "/%j.out"
|
||||||
|
submitReq.Job.StandardError = submitReq.Job.CurrentWorkingDirectory + "/%j.err"
|
||||||
|
|
||||||
|
jobResp, err := job.SubmitJob(submitReq)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
jobId := strconv.Itoa(jobResp.JobId)
|
||||||
|
return jobId, nil
|
||||||
|
}
|
||||||
|
|
|
@ -835,7 +835,7 @@ type ClusterCreateReq struct {
|
||||||
|
|
||||||
type ClusterInfo struct {
|
type ClusterInfo struct {
|
||||||
Id string `json:"id,omitempty" db:"id"`
|
Id string `json:"id,omitempty" db:"id"`
|
||||||
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
|
AdapterId int64 `json:"adapterId,omitempty" db:"adapter_id"`
|
||||||
Name string `json:"name,omitempty" db:"name"`
|
Name string `json:"name,omitempty" db:"name"`
|
||||||
Nickname string `json:"nickname,omitempty" db:"nickname"`
|
Nickname string `json:"nickname,omitempty" db:"nickname"`
|
||||||
Description string `json:"description,omitempty" db:"description"`
|
Description string `json:"description,omitempty" db:"description"`
|
||||||
|
@ -856,7 +856,8 @@ type ClusterInfo struct {
|
||||||
RegionDict string `json:"regionDict,omitempty" db:"region_dict"`
|
RegionDict string `json:"regionDict,omitempty" db:"region_dict"`
|
||||||
Location string `json:"location,omitempty" db:"location"`
|
Location string `json:"location,omitempty" db:"location"`
|
||||||
CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"`
|
CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"`
|
||||||
Environment string `json:"environment,omitempty" db:"environment"`
|
EnvPath string `json:"envPath,omitempty" db:"env_path"`
|
||||||
|
EnvLdPath string `json:"envLdPath,omitempty" db:"env_ld_path"`
|
||||||
WorkDir string `json:"workDir,omitempty" db:"work_dir"`
|
WorkDir string `json:"workDir,omitempty" db:"work_dir"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1298,9 +1299,8 @@ type CommitHpcTaskReq struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type CommitHpcTaskResp struct {
|
type CommitHpcTaskResp struct {
|
||||||
TaskId int64 `json:"taskId"`
|
ClusterId int64 `json:"clusterId"`
|
||||||
Code int32 `json:"code"`
|
JobId string `json:"jobId"`
|
||||||
Msg string `json:"msg"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type HpcOverViewReq struct {
|
type HpcOverViewReq struct {
|
||||||
|
|
|
@ -1,29 +0,0 @@
|
||||||
package models
|
|
||||||
|
|
||||||
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
|
||||||
|
|
||||||
var _ TaskHpcModel = (*customTaskHpcModel)(nil)
|
|
||||||
|
|
||||||
type (
|
|
||||||
// TaskHpcModel is an interface to be customized, add more methods here,
|
|
||||||
// and implement the added methods in customTaskHpcModel.
|
|
||||||
TaskHpcModel interface {
|
|
||||||
taskHpcModel
|
|
||||||
withSession(session sqlx.Session) TaskHpcModel
|
|
||||||
}
|
|
||||||
|
|
||||||
customTaskHpcModel struct {
|
|
||||||
*defaultTaskHpcModel
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewTaskHpcModel returns a model for the database table.
|
|
||||||
func NewTaskHpcModel(conn sqlx.SqlConn) TaskHpcModel {
|
|
||||||
return &customTaskHpcModel{
|
|
||||||
defaultTaskHpcModel: newTaskHpcModel(conn),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *customTaskHpcModel) withSession(session sqlx.Session) TaskHpcModel {
|
|
||||||
return NewTaskHpcModel(sqlx.NewSqlConnFromSession(session))
|
|
||||||
}
|
|
|
@ -72,12 +72,12 @@ type (
|
||||||
StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j
|
StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j
|
||||||
StdInput string `db:"std_input"`
|
StdInput string `db:"std_input"`
|
||||||
Partition string `db:"partition"`
|
Partition string `db:"partition"`
|
||||||
Environment string `db:"environment"`
|
|
||||||
DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是)
|
DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是)
|
||||||
CreatedBy int64 `db:"created_by"` // 创建人
|
CreatedBy int64 `db:"created_by"` // 创建人
|
||||||
CreatedTime time.Time `db:"created_time"` // 创建时间
|
CreatedTime time.Time `db:"created_time"` // 创建时间
|
||||||
UpdatedBy int64 `db:"updated_by"` // 更新人
|
UpdatedBy int64 `db:"updated_by"` // 更新人
|
||||||
UpdatedTime time.Time `db:"updated_time"` // 更新时间
|
UpdatedTime time.Time `db:"updated_time"` // 更新时间
|
||||||
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -108,18 +108,6 @@ func (m *defaultTaskHpcModel) FindOne(ctx context.Context, id int64) (*TaskHpc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *defaultTaskHpcModel) Insert(ctx context.Context, data *TaskHpc) (sql.Result, error) {
|
|
||||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskHpcRowsExpectAutoSet)
|
|
||||||
ret, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.JobId, data.ClusterId, data.Name, data.Status, data.CmdScript, data.StartTime, data.RunningTime, data.DerivedEs, data.Cluster, data.BlockId, data.AllocNodes, data.AllocCpu, data.CardCount, data.Version, data.Account, data.WorkDir, data.AssocId, data.ExitCode, data.WallTime, data.Result, data.DeletedAt, data.YamlString, data.AppType, data.AppName, data.Queue, data.SubmitType, data.NNode, data.StdOutFile, data.StdErrFile, data.StdInput, data.Environment, data.DeletedFlag, data.CreatedBy, data.CreatedTime, data.UpdatedBy, data.UpdatedTime)
|
|
||||||
return ret, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *defaultTaskHpcModel) Update(ctx context.Context, data *TaskHpc) error {
|
|
||||||
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskHpcRowsWithPlaceHolder)
|
|
||||||
_, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.JobId, data.ClusterId, data.Name, data.Status, data.CmdScript, data.StartTime, data.RunningTime, data.DerivedEs, data.Cluster, data.BlockId, data.AllocNodes, data.AllocCpu, data.CardCount, data.Version, data.Account, data.WorkDir, data.AssocId, data.ExitCode, data.WallTime, data.Result, data.DeletedAt, data.YamlString, data.AppType, data.AppName, data.Queue, data.SubmitType, data.NNode, data.StdOutFile, data.StdErrFile, data.StdInput, data.Environment, data.DeletedFlag, data.CreatedBy, data.CreatedTime, data.UpdatedBy, data.UpdatedTime, data.Id)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *defaultTaskHpcModel) tableName() string {
|
func (m *defaultTaskHpcModel) tableName() string {
|
||||||
return m.table
|
return m.table
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue