145 lines
4.6 KiB
Go
145 lines
4.6 KiB
Go
package mq
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"fmt"
|
||
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||
|
||
"gitlink.org.cn/cloudream/common/consts/errorcode"
|
||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job/state"
|
||
)
|
||
|
||
// 提交任务集
|
||
func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetResp, *mq.CodeMessage) {
|
||
logger.Debugf("submitting job")
|
||
|
||
var jobs []jobmgr.SubmittingJob
|
||
for _, jobInfo := range msg.JobSet.Jobs {
|
||
switch info := jobInfo.(type) {
|
||
case *schsdk.NormalJobInfo:
|
||
job := job.NewNormalJob(*info)
|
||
|
||
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
||
if !ok {
|
||
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
|
||
}
|
||
|
||
jobs = append(jobs, jobmgr.SubmittingJob{
|
||
Body: job,
|
||
InitState: state.NewPreSchuduling(preSch),
|
||
})
|
||
|
||
case *schsdk.DataReturnJobInfo:
|
||
job := job.NewResourceJob(*info)
|
||
jobs = append(jobs, jobmgr.SubmittingJob{
|
||
Body: job,
|
||
InitState: state.NewWaitTargetComplete(),
|
||
})
|
||
|
||
case *schsdk.MultiInstanceJobInfo:
|
||
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
||
logger.Info(">>>localJobID: " + info.LocalJobID)
|
||
|
||
job := job.NewMultiInstanceJob(*info, preSch)
|
||
|
||
if !ok {
|
||
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
|
||
}
|
||
|
||
jobs = append(jobs, jobmgr.SubmittingJob{
|
||
Body: job,
|
||
InitState: state.NewMultiInstanceInit(),
|
||
})
|
||
|
||
}
|
||
}
|
||
|
||
return mq.ReplyOK(mgrmq.NewSubmitJobSetResp(svc.jobMgr.SubmitJobSet(jobs)))
|
||
}
|
||
|
||
func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) {
|
||
logger.Debugf("start create instance")
|
||
|
||
fut := future.NewSetValue[event.CreateInstanceResult]()
|
||
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceCreate(instInfo.LocalPath, fut))
|
||
|
||
result, err := fut.WaitValue(context.TODO())
|
||
|
||
if err != nil {
|
||
return nil, mq.Failed(errorcode.OperationFailed, err.Error())
|
||
}
|
||
|
||
return mq.ReplyOK(mgrmq.NewCreateInstanceResp(result.JobID, result.FilesUploadScheme))
|
||
}
|
||
|
||
// 任务集中某个文件上传完成
|
||
func (svc *Service) JobSetLocalFileUploaded(msg *mgrmq.JobSetLocalFileUploaded) (*mgrmq.JobSetLocalFileUploadedResp, *mq.CodeMessage) {
|
||
logger.WithField("LocalPath", msg.LocalPath).
|
||
WithField("PackageID", msg.PackageID).
|
||
Debugf("local file uploaded")
|
||
|
||
svc.jobMgr.BroadcastEvent(msg.JobSetID, event.NewLocalFileUploaded(msg.LocalPath, errors.New(msg.Error), msg.PackageID))
|
||
return mq.ReplyOK(mgrmq.NewJobSetLocalFileUploadedResp())
|
||
}
|
||
|
||
func (svc *Service) GetJobSetDump(msg *mgrmq.GetJobSetDump) (*mgrmq.GetJobSetDumpResp, *mq.CodeMessage) {
|
||
jobs := svc.jobMgr.DumpJobSet(msg.JobSetID)
|
||
if len(jobs) == 0 {
|
||
return nil, mq.Failed(errorcode.OperationFailed, "job set not found")
|
||
}
|
||
|
||
return mq.ReplyOK(mgrmq.RespGetJobSetDump(jobs))
|
||
}
|
||
|
||
func (svc *Service) GetServiceList(msg *mgrmq.GetServiceList) (*mgrmq.GetServiceListResp, *mq.CodeMessage) {
|
||
jobs := svc.jobMgr.DumpJobSet(msg.JobSetID)
|
||
|
||
var jobSetServiceInfos []schsdk.JobSetServiceInfo
|
||
|
||
for _, jo := range jobs {
|
||
var cdsNodeID *cdssdk.NodeID
|
||
|
||
norJob, ok := jo.Body.(*jobmod.NormalJobDump)
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
_, ok = jo.State.(*jobmod.NormalJobExecutingDump)
|
||
if ok {
|
||
computingCenter, err := svc.db.ComputingCenter().GetByID(svc.db.SQLCtx(), norJob.TargetCCID)
|
||
if err != nil {
|
||
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("get cdsNodeID failed by CCID: %s", err.Error()))
|
||
}
|
||
|
||
cdsNodeID = &computingCenter.CDSNodeID
|
||
|
||
} else {
|
||
//返回空指针,表明查询任务不在执行状态,没有id
|
||
cdsNodeID = nil
|
||
}
|
||
|
||
norJobInfo := jo.Info.(*schsdk.NormalJobInfo)
|
||
for _, servicePortInfo := range norJobInfo.Services.ServicePortInfos {
|
||
jobSetServiceInfo := schsdk.JobSetServiceInfo{
|
||
Name: servicePortInfo.Name,
|
||
Port: servicePortInfo.Port,
|
||
CDSNodeID: cdsNodeID,
|
||
LocalJobID: norJobInfo.LocalJobID,
|
||
}
|
||
jobSetServiceInfos = append(jobSetServiceInfos, jobSetServiceInfo)
|
||
}
|
||
}
|
||
|
||
return mq.ReplyOK(mgrmq.NewGetServiceListResp(jobSetServiceInfos))
|
||
}
|