JCC-CSScheduler/manager/internal/mq/job.go

179 lines
5.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package mq
import (
"context"
"errors"
"fmt"
"gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/common/consts/errorcode"
"gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/mq"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job/state"
)
// 提交任务集
func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetResp, *mq.CodeMessage) {
logger.Debugf("submitting job")
var jobs []jobmgr.SubmittingJob
for _, jobInfo := range msg.JobSet.Jobs {
switch info := jobInfo.(type) {
case *schsdk.NormalJobInfo:
jo := job.NewNormalJob(*info)
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
if !ok {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
}
jobs = append(jobs, jobmgr.SubmittingJob{
Body: jo,
InitState: state.NewPreSchuduling(preSch),
})
case *schsdk.DataReturnJobInfo:
jo := job.NewDataReturnJob(*info)
jobs = append(jobs, jobmgr.SubmittingJob{
Body: jo,
InitState: state.NewWaitTargetComplete(),
})
case *schsdk.MultiInstanceJobInfo:
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
jo := job.NewMultiInstanceJob(*info, preSch)
if !ok {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
}
jobs = append(jobs, jobmgr.SubmittingJob{
Body: jo,
InitState: state.NewMultiInstanceInit(),
})
case *schsdk.UpdateMultiInstanceJobInfo:
modelJob := job.NewUpdateMultiInstanceJob(*info)
instanceJobSets := svc.jobMgr.DumpJobSet(modelJob.Info.MultiInstanceJobSetID)
if len(instanceJobSets) == 0 {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("job set %s is not found", modelJob.Info.MultiInstanceJobSetID))
}
// 找到多实例任务本身
var multiInstanceJobDump jobmod.JobDump
for i := 0; i < len(instanceJobSets); i++ {
jobDump := instanceJobSets[i]
if _, ok := jobDump.Body.(*jobmod.MultiInstanceJobDump); ok {
multiInstanceJobDump = jobDump
break
}
}
jobs = append(jobs, jobmgr.SubmittingJob{
Body: modelJob,
InitState: state.NewMultiInstanceUpdate(multiInstanceJobDump),
})
}
}
return mq.ReplyOK(mgrmq.NewSubmitJobSetResp(svc.jobMgr.SubmitJobSet(jobs)))
}
func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) {
logger.Debugf("start create instance")
fut := future.NewSetValue[event.OperateInstanceResult]()
info := event.InstanceCreateInfo{
DataSet: instInfo.DataSet,
}
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceOperate(&info, fut))
result, err := fut.Wait(context.TODO())
if err != nil {
return nil, mq.Failed(errorcode.OperationFailed, err.Error())
}
return mq.ReplyOK(mgrmq.NewCreateInstanceResp(result.JobID, result.FilesUploadScheme))
}
func (svc *Service) GetAvailableNodes(msg *mgrmq.AvailableNodes) (*schsdk.AvailableNodesResp, *mq.CodeMessage) {
availableNodes := jobmgr.GetAvailableNodes()
return mq.ReplyOK(mgrmq.NewAvailableNodesResp(availableNodes))
}
// 任务集中某个文件上传完成
func (svc *Service) JobSetLocalFileUploaded(msg *mgrmq.JobSetLocalFileUploaded) (*mgrmq.JobSetLocalFileUploadedResp, *mq.CodeMessage) {
logger.WithField("LocalPath", msg.LocalPath).
WithField("PackageID", msg.PackageID).
Debugf("local file uploaded")
var err error
if msg.Error != "" {
err = errors.New(msg.Error)
}
svc.jobMgr.BroadcastEvent(msg.JobSetID, event.NewLocalFileUploaded(msg.LocalPath, err, msg.PackageID))
return mq.ReplyOK(mgrmq.NewJobSetLocalFileUploadedResp())
}
func (svc *Service) GetJobSetDump(msg *mgrmq.GetJobSetDump) (*mgrmq.GetJobSetDumpResp, *mq.CodeMessage) {
jobs := svc.jobMgr.DumpJobSet(msg.JobSetID)
if len(jobs) == 0 {
return nil, mq.Failed(errorcode.OperationFailed, "job set not found")
}
return mq.ReplyOK(mgrmq.RespGetJobSetDump(jobs))
}
func (svc *Service) GetServiceList(msg *mgrmq.GetServiceList) (*mgrmq.GetServiceListResp, *mq.CodeMessage) {
jobs := svc.jobMgr.DumpJobSet(msg.JobSetID)
var jobSetServiceInfos []schsdk.JobSetServiceInfo
for _, jo := range jobs {
var cdsNodeID *cdssdk.NodeID
norJob, ok := jo.Body.(*jobmod.NormalJobDump)
if !ok {
continue
}
_, ok = jo.State.(*jobmod.NormalJobExecutingDump)
if ok {
computingCenter, err := svc.db.ComputingCenter().GetByID(svc.db.SQLCtx(), norJob.TargetCCID)
if err != nil {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("get cdsNodeID failed by CCID: %s", err.Error()))
}
cdsNodeID = &computingCenter.CDSNodeID
} else {
//返回空指针表明查询任务不在执行状态没有id
cdsNodeID = nil
}
norJobInfo := jo.Info.(*schsdk.NormalJobInfo)
for _, servicePortInfo := range norJobInfo.Services.ServicePortInfos {
jobSetServiceInfo := schsdk.JobSetServiceInfo{
Name: servicePortInfo.Name,
Port: servicePortInfo.Port,
CDSNodeID: cdsNodeID,
LocalJobID: norJobInfo.LocalJobID,
}
jobSetServiceInfos = append(jobSetServiceInfos, jobSetServiceInfo)
}
}
return mq.ReplyOK(mgrmq.NewGetServiceListResp(jobSetServiceInfos))
}