JCC-CSScheduler/manager/internal/jobmgr/job/state/multiInstance_init.go

67 lines
2.0 KiB
Go

package state
import (
"context"
"fmt"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/prescheduler"
"gitlink.org.cn/cloudream/scheduler/common/utils"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
)
type MultiInstanceInit struct {
}
func NewMultiInstanceInit() *MultiInstanceInit {
return &MultiInstanceInit{}
}
func (s *MultiInstanceInit) Run(rtx jobmgr.JobStateRunContext, job *jobmgr.Job) {
s.do(rtx, job)
}
func (s *MultiInstanceInit) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) {
multInstJob := jo.Body.(*job.MultiInstanceJob)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
event.WaitType[*event.Cancel](ctx, rtx.EventSet)
cancel()
}()
newLocalJobID := fmt.Sprintf("%s_%s", multInstJob.Info.LocalJobID, utils.GenerateRandomID())
instJobInfo := &schsdk.InstanceJobInfo{
Type: schsdk.JobTypeInstance,
LocalJobID: newLocalJobID,
Files: multInstJob.Info.Files,
Runtime: multInstJob.Info.Runtime,
Resources: multInstJob.Info.Resources,
ModelJobInfo: multInstJob.Info.ModelJobInfo,
}
files := jobmod.JobFiles{
Dataset: multInstJob.Files.Dataset,
Code: multInstJob.Files.Code,
Image: multInstJob.Files.Image,
}
// 创建实例并运行
instanceJob := job.NewInstanceJob(*instJobInfo, files, jo.JobID)
jobID := rtx.Mgr.AddJob(jo.JobSetID, instanceJob, NewPreSchuduling(multInstJob.PreScheduler))
// 在多实例任务中新增这个实例的任务ID
multInstJob.SubJobs = append(multInstJob.SubJobs, jobID)
rtx.Mgr.ChangeState(jo, NewMultiInstanceRunning(prescheduler.NewDefaultPreScheduler()))
}
func (s *MultiInstanceInit) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
return &jobmod.MultiInstCreateInitDump{}
}