262 lines
5.4 KiB
Go
262 lines
5.4 KiB
Go
package jobmgr
|
||
|
||
import (
|
||
"fmt"
|
||
"sync"
|
||
"time"
|
||
|
||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||
"gitlink.org.cn/cloudream/scheduler/common/pkgs/db"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/advisormgr"
|
||
"gitlink.org.cn/cloudream/scheduler/manager/internal/executormgr"
|
||
)
|
||
|
||
type mgrJob struct {
|
||
job Job
|
||
eventSet EventSet
|
||
state JobState
|
||
isCompleted bool // 任务是否结束。注:任务状态为Completed时,此字段不一定为true,因为在Completed状态下也有工作要做。
|
||
}
|
||
|
||
type mgrJobSet struct {
|
||
jobs map[schsdk.JobID]*mgrJob
|
||
}
|
||
|
||
type Manager struct {
|
||
// 任何修改job、jobset的操作,都需要加这个锁
|
||
pubLock sync.Mutex
|
||
|
||
ExecMgr *executormgr.Manager
|
||
AdvMgr *advisormgr.Manager
|
||
DB *db.DB
|
||
|
||
jobSetIDIndex int
|
||
jobSets map[schsdk.JobSetID]*mgrJobSet
|
||
jobIDIndex int
|
||
jobs map[schsdk.JobID]*mgrJob
|
||
}
|
||
|
||
func NewManager(execMgr *executormgr.Manager, advMgr *advisormgr.Manager, db *db.DB) (*Manager, error) {
|
||
mgr := &Manager{
|
||
ExecMgr: execMgr,
|
||
AdvMgr: advMgr,
|
||
DB: db,
|
||
jobSets: make(map[schsdk.JobSetID]*mgrJobSet),
|
||
jobs: make(map[schsdk.JobID]*mgrJob),
|
||
}
|
||
|
||
return mgr, nil
|
||
}
|
||
|
||
func (m *Manager) Serve() error {
|
||
|
||
ticker := time.NewTicker(time.Minute)
|
||
defer ticker.Stop()
|
||
|
||
// TODO 应该要阻塞在这里
|
||
|
||
return nil
|
||
}
|
||
|
||
func (m *Manager) Stop() {
|
||
|
||
}
|
||
|
||
// 改变任务状态。注:将任务改变为Completed状态不会设置mgrJob.isCompleted为true
|
||
func (m *Manager) ChangeState(job *Job, state JobState) {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
mgrJob, ok := m.jobs[job.JobID]
|
||
if !ok {
|
||
return
|
||
}
|
||
|
||
lastState := mgrJob.state
|
||
mgrJob.state = state
|
||
logger.Info("jobID: %s change state from %s to %s", job.JobID, lastState, state)
|
||
|
||
go func() {
|
||
logger.WithField("JobID", job.JobID).Infof("state changed: %T -> %T", lastState, state)
|
||
|
||
state.Run(JobStateRunContext{
|
||
Mgr: m,
|
||
EventSet: &mgrJob.eventSet,
|
||
LastState: lastState,
|
||
}, job)
|
||
}()
|
||
}
|
||
|
||
// 将任务标记为结束
|
||
func (m *Manager) JobCompleted(job *Job) {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
mgrJob, ok := m.jobs[job.JobID]
|
||
if !ok {
|
||
return
|
||
}
|
||
|
||
mgrJob.isCompleted = true
|
||
|
||
// 如果任务集中的所有任务都完成了,则删除任务集
|
||
jobSet := m.jobSets[job.JobSetID]
|
||
for _, mjob := range jobSet.jobs {
|
||
if !mjob.isCompleted {
|
||
return
|
||
}
|
||
}
|
||
|
||
// TODO 可以考虑加个回调
|
||
delete(m.jobSets, job.JobSetID)
|
||
|
||
go func() {
|
||
logger.Infof("job set %s completed", job.JobSetID)
|
||
}()
|
||
}
|
||
|
||
// 向某个任务投递事件
|
||
func (m *Manager) PostEvent(jobID schsdk.JobID, evt Event) {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
mgrJob, ok := m.jobs[jobID]
|
||
if !ok {
|
||
return
|
||
}
|
||
|
||
go func() {
|
||
mgrJob.eventSet.Post(evt)
|
||
}()
|
||
}
|
||
|
||
// 向某个任务集中的所有任务投递事件
|
||
func (m *Manager) BroadcastEvent(jobSetID schsdk.JobSetID, evt Event) {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
jobSet, ok := m.jobSets[jobSetID]
|
||
if !ok {
|
||
// 如果作业集不存在,则直接返回
|
||
return
|
||
}
|
||
|
||
for _, mjob := range jobSet.jobs {
|
||
go func(j *mgrJob) {
|
||
j.eventSet.Post(evt)
|
||
}(mjob)
|
||
}
|
||
}
|
||
|
||
type SubmittingJob struct {
|
||
Body JobBody
|
||
InitState JobState
|
||
}
|
||
|
||
// 提交一个任务集
|
||
func (m *Manager) SubmitJobSet(jobs []SubmittingJob) schsdk.JobSetID {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
jobSetID := schsdk.JobSetID(fmt.Sprintf("%d", m.jobSetIDIndex))
|
||
m.jobSetIDIndex += 1
|
||
|
||
jobSet := &mgrJobSet{
|
||
jobs: make(map[schsdk.JobID]*mgrJob),
|
||
}
|
||
m.jobSets[jobSetID] = jobSet
|
||
|
||
var addedJobs []*mgrJob
|
||
for i, subJob := range jobs {
|
||
jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex+i))
|
||
job := &mgrJob{
|
||
job: Job{
|
||
JobSetID: jobSetID,
|
||
JobID: jobID,
|
||
Body: subJob.Body,
|
||
},
|
||
eventSet: NewEventSet(),
|
||
state: subJob.InitState,
|
||
}
|
||
jobSet.jobs[jobID] = job
|
||
m.jobs[jobID] = job
|
||
addedJobs = append(addedJobs, job)
|
||
}
|
||
m.jobIDIndex += len(jobs)
|
||
|
||
// 先添加完所有Job,再启动
|
||
for _, job := range addedJobs {
|
||
go func(j *mgrJob) {
|
||
j.state.Run(JobStateRunContext{
|
||
Mgr: m,
|
||
EventSet: &j.eventSet,
|
||
LastState: nil,
|
||
}, &j.job)
|
||
}(job)
|
||
}
|
||
|
||
return jobSetID
|
||
}
|
||
|
||
// 导出任务集中所有任务的状态
|
||
func (m *Manager) DumpJobSet(jobSetID schsdk.JobSetID) []jobmod.JobDump {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
jobSet, ok := m.jobSets[jobSetID]
|
||
if !ok {
|
||
return nil
|
||
}
|
||
|
||
var jobDumps []jobmod.JobDump
|
||
for _, mgrJob := range jobSet.jobs {
|
||
jobDumps = append(jobDumps, mgrJob.job.Dump(JobStateRunContext{
|
||
Mgr: m,
|
||
EventSet: &mgrJob.eventSet,
|
||
LastState: mgrJob.state,
|
||
}, &mgrJob.job, mgrJob.state))
|
||
}
|
||
|
||
return jobDumps
|
||
}
|
||
|
||
type PreSchedulerInstJob struct {
|
||
Body JobBody
|
||
InitState JobState
|
||
}
|
||
|
||
// AddJob 添加一个作业到指定的作业集。
|
||
func (m *Manager) AddJob(jobSetID schsdk.JobSetID, jobBody JobBody, jobState JobState) schsdk.JobID {
|
||
m.pubLock.Lock()
|
||
defer m.pubLock.Unlock()
|
||
|
||
jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex))
|
||
m.jobIDIndex += 1
|
||
|
||
job := &mgrJob{
|
||
job: Job{
|
||
JobSetID: jobSetID,
|
||
JobID: jobID,
|
||
Body: jobBody,
|
||
},
|
||
state: jobState,
|
||
eventSet: NewEventSet(),
|
||
}
|
||
|
||
m.jobs[jobID] = job
|
||
jobSet := m.jobSets[jobSetID]
|
||
jobSet.jobs[jobID] = job
|
||
|
||
go func() {
|
||
jobState.Run(JobStateRunContext{
|
||
Mgr: m,
|
||
EventSet: &job.eventSet,
|
||
LastState: nil,
|
||
}, &job.job)
|
||
}()
|
||
|
||
return jobID
|
||
}
|