163 lines
3.7 KiB
Go
163 lines
3.7 KiB
Go
package advisormgr
|
||
|
||
import (
|
||
"fmt"
|
||
"sync"
|
||
"time"
|
||
|
||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||
advmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor"
|
||
advtsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor/task"
|
||
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||
)
|
||
|
||
type jobTask struct {
|
||
JobID schsdk.JobID
|
||
TaskID string
|
||
FullTaskID string
|
||
}
|
||
|
||
type AdvisorInfo struct {
|
||
advisorID schmod.AdvisorID
|
||
jobTasks map[string]jobTask // key 为 TaskID
|
||
lastReportTime time.Time
|
||
}
|
||
|
||
type OnTaskUpdatedCallbackFn func(jobID schsdk.JobID, fullTaskID string, taskStatus advtsk.TaskStatus)
|
||
type OnTimeoutCallbackFn func(jobID schsdk.JobID, fullTaskID string)
|
||
|
||
type Manager struct {
|
||
advisors map[schmod.AdvisorID]*AdvisorInfo
|
||
lock sync.Mutex
|
||
advCli *advmq.Client
|
||
|
||
onTaskUpdated OnTaskUpdatedCallbackFn
|
||
onTaskTimeout OnTimeoutCallbackFn
|
||
|
||
reportTimeout time.Duration
|
||
}
|
||
|
||
func NewManager(reportTimeout time.Duration) (*Manager, error) {
|
||
advCli, err := schglb.AdvisorMQPool.Acquire()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("new executor client: %w", err)
|
||
}
|
||
|
||
return &Manager{
|
||
advisors: make(map[schmod.AdvisorID]*AdvisorInfo),
|
||
advCli: advCli,
|
||
reportTimeout: reportTimeout,
|
||
}, nil
|
||
}
|
||
|
||
func (m *Manager) OnTaskUpdated(callback OnTaskUpdatedCallbackFn) {
|
||
m.onTaskUpdated = callback
|
||
}
|
||
|
||
func (m *Manager) OnTaskTimeout(callback OnTimeoutCallbackFn) {
|
||
m.onTaskTimeout = callback
|
||
}
|
||
|
||
func (m *Manager) Report(advID schmod.AdvisorID, taskStatus []mgrmq.AdvisorTaskStatus) {
|
||
m.lock.Lock()
|
||
defer m.lock.Unlock()
|
||
|
||
info, ok := m.advisors[advID]
|
||
if !ok {
|
||
info := &AdvisorInfo{
|
||
advisorID: advID,
|
||
jobTasks: make(map[string]jobTask),
|
||
}
|
||
m.advisors[advID] = info
|
||
}
|
||
|
||
info.lastReportTime = time.Now()
|
||
|
||
for _, s := range taskStatus {
|
||
tsk, ok := info.jobTasks[s.TaskID]
|
||
if !ok {
|
||
continue
|
||
}
|
||
|
||
m.onTaskUpdated(tsk.JobID, tsk.FullTaskID, s.Status)
|
||
}
|
||
}
|
||
|
||
// 启动一个Task,并将其关联到指定的Job。返回一个在各Executor之间唯一的TaskID
|
||
func (m *Manager) StartTask(jobID schsdk.JobID, info advtsk.TaskInfo) (string, error) {
|
||
m.lock.Lock()
|
||
defer m.lock.Unlock()
|
||
|
||
resp, err := m.advCli.StartTask(advmq.NewStartTask(info))
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
fullTaskID := fmt.Sprintf("%s-%s", resp.AdvisorID, resp.TaskID)
|
||
|
||
exeInfo, ok := m.advisors[resp.AdvisorID]
|
||
if !ok {
|
||
exeInfo = &AdvisorInfo{
|
||
advisorID: resp.AdvisorID,
|
||
jobTasks: make(map[string]jobTask),
|
||
lastReportTime: time.Now(),
|
||
}
|
||
m.advisors[resp.AdvisorID] = exeInfo
|
||
}
|
||
|
||
exeInfo.jobTasks[resp.TaskID] = jobTask{
|
||
JobID: jobID,
|
||
TaskID: resp.TaskID,
|
||
FullTaskID: fullTaskID,
|
||
}
|
||
|
||
return fullTaskID, nil
|
||
}
|
||
|
||
// 放弃对指定任务进度的等待。调用此函数不会停止任务执行,只是回调里不会再收到此任务的进度更新
|
||
func (m *Manager) ForgetTask(fullTaskID string) {
|
||
m.lock.Lock()
|
||
defer m.lock.Unlock()
|
||
|
||
for _, exe := range m.advisors {
|
||
for _, tsk := range exe.jobTasks {
|
||
if tsk.FullTaskID == fullTaskID {
|
||
delete(exe.jobTasks, fullTaskID)
|
||
return
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func (m *Manager) Serve() error {
|
||
ticker := time.NewTicker(time.Second)
|
||
defer ticker.Stop()
|
||
|
||
for {
|
||
select {
|
||
case <-ticker.C:
|
||
func() {
|
||
m.lock.Lock()
|
||
defer m.lock.Unlock()
|
||
|
||
now := time.Now()
|
||
for exeID, exeInfo := range m.advisors {
|
||
dt := now.Sub(exeInfo.lastReportTime)
|
||
|
||
if dt < m.reportTimeout {
|
||
continue
|
||
}
|
||
|
||
for _, tsk := range exeInfo.jobTasks {
|
||
m.onTaskTimeout(tsk.JobID, tsk.FullTaskID)
|
||
}
|
||
|
||
delete(m.advisors, exeID)
|
||
}
|
||
}()
|
||
}
|
||
}
|
||
}
|