修复调试问题

This commit is contained in:
Sydonian 2023-10-07 11:13:48 +08:00
parent a8d61ae115
commit cce21aedea
50 changed files with 503 additions and 162 deletions

View File

@ -15,7 +15,7 @@ import (
type Reporter struct { type Reporter struct {
advisorID schmod.AdvisorID advisorID schmod.AdvisorID
reportInterval time.Duration reportInterval time.Duration
taskStatus map[string]advtsk.TaskStatus taskStatus map[string]advtsk.AdvTaskStatus
taskStatusLock sync.Mutex taskStatusLock sync.Mutex
reportNow chan bool reportNow chan bool
} }
@ -24,12 +24,12 @@ func NewReporter(advisorID schmod.AdvisorID, reportInterval time.Duration) *Repo
return &Reporter{ return &Reporter{
advisorID: advisorID, advisorID: advisorID,
reportInterval: reportInterval, reportInterval: reportInterval,
taskStatus: make(map[string]advtsk.TaskStatus), taskStatus: make(map[string]advtsk.AdvTaskStatus),
reportNow: make(chan bool), reportNow: make(chan bool),
} }
} }
func (r *Reporter) Report(taskID string, taskStatus advtsk.TaskStatus) { func (r *Reporter) Report(taskID string, taskStatus advtsk.AdvTaskStatus) {
r.taskStatusLock.Lock() r.taskStatusLock.Lock()
defer r.taskStatusLock.Unlock() defer r.taskStatusLock.Unlock()
@ -65,7 +65,7 @@ func (r *Reporter) Serve() error {
for taskID, status := range r.taskStatus { for taskID, status := range r.taskStatus {
taskStatus = append(taskStatus, mgrmq.NewAdvisorTaskStatus(taskID, status)) taskStatus = append(taskStatus, mgrmq.NewAdvisorTaskStatus(taskID, status))
} }
r.taskStatus = make(map[string]advtsk.TaskStatus) r.taskStatus = make(map[string]advtsk.AdvTaskStatus)
r.taskStatusLock.Unlock() r.taskStatusLock.Unlock()
_, err := magCli.ReportAdvisorTaskStatus(mgrmq.NewReportAdvisorTaskStatus(r.advisorID, taskStatus)) _, err := magCli.ReportAdvisorTaskStatus(mgrmq.NewReportAdvisorTaskStatus(r.advisorID, taskStatus))

View File

@ -183,14 +183,20 @@ func (s *DefaultScheduler) makeSchemeForNode(targetSlwNode *candidateSlwNode) jo
if !targetSlwNode.Files.Dataset.IsLoaded { if !targetSlwNode.Files.Dataset.IsLoaded {
scheme.Dataset.Action = jobmod.ActionLoad scheme.Dataset.Action = jobmod.ActionLoad
} else {
scheme.Dataset.Action = jobmod.ActionNo
} }
if !targetSlwNode.Files.Code.IsLoaded { if !targetSlwNode.Files.Code.IsLoaded {
scheme.Dataset.Action = jobmod.ActionLoad scheme.Code.Action = jobmod.ActionLoad
} else {
scheme.Code.Action = jobmod.ActionNo
} }
if !targetSlwNode.Files.Image.IsLoaded { if !targetSlwNode.Files.Image.IsLoaded {
scheme.Dataset.Action = jobmod.ActionImportImage scheme.Image.Action = jobmod.ActionImportImage
} else {
scheme.Image.Action = jobmod.ActionNo
} }
return scheme return scheme

View File

@ -39,7 +39,7 @@ func NewManager(reporter *reporter.Reporter, scheduleSvc *scheduler.Service) Man
} }
} }
func (m *Manager) StartByInfo(info advtsk.TaskInfo) (*Task, error) { func (m *Manager) StartByInfo(info advtsk.AdvTaskInfo) (*Task, error) {
infoType := myreflect.TypeOfValue(info) infoType := myreflect.TypeOfValue(info)
ctor, ok := taskFromInfoCtors[infoType] ctor, ok := taskFromInfoCtors[infoType]
@ -50,10 +50,10 @@ func (m *Manager) StartByInfo(info advtsk.TaskInfo) (*Task, error) {
return m.StartNew(ctor(info)), nil return m.StartNew(ctor(info)), nil
} }
var taskFromInfoCtors map[reflect.Type]func(advtsk.TaskInfo) TaskBody var taskFromInfoCtors map[reflect.Type]func(advtsk.AdvTaskInfo) TaskBody = make(map[reflect.Type]func(advtsk.AdvTaskInfo) task.TaskBody[TaskContext])
func Register[TInfo advtsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) { func Register[TInfo advtsk.AdvTaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) {
taskFromInfoCtors[myreflect.TypeOf[TInfo]()] = func(info advtsk.TaskInfo) TaskBody { taskFromInfoCtors[myreflect.TypeOf[TInfo]()] = func(info advtsk.AdvTaskInfo) TaskBody {
return ctor(info.(TInfo)) return ctor(info.(TInfo))
} }
} }

View File

@ -12,6 +12,7 @@ import (
"gitlink.org.cn/cloudream/scheduler/advisor/internal/scheduler" "gitlink.org.cn/cloudream/scheduler/advisor/internal/scheduler"
"gitlink.org.cn/cloudream/scheduler/advisor/internal/services" "gitlink.org.cn/cloudream/scheduler/advisor/internal/services"
"gitlink.org.cn/cloudream/scheduler/advisor/internal/task" "gitlink.org.cn/cloudream/scheduler/advisor/internal/task"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
advmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor" advmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor"
) )
@ -28,6 +29,7 @@ func main() {
os.Exit(1) os.Exit(1)
} }
schglb.InitMQPool(&config.Cfg().RabbitMQ)
myglbs.Init() myglbs.Init()
rpter := reporter.NewReporter(myglbs.AdvisorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec)) rpter := reporter.NewReporter(myglbs.AdvisorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec))
@ -41,9 +43,9 @@ func main() {
logger.Fatalf("new advisor server failed, err: %s", err.Error()) logger.Fatalf("new advisor server failed, err: %s", err.Error())
} }
mqSvr.OnError = func(err error) { mqSvr.OnError(func(err error) {
logger.Warnf("advisor server err: %s", err.Error()) logger.Warnf("advisor server err: %s", err.Error())
} })
// 启动服务 // 启动服务
go serveMQServer(mqSvr) go serveMQServer(mqSvr)

View File

@ -149,10 +149,14 @@ func (s *DefaultPreScheduler) Schedule(info *schsdk.JobSetInfo) (*jobmod.JobSetP
slwNodes[node.ID] = node slwNodes[node.ID] = node
} }
if len(slwNodes) == 0 {
return nil, nil, ErrNoAvailableScheme
}
// 先根据任务配置收集它们依赖的任务的LocalID // 先根据任务配置收集它们依赖的任务的LocalID
var schJobs []schedulingJob var schJobs []*schedulingJob
for _, job := range info.Jobs { for _, job := range info.Jobs {
j := schedulingJob{ j := &schedulingJob{
Job: job, Job: job,
} }
@ -171,8 +175,8 @@ func (s *DefaultPreScheduler) Schedule(info *schsdk.JobSetInfo) (*jobmod.JobSetP
schJobs = append(schJobs, j) schJobs = append(schJobs, j)
} }
// 然后根据引用进行排序 // 然后根据依赖进行排序
schJobs, ok := s.orderByReference(schJobs) schJobs, ok := s.orderByAfters(schJobs)
if !ok { if !ok {
return nil, nil, fmt.Errorf("circular reference detected between jobs in the job set") return nil, nil, fmt.Errorf("circular reference detected between jobs in the job set")
} }
@ -180,7 +184,7 @@ func (s *DefaultPreScheduler) Schedule(info *schsdk.JobSetInfo) (*jobmod.JobSetP
// 经过排序后,按顺序生成调度方案 // 经过排序后,按顺序生成调度方案
for _, job := range schJobs { for _, job := range schJobs {
if norJob, ok := job.Job.(*schsdk.NormalJobInfo); ok { if norJob, ok := job.Job.(*schsdk.NormalJobInfo); ok {
scheme, err := s.scheduleForNormalJob(info, &job, slwNodes, jobSetScheme.JobSchemes) scheme, err := s.scheduleForNormalJob(info, job, slwNodes, jobSetScheme.JobSchemes)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -199,16 +203,17 @@ func (s *DefaultPreScheduler) Schedule(info *schsdk.JobSetInfo) (*jobmod.JobSetP
}, nil }, nil
} }
func (s *DefaultPreScheduler) orderByReference(jobs []schedulingJob) ([]schedulingJob, bool) { func (s *DefaultPreScheduler) orderByAfters(jobs []*schedulingJob) ([]*schedulingJob, bool) {
type jobOrder struct { type jobOrder struct {
Job schedulingJob Job *schedulingJob
Afters []string Afters []string
} }
var jobOrders []jobOrder var jobOrders []*jobOrder
for _, job := range jobs { for _, job := range jobs {
od := jobOrder{ od := &jobOrder{
Job: job, Job: job,
Afters: make([]string, len(job.Afters)),
} }
copy(od.Afters, job.Afters) copy(od.Afters, job.Afters)
@ -217,7 +222,7 @@ func (s *DefaultPreScheduler) orderByReference(jobs []schedulingJob) ([]scheduli
} }
// 然后排序 // 然后排序
var orderedJob []schedulingJob var orderedJob []*schedulingJob
for { for {
rm := 0 rm := 0
for i, jo := range jobOrders { for i, jo := range jobOrders {
@ -231,6 +236,7 @@ func (s *DefaultPreScheduler) orderByReference(jobs []schedulingJob) ([]scheduli
} }
rm++ rm++
continue
} }
jobOrders[i-rm] = jobOrders[i] jobOrders[i-rm] = jobOrders[i]
@ -344,16 +350,18 @@ func (s *DefaultPreScheduler) makeSchemeForNode(targetSlwNode *candidateSlwNode)
TargetSlwNodeID: targetSlwNode.SlwNode.ID, TargetSlwNodeID: targetSlwNode.SlwNode.ID,
} }
// TODO 根据实际情况选择Move或者Load
if !targetSlwNode.Files.Dataset.IsLoaded { if !targetSlwNode.Files.Dataset.IsLoaded {
scheme.Dataset.Action = jobmod.ActionLoad scheme.Dataset.Action = jobmod.ActionLoad
} }
if !targetSlwNode.Files.Code.IsLoaded { if !targetSlwNode.Files.Code.IsLoaded {
scheme.Dataset.Action = jobmod.ActionLoad scheme.Code.Action = jobmod.ActionLoad
} }
if !targetSlwNode.Files.Image.IsLoaded { if !targetSlwNode.Files.Image.IsLoaded {
scheme.Dataset.Action = jobmod.ActionImportImage scheme.Image.Action = jobmod.ActionImportImage
} }
return scheme return scheme
@ -517,7 +525,7 @@ func (s *DefaultPreScheduler) calcResourceLevel(avai float64, need float64) int
// 计算节点得分情况 // 计算节点得分情况
func (s *DefaultPreScheduler) calcFileScore(files schsdk.JobFilesInfo, allSlwNodes map[uopsdk.SlwNodeID]*candidateSlwNode) error { func (s *DefaultPreScheduler) calcFileScore(files schsdk.JobFilesInfo, allSlwNodes map[uopsdk.SlwNodeID]*candidateSlwNode) error {
// 只计算运控返回的计算中心上的存储服务的数据权重 // 只计算运控返回的可用计算中心上的存储服务的数据权重
stgNodeToSlwNode := make(map[int64]*candidateSlwNode) stgNodeToSlwNode := make(map[int64]*candidateSlwNode)
for _, slwNode := range allSlwNodes { for _, slwNode := range allSlwNodes {
stgNodeToSlwNode[slwNode.SlwNode.StgNodeID] = slwNode stgNodeToSlwNode[slwNode.SlwNode.StgNodeID] = slwNode
@ -579,6 +587,7 @@ func (s *DefaultPreScheduler) calcPackageFileScore(packageID int64, stgNodeToSlw
slwNodeFileScores := make(map[uopsdk.SlwNodeID]*fileDetail) slwNodeFileScores := make(map[uopsdk.SlwNodeID]*fileDetail)
// TODO UserID
cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, packageID)) cachedResp, err := colCli.PackageGetCachedStgNodes(collector.NewPackageGetCachedStgNodes(0, packageID))
if err != nil { if err != nil {
return nil, err return nil, err
@ -596,6 +605,7 @@ func (s *DefaultPreScheduler) calcPackageFileScore(packageID int64, stgNodeToSlw
} }
} }
// TODO UserID
loadedResp, err := colCli.PackageGetLoadedStgNodes(collector.NewPackageGetLoadedStgNodes(0, packageID)) loadedResp, err := colCli.PackageGetLoadedStgNodes(collector.NewPackageGetLoadedStgNodes(0, packageID))
if err != nil { if err != nil {
return nil, err return nil, err
@ -607,14 +617,14 @@ func (s *DefaultPreScheduler) calcPackageFileScore(packageID int64, stgNodeToSlw
continue continue
} }
fsc, ok := slwNodeFileScores[slwNode.SlwNode.ID] sfc, ok := slwNodeFileScores[slwNode.SlwNode.ID]
if !ok { if !ok {
fsc = &fileDetail{} sfc = &fileDetail{}
slwNodeFileScores[slwNode.SlwNode.ID] = fsc slwNodeFileScores[slwNode.SlwNode.ID] = sfc
} }
fsc.LoadingScore = 1 * LoadedWeight sfc.LoadingScore = 1 * LoadedWeight
fsc.IsLoaded = true sfc.IsLoaded = true
} }
return slwNodeFileScores, nil return slwNodeFileScores, nil

View File

@ -0,0 +1,117 @@
package prescheduler
import (
"testing"
"github.com/samber/lo"
. "github.com/smartystreets/goconvey/convey"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
)
func TestOrderByAfters(t *testing.T) {
cases := []struct {
title string
jobs []*schedulingJob
wants []string
}{
{
title: "所有Job都有直接或间接的依赖关系",
jobs: []*schedulingJob{
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "1"}},
Afters: []string{"2"},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "2"}},
Afters: []string{},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "3"}},
Afters: []string{"1"},
},
},
wants: []string{"2", "1", "3"},
},
{
title: "部分Job之间无依赖关系",
jobs: []*schedulingJob{
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "1"}},
Afters: []string{"2"},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "2"}},
Afters: []string{},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "3"}},
Afters: []string{"1"},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "4"}},
Afters: []string{"5"},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "5"}},
Afters: []string{},
},
},
wants: []string{"2", "5", "1", "3", "4"},
},
{
title: "存在循环依赖",
jobs: []*schedulingJob{
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "1"}},
Afters: []string{"2"},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "2"}},
Afters: []string{"1"},
},
},
wants: nil,
},
{
title: "完全不依赖",
jobs: []*schedulingJob{
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "1"}},
Afters: []string{},
},
{
Job: &schsdk.NormalJobInfo{JobInfoBase: schsdk.JobInfoBase{LocalJobID: "2"}},
Afters: []string{},
},
},
wants: []string{"1", "2"},
},
}
sch := NewDefaultPreScheduler()
for _, c := range cases {
Convey(c.title, t, func() {
ordered, ok := sch.orderByAfters(c.jobs)
if c.wants == nil {
So(ok, ShouldBeFalse)
} else {
So(ok, ShouldBeTrue)
ids := lo.Map(ordered, func(item *schedulingJob, idx int) string { return item.Job.GetLocalJobID() })
So(ids, ShouldResemble, c.wants)
}
})
}
}

View File

@ -26,7 +26,7 @@ func (svc *JobSetService) Submit(info schsdk.JobSetInfo) (schsdk.JobSetID, *schs
schScheme, uploadScheme, err := svc.preScheduler.Schedule(&info) schScheme, uploadScheme, err := svc.preScheduler.Schedule(&info)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("") return "", nil, fmt.Errorf("pre scheduling: %w", err)
} }
resp, err := mgrCli.SubmitJobSet(mgrmq.NewSubmitJobSet(info, *schScheme)) resp, err := mgrCli.SubmitJobSet(mgrmq.NewSubmitJobSet(info, *schScheme))

View File

@ -32,9 +32,9 @@ func main() {
logger.Fatalf("new collector server failed, err: %s", err.Error()) logger.Fatalf("new collector server failed, err: %s", err.Error())
} }
mqSvr.OnError = func(err error) { mqSvr.OnError(func(err error) {
logger.Warnf("collector server err: %s", err.Error()) logger.Warnf("collector server err: %s", err.Error())
} })
// 启动服务 // 启动服务
go serveColServer(mqSvr) go serveColServer(mqSvr)

View File

@ -0,0 +1,21 @@
{
"logger": {
"output": "file",
"outputFileName": "advisor",
"outputDirectory": "log",
"level": "debug"
},
"rabbitMQ": {
"address": "127.0.0.1:5672",
"account": "cloudream",
"password": "123456",
"vhost": "/"
},
"cloudreamStorage": {
"url": "http://localhost:7890"
},
"pcm": {
"url": "http://localhost:7892"
},
"reportIntervalSec": 10
}

View File

@ -15,6 +15,13 @@
"url": "http://localhost:7890" "url": "http://localhost:7890"
}, },
"unifyOps": { "unifyOps": {
"url": "http://localhost:7890" "url": "http://localhost:7892"
} },
"slwNodes": [
{
"slwNodeID": 1,
"stgNodeID": 1,
"storageID": 1
}
]
} }

View File

@ -14,5 +14,8 @@
"cloudreamStorage": { "cloudreamStorage": {
"url": "http://localhost:7890" "url": "http://localhost:7890"
}, },
"pcm": {
"url": "http://localhost:7892"
},
"reportIntervalSec": 10 "reportIntervalSec": 10
} }

View File

@ -1,6 +1,8 @@
{ {
"logger": { "logger": {
"output": "stdout", "output": "file",
"outputFileName": "manager",
"outputDirectory": "log",
"level": "debug" "level": "debug"
}, },
"rabbitMQ": { "rabbitMQ": {

View File

@ -2,10 +2,10 @@ package jobmod
import ( import (
"github.com/samber/lo" "github.com/samber/lo"
"gitlink.org.cn/cloudream/common/pkgs/mq"
"gitlink.org.cn/cloudream/common/pkgs/types" "gitlink.org.cn/cloudream/common/pkgs/types"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler" schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops" uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops"
"gitlink.org.cn/cloudream/common/utils/reflect"
) )
type FileScheduleAction string type FileScheduleAction string
@ -73,9 +73,12 @@ type Job interface {
} }
var JobTypeUnion = types.NewTypeUnion[Job]( var JobTypeUnion = types.NewTypeUnion[Job](
reflect.TypeOf[NormalJob](), (*NormalJob)(nil),
reflect.TypeOf[ResourceJob](), (*ResourceJob)(nil),
) )
var _ = mq.RegisterUnionType(JobTypeUnion)
// TODO var _ = serder.RegisterNewTaggedTypeUnion(JobTypeUnion, "Type", "type")
type JobBase struct { type JobBase struct {
JobSetID schsdk.JobSetID `json:"jobSetID"` // 任务集ID JobSetID schsdk.JobSetID `json:"jobSetID"` // 任务集ID

View File

@ -1,8 +1,8 @@
package jobmod package jobmod
import ( import (
"gitlink.org.cn/cloudream/common/pkgs/mq"
"gitlink.org.cn/cloudream/common/pkgs/types" "gitlink.org.cn/cloudream/common/pkgs/types"
myreflect "gitlink.org.cn/cloudream/common/utils/reflect"
) )
type JobState interface { type JobState interface {
@ -11,15 +11,18 @@ type JobState interface {
type JobStateBase struct{} type JobStateBase struct{}
var JobStateTypeUnion = types.NewTypeUnion[JobState]( var JobStateTypeUnion = types.NewTypeUnion[JobState](
myreflect.TypeOf[StatePreScheduling](), (*StatePreScheduling)(nil),
myreflect.TypeOf[StateReadyToAdjust](), (*StateReadyToAdjust)(nil),
myreflect.TypeOf[StateMakingAdjustScheme](), (*StateMakingAdjustScheme)(nil),
myreflect.TypeOf[StateAdjusting](), (*StateAdjusting)(nil),
myreflect.TypeOf[StateReadyToExecute](), (*StateReadyToExecute)(nil),
myreflect.TypeOf[StateExecuting](), (*StateExecuting)(nil),
myreflect.TypeOf[StateFailed](), (*StateFailed)(nil),
myreflect.TypeOf[StateSuccess](), (*StateSuccess)(nil),
) )
var _ = mq.RegisterUnionType(JobStateTypeUnion)
// TODO var _ = serder.RegisterNewTaggedTypeUnion(JobStateTypeUnion, "Type", "type")
type FileSchedulingStep string type FileSchedulingStep string

View File

@ -16,8 +16,6 @@ type Service interface {
type Server struct { type Server struct {
service Service service Service
rabbitSvr mq.RabbitMQServer rabbitSvr mq.RabbitMQServer
OnError func(err error)
} }
func NewServer(svc Service, cfg *mymq.Config) (*Server, error) { func NewServer(svc Service, cfg *mymq.Config) (*Server, error) {
@ -48,6 +46,10 @@ func (s *Server) Serve() error {
return s.rabbitSvr.Serve() return s.rabbitSvr.Serve()
} }
func (s *Server) OnError(callback func(error)) {
s.rabbitSvr.OnError = callback
}
var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher() var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher()
// Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型 // Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型

View File

@ -15,7 +15,7 @@ var _ = Register(Service.StartTask)
type StartTask struct { type StartTask struct {
mq.MessageBodyBase mq.MessageBodyBase
Info advtsk.TaskInfo `json:"info"` Info advtsk.AdvTaskInfo `json:"info"`
} }
type StartTaskResp struct { type StartTaskResp struct {
mq.MessageBodyBase mq.MessageBodyBase
@ -23,7 +23,7 @@ type StartTaskResp struct {
TaskID string `json:"taskID"` TaskID string `json:"taskID"`
} }
func NewStartTask(info advtsk.TaskInfo) *StartTask { func NewStartTask(info advtsk.AdvTaskInfo) *StartTask {
return &StartTask{ return &StartTask{
Info: info, Info: info,
} }
@ -37,7 +37,3 @@ func NewStartTaskResp(advID schmod.AdvisorID, taskID string) *StartTaskResp {
func (c *Client) StartTask(msg *StartTask, opts ...mq.RequestOption) (*StartTaskResp, error) { func (c *Client) StartTask(msg *StartTask, opts ...mq.RequestOption) (*StartTaskResp, error) {
return mq.Request(Service.StartTask, c.rabbitCli, msg, opts...) return mq.Request(Service.StartTask, c.rabbitCli, msg, opts...)
} }
func init() {
mq.RegisterUnionType(advtsk.TaskInfoTypeUnion)
}

View File

@ -4,6 +4,8 @@ import (
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job" jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
) )
var _ = Register[*MakeAdjustScheme, *MakeAdjustSchemeStatus]()
type MakeAdjustScheme struct { type MakeAdjustScheme struct {
TaskInfoBase TaskInfoBase
Job jobmod.NormalJob `json:"job"` Job jobmod.NormalJob `json:"job"`
@ -27,7 +29,3 @@ func NewMakeAdjustSchemeStatus(err string, scheme jobmod.JobScheduleScheme) *Mak
Scheme: scheme, Scheme: scheme,
} }
} }
func init() {
Register[MakeAdjustScheme, MakeAdjustSchemeStatus]()
}

View File

@ -1,36 +1,48 @@
package task package task
import ( import (
"gitlink.org.cn/cloudream/common/pkgs/mq"
"gitlink.org.cn/cloudream/common/pkgs/types" "gitlink.org.cn/cloudream/common/pkgs/types"
myreflect "gitlink.org.cn/cloudream/common/utils/reflect" myreflect "gitlink.org.cn/cloudream/common/utils/reflect"
) )
// 任务 // 任务。
type TaskInfo interface { // 由于json-iter库的缺陷这个类型名必须加一点前缀否则会和executor中的重名导致代码异常
type AdvTaskInfo interface {
Noop() Noop()
} }
// 增加了新类型后需要在这里也同步添加 // 增加了新类型后需要在这里也同步添加
var TaskInfoTypeUnion = types.NewTypeUnion[TaskInfo]() var TaskInfoTypeUnion = types.NewTypeUnion[AdvTaskInfo]()
type TaskInfoBase struct{} type TaskInfoBase struct{}
func (s *TaskInfoBase) Noop() {} func (s *TaskInfoBase) Noop() {}
// 任务上报的状态 // 任务上报的状态
type TaskStatus interface { // 由于json-iter库的缺陷这个类型名必须加一点前缀否则会和executor中的重名导致代码异常
type AdvTaskStatus interface {
Noop() Noop()
} }
// 增加了新类型后需要在这里也同步添加 // 增加了新类型后需要在这里也同步添加
var TaskStatusTypeUnion = types.NewTypeUnion[TaskStatus]() var TaskStatusTypeUnion = types.NewTypeUnion[AdvTaskStatus]()
type TaskStatusBase struct{} type TaskStatusBase struct{}
func (s *TaskStatusBase) Noop() {} func (s *TaskStatusBase) Noop() {}
func Register[TTaskInfo any, TTaskStatus any]() { // 注此函数必须以var _ = Register[xxx, xxx]()的形式被调用这样才能保证init中RegisterUnionType时
// TypeUnion不是空的。因为包级变量初始化比init函数调用先进行
func Register[TTaskInfo AdvTaskInfo, TTaskStatus AdvTaskStatus]() any {
TaskInfoTypeUnion.Add(myreflect.TypeOf[TTaskInfo]()) TaskInfoTypeUnion.Add(myreflect.TypeOf[TTaskInfo]())
TaskStatusTypeUnion.Add(myreflect.TypeOf[TTaskStatus]()) TaskStatusTypeUnion.Add(myreflect.TypeOf[TTaskStatus]())
return nil
}
func init() {
mq.RegisterUnionType(TaskInfoTypeUnion)
mq.RegisterUnionType(TaskStatusTypeUnion)
} }

View File

@ -64,7 +64,3 @@ func NewGetAllResourceDataResp(datas []uopsdk.ResourceData) *GetAllResourceDataR
func (c *Client) GetAllResourceData(msg *GetAllResourceData, opts ...mq.RequestOption) (*GetAllResourceDataResp, error) { func (c *Client) GetAllResourceData(msg *GetAllResourceData, opts ...mq.RequestOption) (*GetAllResourceDataResp, error) {
return mq.Request(Service.GetAllResourceData, c.rabbitCli, msg, opts...) return mq.Request(Service.GetAllResourceData, c.rabbitCli, msg, opts...)
} }
func init() {
mq.RegisterUnionType(uopsdk.ResourceDataTypeUnion)
}

View File

@ -22,8 +22,6 @@ type Service interface {
type Server struct { type Server struct {
service Service service Service
rabbitSvr mq.RabbitMQServer rabbitSvr mq.RabbitMQServer
OnError func(err error)
} }
func NewServer(svc Service, cfg *mymq.Config) (*Server, error) { func NewServer(svc Service, cfg *mymq.Config) (*Server, error) {
@ -54,6 +52,10 @@ func (s *Server) Serve() error {
return s.rabbitSvr.Serve() return s.rabbitSvr.Serve()
} }
func (s *Server) OnError(callback func(error)) {
s.rabbitSvr.OnError = callback
}
var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher() var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher()
// Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型 // Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型

View File

@ -19,8 +19,6 @@ const (
type Server struct { type Server struct {
service Service service Service
rabbitSvr mq.RabbitMQServer rabbitSvr mq.RabbitMQServer
OnError func(err error)
} }
func NewServer(svc Service, cfg *mymq.Config) (*Server, error) { func NewServer(svc Service, cfg *mymq.Config) (*Server, error) {
@ -51,6 +49,10 @@ func (s *Server) Serve() error {
return s.rabbitSvr.Serve() return s.rabbitSvr.Serve()
} }
func (s *Server) OnError(callback func(error)) {
s.rabbitSvr.OnError = callback
}
var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher() var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher()
// Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型 // Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型

View File

@ -16,7 +16,7 @@ var _ = Register(Service.StartTask)
type StartTask struct { type StartTask struct {
mq.MessageBodyBase mq.MessageBodyBase
Info exectsk.TaskInfo `json:"info"` Info exectsk.ExeTaskInfo `json:"info"`
} }
type StartTaskResp struct { type StartTaskResp struct {
mq.MessageBodyBase mq.MessageBodyBase
@ -24,7 +24,7 @@ type StartTaskResp struct {
TaskID string `json:"taskID"` TaskID string `json:"taskID"`
} }
func NewStartTask(info exectsk.TaskInfo) *StartTask { func NewStartTask(info exectsk.ExeTaskInfo) *StartTask {
return &StartTask{ return &StartTask{
Info: info, Info: info,
} }
@ -38,7 +38,3 @@ func NewStartTaskResp(execID schmod.ExecutorID, taskID string) *StartTaskResp {
func (c *Client) StartTask(msg *StartTask, opts ...mq.RequestOption) (*StartTaskResp, error) { func (c *Client) StartTask(msg *StartTask, opts ...mq.RequestOption) (*StartTaskResp, error) {
return mq.Request(Service.StartTask, c.rabbitCli, msg, opts...) return mq.Request(Service.StartTask, c.rabbitCli, msg, opts...)
} }
func init() {
mq.RegisterUnionType(exectsk.TaskInfoTypeUnion)
}

View File

@ -2,6 +2,8 @@ package task
import stgsdk "gitlink.org.cn/cloudream/common/sdks/storage" import stgsdk "gitlink.org.cn/cloudream/common/sdks/storage"
var _ = Register[*CacheMovePackage, *CacheMovePackageStatus]()
type CacheMovePackage struct { type CacheMovePackage struct {
TaskInfoBase TaskInfoBase
UserID int64 `json:"userID"` UserID int64 `json:"userID"`
@ -27,7 +29,3 @@ func NewCacheMovePackageStatus(err string, cacheInfos []stgsdk.ObjectCacheInfo)
CacheInfos: cacheInfos, CacheInfos: cacheInfos,
} }
} }
func init() {
Register[CacheMovePackage, CacheMovePackageStatus]()
}

View File

@ -5,6 +5,8 @@ import (
uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops" uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops"
) )
var _ = Register[*ScheduleTask, *ScheduleTaskStatus]()
type ScheduleTask struct { type ScheduleTask struct {
TaskInfoBase TaskInfoBase
SlwNodeID uopsdk.SlwNodeID `json:"slwNodeID"` SlwNodeID uopsdk.SlwNodeID `json:"slwNodeID"`
@ -35,7 +37,3 @@ func NewScheduleTaskStatus(status string, err string, pcmJobID int64) *ScheduleT
PCMJobID: pcmJobID, PCMJobID: pcmJobID,
} }
} }
func init() {
Register[ScheduleTask, ScheduleTaskStatus]()
}

View File

@ -2,6 +2,8 @@ package task
import stgsdk "gitlink.org.cn/cloudream/common/sdks/storage" import stgsdk "gitlink.org.cn/cloudream/common/sdks/storage"
var _ = Register[*StorageCreatePackage, *StorageCreatePackageStatus]()
type StorageCreatePackage struct { type StorageCreatePackage struct {
TaskInfoBase TaskInfoBase
UserID int64 `json:"userID"` UserID int64 `json:"userID"`
@ -35,7 +37,3 @@ func NewStorageCreatePackageStatus(status string, err string, packageID int64) *
PackageID: packageID, PackageID: packageID,
} }
} }
func init() {
Register[StorageCreatePackage, StorageCreatePackageStatus]()
}

View File

@ -1,5 +1,7 @@
package task package task
var _ = Register[*StorageLoadPackage, *StorageLoadPackageStatus]()
type StorageLoadPackage struct { type StorageLoadPackage struct {
TaskInfoBase TaskInfoBase
UserID int64 `json:"userID"` UserID int64 `json:"userID"`
@ -25,7 +27,3 @@ func NewStorageLoadPackageStatus(status string, err string) *StorageLoadPackageS
Error: err, Error: err,
} }
} }
func init() {
Register[StorageLoadPackage, StorageCreatePackageStatus]()
}

View File

@ -1,36 +1,48 @@
package task package task
import ( import (
"gitlink.org.cn/cloudream/common/pkgs/mq"
"gitlink.org.cn/cloudream/common/pkgs/types" "gitlink.org.cn/cloudream/common/pkgs/types"
myreflect "gitlink.org.cn/cloudream/common/utils/reflect" myreflect "gitlink.org.cn/cloudream/common/utils/reflect"
) )
// 任务 // 任务
type TaskInfo interface { // 由于json-iter库的缺陷这个类型名必须加一点前缀否则会和advisor中的重名导致代码异常
type ExeTaskInfo interface {
Noop() Noop()
} }
// 增加了新类型后需要在这里也同步添加 // 增加了新类型后需要在这里也同步添加
var TaskInfoTypeUnion = types.NewTypeUnion[TaskInfo]() var TaskInfoTypeUnion = types.NewTypeUnion[ExeTaskInfo]()
type TaskInfoBase struct{} type TaskInfoBase struct{}
func (s *TaskInfoBase) Noop() {} func (s *TaskInfoBase) Noop() {}
// 任务上报的状态 // 任务上报的状态
type TaskStatus interface { // 由于json-iter库的缺陷这个类型名必须加一点前缀否则会和advisor中的重名导致代码异常
type ExeTaskStatus interface {
Noop() Noop()
} }
// 增加了新类型后需要在这里也同步添加 // 增加了新类型后需要在这里也同步添加
var TaskStatusTypeUnion = types.NewTypeUnion[TaskStatus]() var TaskStatusTypeUnion = types.NewTypeUnion[ExeTaskStatus]()
type TaskStatusBase struct{} type TaskStatusBase struct{}
func (s *TaskStatusBase) Noop() {} func (s *TaskStatusBase) Noop() {}
func Register[TTaskInfo any, TTaskStatus any]() { // 注此函数必须以var _ = Register[xxx, xxx]()的形式被调用这样才能保证init中RegisterUnionType时
// TypeUnion不是空的。因为包级变量初始化比init函数调用先进行
func Register[TTaskInfo ExeTaskInfo, TTaskStatus ExeTaskStatus]() any {
TaskInfoTypeUnion.Add(myreflect.TypeOf[TTaskInfo]()) TaskInfoTypeUnion.Add(myreflect.TypeOf[TTaskInfo]())
TaskStatusTypeUnion.Add(myreflect.TypeOf[TTaskStatus]()) TaskStatusTypeUnion.Add(myreflect.TypeOf[TTaskStatus]())
return nil
}
func init() {
mq.RegisterUnionType(TaskInfoTypeUnion)
mq.RegisterUnionType(TaskStatusTypeUnion)
} }

View File

@ -2,6 +2,8 @@ package task
import uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops" import uopsdk "gitlink.org.cn/cloudream/common/sdks/unifyops"
var _ = Register[*UploadImage, *UploadImageStatus]()
type UploadImage struct { type UploadImage struct {
TaskInfoBase TaskInfoBase
SlwNodeID uopsdk.SlwNodeID `json:"slwNodeID"` SlwNodeID uopsdk.SlwNodeID `json:"slwNodeID"`
@ -27,7 +29,3 @@ func NewUploadImageStatus(status string, err string, imageID uopsdk.SlwNodeImage
ImageID: imageID, ImageID: imageID,
} }
} }
func init() {
Register[UploadImage, UploadImageStatus]()
}

View File

@ -25,7 +25,7 @@ type ReportAdvisorTaskStatusResp struct {
} }
type AdvisorTaskStatus struct { type AdvisorTaskStatus struct {
TaskID string TaskID string
Status advtsk.TaskStatus Status advtsk.AdvTaskStatus
} }
func NewReportAdvisorTaskStatus(advisorID schmod.AdvisorID, taskStatus []AdvisorTaskStatus) *ReportAdvisorTaskStatus { func NewReportAdvisorTaskStatus(advisorID schmod.AdvisorID, taskStatus []AdvisorTaskStatus) *ReportAdvisorTaskStatus {
@ -37,7 +37,7 @@ func NewReportAdvisorTaskStatus(advisorID schmod.AdvisorID, taskStatus []Advisor
func NewReportAdvisorTaskStatusResp() *ReportAdvisorTaskStatusResp { func NewReportAdvisorTaskStatusResp() *ReportAdvisorTaskStatusResp {
return &ReportAdvisorTaskStatusResp{} return &ReportAdvisorTaskStatusResp{}
} }
func NewAdvisorTaskStatus(taskID string, status exectsk.TaskStatus) AdvisorTaskStatus { func NewAdvisorTaskStatus(taskID string, status exectsk.ExeTaskStatus) AdvisorTaskStatus {
return AdvisorTaskStatus{ return AdvisorTaskStatus{
TaskID: taskID, TaskID: taskID,
Status: status, Status: status,

View File

@ -24,7 +24,7 @@ type ReportExecutorTaskStatusResp struct {
} }
type ExecutorTaskStatus struct { type ExecutorTaskStatus struct {
TaskID string TaskID string
Status exectsk.TaskStatus Status exectsk.ExeTaskStatus
} }
func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus { func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus {
@ -36,7 +36,7 @@ func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []Exec
func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp { func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp {
return &ReportExecutorTaskStatusResp{} return &ReportExecutorTaskStatusResp{}
} }
func NewExecutorTaskStatus(taskID string, status exectsk.TaskStatus) ExecutorTaskStatus { func NewExecutorTaskStatus(taskID string, status exectsk.ExeTaskStatus) ExecutorTaskStatus {
return ExecutorTaskStatus{ return ExecutorTaskStatus{
TaskID: taskID, TaskID: taskID,
Status: status, Status: status,
@ -45,7 +45,3 @@ func NewExecutorTaskStatus(taskID string, status exectsk.TaskStatus) ExecutorTas
func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) { func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) {
return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...) return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...)
} }
func init() {
mq.RegisterUnionType(exectsk.TaskStatusTypeUnion)
}

View File

@ -122,7 +122,3 @@ func (c *Client) GetJobSetJobs(msg *GetJobSetJobs, opts ...mq.RequestOption) (*G
return mq.Request(Service.GetJobSetJobs, c.rabbitCli, msg, opts...) return mq.Request(Service.GetJobSetJobs, c.rabbitCli, msg, opts...)
} }
*/ */
func init() {
mq.RegisterUnionType(jobmod.JobTypeUnion)
}

View File

@ -22,8 +22,6 @@ type Service interface {
type Server struct { type Server struct {
service Service service Service
rabbitSvr mq.RabbitMQServer rabbitSvr mq.RabbitMQServer
OnError func(err error)
} }
func NewServer(svc Service, cfg *mymq.Config) (*Server, error) { func NewServer(svc Service, cfg *mymq.Config) (*Server, error) {
@ -54,6 +52,10 @@ func (s *Server) Serve() error {
return s.rabbitSvr.Serve() return s.rabbitSvr.Serve()
} }
func (s *Server) OnError(callback func(error)) {
s.rabbitSvr.OnError = callback
}
var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher() var msgDispatcher mq.MessageDispatcher = mq.NewMessageDispatcher()
// Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型 // Register 将Service中的一个接口函数作为指定类型消息的处理函数同时会注册请求和响应的消息类型

View File

@ -15,7 +15,7 @@ import (
type Reporter struct { type Reporter struct {
executorID schmod.ExecutorID executorID schmod.ExecutorID
reportInterval time.Duration reportInterval time.Duration
taskStatus map[string]exectsk.TaskStatus taskStatus map[string]exectsk.ExeTaskStatus
taskStatusLock sync.Mutex taskStatusLock sync.Mutex
reportNow chan bool reportNow chan bool
} }
@ -24,12 +24,12 @@ func NewReporter(executorID schmod.ExecutorID, reportInterval time.Duration) Rep
return Reporter{ return Reporter{
executorID: executorID, executorID: executorID,
reportInterval: reportInterval, reportInterval: reportInterval,
taskStatus: make(map[string]exectsk.TaskStatus), taskStatus: make(map[string]exectsk.ExeTaskStatus),
reportNow: make(chan bool), reportNow: make(chan bool),
} }
} }
func (r *Reporter) Report(taskID string, taskStatus exectsk.TaskStatus) { func (r *Reporter) Report(taskID string, taskStatus exectsk.ExeTaskStatus) {
r.taskStatusLock.Lock() r.taskStatusLock.Lock()
defer r.taskStatusLock.Unlock() defer r.taskStatusLock.Unlock()
@ -65,7 +65,7 @@ func (r *Reporter) Serve() error {
for taskID, status := range r.taskStatus { for taskID, status := range r.taskStatus {
taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status)) taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status))
} }
r.taskStatus = make(map[string]exectsk.TaskStatus) r.taskStatus = make(map[string]exectsk.ExeTaskStatus)
r.taskStatusLock.Unlock() r.taskStatusLock.Unlock()
_, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus)) _, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus))

View File

@ -36,7 +36,7 @@ func NewManager(reporter *reporter.Reporter) Manager {
} }
} }
func (m *Manager) StartByInfo(info exectsk.TaskInfo) (*Task, error) { func (m *Manager) StartByInfo(info exectsk.ExeTaskInfo) (*Task, error) {
infoType := myreflect.TypeOfValue(info) infoType := myreflect.TypeOfValue(info)
ctor, ok := taskFromInfoCtors[infoType] ctor, ok := taskFromInfoCtors[infoType]
@ -47,10 +47,10 @@ func (m *Manager) StartByInfo(info exectsk.TaskInfo) (*Task, error) {
return m.StartNew(ctor(info)), nil return m.StartNew(ctor(info)), nil
} }
var taskFromInfoCtors map[reflect.Type]func(exectsk.TaskInfo) TaskBody var taskFromInfoCtors map[reflect.Type]func(exectsk.ExeTaskInfo) TaskBody = make(map[reflect.Type]func(exectsk.ExeTaskInfo) task.TaskBody[TaskContext])
func Register[TInfo exectsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) { func Register[TInfo exectsk.ExeTaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) {
taskFromInfoCtors[myreflect.TypeOf[TInfo]()] = func(info exectsk.TaskInfo) TaskBody { taskFromInfoCtors[myreflect.TypeOf[TInfo]()] = func(info exectsk.ExeTaskInfo) TaskBody {
return ctor(info.(TInfo)) return ctor(info.(TInfo))
} }
} }

View File

@ -28,6 +28,7 @@ func main() {
os.Exit(1) os.Exit(1)
} }
schglb.InitMQPool(&config.Cfg().RabbitMQ)
schglb.InitCloudreamStoragePool(&config.Cfg().CloudreamStorage) schglb.InitCloudreamStoragePool(&config.Cfg().CloudreamStorage)
schglb.InitPCMPool(&config.Cfg().PCM) schglb.InitPCMPool(&config.Cfg().PCM)
@ -42,9 +43,9 @@ func main() {
logger.Fatalf("new executor server failed, err: %s", err.Error()) logger.Fatalf("new executor server failed, err: %s", err.Error())
} }
mqSvr.OnError = func(err error) { mqSvr.OnError(func(err error) {
logger.Warnf("executor server err: %s", err.Error()) logger.Warnf("executor server err: %s", err.Error())
} })
// 启动服务 // 启动服务
go serveMQServer(mqSvr) go serveMQServer(mqSvr)

7
go.mod
View File

@ -8,10 +8,17 @@ require (
github.com/gin-gonic/gin v1.9.1 github.com/gin-gonic/gin v1.9.1
github.com/google/uuid v1.3.0 github.com/google/uuid v1.3.0
github.com/samber/lo v1.38.1 github.com/samber/lo v1.38.1
github.com/smartystreets/goconvey v1.8.0
gitlink.org.cn/cloudream/common v0.0.0 gitlink.org.cn/cloudream/common v0.0.0
google.golang.org/grpc v1.54.0 google.golang.org/grpc v1.54.0
) )
require (
github.com/gopherjs/gopherjs v1.17.2 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/smartystreets/assertions v1.13.1 // indirect
)
require ( require (
github.com/antonfisher/nested-logrus-formatter v1.3.1 // indirect github.com/antonfisher/nested-logrus-formatter v1.3.1 // indirect
github.com/bytedance/sonic v1.9.1 // indirect github.com/bytedance/sonic v1.9.1 // indirect

4
go.sum
View File

@ -34,6 +34,7 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@ -46,6 +47,7 @@ github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf/go.mod h1:yrqSX
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
@ -69,7 +71,9 @@ github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXn
github.com/sirupsen/logrus v1.9.2 h1:oxx1eChJGI6Uks2ZC4W1zpLlVgqB8ner4EuQwV4Ik1Y= github.com/sirupsen/logrus v1.9.2 h1:oxx1eChJGI6Uks2ZC4W1zpLlVgqB8ner4EuQwV4Ik1Y=
github.com/sirupsen/logrus v1.9.2/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.2/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smartystreets/assertions v1.13.1 h1:Ef7KhSmjZcK6AVf9YbJdvPYG9avaF0ZxudX+ThRdWfU= github.com/smartystreets/assertions v1.13.1 h1:Ef7KhSmjZcK6AVf9YbJdvPYG9avaF0ZxudX+ThRdWfU=
github.com/smartystreets/assertions v1.13.1/go.mod h1:cXr/IwVfSo/RbCSPhoAPv73p3hlSdrBH/b3SdnW/LMY=
github.com/smartystreets/goconvey v1.8.0 h1:Oi49ha/2MURE0WexF052Z0m+BNSGirfjg5RL+JXWq3w= github.com/smartystreets/goconvey v1.8.0 h1:Oi49ha/2MURE0WexF052Z0m+BNSGirfjg5RL+JXWq3w=
github.com/smartystreets/goconvey v1.8.0/go.mod h1:EdX8jtrTIj26jmjCOVNMVSIYAtgexqXKHOXW2Dx9JLg=
github.com/streadway/amqp v1.1.0 h1:py12iX8XSyI7aN/3dUT8DFIDJazNJsVJdxNVEpnQTZM= github.com/streadway/amqp v1.1.0 h1:py12iX8XSyI7aN/3dUT8DFIDJazNJsVJdxNVEpnQTZM=
github.com/streadway/amqp v1.1.0/go.mod h1:WYSrTEYHOXHd0nwFeUXAe2G2hRnQT+deZJJf88uS9Bg= github.com/streadway/amqp v1.1.0/go.mod h1:WYSrTEYHOXHd0nwFeUXAe2G2hRnQT+deZJJf88uS9Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

View File

@ -55,9 +55,15 @@ func Bin() error {
if err := Collector(); err != nil { if err := Collector(); err != nil {
return err return err
} }
if err := Advisor(); err != nil {
return err
}
if err := Executor(); err != nil { if err := Executor(); err != nil {
return err return err
} }
if err := Manager(); err != nil {
return err
}
return nil return nil
} }
@ -66,8 +72,13 @@ func Scripts() error {
scriptsDir := "./common/assets/scripts" scriptsDir := "./common/assets/scripts"
info, err := os.Stat(scriptsDir) info, err := os.Stat(scriptsDir)
if errors.Is(err, os.ErrNotExist) || !info.IsDir() { if errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("script directory not exists or is not a directory") fmt.Printf("no scripts.\n")
return nil
}
if !info.IsDir() {
return fmt.Errorf("scripts is not a directory")
} }
fullDirPath, err := filepath.Abs(filepath.Join(BuildDir, "scripts")) fullDirPath, err := filepath.Abs(filepath.Join(BuildDir, "scripts"))
@ -84,8 +95,13 @@ func Confs() error {
confDir := "./common/assets/confs" confDir := "./common/assets/confs"
info, err := os.Stat(confDir) info, err := os.Stat(confDir)
if errors.Is(err, os.ErrNotExist) || !info.IsDir() { if errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("conf directory not exists or is not a directory") fmt.Printf("no confs.\n")
return nil
}
if !info.IsDir() {
return fmt.Errorf("confs is not a directory")
} }
fullDirPath, err := filepath.Abs(filepath.Join(BuildDir, "confs")) fullDirPath, err := filepath.Abs(filepath.Join(BuildDir, "confs"))
@ -116,6 +132,15 @@ func Collector() error {
}) })
} }
func Advisor() error {
return magefiles.Build(magefiles.BuildArgs{
OutputName: "advisor",
OutputDir: "advisor",
AssetsDir: "assets",
EntryFile: "advisor/main.go",
})
}
func Executor() error { func Executor() error {
return magefiles.Build(magefiles.BuildArgs{ return magefiles.Build(magefiles.BuildArgs{
OutputName: "executor", OutputName: "executor",
@ -124,3 +149,12 @@ func Executor() error {
EntryFile: "executor/main.go", EntryFile: "executor/main.go",
}) })
} }
func Manager() error {
return magefiles.Build(magefiles.BuildArgs{
OutputName: "manager",
OutputDir: "manager",
AssetsDir: "assets",
EntryFile: "manager/main.go",
})
}

View File

@ -25,7 +25,7 @@ type AdvisorInfo struct {
lastReportTime time.Time lastReportTime time.Time
} }
type OnTaskUpdatedCallbackFn func(jobID schsdk.JobID, fullTaskID string, taskStatus advtsk.TaskStatus) type OnTaskUpdatedCallbackFn func(jobID schsdk.JobID, fullTaskID string, taskStatus advtsk.AdvTaskStatus)
type OnTimeoutCallbackFn func(jobID schsdk.JobID, fullTaskID string) type OnTimeoutCallbackFn func(jobID schsdk.JobID, fullTaskID string)
type Manager struct { type Manager struct {
@ -66,7 +66,7 @@ func (m *Manager) Report(advID schmod.AdvisorID, taskStatus []mgrmq.AdvisorTaskS
info, ok := m.advisors[advID] info, ok := m.advisors[advID]
if !ok { if !ok {
info := &AdvisorInfo{ info = &AdvisorInfo{
advisorID: advID, advisorID: advID,
jobTasks: make(map[string]jobTask), jobTasks: make(map[string]jobTask),
} }
@ -86,7 +86,7 @@ func (m *Manager) Report(advID schmod.AdvisorID, taskStatus []mgrmq.AdvisorTaskS
} }
// 启动一个Task并将其关联到指定的Job。返回一个在各Executor之间唯一的TaskID // 启动一个Task并将其关联到指定的Job。返回一个在各Executor之间唯一的TaskID
func (m *Manager) StartTask(jobID schsdk.JobID, info advtsk.TaskInfo) (string, error) { func (m *Manager) StartTask(jobID schsdk.JobID, info advtsk.AdvTaskInfo) (string, error) {
m.lock.Lock() m.lock.Lock()
defer m.lock.Unlock() defer m.lock.Unlock()

View File

@ -26,7 +26,7 @@ type ExecutorInfo struct {
lastReportTime time.Time lastReportTime time.Time
} }
type OnTaskUpdatedCallbackFn func(jobID schsdk.JobID, fullTaskID string, taskStatus exetsk.TaskStatus) type OnTaskUpdatedCallbackFn func(jobID schsdk.JobID, fullTaskID string, taskStatus exetsk.ExeTaskStatus)
type OnTimeoutCallbackFn func(jobID schsdk.JobID, fullTaskID string) type OnTimeoutCallbackFn func(jobID schsdk.JobID, fullTaskID string)
type Manager struct { type Manager struct {
@ -67,7 +67,7 @@ func (m *Manager) Report(execID schmod.ExecutorID, taskStatus []mgrmq.ExecutorTa
info, ok := m.executors[execID] info, ok := m.executors[execID]
if !ok { if !ok {
info := &ExecutorInfo{ info = &ExecutorInfo{
executorID: execID, executorID: execID,
jobTasks: make(map[string]jobTask), jobTasks: make(map[string]jobTask),
} }
@ -87,7 +87,7 @@ func (m *Manager) Report(execID schmod.ExecutorID, taskStatus []mgrmq.ExecutorTa
} }
// 启动一个Task并将其关联到指定的Job。返回一个在各Executor之间唯一的TaskID // 启动一个Task并将其关联到指定的Job。返回一个在各Executor之间唯一的TaskID
func (m *Manager) StartTask(jobID schsdk.JobID, info exetsk.TaskInfo) (string, error) { func (m *Manager) StartTask(jobID schsdk.JobID, info exetsk.ExeTaskInfo) (string, error) {
m.lock.Lock() m.lock.Lock()
defer m.lock.Unlock() defer m.lock.Unlock()

View File

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"reflect" "reflect"
"gitlink.org.cn/cloudream/common/pkgs/logger"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job" jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event" "gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
) )
@ -32,10 +33,17 @@ func (h *CompleteHandler) Handle(job jobmod.Job) {
} }
func (h *CompleteHandler) handleSuccess(job jobmod.Job, state *jobmod.StateSuccess) { func (h *CompleteHandler) handleSuccess(job jobmod.Job, state *jobmod.StateSuccess) {
logger.WithField("JobID", job.GetJobID()).Infof("job completed successfuly")
h.mgr.onEvent(event.ToJobSet(job.GetJobSetID()), event.NewJobCompleted(job)) h.mgr.onEvent(event.ToJobSet(job.GetJobSetID()), event.NewJobCompleted(job))
} }
func (h *CompleteHandler) handleFailed(job jobmod.Job, state *jobmod.StateFailed) { func (h *CompleteHandler) handleFailed(job jobmod.Job, state *jobmod.StateFailed) {
logger.
WithField("JobID", job.GetJobID()).
WithField("LastState", reflect.TypeOf(state.LastState).String()).
Infof("job failed with: %v", state.Error)
h.mgr.onEvent(event.ToJobSet(job.GetJobSetID()), event.NewJobCompleted(job)) h.mgr.onEvent(event.ToJobSet(job.GetJobSetID()), event.NewJobCompleted(job))
} }

View File

@ -0,0 +1,50 @@
package jobmgr
import (
"gitlink.org.cn/cloudream/common/pkgs/logger"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
)
type DefaultHandler struct {
mgr *Manager
}
func NewDefaultHandler(mgr *Manager) *DefaultHandler {
return &DefaultHandler{
mgr: mgr,
}
}
// 处理Job。在此期间全局锁已锁定
func (h *DefaultHandler) Handle(job jobmod.Job) {
state := job.GetState()
if state == nil {
job.SetState(jobmod.NewStateFailed("unexpected nil state", nil))
h.mgr.handleState(job)
return
}
if _, ok := state.(*jobmod.StateFailed); ok {
logger.Warnf("state failed should not be handled by default handler")
return
}
job.SetState(jobmod.NewStateFailed("no handler for this state", state))
h.mgr.handleState(job)
}
// 外部发生了一个事件
func (h *DefaultHandler) OnEvent(broadcast event.Broadcast, evt event.Event) {
}
// 运行Handler
func (h *DefaultHandler) Serve() {
}
// 停止此Handler
func (h *DefaultHandler) Stop() {
}

View File

@ -5,17 +5,17 @@ import advtsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor/task"
// advisor上报任务进度 // advisor上报任务进度
type AdvisorTaskUpdated struct { type AdvisorTaskUpdated struct {
FullTaskID string FullTaskID string
TaskStatus advtsk.TaskStatus TaskStatus advtsk.AdvTaskStatus
} }
func NewAdvisorTaskUpdated(fullTaskID string, taskStatus advtsk.TaskStatus) *AdvisorTaskUpdated { func NewAdvisorTaskUpdated(fullTaskID string, taskStatus advtsk.AdvTaskStatus) *AdvisorTaskUpdated {
return &AdvisorTaskUpdated{ return &AdvisorTaskUpdated{
FullTaskID: fullTaskID, FullTaskID: fullTaskID,
TaskStatus: taskStatus, TaskStatus: taskStatus,
} }
} }
func AssertAdvisorTaskStatus[T advtsk.TaskStatus](evt Event, fullTaskID string) (T, error) { func AssertAdvisorTaskStatus[T advtsk.AdvTaskStatus](evt Event, fullTaskID string) (T, error) {
var ret T var ret T
if evt == nil { if evt == nil {
return ret, ErrUnconcernedTask return ret, ErrUnconcernedTask

View File

@ -7,17 +7,17 @@ import (
// executor上报任务进度 // executor上报任务进度
type ExecutorTaskUpdated struct { type ExecutorTaskUpdated struct {
FullTaskID string FullTaskID string
TaskStatus exectsk.TaskStatus TaskStatus exectsk.ExeTaskStatus
} }
func NewExecutorTaskUpdated(fullTaskID string, taskStatus exectsk.TaskStatus) *ExecutorTaskUpdated { func NewExecutorTaskUpdated(fullTaskID string, taskStatus exectsk.ExeTaskStatus) *ExecutorTaskUpdated {
return &ExecutorTaskUpdated{ return &ExecutorTaskUpdated{
FullTaskID: fullTaskID, FullTaskID: fullTaskID,
TaskStatus: taskStatus, TaskStatus: taskStatus,
} }
} }
func AssertExecutorTaskStatus[T exectsk.TaskStatus](evt Event, fullTaskID string) (T, error) { func AssertExecutorTaskStatus[T exectsk.ExeTaskStatus](evt Event, fullTaskID string) (T, error) {
var ret T var ret T
if evt == nil { if evt == nil {
return ret, ErrUnconcernedTask return ret, ErrUnconcernedTask

View File

@ -74,7 +74,7 @@ func (h *ExecutingHandler) onNormalJobEvent(evt event.Event, job *executingJob,
return return
} }
fullTaskID, err := h.mgr.advMgr.StartTask(job.job.GetJobID(), fullTaskID, err := h.mgr.execMgr.StartTask(job.job.GetJobID(),
exetsk.NewScheduleTask( exetsk.NewScheduleTask(
norJob.TargetSlwNodeID, norJob.TargetSlwNodeID,
norJob.Info.Runtime.Envs, norJob.Info.Runtime.Envs,

View File

@ -5,7 +5,9 @@ import (
"fmt" "fmt"
"reflect" "reflect"
"sync" "sync"
"time"
"gitlink.org.cn/cloudream/common/pkgs/logger"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler" schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
myreflect "gitlink.org.cn/cloudream/common/utils/reflect" myreflect "gitlink.org.cn/cloudream/common/utils/reflect"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job" jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
@ -44,6 +46,10 @@ func NewManager(execMgr *executormgr.Manager, advMgr *advisormgr.Manager, imageM
execMgr: execMgr, execMgr: execMgr,
advMgr: advMgr, advMgr: advMgr,
imageMgr: imageMgr, imageMgr: imageMgr,
handlers: make(map[reflect.Type]StateHandler),
jobSets: make(map[schsdk.JobSetID]*jobmod.JobSet),
jobs: make(map[schsdk.JobID]*mgrJob),
} }
execMgr.OnTaskUpdated(mgr.executorTaskUpdated) execMgr.OnTaskUpdated(mgr.executorTaskUpdated)
@ -52,16 +58,20 @@ func NewManager(execMgr *executormgr.Manager, advMgr *advisormgr.Manager, imageM
advMgr.OnTaskUpdated(mgr.advisorTaskUpdated) advMgr.OnTaskUpdated(mgr.advisorTaskUpdated)
advMgr.OnTaskTimeout(mgr.advisorTaskTimeout) advMgr.OnTaskTimeout(mgr.advisorTaskTimeout)
mgr.handlers[myreflect.TypeOf[jobmod.StatePreScheduling]()] = NewPreSchedulingHandler(mgr) // TODO 考虑优化这部分逻辑
mgr.handlers[myreflect.TypeOf[jobmod.StateReadyToAdjust]()] = NewReadyToAdjustHandler(mgr)
mgr.handlers[myreflect.TypeOf[jobmod.StateMakingAdjustScheme]()] = NewMakingAdjustSchemeHandler(mgr) mgr.handlers[myreflect.TypeOf[*jobmod.StatePreScheduling]()] = NewPreSchedulingHandler(mgr)
mgr.handlers[myreflect.TypeOf[jobmod.StateAdjusting]()] = NewAdjustingHandler(mgr) mgr.handlers[myreflect.TypeOf[*jobmod.StateReadyToAdjust]()] = NewReadyToAdjustHandler(mgr)
mgr.handlers[myreflect.TypeOf[jobmod.StateReadyToExecute]()] = NewReadyToExecuteHandler(mgr) mgr.handlers[myreflect.TypeOf[*jobmod.StateMakingAdjustScheme]()] = NewMakingAdjustSchemeHandler(mgr)
mgr.handlers[myreflect.TypeOf[jobmod.StateExecuting]()] = NewExecutingHandler(mgr) mgr.handlers[myreflect.TypeOf[*jobmod.StateAdjusting]()] = NewAdjustingHandler(mgr)
mgr.handlers[myreflect.TypeOf[*jobmod.StateReadyToExecute]()] = NewReadyToExecuteHandler(mgr)
mgr.handlers[myreflect.TypeOf[*jobmod.StateExecuting]()] = NewExecutingHandler(mgr)
compHder := NewCompleteHandler(mgr) compHder := NewCompleteHandler(mgr)
mgr.handlers[myreflect.TypeOf[jobmod.StateFailed]()] = compHder mgr.handlers[myreflect.TypeOf[*jobmod.StateFailed]()] = compHder
mgr.handlers[myreflect.TypeOf[jobmod.StateSuccess]()] = compHder mgr.handlers[myreflect.TypeOf[*jobmod.StateSuccess]()] = compHder
mgr.defaultHandler = NewDefaultHandler(mgr)
return mgr, nil return mgr, nil
} }
@ -73,6 +83,19 @@ func (m *Manager) Serve() error {
go m.defaultHandler.Serve() go m.defaultHandler.Serve()
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
// 每一分钟产生一个空事件,防止无限等待
m.pubLock.Lock()
m.onEvent(event.ToAll(), nil)
m.pubLock.Unlock()
}
}
return nil return nil
} }
@ -158,7 +181,7 @@ func (m *Manager) LocalFileUploaded(jobSetID schsdk.JobSetID, localPath string,
return nil return nil
} }
func (m *Manager) executorTaskUpdated(jobID schsdk.JobID, fullTaskID string, taskStatus exectsk.TaskStatus) { func (m *Manager) executorTaskUpdated(jobID schsdk.JobID, fullTaskID string, taskStatus exectsk.ExeTaskStatus) {
m.pubLock.Lock() m.pubLock.Lock()
defer m.pubLock.Unlock() defer m.pubLock.Unlock()
@ -182,7 +205,7 @@ func (m *Manager) executorTaskTimeout(jobID schsdk.JobID, fullTaskID string) {
job.Handler.OnEvent(event.ToJob(jobID), event.NewExecutorTaskTimeout(fullTaskID)) job.Handler.OnEvent(event.ToJob(jobID), event.NewExecutorTaskTimeout(fullTaskID))
} }
func (m *Manager) advisorTaskUpdated(jobID schsdk.JobID, fullTaskID string, taskStatus advtsk.TaskStatus) { func (m *Manager) advisorTaskUpdated(jobID schsdk.JobID, fullTaskID string, taskStatus advtsk.AdvTaskStatus) {
m.pubLock.Lock() m.pubLock.Lock()
defer m.pubLock.Unlock() defer m.pubLock.Unlock()
@ -224,6 +247,10 @@ func (m *Manager) CloneJob(jobID schsdk.JobID) (jobmod.Job, error) {
// 根据job状态选择handler进行处理。需要加锁 // 根据job状态选择handler进行处理。需要加锁
func (m *Manager) handleState(job jobmod.Job) { func (m *Manager) handleState(job jobmod.Job) {
logger.WithField("JobID", job.GetJobID()).
WithField("State", reflect.TypeOf(job.GetState()).String()).
Debugf("job state changed")
runtime, ok := m.jobs[job.GetJobID()] runtime, ok := m.jobs[job.GetJobID()]
if !ok { if !ok {
return return

View File

@ -135,6 +135,7 @@ func (h *PreSchedulingHandler) changeJobState(job jobmod.Job, state jobmod.JobSt
} }
func (h *PreSchedulingHandler) doPackageScheduling(evt event.Event, job *preSchedulingJob, fileInfo schsdk.JobFileInfo, file *jobmod.PackageJobFile, scheme *jobmod.FileScheduleScheme, state *jobmod.FileSchedulingState) error { func (h *PreSchedulingHandler) doPackageScheduling(evt event.Event, job *preSchedulingJob, fileInfo schsdk.JobFileInfo, file *jobmod.PackageJobFile, scheme *jobmod.FileScheduleScheme, state *jobmod.FileSchedulingState) error {
// TODO 考虑拆分成多个函数
if state.Step == jobmod.StepBegin { if state.Step == jobmod.StepBegin {
switch info := fileInfo.(type) { switch info := fileInfo.(type) {
case *schsdk.LocalJobFileInfo: case *schsdk.LocalJobFileInfo:
@ -163,6 +164,10 @@ func (h *PreSchedulingHandler) doPackageScheduling(evt event.Event, job *preSche
return nil return nil
} }
if localFileCmd.Error != "" {
return fmt.Errorf("local file uploading: %s", localFileCmd.Error)
}
file.PackageID = localFileCmd.PackageID file.PackageID = localFileCmd.PackageID
state.Step = jobmod.StepUploaded state.Step = jobmod.StepUploaded
} }
@ -243,6 +248,7 @@ func (h *PreSchedulingHandler) doPackageScheduling(evt event.Event, job *preSche
} }
func (h *PreSchedulingHandler) doImageScheduling(evt event.Event, job *preSchedulingJob, fileInfo schsdk.JobFileInfo, file *jobmod.ImageJobFile, scheme *jobmod.FileScheduleScheme, state *jobmod.FileSchedulingState) error { func (h *PreSchedulingHandler) doImageScheduling(evt event.Event, job *preSchedulingJob, fileInfo schsdk.JobFileInfo, file *jobmod.ImageJobFile, scheme *jobmod.FileScheduleScheme, state *jobmod.FileSchedulingState) error {
// TODO 考虑拆分成多个函数
if state.Step == jobmod.StepBegin { if state.Step == jobmod.StepBegin {
switch info := fileInfo.(type) { switch info := fileInfo.(type) {
case *schsdk.LocalJobFileInfo: case *schsdk.LocalJobFileInfo:
@ -277,6 +283,10 @@ func (h *PreSchedulingHandler) doImageScheduling(evt event.Event, job *preSchedu
return nil return nil
} }
if localFileCmd.Error != "" {
return fmt.Errorf("local file uploading: %s", localFileCmd.Error)
}
// 上传完毕,则可以新建一个空的镜像的记录 // 上传完毕,则可以新建一个空的镜像的记录
info, err := h.mgr.imageMgr.CreateImage(localFileCmd.PackageID) info, err := h.mgr.imageMgr.CreateImage(localFileCmd.PackageID)
if err != nil { if err != nil {
@ -325,8 +335,12 @@ func (h *PreSchedulingHandler) doImageScheduling(evt event.Event, job *preSchedu
return fmt.Errorf("cache move pacakge: %s", cacheMoveRet.Error) return fmt.Errorf("cache move pacakge: %s", cacheMoveRet.Error)
} }
if len(cacheMoveRet.CacheInfos) != 1 { if len(cacheMoveRet.CacheInfos) == 0 {
return fmt.Errorf("there must be only 1 object in the package that will be imported") return fmt.Errorf("no object in the package which will be imported")
}
if len(cacheMoveRet.CacheInfos) > 1 {
return fmt.Errorf("there must be only 1 object in the package which will be imported")
} }
fullTaskID, err := h.mgr.execMgr.StartTask(job.job.JobID, exectsk.NewUploadImage(job.slwNodeInfo.ID, stgsdk.MakeIPFSFilePath(cacheMoveRet.CacheInfos[0].FileHash))) fullTaskID, err := h.mgr.execMgr.StartTask(job.job.JobID, exectsk.NewUploadImage(job.slwNodeInfo.ID, stgsdk.MakeIPFSFilePath(cacheMoveRet.CacheInfos[0].FileHash)))

View File

@ -71,7 +71,7 @@ func (h *ReadyToAdjustHandler) onNormalJobEvent(evt event.Event, job *readyToAdj
return return
} }
needWait := true needWait := false
// 无论发生什么事件,都检查一下前置任务的状态 // 无论发生什么事件,都检查一下前置任务的状态
if resFile, ok := norJob.Info.Files.Dataset.(*schsdk.ResourceJobFileInfo); ok { if resFile, ok := norJob.Info.Files.Dataset.(*schsdk.ResourceJobFileInfo); ok {
@ -110,6 +110,9 @@ func (h *ReadyToAdjustHandler) onNormalJobEvent(evt event.Event, job *readyToAdj
job.state, job.state,
)) ))
return return
} else {
// 等待的Job不是失败或者成功状态则需要继续等待
needWait = true
} }
} }

View File

@ -9,6 +9,8 @@ import (
// 提交任务集 // 提交任务集
func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetResp, *mq.CodeMessage) { func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetResp, *mq.CodeMessage) {
logger.Debugf("submitting job")
jobSet, err := svc.jobMgr.SubmitJobSet(msg.JobSet, msg.PreScheduleScheme) jobSet, err := svc.jobMgr.SubmitJobSet(msg.JobSet, msg.PreScheduleScheme)
if err != nil { if err != nil {
logger.Warnf("submitting job set: %s", err.Error()) logger.Warnf("submitting job set: %s", err.Error())
@ -20,6 +22,10 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
// 任务集中某个文件上传完成 // 任务集中某个文件上传完成
func (svc *Service) JobSetLocalFileUploaded(msg *mgrmq.JobSetLocalFileUploaded) (*mgrmq.JobSetLocalFileUploadedResp, *mq.CodeMessage) { func (svc *Service) JobSetLocalFileUploaded(msg *mgrmq.JobSetLocalFileUploaded) (*mgrmq.JobSetLocalFileUploadedResp, *mq.CodeMessage) {
logger.WithField("LocalPath", msg.LocalPath).
WithField("PackageID", msg.PackageID).
Debugf("local file uploaded")
svc.jobMgr.LocalFileUploaded(msg.JobSetID, msg.LocalPath, msg.Error, msg.PackageID) svc.jobMgr.LocalFileUploaded(msg.JobSetID, msg.LocalPath, msg.Error, msg.PackageID)
return mq.ReplyOK(mgrmq.NewJobSetLocalFileUploadedResp()) return mq.ReplyOK(mgrmq.NewJobSetLocalFileUploadedResp())
} }

View File

@ -68,11 +68,13 @@ func main() {
logger.Fatalf("new manager mq server: %s", err.Error()) logger.Fatalf("new manager mq server: %s", err.Error())
} }
mqSvr.OnError = func(err error) { mqSvr.OnError(func(err error) {
logger.Warnf("manager server err: %s", err.Error()) logger.Warnf("manager server err: %s", err.Error())
} })
// 启动服务 // 启动服务
go serveJobManager(jobMgr)
go serveExecutorManager(exeMgr) go serveExecutorManager(exeMgr)
go serveAdvisorManager(advMgr) go serveAdvisorManager(advMgr)
@ -83,6 +85,17 @@ func main() {
<-forever <-forever
} }
func serveJobManager(mgr *jobmgr.Manager) {
logger.Info("start serving job manager")
err := mgr.Serve()
if err != nil {
logger.Errorf("job manager stopped with error: %s", err.Error())
}
logger.Info("job manager stopped")
}
func serveMQServer(server *mgrmq.Server) { func serveMQServer(server *mgrmq.Server) {
logger.Info("start serving mq server") logger.Info("start serving mq server")