JCC-CSScheduler/advisor/internal/task/schedule_scheme.go

181 lines
4.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package task
import (
"fmt"
"time"
"gitlink.org.cn/cloudream/common/models"
"gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
"gitlink.org.cn/cloudream/common/utils/convertto"
"gitlink.org.cn/cloudream/scheduler/common/globals"
"gitlink.org.cn/cloudream/scheduler/common/models/job"
advtsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/advisor/task"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/collector"
)
type GetScheduleScheme struct {
Job job.NormalJob
preAdjustNodeID int64
}
func NewGetScheduleScheme() *GetScheduleScheme {
return &GetScheduleScheme{}
}
func (t *GetScheduleScheme) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
log := logger.WithType[GetScheduleScheme]("Task")
log.Debugf("begin")
defer log.Debugf("end")
err := t.do(task.ID(), ctx)
if err != nil {
//TODO 若任务失败上报的状态failed字段根据情况修改
ctx.reporter.Report(task.ID(), advtsk.NewTaskStatus("failed", err.Error(), true, advtsk.AdjustedScheme{}))
} else {
ctx.reporter.Report(task.ID(), advtsk.NewTaskStatus("failed", err.Error(), false, advtsk.AdjustedScheme{}))
}
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
}
func (t *GetScheduleScheme) do(taskID string, ctx TaskContext) error {
isAvailable, err := t.CheckResourceAvailability()
if err != nil {
return err
}
if isAvailable {
// 确认code、dataset、image是否已经调度到该中心
} else {
// 重新执行预调度方案,寻找最优节点
}
return nil
}
// 检查预调度节点资源是否足够
func (t *GetScheduleScheme) CheckResourceAvailability() (bool, error) {
colCli, err := globals.CollectorMQPool.Acquire()
if err != nil {
return false, fmt.Errorf("new collector client: %w", err)
}
defer colCli.Close()
neededCPU := t.Job.Info.Resources.CPU
if neededCPU > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeCPU,
})
if err != nil {
return false, err
}
availCPU := resp.Data.(models.CPUResourceData).Available.Value
if float64(availCPU) < 1.5*neededCPU {
fmt.Printf("Schedule Scheme is wrong: Insufficient cpu")
return false, nil
}
}
neededNPU := t.Job.Info.Resources.NPU
if neededNPU > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeNPU,
})
if err != nil {
return false, err
}
availNPU := resp.Data.(models.NPUResourceData).Available.Value
if float64(availNPU) < 1.5*neededNPU {
fmt.Printf("Schedule Scheme is wrong: Insufficient npu")
return false, nil
}
}
neededGPU := t.Job.Info.Resources.GPU
if neededGPU > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeGPU,
})
if err != nil {
return false, err
}
availGPU := resp.Data.(models.GPUResourceData).Available.Value
if float64(availGPU) < 1.5*neededGPU {
fmt.Printf("Schedule Scheme is wrong: Insufficient gpu")
return false, nil
}
}
neededMLU := t.Job.Info.Resources.MLU
if neededMLU > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeMLU,
})
if err != nil {
return false, err
}
availMLU := resp.Data.(models.MLUResourceData).Available.Value
if float64(availMLU) < 1.5*neededMLU {
fmt.Printf("Schedule Scheme is wrong: Insufficient mlu")
return false, nil
}
}
neededStorage := t.Job.Info.Resources.Storage
if neededStorage > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeStorage,
})
if err != nil {
return false, err
}
availStorage := resp.Data.(models.StorageResourceData).Available.Value
bytesStorage := convertto.GBToBytes(availStorage)
if bytesStorage < int64(1.5*float64(neededStorage)) {
fmt.Printf("Schedule Scheme is wrong: Insufficient storage")
return false, nil
}
}
neededMemory := t.Job.Info.Resources.Memory
if neededMemory > 0 {
resp, err := colCli.GetOneResourceData(collector.GetOneResourceData{
NodeId: t.preAdjustNodeID,
ResourceType: models.ResourceTypeMemory,
})
if err != nil {
return false, err
}
availMemory := resp.Data.(models.MemoryResourceData).Available.Value
bytesMemory := convertto.GBToBytes(availMemory)
if bytesMemory < int64(1.5*float64(neededMemory)) {
fmt.Printf("Schedule Scheme is wrong: Insufficient memory")
return false, nil
}
}
return true, nil
}