1、新增增量模型更新任务

2、完成执行器任务机制与消息推送等优化
This commit is contained in:
JeshuaRen 2024-08-09 17:42:10 +08:00
parent 2f7250eda1
commit ae31802f19
43 changed files with 1278 additions and 571 deletions

View File

@ -44,7 +44,7 @@ func (s *Service) MakeScheme(dump jobmod.JobDump) (*jobmod.JobScheduleScheme, er
default: default:
} }
return callback.WaitValue(context.Background()) return callback.Wait(context.Background())
} }
func (s *Service) Serve() error { func (s *Service) Serve() error {

View File

@ -20,6 +20,7 @@ var _ = serder.UseTypeUnionExternallyTagged(types.Ref(types.NewTypeUnion[JobBody
(*DataReturnJobDump)(nil), (*DataReturnJobDump)(nil),
(*InstanceJobDump)(nil), (*InstanceJobDump)(nil),
(*MultiInstanceJobDump)(nil), (*MultiInstanceJobDump)(nil),
(*UpdateMultiInstanceJobDump)(nil),
))) )))
type NormalJobDump struct { type NormalJobDump struct {
@ -64,3 +65,13 @@ type MultiInstanceJobDump struct {
func (d *MultiInstanceJobDump) getType() JobBodyDumpType { func (d *MultiInstanceJobDump) getType() JobBodyDumpType {
return d.Type return d.Type
} }
type UpdateMultiInstanceJobDump struct {
serder.Metadata `union:"MultiInstanceJob"`
Type JobBodyDumpType `json:"type"`
Files JobFiles `json:"files"`
}
func (d *UpdateMultiInstanceJobDump) getType() JobBodyDumpType {
return d.Type
}

View File

@ -56,6 +56,15 @@ func (dump *MultiInstCreateInitDump) getType() JobStateDumpType {
return dump.Type return dump.Type
} }
type MultiInstanceUpdateDump struct {
serder.Metadata `union:"MultiInstCreateInit"`
Type JobStateDumpType `json:"type"`
}
func (dump *MultiInstanceUpdateDump) getType() JobStateDumpType {
return dump.Type
}
type MultiInstCreateRunningDump struct { type MultiInstCreateRunningDump struct {
serder.Metadata `union:"MultiInstCreateRunning"` serder.Metadata `union:"MultiInstCreateRunning"`
Type JobStateDumpType `json:"type"` Type JobStateDumpType `json:"type"`

View File

@ -6,7 +6,6 @@ import (
"gitlink.org.cn/cloudream/common/pkgs/mq" "gitlink.org.cn/cloudream/common/pkgs/mq"
myhttp "gitlink.org.cn/cloudream/common/utils/http" myhttp "gitlink.org.cn/cloudream/common/utils/http"
"gitlink.org.cn/cloudream/common/utils/serder" "gitlink.org.cn/cloudream/common/utils/serder"
"log"
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
@ -24,7 +23,8 @@ var _ = Register(Service.StartTask)
type StartTask struct { type StartTask struct {
mq.MessageBodyBase mq.MessageBodyBase
Info exectsk.TaskInfo `json:"info"` TaskID string `json:"taskID"`
Info exectsk.TaskInfo `json:"info"`
} }
type StartTaskResp struct { type StartTaskResp struct {
mq.MessageBodyBase mq.MessageBodyBase
@ -32,9 +32,10 @@ type StartTaskResp struct {
TaskID string `json:"taskID"` TaskID string `json:"taskID"`
} }
func NewStartTask(info exectsk.TaskInfo) *StartTask { func NewStartTask(taskID string, info exectsk.TaskInfo) *StartTask {
return &StartTask{ return &StartTask{
Info: info, TaskID: taskID,
Info: info,
} }
} }
func NewStartTaskResp(execID schmod.ExecutorID, taskID string) *StartTaskResp { func NewStartTaskResp(execID schmod.ExecutorID, taskID string) *StartTaskResp {
@ -53,9 +54,7 @@ func (c *HttpClient) SubmitTask(req *StartTask) (*StartTaskResp, error) {
return nil, err return nil, err
} }
//data, err := json.Marshal(req)
data, err := serder.ObjectToJSONEx(req) data, err := serder.ObjectToJSONEx(req)
log.Println("send data: " + string(data))
resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{ resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{
Body: data, Body: data,
}) })
@ -95,3 +94,56 @@ func (c *HttpClient) GetReportInfo() (*http.Response, error) {
return resp, nil return resp, nil
} }
type TaskOperateInfo struct {
TaskID string
Command string
}
func NewTaskOperateInfo(taskID string, command string) *TaskOperateInfo {
return &TaskOperateInfo{
TaskID: taskID,
Command: command,
}
}
type TaskOperateResp struct {
Err error
}
func NewTaskOperateResp(err error) *TaskOperateResp {
return &TaskOperateResp{
Err: err,
}
}
func (c *HttpClient) OperateTask(req *TaskOperateInfo) (*TaskOperateResp, error) {
targetURL, err := url.JoinPath(c.baseURL + "/operateTask")
if err != nil {
return nil, err
}
data, err := serder.ObjectToJSONEx(req)
resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{
Body: data,
})
if err != nil {
return nil, err
}
contType := resp.Header.Get("Content-Type")
if strings.Contains(contType, myhttp.ContentTypeJSON) {
var codeResp response[TaskOperateResp]
if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}
if codeResp.Code == errorcode.OK {
return &codeResp.Data, nil
}
return nil, codeResp.ToError()
}
return nil, fmt.Errorf("unknow response content type: %s", contType)
}

View File

@ -1,29 +1,36 @@
package task package task
import cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" import (
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
)
type ScheduleCreateECS struct { type ScheduleCreateECS struct {
TaskInfoBase TaskInfoBase
UserID cdssdk.UserID `json:"userID"` UserID cdssdk.UserID `json:"userID"`
PackageID cdssdk.PackageID `json:"packageID"` PackageID cdssdk.PackageID `json:"packageID"`
ModelID schsdk.ModelID `json:"modelID"`
} }
type ScheduleCreateECSStatus struct { type ScheduleCreateECSStatus struct {
TaskStatusBase TaskStatusBase
Error string `json:"error"` Error string `json:"error"`
Address string `json:"address"` Address string `json:"address"`
ModelID schsdk.ModelID `json:"modelID"`
} }
func NewScheduleCreateECS(userID cdssdk.UserID, packageID cdssdk.PackageID) *ScheduleCreateECS { func NewScheduleCreateECS(userID cdssdk.UserID, packageID cdssdk.PackageID, modelID schsdk.ModelID) *ScheduleCreateECS {
return &ScheduleCreateECS{ return &ScheduleCreateECS{
UserID: userID, UserID: userID,
PackageID: packageID, PackageID: packageID,
ModelID: modelID,
} }
} }
func NewScheduleCreateECSStatus(address string, err string) *ScheduleCreateECSStatus { func NewScheduleCreateECSStatus(address string, modelID schsdk.ModelID, err string) *ScheduleCreateECSStatus {
return &ScheduleCreateECSStatus{ return &ScheduleCreateECSStatus{
Address: address, Address: address,
ModelID: modelID,
Error: err, Error: err,
} }
} }

View File

@ -0,0 +1,29 @@
package task
import cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
type StorageMoveObject struct {
TaskInfoBase
ObjectMove cdssdk.ObjectMove `json:"objectMove"`
}
type StorageMoveObjectStatus struct {
TaskStatusBase
Error string `json:"error"`
}
func NewStorageMoveObject(objectMove cdssdk.ObjectMove) *StorageMoveObject {
return &StorageMoveObject{
ObjectMove: objectMove,
}
}
func NewStorageMoveObjectStatus(err string) *StorageMoveObjectStatus {
return &StorageMoveObjectStatus{
Error: err,
}
}
func init() {
Register[*StorageMoveObject, *StorageMoveObjectStatus]()
}

View File

@ -38,3 +38,8 @@ func Register[TTaskInfo TaskInfo, TTaskStatus TaskStatus]() any {
return nil return nil
} }
type TaskOperateInfo struct {
TaskID string
Command string
}

View File

@ -1,47 +1,50 @@
package manager package manager
import ( import (
"gitlink.org.cn/cloudream/common/pkgs/mq"
schmod "gitlink.org.cn/cloudream/scheduler/common/models" schmod "gitlink.org.cn/cloudream/scheduler/common/models"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
) )
type ExecutorService interface { type ExecutorService interface {
ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus) (*ReportExecutorTaskStatusResp, *mq.CodeMessage) //ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus) (*ReportExecutorTaskStatusResp, *mq.CodeMessage)
} }
// 接收executor上报的存活状态及任务执行情况 // 接收executor上报的存活状态及任务执行情况
var _ = Register(Service.ReportExecutorTaskStatus) //var _ = Register(Service.ReportExecutorTaskStatus)
type ReportExecutorTaskStatus struct { // type ReportExecutorTaskStatus struct {
mq.MessageBodyBase // mq.MessageBodyBase
ExecutorID schmod.ExecutorID `json:"executorID"` // ExecutorID schmod.ExecutorID `json:"executorID"`
TaskStatus []ExecutorTaskStatus `json:"taskStatus"` // TaskStatus []ExecutorTaskStatus `json:"taskStatus"`
} // }
//
type ReportExecutorTaskStatusResp struct { // type ReportExecutorTaskStatusResp struct {
mq.MessageBodyBase // mq.MessageBodyBase
} // }
type ExecutorTaskStatus struct { type ExecutorTaskStatus struct {
TaskID string ExecutorID schmod.ExecutorID `json:"executorID"`
Status exectsk.TaskStatus TaskID string `json:"taskID"`
Status exectsk.TaskStatus `json:"status"`
} }
func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus { // func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus {
return &ReportExecutorTaskStatus{ // return &ReportExecutorTaskStatus{
ExecutorID: executorID, // ExecutorID: executorID,
TaskStatus: taskStatus, // TaskStatus: taskStatus,
} // }
} // }
func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp { //
return &ReportExecutorTaskStatusResp{} // func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp {
} // return &ReportExecutorTaskStatusResp{}
func NewExecutorTaskStatus(taskID string, status exectsk.TaskStatus) ExecutorTaskStatus { // }
func NewExecutorTaskStatus(executorID schmod.ExecutorID, taskID string, status exectsk.TaskStatus) ExecutorTaskStatus {
return ExecutorTaskStatus{ return ExecutorTaskStatus{
TaskID: taskID, ExecutorID: executorID,
Status: status, TaskID: taskID,
Status: status,
} }
} }
func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) {
return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...) //func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) {
} // return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...)
//}

View File

@ -2,7 +2,6 @@ package globals
import ( import (
schmod "gitlink.org.cn/cloudream/scheduler/common/models" schmod "gitlink.org.cn/cloudream/scheduler/common/models"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
) )
var ExecutorID schmod.ExecutorID var ExecutorID schmod.ExecutorID
@ -12,5 +11,9 @@ func Init(id schmod.ExecutorID) {
ExecutorID = id ExecutorID = id
} }
// 全局变量定义 const (
var EventChannel = make(chan manager.ReportExecutorTaskStatus) UPDATE = "update"
STOP = "stop"
RESTART = "restart"
DESTROY = "destroy"
)

View File

@ -41,5 +41,6 @@ func (s *Server) Serve() error {
func (s *Server) initRouters() { func (s *Server) initRouters() {
s.engine.POST("/submitTask", s.TaskSvc().SubmitTask) s.engine.POST("/submitTask", s.TaskSvc().SubmitTask)
s.engine.POST("/operateTask", s.TaskSvc().OperateTask)
s.engine.GET("/getReportInfo", s.TaskSvc().GetReportInfo) s.engine.GET("/getReportInfo", s.TaskSvc().GetReportInfo)
} }

View File

@ -1,13 +1,13 @@
package http package http
import ( import (
"encoding/json"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"gitlink.org.cn/cloudream/common/consts/errorcode" "gitlink.org.cn/cloudream/common/consts/errorcode"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/utils/reflect2" "gitlink.org.cn/cloudream/common/utils/reflect2"
"gitlink.org.cn/cloudream/common/utils/serder" "gitlink.org.cn/cloudream/common/utils/serder"
execmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor" execmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals" myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
"io" "io"
"net/http" "net/http"
@ -31,8 +31,6 @@ func (s *TaskService) SubmitTask(ctx *gin.Context) {
return return
} }
println(string(bodyData))
req, err := serder.JSONToObjectEx[execmq.StartTask](bodyData) req, err := serder.JSONToObjectEx[execmq.StartTask](bodyData)
if err != nil { if err != nil {
log.Warnf("parsing request body: %s", err.Error()) log.Warnf("parsing request body: %s", err.Error())
@ -48,7 +46,7 @@ func (s *TaskService) SubmitTask(ctx *gin.Context) {
return return
} }
ctx.JSON(http.StatusOK, OK(execmq.NewStartTaskResp(myglbs.ExecutorID, tsk.ID()))) ctx.JSON(http.StatusOK, OK(execmq.NewStartTaskResp(myglbs.ExecutorID, string(tsk.ID()))))
} }
func (s *TaskService) GetReportInfo(ctx *gin.Context) { func (s *TaskService) GetReportInfo(ctx *gin.Context) {
@ -56,16 +54,64 @@ func (s *TaskService) GetReportInfo(ctx *gin.Context) {
ctx.Header("Cache-Control", "no-cache") ctx.Header("Cache-Control", "no-cache")
ctx.Header("Connection", "keep-alive") ctx.Header("Connection", "keep-alive")
for report := range myglbs.EventChannel { taskChan := s.svc.TaskManager.GetTaskChan()
data, err := json.Marshal(report) defer taskChan.Chan.Close()
status := mgrmq.ExecutorTaskStatus{
ExecutorID: myglbs.ExecutorID,
}
bytes, err := serder.ObjectToJSONEx(status)
_, err = ctx.Writer.Write([]byte("data: " + string(bytes) + "\n\n"))
if err != nil {
logger.Errorf("write data: %s", err.Error())
return
}
ctx.Writer.Flush()
for {
receive, err := taskChan.Chan.Receive()
if err != nil { if err != nil {
return continue
} }
data, err := serder.ObjectToJSONEx(receive)
if err != nil {
logger.Errorf("marshal task: %s", err.Error())
continue
}
logger.Info("send task status: %s", string(data))
_, err = ctx.Writer.Write([]byte("data: " + string(data) + "\n\n")) _, err = ctx.Writer.Write([]byte("data: " + string(data) + "\n\n"))
if err != nil { if err != nil {
return logger.Errorf("write data: %s", err.Error())
continue
} }
ctx.Writer.Flush() // 确保数据立即发送到客户端 ctx.Writer.Flush() // 确保数据立即发送到客户端
println("report: " + string(data))
} }
} }
func (s *TaskService) OperateTask(ctx *gin.Context) {
log := logger.WithField("HTTP", "TaskOperate")
bodyData, err := io.ReadAll(ctx.Request.Body)
if err != nil {
log.Warnf("reading request body: %s", err.Error())
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "read request body failed"))
return
}
req, err := serder.JSONToObjectEx[execmq.TaskOperateInfo](bodyData)
if err != nil {
log.Warnf("parsing request body: %s", err.Error())
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "parse request body failed"))
return
}
task, ok := s.svc.TaskManager.Tasks[req.TaskID]
if !ok {
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "task not found"))
return
}
task.SendTaskOperate(req)
ctx.JSON(http.StatusOK, OK(execmq.NewTaskOperateResp(nil)))
}

View File

@ -0,0 +1,74 @@
package manager
import (
"fmt"
"gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/utils/reflect2"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
"gitlink.org.cn/cloudream/scheduler/executor/internal/task"
"sync"
)
type Manager struct {
statusChans []*task.TaskChan[any]
Tasks map[string]*task.Task
ctx task.TaskContext
lock sync.Mutex
}
func NewManager() Manager {
return Manager{
statusChans: make([]*task.TaskChan[any], 0),
Tasks: make(map[string]*task.Task),
ctx: task.TaskContext{},
}
}
func (m *Manager) GetTaskChan() *task.TaskChan[any] {
// 创建 TaskChan[any] 实例,并赋值 UnboundChannel[any]
taskChan := task.NewTaskChan[any]()
m.statusChans = append(m.statusChans, taskChan)
return taskChan
}
func (m *Manager) sendTaskChan(tskChan task.TaskChan[any]) {
for {
receive, err := tskChan.Chan.Receive()
if err != nil {
logger.Error(err.Error())
continue
}
for i := 0; i < len(m.statusChans); i++ {
err := m.statusChans[i].Chan.Send(receive)
if err != nil {
logger.Error(err.Error())
continue
}
}
}
}
func (m *Manager) StartByInfo(taskID string, info exectsk.TaskInfo) (*task.Task, error) {
m.lock.Lock()
defer m.lock.Unlock()
infoType := reflect2.TypeOfValue(info)
ctor, ok := task.TaskFromInfoCtors[infoType]
if !ok {
return nil, fmt.Errorf("unknow info type")
}
newTask := task.NewTask(taskID)
m.Tasks[taskID] = newTask
go ctor(info).Execute(newTask, m.ctx)
// 将task的状态发送到所有channel
go m.sendTaskChan(newTask.TaskStatusChan)
return newTask, nil
}

View File

@ -1,13 +1,11 @@
package reporter package reporter
import ( import (
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
"sync" "sync"
"time" "time"
schmod "gitlink.org.cn/cloudream/scheduler/common/models" schmod "gitlink.org.cn/cloudream/scheduler/common/models"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
) )
type Reporter struct { type Reporter struct {
@ -58,17 +56,17 @@ func (r *Reporter) Serve() error {
ticker.Reset(r.reportInterval) ticker.Reset(r.reportInterval)
} }
r.taskStatusLock.Lock() //r.taskStatusLock.Lock()
var taskStatus []mgrmq.ExecutorTaskStatus //var taskStatus []mgrmq.ExecutorTaskStatus
for taskID, status := range r.taskStatus { //for taskID, status := range r.taskStatus {
taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status)) // taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status))
} //}
r.taskStatus = make(map[string]exectsk.TaskStatus) //r.taskStatus = make(map[string]exectsk.TaskStatus)
r.taskStatusLock.Unlock() //r.taskStatusLock.Unlock()
status := mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus) //status := mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus)
// 将数据发送到管道中 //// 将数据发送到管道中
globals.EventChannel <- *status //globals.EventChannel <- *status
//_, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus)) //_, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus))

View File

@ -1,15 +1,15 @@
package services package services
import ( import (
"gitlink.org.cn/cloudream/scheduler/executor/internal/task" "gitlink.org.cn/cloudream/scheduler/executor/internal/manager"
) )
type Service struct { type Service struct {
taskManager *task.Manager TaskManager *manager.Manager
} }
func NewService(tskmgr *task.Manager) *Service { func NewService(tskmgr *manager.Manager) *Service {
return &Service{ return &Service{
taskManager: tskmgr, TaskManager: tskmgr,
} }
} }

View File

@ -1,7 +1,6 @@
package services package services
import ( import (
"gitlink.org.cn/cloudream/common/consts/errorcode"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/mq" "gitlink.org.cn/cloudream/common/pkgs/mq"
"gitlink.org.cn/cloudream/common/utils/reflect2" "gitlink.org.cn/cloudream/common/utils/reflect2"
@ -11,18 +10,18 @@ import (
) )
func (svc *Service) StartTask(msg *execmq.StartTask) (*execmq.StartTaskResp, *mq.CodeMessage) { func (svc *Service) StartTask(msg *execmq.StartTask) (*execmq.StartTaskResp, *mq.CodeMessage) {
tsk, err := svc.taskManager.StartByInfo(msg.Info) //tsk, err := svc.TaskManager.StartByInfo(msg.Info)
if err != nil { //if err != nil {
logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()). // logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
Warnf("starting task by info: %s", err.Error()) // Warnf("starting task by info: %s", err.Error())
return nil, mq.Failed(errorcode.OperationFailed, "start task by info failed") // return nil, mq.Failed(errorcode.OperationFailed, "start task by info failed")
} //}
return mq.ReplyOK(execmq.NewStartTaskResp(myglbs.ExecutorID, tsk.ID())) return mq.ReplyOK(execmq.NewStartTaskResp(myglbs.ExecutorID, ""))
} }
func (svc *Service) SubmitTask(msg *execmq.StartTask) (*task.Task, error) { func (svc *Service) SubmitTask(msg *execmq.StartTask) (*task.Task, error) {
tsk, err := svc.taskManager.StartByInfo(msg.Info) tsk, err := svc.TaskManager.StartByInfo(msg.TaskID, msg.Info)
if err != nil { if err != nil {
logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()). logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
Warnf("starting task by info: %s", err.Error()) Warnf("starting task by info: %s", err.Error())

View File

@ -2,10 +2,7 @@ package task
import ( import (
"fmt" "fmt"
"time"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
@ -21,22 +18,17 @@ func NewCacheMovePackage(info *exectsk.CacheMovePackage) *CacheMovePackage {
} }
} }
func (t *CacheMovePackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *CacheMovePackage) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[CacheMovePackage]("Task") log := logger.WithType[CacheMovePackage]("Task")
log.Debugf("begin with %v", logger.FormatStruct(t.CacheMovePackage)) log.Debugf("begin with %v", logger.FormatStruct(t.CacheMovePackage))
defer log.Debugf("end") defer log.Debugf("end")
err := t.do(ctx) err := t.do(ctx)
if err != nil { if err != nil {
ctx.reporter.Report(task.ID(), exectsk.NewCacheMovePackageStatus(err.Error())) task.SendStatus(exectsk.NewCacheMovePackageStatus(err.Error()))
} else { } else {
ctx.reporter.Report(task.ID(), exectsk.NewCacheMovePackageStatus("")) task.SendStatus(exectsk.NewCacheMovePackageStatus(""))
} }
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
} }
func (t *CacheMovePackage) do(ctx TaskContext) error { func (t *CacheMovePackage) do(ctx TaskContext) error {

View File

@ -9,7 +9,6 @@ import (
util "github.com/alibabacloud-go/tea-utils/v2/service" util "github.com/alibabacloud-go/tea-utils/v2/service"
"github.com/alibabacloud-go/tea/tea" "github.com/alibabacloud-go/tea/tea"
log "gitlink.org.cn/cloudream/common/pkgs/logger" log "gitlink.org.cn/cloudream/common/pkgs/logger"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
"time" "time"
) )
@ -49,10 +48,9 @@ func AliConfig(configMap map[string]interface{}) {
aliclient, _ = ecs.NewClient(config) aliclient, _ = ecs.NewClient(config)
} }
func (a *AliCloud) CreateServer(commands []string) (string, error) { // CreateServer 创建实例
func (a *AliCloud) CreateServer() (string, error) {
var instanceID string var instanceID string
var instanceIDArr string
var result string
tryErr := func() (_e error) { tryErr := func() (_e error) {
defer func() { defer func() {
@ -68,7 +66,7 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
return _err return _err
} }
instanceID = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet[0])) instanceID = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet[0]))
instanceIDArr = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet)) //instanceIDArr := tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet))
log.Info(tea.String("--------------------创建实例成功实例ID:" + tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet)) + "--------------------")) log.Info(tea.String("--------------------创建实例成功实例ID:" + tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet)) + "--------------------"))
return nil return nil
@ -86,35 +84,16 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
return "", tryErr return "", tryErr
} }
println("instance: " + instanceID)
println("instanceArr: " + instanceIDArr)
// 获取实例IP // 获取实例IP
ip, _ := getInstanceIP(instanceIDArr, *aliclient.RegionId) //ip, _ := getInstanceIP(instanceIDArr, *aliclient.RegionId)
println("ip: " + ip) //println("ip: " + ip)
CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID return instanceID, nil
CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID }
println("CDSRcloneID: " + CDSRcloneID)
println("CDSRcloneConfigID: " + CDSRcloneConfigID)
//commands := []string{}
//commandContent := "yum install -y fuse3"
//commands = append(commands, commandContent)
//commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/"
//commands = append(commands, commandContent)
//commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n"
//println(commandContent)
//commands = append(commands, commandContent)
//commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n"
//println(commandContent)
//commands = append(commands, commandContent)
//commandContent = "cd /opt/rclone \n chmod +x rclone"
//commands = append(commands, commandContent)
//commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &"
//commands = append(commands, commandContent)
//commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh"
//commands = append(commands, commandContent)
// RunCommand 执行指令
func (a *AliCloud) RunCommand(commands []string, instanceID string) (string, error) {
var result string
for i := 0; i < len(commands); i++ { for i := 0; i < len(commands); i++ {
log.Info("start execute command") log.Info("start execute command")
commandId, err := runShellCommand(commands[i], instanceID, *aliclient.RegionId) commandId, err := runShellCommand(commands[i], instanceID, *aliclient.RegionId)
@ -123,16 +102,27 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
} }
// 判断是否执行成功 // 判断是否执行成功
log.Info("describe result") log.Info("describe result")
_, _, err = describeInvocationResults(aliclient, instanceID, commandId, tea.String("utf-8"), 500) _, result, err = describeInvocationResults(aliclient, instanceID, commandId, tea.String("utf-8"), 500)
if err != nil { if err != nil {
log.Error("describeInvocationResults: " + err.Error()) log.Error("describeInvocationResults: " + err.Error())
return "", err return "", err
} }
} }
return result, nil return result, nil
} }
// DestroyServer 强制销毁实例
func (a *AliCloud) DestroyServer(instanceID string) (string, error) {
result, err := aliclient.DeleteInstance(&ecs.DeleteInstanceRequest{
InstanceId: &instanceID,
Force: tea.Bool(true),
})
if err != nil {
return "", err
}
return tea.StringValue(result.Body.RequestId), nil
}
func runShellCommand(commandContent string, instanceID string, regionId string) (*string, error) { func runShellCommand(commandContent string, instanceID string, regionId string) (*string, error) {
// 从CDS下载文件 // 从CDS下载文件
commandRequest := ecs.RunCommandRequest{ commandRequest := ecs.RunCommandRequest{

View File

@ -2,14 +2,11 @@ package create_ecs
// CloudProvider 是一个接口,定义了创建服务器的方法 // CloudProvider 是一个接口,定义了创建服务器的方法
type CloudProvider interface { type CloudProvider interface {
CreateServer(commands []string) (string, error) CreateServer() (string, error)
RunCommand(commands []string, instanceID string) (string, error)
DestroyServer(instanceID string) (string, error)
} }
// CloudFactory 是工厂接口
// 工厂模式中使用 CreateProvider 的设计原则是:
// 单一职责Factory 只负责创建 CloudProvider 实例CloudProvider 负责实际的服务器创建任务。
// 开闭原则Factory 可以扩展以支持新的 CloudProvider 实现,而无需修改现有代码。
// 依赖倒置原则:客户端代码依赖于 CloudProvider 接口而不是具体实现,从而减少了耦合。
type CloudFactory interface { type CloudFactory interface {
CreateProvider() CloudProvider CreateProvider() CloudProvider
} }

View File

@ -12,7 +12,7 @@ import (
// HuaweiCloud实现了CloudProvider接口 // HuaweiCloud实现了CloudProvider接口
type HuaweiCloud struct{} type HuaweiCloud struct{}
var req model.PostPaidServer var serverbody model.PrePaidServer
var hwConfigMap map[string]interface{} var hwConfigMap map[string]interface{}
var hwclient ecs.EcsClient var hwclient ecs.EcsClient
@ -24,7 +24,7 @@ func HWCloudConfig(configMap map[string]interface{}) {
return return
} }
err = json.Unmarshal(jsonData, &req) err = json.Unmarshal(jsonData, &serverbody)
if err != nil { if err != nil {
return return
} }
@ -45,13 +45,13 @@ func HWCloudConfig(configMap map[string]interface{}) {
} }
func (a *HuaweiCloud) CreateServer(commands []string) (string, error) { func (a *HuaweiCloud) CreateServer() (string, error) {
request := &model.CreatePostPaidServersRequest{} request := &model.CreateServersRequest{}
request.Body = &model.CreatePostPaidServersRequestBody{ request.Body = &model.CreateServersRequestBody{
Server: &req, Server: &serverbody,
} }
response, err := hwclient.CreatePostPaidServers(request) response, err := hwclient.CreateServers(request)
if err == nil { if err == nil {
fmt.Printf("%+v\n", response) fmt.Printf("%+v\n", response)
} else { } else {
@ -60,3 +60,26 @@ func (a *HuaweiCloud) CreateServer(commands []string) (string, error) {
//ids := response.ServerIds //ids := response.ServerIds
return "", nil return "", nil
} }
func (a *HuaweiCloud) RunCommand(commands []string, instanceID string) (string, error) {
//TODO implement me
panic("implement me")
}
func (a *HuaweiCloud) DestroyServer(instanceID string) (string, error) {
request := &model.DeleteServersRequest{}
var listServersbody = []model.ServerId{
{
Id: instanceID,
},
}
request.Body = &model.DeleteServersRequestBody{
Servers: listServersbody,
}
response, err := hwclient.DeleteServers(request)
if err != nil {
return "", err
}
return response.String(), nil
}

View File

@ -2,12 +2,8 @@ package task
import ( import (
"fmt" "fmt"
"time"
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task" pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
) )
@ -22,24 +18,22 @@ func NewPCMSubmitTask(info *exectsk.SubmitTask) *PCMSubmitTask {
} }
} }
func (t *PCMSubmitTask) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *PCMSubmitTask) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[PCMSubmitTask]("Task") log := logger.WithType[PCMSubmitTask]("Task")
log.Debugf("begin with %v", logger.FormatStruct(t.SubmitTask)) log.Debugf("begin with %v", logger.FormatStruct(t.SubmitTask))
defer log.Debugf("end") defer log.Debugf("end")
err := t.do(task.ID(), ctx) //err := t.do(task, ctx)
err := error(nil)
if err != nil { if err != nil {
//TODO 若任务失败上报的状态failed字段根据情况修改 //TODO 若任务失败上报的状态failed字段根据情况修改
ctx.reporter.Report(task.ID(), exectsk.NewSubmitTaskStatus("failed", err.Error())) task.SendStatus(exectsk.NewSubmitTaskStatus("failed", err.Error()))
} else {
task.SendStatus(exectsk.NewSubmitTaskStatus("succeeded", ""))
} }
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
} }
func (t *PCMSubmitTask) do(taskID string, ctx TaskContext) error { func (t *PCMSubmitTask) do(task *Task, ctx TaskContext) error {
log := logger.WithType[PCMSubmitTask]("Task") log := logger.WithType[PCMSubmitTask]("Task")
pcmCli, err := schglb.PCMPool.Acquire() pcmCli, err := schglb.PCMPool.Acquire()
@ -79,7 +73,7 @@ func (t *PCMSubmitTask) do(taskID string, ctx TaskContext) error {
} }
if tsResp.TaskStatus != prevStatus { if tsResp.TaskStatus != prevStatus {
ctx.reporter.Report(taskID, exectsk.NewSubmitTaskStatus(tsResp.TaskStatus, "")) task.SendStatus(exectsk.NewSubmitTaskStatus(tsResp.TaskStatus, ""))
} }
prevStatus = tsResp.TaskStatus prevStatus = tsResp.TaskStatus

View File

@ -2,10 +2,7 @@ package task
import ( import (
"fmt" "fmt"
"time"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm" pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
@ -21,24 +18,19 @@ func NewPCMUploadImage(info *exectsk.UploadImage) *PCMUploadImage {
} }
} }
func (t *PCMUploadImage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *PCMUploadImage) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[PCMUploadImage]("Task") log := logger.WithType[PCMUploadImage]("Task")
log.Debugf("begin") log.Debugf("begin")
defer log.Debugf("end") defer log.Debugf("end")
err := t.do(task.ID(), ctx) err := t.do(task, ctx)
if err != nil { if err != nil {
//TODO 若任务失败上报的状态failed字段根据情况修改 //TODO 若任务失败上报的状态failed字段根据情况修改
ctx.reporter.Report(task.ID(), exectsk.NewUploadImageStatus("failed", err.Error(), pcmsdk.ImageID(""), "")) task.SendStatus(exectsk.NewUploadImageStatus("failed", err.Error(), pcmsdk.ImageID(""), ""))
} }
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
} }
func (t *PCMUploadImage) do(taskID string, ctx TaskContext) error { func (t *PCMUploadImage) do(task *Task, ctx TaskContext) error {
pcmCli, err := schglb.PCMPool.Acquire() pcmCli, err := schglb.PCMPool.Acquire()
if err != nil { if err != nil {
return fmt.Errorf("new pcm client: %w", err) return fmt.Errorf("new pcm client: %w", err)
@ -53,7 +45,7 @@ func (t *PCMUploadImage) do(taskID string, ctx TaskContext) error {
return err return err
} }
ctx.reporter.Report(taskID, exectsk.NewUploadImageStatus(resp.Result, "", resp.ImageID, resp.Name)) task.SendStatus(exectsk.NewUploadImageStatus(resp.Result, "", resp.ImageID, resp.Name))
return nil return nil
} }

View File

@ -3,11 +3,12 @@ package task
import ( import (
"fmt" "fmt"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
"gitlink.org.cn/cloudream/scheduler/executor/internal/config" "gitlink.org.cn/cloudream/scheduler/executor/internal/config"
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
"gitlink.org.cn/cloudream/scheduler/executor/internal/task/create_ecs" "gitlink.org.cn/cloudream/scheduler/executor/internal/task/create_ecs"
) )
@ -21,22 +22,21 @@ func NewScheduleCreateECS(info *exectsk.ScheduleCreateECS) *ScheduleCreateECS {
} }
} }
func (t *ScheduleCreateECS) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *ScheduleCreateECS) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[ScheduleCreateECS]("Task") log := logger.WithType[ScheduleCreateECS]("Task")
log.Debugf("begin") log.Debugf("begin")
defer log.Debugf("end") defer log.Debugf("end")
err := t.do(task.ID(), ctx) err := t.do(task, ctx)
if err != nil { if err != nil {
log.Error(err) log.Error(err)
return return
} }
ctx.reporter.ReportNow()
log.Info("ScheduleCreateECS...") log.Info("ScheduleCreateECS...")
} }
func (t *ScheduleCreateECS) do(taskID string, ctx TaskContext) error { func (t *ScheduleCreateECS) do(task *Task, ctx TaskContext) error {
stgCli, err := schglb.CloudreamStoragePool.Acquire() stgCli, err := schglb.CloudreamStoragePool.Acquire()
if err != nil { if err != nil {
return fmt.Errorf("new cloudream storage client: %w", err) return fmt.Errorf("new cloudream storage client: %w", err)
@ -52,51 +52,84 @@ func (t *ScheduleCreateECS) do(taskID string, ctx TaskContext) error {
} }
println(resp.Name) println(resp.Name)
//factory := create_ecs.GetFactory(config.CloudName)
//provider := factory.CreateProvider()
//address, err := provider.CreateServer(resp.Name)
//if err != nil {
// ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("", err.Error()))
// return err
//}
//
//ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("http://"+address+":5001", ""))
CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID
CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID
println("CDSRcloneID: " + CDSRcloneID) println("CDSRcloneID: " + CDSRcloneID)
println("CDSRcloneConfigID: " + CDSRcloneConfigID) println("CDSRcloneConfigID: " + CDSRcloneConfigID)
commands := []string{} var commands []string
commandContent := "yum install -y fuse3" commandContent := "yum install -y fuse3"
commands = append(commands, commandContent) commands = append(commands, commandContent)
commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/" commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/"
commands = append(commands, commandContent) commands = append(commands, commandContent)
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n" commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n"
println(commandContent)
commands = append(commands, commandContent) commands = append(commands, commandContent)
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n" commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n"
println(commandContent)
commands = append(commands, commandContent) commands = append(commands, commandContent)
commandContent = "cd /opt/rclone \n chmod +x rclone" commandContent = "cd /opt/rclone \n chmod +x rclone"
commands = append(commands, commandContent) commands = append(commands, commandContent)
commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &" commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &"
commands = append(commands, commandContent) commands = append(commands, commandContent)
//commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple \n python3 -m pip install --upgrade pip setuptools \n python3 -m pip install transformers --ignore-installed pyyaml \n python3 -m pip install -r requirement.txt \n sh start.sh"
commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh" commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh"
commands = append(commands, commandContent) commands = append(commands, commandContent)
// 创建云主机
factory := create_ecs.GetFactory(config.CloudName) factory := create_ecs.GetFactory(config.CloudName)
provider := factory.CreateProvider() provider := factory.CreateProvider()
address, err := provider.CreateServer(commands)
instanceID, err := provider.CreateServer()
if err != nil { if err != nil {
ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("", err.Error())) task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
return err return err
} }
ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus(address, "")) address, err := provider.RunCommand(commands, instanceID)
if err != nil {
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
return err
}
return nil // 返回执行结果
task.SendStatus(exectsk.NewScheduleCreateECSStatus(address, t.ModelID, ""))
println("create ECS success, waiting msg...")
// 监听更新操作
for {
taskOperate, err := task.taskChan.Chan.Receive()
if err != nil {
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
return err
}
info, ok := taskOperate.(executor.TaskOperateInfo)
if !ok {
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, "invalid task operate info"))
return fmt.Errorf("invalid task operate info")
}
switch info.Command {
case globals.RESTART:
var commands []string
commandContent := "yum install -y fuse3"
commands = append(commands, commandContent)
result, err := provider.RunCommand(commands, instanceID)
if err != nil {
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
return err
}
task.SendStatus(exectsk.NewScheduleCreateECSStatus(result, t.ModelID, ""))
case globals.STOP:
println("STOP")
case globals.DESTROY:
result, err := provider.DestroyServer(instanceID)
if err != nil {
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
return err
}
task.SendStatus(exectsk.NewScheduleCreateECSStatus(result, t.ModelID, ""))
}
}
} }
func init() { func init() {

View File

@ -2,10 +2,7 @@ package task
import ( import (
"fmt" "fmt"
"time"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
@ -21,24 +18,19 @@ func NewStorageCreatePackage(info *exectsk.StorageCreatePackage) *StorageCreateP
} }
} }
func (t *StorageCreatePackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *StorageCreatePackage) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[StorageCreatePackage]("Task") log := logger.WithType[StorageCreatePackage]("Task")
log.Debugf("begin") log.Debugf("begin")
defer log.Debugf("end") defer log.Debugf("end")
err := t.do(task.ID(), ctx) err := t.do(task, ctx)
if err != nil { if err != nil {
//TODO 若任务失败上报的状态failed字段根据情况修改 //TODO 若任务失败上报的状态failed字段根据情况修改
ctx.reporter.Report(task.ID(), exectsk.NewStorageCreatePackageStatus("failed", err.Error(), 0)) task.SendStatus(exectsk.NewStorageCreatePackageStatus("failed", err.Error(), 0))
} }
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
} }
func (t *StorageCreatePackage) do(taskID string, ctx TaskContext) error { func (t *StorageCreatePackage) do(task *Task, ctx TaskContext) error {
stgCli, err := schglb.CloudreamStoragePool.Acquire() stgCli, err := schglb.CloudreamStoragePool.Acquire()
if err != nil { if err != nil {
return fmt.Errorf("new cloudream storage client: %w", err) return fmt.Errorf("new cloudream storage client: %w", err)
@ -57,7 +49,7 @@ func (t *StorageCreatePackage) do(taskID string, ctx TaskContext) error {
} }
// TODO 根据接口result返回情况修改 // TODO 根据接口result返回情况修改
ctx.reporter.Report(taskID, exectsk.NewStorageCreatePackageStatus("completed", "", resp.PackageID)) task.SendStatus(exectsk.NewStorageCreatePackageStatus("completed", "", resp.PackageID))
return nil return nil
} }

View File

@ -2,10 +2,7 @@ package task
import ( import (
"fmt" "fmt"
"time"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/pkgs/task"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
@ -21,7 +18,7 @@ func NewStorageLoadPackage(info *exectsk.StorageLoadPackage) *StorageLoadPackage
} }
} }
func (t *StorageLoadPackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) { func (t *StorageLoadPackage) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[StorageLoadPackage]("Task") log := logger.WithType[StorageLoadPackage]("Task")
log.Debugf("begin with %v", logger.FormatStruct(t.StorageLoadPackage)) log.Debugf("begin with %v", logger.FormatStruct(t.StorageLoadPackage))
defer log.Debugf("end") defer log.Debugf("end")
@ -29,15 +26,10 @@ func (t *StorageLoadPackage) Execute(task *task.Task[TaskContext], ctx TaskConte
packagePath, err := t.do(ctx) packagePath, err := t.do(ctx)
if err != nil { if err != nil {
//TODO 若任务失败上报的状态failed字段根据情况修改 //TODO 若任务失败上报的状态failed字段根据情况修改
ctx.reporter.Report(task.ID(), exectsk.NewStorageLoadPackageStatus(err.Error(), "")) task.SendStatus(exectsk.NewStorageLoadPackageStatus(err.Error(), ""))
} else { } else {
ctx.reporter.Report(task.ID(), exectsk.NewStorageLoadPackageStatus("", packagePath)) task.SendStatus(exectsk.NewStorageLoadPackageStatus("", packagePath))
} }
ctx.reporter.ReportNow()
complete(err, CompleteOption{
RemovingDelay: time.Minute,
})
} }
func (t *StorageLoadPackage) do(ctx TaskContext) (string, error) { func (t *StorageLoadPackage) do(ctx TaskContext) (string, error) {

View File

@ -0,0 +1,53 @@
package task
import (
"fmt"
"gitlink.org.cn/cloudream/common/pkgs/logger"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
)
type StorageMoveObject struct {
*exectsk.StorageMoveObject
}
func NewStorageMoveObject(info *exectsk.StorageMoveObject) *StorageMoveObject {
return &StorageMoveObject{StorageMoveObject: info}
}
func (t *StorageMoveObject) Execute(task *Task, ctx TaskContext) {
log := logger.WithType[StorageMoveObject]("Task")
log.Debugf("begin with %v", logger.FormatStruct(t.StorageMoveObject))
defer log.Debugf("end")
err := t.do()
if err != nil {
task.SendStatus(exectsk.NewStorageMoveObjectStatus(err.Error()))
} else {
task.SendStatus(exectsk.NewStorageMoveObjectStatus(""))
}
}
func (t *StorageMoveObject) do() error {
stgCli, err := schglb.CloudreamStoragePool.Acquire()
if err != nil {
return fmt.Errorf("new cloudream storage client: %w", err)
}
defer schglb.CloudreamStoragePool.Release(stgCli)
move, err := stgCli.Object().Move(t.ObjectMove)
if err != nil {
return fmt.Errorf("move object: %w", err)
}
// 判断全部object是否都移动成功
if len(move.Successes) != len(t.ObjectMove.Movings) {
return fmt.Errorf("move object: %d objects failed", len(t.ObjectMove.Movings)-len(move.Successes))
}
return nil
}
func init() {
Register(NewStorageMoveObject)
}

View File

@ -1,56 +1,80 @@
package task package task
import ( import (
"fmt" "gitlink.org.cn/cloudream/common/pkgs/logger"
"reflect"
"gitlink.org.cn/cloudream/common/pkgs/task"
"gitlink.org.cn/cloudream/common/utils/reflect2" "gitlink.org.cn/cloudream/common/utils/reflect2"
"gitlink.org.cn/cloudream/common/utils/sync2"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task" exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
reporter "gitlink.org.cn/cloudream/scheduler/executor/internal/reporter" mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
"reflect"
) )
type TaskContext struct { type TaskChan[T any] struct {
reporter *reporter.Reporter Chan sync2.UnboundChannel[T]
} }
// 需要在Task结束后主动调用completing函数将在Manager加锁期间被调用 func NewTaskChan[T any]() *TaskChan[T] {
// 因此适合进行执行结果的设置 return &TaskChan[T]{Chan: *sync2.NewUnboundChannel[T]()}
type CompleteFn = task.CompleteFn
type Manager struct {
task.Manager[TaskContext]
} }
type TaskBody = task.TaskBody[TaskContext] type Task struct {
id string
taskChan TaskChan[any]
TaskStatusChan TaskChan[any]
}
type Task = task.Task[TaskContext] type TaskContext struct{}
type CompleteOption = task.CompleteOption func NewTask(id string) *Task {
return &Task{
func NewManager(reporter *reporter.Reporter) Manager { taskChan: *NewTaskChan[any](),
return Manager{ TaskStatusChan: *NewTaskChan[any](),
Manager: task.NewManager(TaskContext{ id: id,
reporter: reporter, //body: body,
}),
} }
} }
func (m *Manager) StartByInfo(info exectsk.TaskInfo) (*Task, error) { type TaskBody interface {
infoType := reflect2.TypeOfValue(info) Execute(task *Task, ctx TaskContext)
ctor, ok := taskFromInfoCtors[infoType]
if !ok {
return nil, fmt.Errorf("unknow info type")
}
return m.StartNew(ctor(info)), nil
} }
var taskFromInfoCtors map[reflect.Type]func(exectsk.TaskInfo) TaskBody = make(map[reflect.Type]func(exectsk.TaskInfo) task.TaskBody[TaskContext]) func (c *Task) SendStatus(status exectsk.TaskStatus) {
taskStatus := mgrmq.NewExecutorTaskStatus(globals.ExecutorID, c.ID(), status)
err := c.TaskStatusChan.Chan.Send(taskStatus)
if err != nil {
logger.Error("send task status error: ", err.Error())
}
}
func (c *Task) SendTaskOperate(info executor.TaskOperateInfo) {
err := c.taskChan.Chan.Send(info)
if err != nil {
logger.Error(err.Error())
}
}
func (c *Task) WaitTaskOperate() *any {
receive, err := c.taskChan.Chan.Receive()
if err != nil {
logger.Error(err.Error())
return nil
}
return &receive
}
func (t *Task) ID() string {
return t.id
}
var TaskFromInfoCtors map[reflect.Type]func(exectsk.TaskInfo) TaskBody = make(map[reflect.Type]func(exectsk.TaskInfo) TaskBody)
func Register[TInfo exectsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) { func Register[TInfo exectsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) {
taskFromInfoCtors[reflect2.TypeOf[TInfo]()] = func(info exectsk.TaskInfo) TaskBody { TaskFromInfoCtors[reflect2.TypeOf[TInfo]()] = func(info exectsk.TaskInfo) TaskBody {
return ctor(info.(TInfo)) return ctor(info.(TInfo))
} }
} }

View File

@ -8,11 +8,9 @@ import (
"gitlink.org.cn/cloudream/scheduler/executor/internal/config" "gitlink.org.cn/cloudream/scheduler/executor/internal/config"
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals" myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
"gitlink.org.cn/cloudream/scheduler/executor/internal/http" "gitlink.org.cn/cloudream/scheduler/executor/internal/http"
"gitlink.org.cn/cloudream/scheduler/executor/internal/reporter" "gitlink.org.cn/cloudream/scheduler/executor/internal/manager"
"gitlink.org.cn/cloudream/scheduler/executor/internal/services" "gitlink.org.cn/cloudream/scheduler/executor/internal/services"
"gitlink.org.cn/cloudream/scheduler/executor/internal/task"
"os" "os"
"time"
) )
func main() { func main() {
@ -36,9 +34,9 @@ func main() {
myglbs.Init(config.Cfg().Application.ExecutorID) myglbs.Init(config.Cfg().Application.ExecutorID)
schglb.InitRcloneConfig(config.Cfg().Rclone.CDSRcloneID, config.Cfg().Rclone.CDSRcloneConfigID) schglb.InitRcloneConfig(config.Cfg().Rclone.CDSRcloneID, config.Cfg().Rclone.CDSRcloneConfigID)
rpter := reporter.NewReporter(myglbs.ExecutorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec)) //rpter := reporter.NewReporter(myglbs.ExecutorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec))
// //
taskMgr := task.NewManager(&rpter) taskMgr := manager.NewManager()
// //
//mqSvr, err := execmq.NewServer(services.NewService(&taskMgr), &config.Cfg().RabbitMQ) //mqSvr, err := execmq.NewServer(services.NewService(&taskMgr), &config.Cfg().RabbitMQ)
//if err != nil { //if err != nil {
@ -52,7 +50,7 @@ func main() {
// 启动服务 // 启动服务
//go serveMQServer(mqSvr) //go serveMQServer(mqSvr)
go serveReporter(&rpter) //go serveReporter(&rpter)
svc := services.NewService(&taskMgr) svc := services.NewService(&taskMgr)
server, err := http.NewServer(config.Cfg().Application.Address, svc) server, err := http.NewServer(config.Cfg().Application.Address, svc)
@ -82,13 +80,13 @@ func serveMQServer(server *execmq.Server) {
logger.Info("mq server stopped") logger.Info("mq server stopped")
} }
func serveReporter(rpt *reporter.Reporter) { //func serveReporter(rpt *reporter.Reporter) {
logger.Info("start serving reporter") // logger.Info("start serving reporter")
//
err := rpt.Serve() // err := rpt.Serve()
if err != nil { // if err != nil {
logger.Errorf("rpt stopped with error: %s", err.Error()) // logger.Errorf("rpt stopped with error: %s", err.Error())
} // }
//
logger.Info("rpt stopped") // logger.Info("rpt stopped")
} //}

View File

@ -2,16 +2,16 @@ package executormgr
import ( import (
"bufio" "bufio"
"encoding/json"
"fmt" "fmt"
"gitlink.org.cn/cloudream/common/pkgs/async"
log "gitlink.org.cn/cloudream/common/pkgs/logger" log "gitlink.org.cn/cloudream/common/pkgs/logger"
"gitlink.org.cn/cloudream/common/utils/serder"
jobTask "gitlink.org.cn/cloudream/scheduler/manager/internal/task"
"io" "io"
"strings" "strings"
"sync" "sync"
"time" "time"
"gitlink.org.cn/cloudream/common/utils/sync2"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals" schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
schmod "gitlink.org.cn/cloudream/scheduler/common/models" schmod "gitlink.org.cn/cloudream/scheduler/common/models"
exemq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor" exemq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
@ -20,12 +20,11 @@ import (
) )
type task struct { type task struct {
statusChan *sync2.Channel[exetsk.TaskStatus] statusChan *async.UnboundChannel[mgrmq.ExecutorTaskStatus]
} }
type ExecutorStatus struct { type ExecutorStatus struct {
executorID schmod.ExecutorID executorID schmod.ExecutorID
tasks map[string]task // key 为 TaskID tasks map[string]task // key 为 TaskID
lastReportTime time.Time
} }
var ErrWaitReportTimeout = fmt.Errorf("wait report timeout") var ErrWaitReportTimeout = fmt.Errorf("wait report timeout")
@ -57,149 +56,139 @@ func NewManager(reportTimeout time.Duration) (*Manager, error) {
}, nil }, nil
} }
func (m *Manager) ReceiveExecutorTaskStatus(url string) { func (m *Manager) ReceiveExecutorTaskStatus(url string) (*mgrmq.ExecutorTaskStatus, error) {
client, err := ExecutorPool.AcquireByUrl(url) client, err := ExecutorPool.AcquireByUrl(url)
if err != nil { if err != nil {
log.Error(err) log.Error(err)
return return &mgrmq.ExecutorTaskStatus{}, err
} }
resp, err := client.GetReportInfo() resp, err := client.GetReportInfo()
if err != nil { if err != nil {
log.Error(err) log.Error(err)
return return &mgrmq.ExecutorTaskStatus{}, err
} }
defer resp.Body.Close()
reader := bufio.NewReader(resp.Body) reader := bufio.NewReader(resp.Body)
for {
line, err := reader.ReadString('\n')
if err != nil && err != io.EOF {
log.Error("Error reading from response body:", err)
return
}
if line == "" {
continue // Skip empty lines
}
line = strings.TrimPrefix(line, "data: ") line, err := reader.ReadString('\n')
line = strings.TrimSpace(line) if err != nil && err != io.EOF {
if len(line) > 0 { log.Error("Error reading from response body:", err)
var msg mgrmq.ReportExecutorTaskStatus return &mgrmq.ExecutorTaskStatus{}, err
if err := json.Unmarshal([]byte(line), &msg); err != nil { }
fmt.Println("Error unmarshalling JSON:", err) // TODO 第一次获取的值包含执行器所有任务,用于失败重试
executorInfo := convertLine(line)
// 将第一次的executor放入到池子中
exec := &ExecutorStatus{
executorID: executorInfo.ExecutorID,
tasks: make(map[string]task),
}
m.executors[executorInfo.ExecutorID] = exec
go func() {
for {
line, err = reader.ReadString('\n')
if err != nil {
if err != io.EOF {
log.Error("Error reading from response body:", err)
}
return
}
status := convertLine(line)
if status == nil {
continue continue
} }
log.Info("Received: %s", msg)
m.Report(msg.ExecutorID, msg.TaskStatus) m.Report(*status)
} }
if err == io.EOF { }()
break
} return executorInfo, nil
}
} }
func (m *Manager) Report(execID schmod.ExecutorID, taskStatus []mgrmq.ExecutorTaskStatus) { func convertLine(line string) *mgrmq.ExecutorTaskStatus {
if line == "" {
return nil
}
line = strings.TrimPrefix(line, "data: ")
line = strings.TrimSpace(line)
if len(line) == 0 {
return nil
}
readResp, err := serder.JSONToObjectEx[mgrmq.ExecutorTaskStatus]([]byte(line))
if err != nil {
log.Error(err)
return nil
}
return &readResp
}
func (m *Manager) Report(status mgrmq.ExecutorTaskStatus) {
m.lock.Lock() m.lock.Lock()
defer m.lock.Unlock() defer m.lock.Unlock()
exec, ok := m.executors[execID] exec := m.executors[status.ExecutorID]
if !ok { if exec == nil {
exec = &ExecutorStatus{ log.Error("Executor not found: ", status.ExecutorID)
executorID: execID, return
tasks: make(map[string]task),
}
m.executors[execID] = exec
} }
// 由于先将task chan放入到池子中再执行的task所以这里的task必存在
tsk := exec.tasks[status.TaskID]
exec.lastReportTime = time.Now() // TODO 考虑主动检测channel是否关闭然后取消task
if tsk.statusChan.Send(status) != nil {
delete(exec.tasks, status.TaskID)
for _, s := range taskStatus { if len(exec.tasks) == 0 {
tsk, ok := exec.tasks[s.TaskID] delete(m.executors, exec.executorID)
if !ok {
continue
}
// TODO 考虑主动检测channel是否关闭然后取消task
if tsk.statusChan.Send(s.Status) != nil {
delete(exec.tasks, s.TaskID)
if len(exec.tasks) == 0 {
delete(m.executors, execID)
}
} }
} }
} }
// 启动一个Task // 启动一个Task
func (m *Manager) StartTask(info exetsk.TaskInfo, ccInfo schmod.ComputingCenter) *sync2.Channel[exetsk.TaskStatus] { func (m *Manager) StartTask(info exetsk.TaskInfo, ccInfo schmod.ComputingCenter) (*jobTask.JobTask[mgrmq.ExecutorTaskStatus], error) {
m.lock.Lock() m.lock.Lock()
defer m.lock.Unlock() defer m.lock.Unlock()
ch := sync2.NewChannel[exetsk.TaskStatus]() newJobTask := jobTask.NewJobTask[mgrmq.ExecutorTaskStatus]()
ch := newJobTask.Chan()
client, err := ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL) client, err := ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL)
//resp, err := m.exeCli.StartTask(exemq.NewStartTask(info))
if err != nil { if err != nil {
ch.CloseWithError(fmt.Errorf("start task: %w", err)) ch.CloseWithError(fmt.Errorf("start task: %w", err))
return ch return newJobTask, err
} }
executorID := schmod.ExecutorID(ccInfo.ExecutorID)
// 检测是否连接过这个Executor如果第一次连则发送请求监听上报信息 // 检测是否连接过这个Executor如果第一次连则发送请求监听上报信息
_, ok := m.executors[schmod.ExecutorID(ccInfo.ExecutorID)] _, ok := m.executors[executorID]
if !ok { if !ok {
go m.ReceiveExecutorTaskStatus(ccInfo.ExecutorURL) _, err = m.ReceiveExecutorTaskStatus(ccInfo.ExecutorURL)
} if err != nil {
ch.CloseWithError(fmt.Errorf("start task: %w", err))
resp, err := client.SubmitTask(exemq.NewStartTask(info)) return newJobTask, err
if err != nil {
ch.CloseWithError(fmt.Errorf("start task: %w", err))
return ch
}
exeInfo, ok := m.executors[resp.ExecutorID]
if !ok {
exeInfo = &ExecutorStatus{
executorID: resp.ExecutorID,
tasks: make(map[string]task),
lastReportTime: time.Now(),
} }
m.executors[resp.ExecutorID] = exeInfo
} }
exeInfo.tasks[resp.TaskID] = task{ // 上面已经将executor放入到池子中了这里的executor必存在
exeInfo := m.executors[executorID]
exeInfo.tasks[newJobTask.ID()] = task{
statusChan: ch, statusChan: ch,
} }
return ch _, err = client.SubmitTask(exemq.NewStartTask(newJobTask.ID(), info))
} if err != nil {
ch.CloseWithError(fmt.Errorf("start task: %w", err))
func (m *Manager) Serve() error { return newJobTask, err
InitExecutorPool()
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
func() {
m.lock.Lock()
defer m.lock.Unlock()
now := time.Now()
for exeID, exeInfo := range m.executors {
dt := now.Sub(exeInfo.lastReportTime)
if dt < m.reportTimeout {
continue
}
for _, tsk := range exeInfo.tasks {
tsk.statusChan.CloseWithError(ErrWaitReportTimeout)
}
delete(m.executors, exeID)
}
}()
}
} }
return newJobTask, nil
}
func (m *Manager) Serve() {
InitExecutorPool()
} }

View File

@ -2,27 +2,61 @@ package event
import ( import (
"gitlink.org.cn/cloudream/common/pkgs/future" "gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/common/pkgs/types"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler" schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
"gitlink.org.cn/cloudream/common/utils/serder"
) )
type CreateInstanceFuture = *future.SetValueFuture[CreateInstanceResult] type OperateInstanceFuture = *future.SetValueFuture[OperateInstanceResult]
type InstanceCreate struct { type InstanceOperate struct {
DataSet schsdk.JobFileInfo Info InstanceOperateInfo
Result CreateInstanceFuture Result OperateInstanceFuture
} }
type CreateInstanceResult struct { type OperateInstanceResult struct {
OperateResult string
Err error
JobID schsdk.JobID JobID schsdk.JobID
FilesUploadScheme schsdk.JobFilesUploadScheme FilesUploadScheme schsdk.JobFilesUploadScheme
} }
func NewInstanceCreate(dataSet schsdk.JobFileInfo, future CreateInstanceFuture) *InstanceCreate { type InstanceOperateInfo interface {
return &InstanceCreate{ Instance()
DataSet: dataSet, }
Result: future,
type InstanceInfoBase struct{}
func (i *InstanceInfoBase) Instance() {}
var InstanceOperateInfoTypeUnion = types.NewTypeUnion[InstanceOperateInfo](
(*InstanceCreateInfo)(nil),
(*InstanceUpdateInfo)(nil),
)
var _ = serder.UseTypeUnionInternallyTagged(&InstanceOperateInfoTypeUnion, "type")
type InstanceCreateInfo struct {
serder.Metadata `union:"Create"`
InstanceInfoBase
DataSet schsdk.JobFileInfo
}
type InstanceUpdateInfo struct {
serder.Metadata `union:"Update"`
InstanceInfoBase
Type string `json:"type"`
Info schsdk.UpdateMultiInstanceJobInfo `json:"info"`
PackageID cdssdk.PackageID `json:"packageID"`
}
func NewInstanceOperate(info InstanceOperateInfo, future OperateInstanceFuture) *InstanceOperate {
return &InstanceOperate{
Info: info,
Result: future,
} }
} }
func (s *InstanceCreate) Noop() { func (s *InstanceOperate) Noop() {
} }

View File

@ -0,0 +1,23 @@
package event
import "gitlink.org.cn/cloudream/common/pkgs/future"
type JobUpdateFuture = *future.SetValueFuture[UpdateResult]
type Update struct {
Command string
Result JobUpdateFuture
}
func (s *Update) Noop() {}
type UpdateResult struct {
Err error
}
func NewUpdate(command string, jobUpdateFuture JobUpdateFuture) *Update {
return &Update{
Command: command,
Result: jobUpdateFuture,
}
}

View File

@ -2,6 +2,7 @@ package event
import ( import (
"context" "context"
"gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr" "gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
) )
@ -51,3 +52,25 @@ func WaitTypeAnd[T jobmgr.Event](ctx context.Context, set *jobmgr.EventSet, cond
// 断言返回的事件为类型T并返回该事件和操作成功标志。 // 断言返回的事件为类型T并返回该事件和操作成功标志。
return ret.(T), ok return ret.(T), ok
} }
func BeginWaitType[T jobmgr.Event](set *jobmgr.EventSet) future.Future1[jobmgr.Event] {
// 等待一个满足特定类型和条件的事件。
return set.BeginWait(func(evt jobmgr.Event) bool {
_, ok := evt.(T)
return ok
})
}
func BeginWaitTypeAnd[T jobmgr.Event](set *jobmgr.EventSet, cond func(val T) bool) future.Future1[jobmgr.Event] {
// 等待一个满足特定类型和条件的事件。
return set.BeginWait(func(evt jobmgr.Event) bool {
// 尝试将事件断言为特定类型T并检查断言是否成功。
e, ok := evt.(T)
if !ok {
return false // 如果事件不是期望的类型T则返回false。
}
// 如果事件是类型T且满足给定条件则返回true。
return cond(e)
})
}

View File

@ -3,6 +3,7 @@ package jobmgr
import ( import (
"context" "context"
"errors" "errors"
"gitlink.org.cn/cloudream/common/pkgs/logger"
"sync" "sync"
"gitlink.org.cn/cloudream/common/pkgs/future" "gitlink.org.cn/cloudream/common/pkgs/future"
@ -69,10 +70,11 @@ func (s *EventSet) Wait(ctx context.Context, cond EventWaitCondition) (Event, bo
future: fut, future: fut,
} }
s.waiters = append(s.waiters, waiter) s.waiters = append(s.waiters, waiter)
logger.Info("append waiter: %p", &waiter)
s.lock.Unlock() s.lock.Unlock()
val, err := fut.WaitValue(ctx) val, err := fut.Wait(ctx)
if err != nil { if err != nil {
return nil, false return nil, false
@ -80,3 +82,26 @@ func (s *EventSet) Wait(ctx context.Context, cond EventWaitCondition) (Event, bo
return val, true return val, true
} }
func (s *EventSet) BeginWait(cond EventWaitCondition) future.Future1[Event] {
s.lock.Lock()
for i, evt := range s.events {
if cond(evt) {
s.events = lo2.RemoveAt(s.events, i)
s.lock.Unlock()
return future.NewReadyValue1(evt)
}
}
fut := future.NewSetValue[Event]()
waiter := EventWaiter{
condition: cond,
future: fut,
}
s.waiters = append(s.waiters, waiter)
s.lock.Unlock()
return fut
}

View File

@ -0,0 +1,30 @@
package job
import (
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
)
type UpdateMultiInstanceJob struct {
Info schsdk.UpdateMultiInstanceJobInfo
Files jobmod.JobFiles
//InstanceIDs []schsdk.JobID
//UpdateStrategy string
}
func NewUpdateMultiInstanceJob(info schsdk.UpdateMultiInstanceJobInfo) *UpdateMultiInstanceJob {
return &UpdateMultiInstanceJob{
Info: info,
}
}
func (j *UpdateMultiInstanceJob) GetInfo() schsdk.JobInfo {
return &j.Info
}
func (j *UpdateMultiInstanceJob) Dump() jobmod.JobBodyDump {
return &jobmod.UpdateMultiInstanceJobDump{
Files: j.Files,
}
}

View File

@ -128,15 +128,14 @@ func (s *Adjusting) doPackageScheduling(ctx context.Context, rtx jobmgr.JobState
if scheme.Action == jobmod.ActionMove { if scheme.Action == jobmod.ActionMove {
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID) logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("moving package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
moveStatus := status.(*exectsk.CacheMovePackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
if moveStatus.Error != "" { if moveStatus.Error != "" {
return fmt.Errorf("moving package: %s", moveStatus.Error) return fmt.Errorf("moving package: %s", moveStatus.Error)
} }
@ -147,15 +146,15 @@ func (s *Adjusting) doPackageScheduling(ctx context.Context, rtx jobmgr.JobState
if scheme.Action == jobmod.ActionLoad { if scheme.Action == jobmod.ActionLoad {
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID) logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("loading package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
loadStatus := status.(*exectsk.StorageLoadPackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
loadStatus := status.Value.Status.(*exectsk.StorageLoadPackageStatus)
if loadStatus.Error != "" { if loadStatus.Error != "" {
return fmt.Errorf("loading package: %s", loadStatus.Error) return fmt.Errorf("loading package: %s", loadStatus.Error)
} }
@ -175,15 +174,15 @@ func (s *Adjusting) doImageScheduling(ctx context.Context, rtx jobmgr.JobStateRu
} }
// TODO UserID // TODO UserID
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("moving package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
moveStatus := status.(*exectsk.CacheMovePackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
if moveStatus.Error != "" { if moveStatus.Error != "" {
return fmt.Errorf("moving package: %s", moveStatus.Error) return fmt.Errorf("moving package: %s", moveStatus.Error)
} }
@ -208,15 +207,18 @@ func (s *Adjusting) doImageScheduling(ctx context.Context, rtx jobmgr.JobStateRu
return fmt.Errorf("there must be only 1 object in the package which will be imported") return fmt.Errorf("there must be only 1 object in the package which will be imported")
} }
wt2 := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo) taskStatus2, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
defer wt2.Close() if err != nil {
return fmt.Errorf("moving package: %w", err)
}
status2, err := wt2.Receive(ctx) fut2 := taskStatus2.Receive()
status2 := <-fut2.Chan()
if err != nil { if err != nil {
return fmt.Errorf("uploading image: %w", err) return fmt.Errorf("uploading image: %w", err)
} }
uploadStatus := status2.(*exectsk.UploadImageStatus) uploadStatus := status2.Value.Status.(*exectsk.UploadImageStatus)
if uploadStatus.Error != "" { if uploadStatus.Error != "" {
return fmt.Errorf("uploading image: %s", uploadStatus.Error) return fmt.Errorf("uploading image: %s", uploadStatus.Error)
} }

View File

@ -6,6 +6,11 @@ import (
"github.com/samber/lo" "github.com/samber/lo"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler" schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage" cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
"gitlink.org.cn/cloudream/scheduler/manager/internal/executormgr"
jobTask "gitlink.org.cn/cloudream/scheduler/manager/internal/task"
"path/filepath" "path/filepath"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
@ -73,9 +78,6 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
packageID = runningJob.Files.Dataset.PackageID packageID = runningJob.Files.Dataset.PackageID
} }
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
pcmImgInfo, err := rtx.Mgr.DB.PCMImage().GetByImageIDAndCCID(rtx.Mgr.DB.SQLCtx(), jobFiles.Image.ImageID, targetCCID) pcmImgInfo, err := rtx.Mgr.DB.PCMImage().GetByImageIDAndCCID(rtx.Mgr.DB.SQLCtx(), jobFiles.Image.ImageID, targetCCID)
if err != nil { if err != nil {
return fmt.Errorf("getting pcm image info: %w", err) return fmt.Errorf("getting pcm image info: %w", err)
@ -96,37 +98,20 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
// TODO 判断是否是模型推理任务,如果是,则进行扩缩容管理 // TODO 判断是否是模型推理任务,如果是,则进行扩缩容管理
if modelJobInfo != nil { if modelJobInfo != nil {
//address := nodeExpansion(jobFiles)
//
//node := schsdk.NodeInfo{
// InstanceID: jo.JobID,
// Address: address,
//}
//jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), schsdk.ModelID(modelJobInfo.ModelID), node)
// 发送扩容任务 // 发送扩容任务
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewScheduleCreateECS( ecs := exetsk.NewScheduleCreateECS(
userID, userID,
packageID, packageID,
), ccInfo) schsdk.ModelID(modelJobInfo.ModelID),
)
task, err := rtx.Mgr.ExecMgr.StartTask(ecs, ccInfo)
for { if err != nil {
status, err := wt.Receive(ctx) log.Error(err.Error())
if err != nil { return err
return err
}
taskStatus := status.(*exetsk.ScheduleCreateECSStatus)
if taskStatus.Error != "" {
log.Error(taskStatus.Error)
return nil
}
node := schsdk.NodeInfo{
InstanceID: jo.JobID,
Address: schsdk.Address(taskStatus.Address),
}
jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), schsdk.ModelID(modelJobInfo.ModelID), node)
log.Infof("node expansion: %v", taskStatus.Address)
} }
return s.listen(rtx, jo, task, ccInfo)
} }
stgCli, err := schglb.CloudreamStoragePool.Acquire() stgCli, err := schglb.CloudreamStoragePool.Acquire()
@ -166,7 +151,7 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
cmd = runtime.Command cmd = runtime.Command
} }
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewSubmitTask( task, err := rtx.Mgr.ExecMgr.StartTask(exetsk.NewSubmitTask(
ccInfo.PCMParticipantID, ccInfo.PCMParticipantID,
pcmImgInfo.PCMImageID, pcmImgInfo.PCMImageID,
// TODO 选择资源的算法 // TODO 选择资源的算法
@ -176,17 +161,16 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
// params, TODO params不应该是kv数组而应该是字符串数组 // params, TODO params不应该是kv数组而应该是字符串数组
[]schsdk.KVPair{}, []schsdk.KVPair{},
), ccInfo) ), ccInfo)
defer wt.Close()
if err != nil {
log.Error(err.Error())
return err
}
taskFut := task.Receive()
for { for {
status, err := wt.Receive(ctx) msg := <-taskFut.Chan()
if err != nil { tskStatus := msg.Value.Status.(*exetsk.SubmitTaskStatus)
return err
}
tskStatus := status.(*exetsk.SubmitTaskStatus)
if tskStatus.Error != "" {
return fmt.Errorf("submitting task: %s", tskStatus.Error)
}
if tskStatus.Status != s.lastStatus { if tskStatus.Status != s.lastStatus {
log.Infof("task %s -> %s", s.lastStatus, tskStatus.Status) log.Infof("task %s -> %s", s.lastStatus, tskStatus.Status)
@ -203,37 +187,63 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
case pcmsdk.TaskStatusFailed: case pcmsdk.TaskStatusFailed:
return fmt.Errorf("task failed") return fmt.Errorf("task failed")
} }
taskFut = task.Receive()
} }
} }
// 模拟 func (s *NormalJobExecuting) listen(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job, task *jobTask.JobTask[mgrmq.ExecutorTaskStatus], ccInfo schmod.ComputingCenter) error {
var nodesAddress = []string{"120.46.183.86:22", "121.36.5.116:22"} log := logger.WithType[NormalJobExecuting]("State").WithField("TaskID", task.ID())
var count = 0
func nodeExpansion(files *jobmod.JobFiles) schsdk.Address { waitFut := event.BeginWaitType[*event.Update](rtx.EventSet)
if count >= 2 { taskFut := task.Receive()
logger.Info("There is no available node")
return "" for {
select {
case v1 := <-waitFut.Chan():
// 对任务进行更新操作
client, err := executormgr.ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL)
if err != nil {
return fmt.Errorf("getting executor client: %w", err)
}
evt := v1.Value.(*event.Update)
operateResp, err := client.OperateTask(executor.NewTaskOperateInfo(task.ID(), evt.Command))
if err != nil {
return fmt.Errorf("operate task: %w", err)
}
evt.Result.SetValue(event.UpdateResult{
Err: operateResp.Err,
})
if operateResp.Err != nil {
return fmt.Errorf("operate task: %w", operateResp.Err)
}
// 持续等待
waitFut = event.BeginWaitType[*event.Update](rtx.EventSet)
case msg := <-taskFut.Chan():
switch v2 := msg.Value.Status.(type) {
case *exetsk.ScheduleCreateECSStatus:
// 扩容任务,将结果放到池子中
node := schsdk.NodeInfo{
InstanceID: jo.JobID,
Address: schsdk.Address(v2.Address),
}
jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), v2.ModelID, node)
log.Infof("node expansion: %v", v2.Address)
case error:
fmt.Println("Received error:", v2.Error())
default:
fmt.Println("Received unexpected type")
}
// 持续接收
taskFut = task.Receive()
}
} }
address := nodesAddress[count]
count++
client := utils.GetSSHClient("pcm", "", address)
defer client.Close()
// 创建SSH会话
session, err := client.NewSession()
if err != nil {
logger.Warn("Failed to create session: %s", err)
}
defer session.Close()
// 执行远程命令
output, err := session.CombinedOutput("sh /home/pcm/modeltest/http/start.sh")
if err != nil {
logger.Warn("Failed to run command: %s", err)
}
return schsdk.Address(output)
} }
type DataReturnJobExecuting struct { type DataReturnJobExecuting struct {
@ -277,20 +287,24 @@ func (s *DataReturnJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Jo
} }
packageName := utils.MakeResourcePackageName(reJob.TargetJobID) packageName := utils.MakeResourcePackageName(reJob.TargetJobID)
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewStorageCreatePackage( task, err := rtx.Mgr.ExecMgr.StartTask(exetsk.NewStorageCreatePackage(
userID, // TOOD 用户ID userID, // TOOD 用户ID
ccInfo.CDSStorageID, ccInfo.CDSStorageID,
reJob.TargetJobOutputPath, reJob.TargetJobOutputPath,
reJob.Info.BucketID, reJob.Info.BucketID,
packageName, packageName,
), ccInfo) ), ccInfo)
defer wt.Close() if err != nil {
log.Error(err.Error())
return err
}
status, err := wt.Receive(ctx) fut := task.Receive()
if err != nil { if err != nil {
return err return err
} }
tskStatus := status.(*exetsk.StorageCreatePackageStatus) status := <-fut.Chan()
tskStatus := status.Value.Status.(*exetsk.StorageCreatePackageStatus)
if tskStatus.Error != "" { if tskStatus.Error != "" {
return fmt.Errorf("creating package: %s", tskStatus.Error) return fmt.Errorf("creating package: %s", tskStatus.Error)
} }

View File

@ -3,6 +3,7 @@ package state
import ( import (
"context" "context"
"fmt" "fmt"
"gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/common/pkgs/logger" "gitlink.org.cn/cloudream/common/pkgs/logger"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler" schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job" jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
@ -11,6 +12,8 @@ import (
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr" "gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event" "gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job" "gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
"strings"
"sync"
) )
type MultiInstanceRunning struct { type MultiInstanceRunning struct {
@ -39,67 +42,120 @@ func (s *MultiInstanceRunning) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job)
multInstJob := jo.Body.(*job.MultiInstanceJob) multInstJob := jo.Body.(*job.MultiInstanceJob)
waitFut := event.BeginWaitType[*event.InstanceOperate](rtx.EventSet)
for { for {
// 监听创建实例事件 chanValue := <-waitFut.Chan()
ic, ok := event.WaitType[*event.InstanceCreate](ctx, rtx.EventSet) instanceInfo := chanValue.Value.(*event.InstanceOperate)
if !ok { instanceFuture := instanceInfo.Result
logger.Info("MultiInstanceRunning canceled")
break
}
logger.Info("wait a event happened") logger.Info("wait a event happened")
waitFut = event.BeginWaitType[*event.InstanceOperate](rtx.EventSet)
dataSet := ic.DataSet switch info := instanceInfo.Info.(type) {
//如果是模型扩容任务直接使用父Job的资源文件 case *event.InstanceCreateInfo:
if &multInstJob.Info.ModelJobInfo != nil { createInstance(rtx, info, s.preScheduler, jo, multInstJob, instanceFuture)
dataSet = multInstJob.Info.Files.Dataset case *event.InstanceUpdateInfo:
updateInstance(rtx, info, multInstJob, instanceFuture)
} }
// 构建InstanceJobInfo
infoFiles := schsdk.JobFilesInfo{
Dataset: dataSet,
Code: multInstJob.Info.Files.Code,
Image: multInstJob.Info.Files.Image,
}
newLocalJobID := fmt.Sprintf("%s_%s", multInstJob.Info.LocalJobID, utils.GenerateRandomID())
instJobInfo := &schsdk.InstanceJobInfo{
Type: schsdk.JobTypeInstance,
LocalJobID: newLocalJobID,
Files: infoFiles,
Runtime: multInstJob.Info.Runtime,
Resources: multInstJob.Info.Resources,
ModelJobInfo: multInstJob.Info.ModelJobInfo,
}
files := jobmod.JobFiles{
Code: multInstJob.Files.Code,
Image: multInstJob.Files.Image,
}
// 生成预调度方案和文件上传方案
jobSchedule, filesUploadScheme, err := s.preScheduler.ScheduleJob(instJobInfo)
if err != nil {
ic.Result.SetError(err)
continue
}
// 创建实例并运行
instanceJob := job.NewInstanceJob(*instJobInfo, files)
jobID := rtx.Mgr.AddJob(jo.JobSetID, instanceJob, NewPreSchuduling(*jobSchedule))
// 在多实例任务中新增这个实例的任务ID
multInstJob.SubJobs = append(multInstJob.SubJobs, jobID)
// 将实例ID和文件上传方案返回
ic.Result.SetValue(event.CreateInstanceResult{
JobID: jobID,
FilesUploadScheme: *filesUploadScheme,
})
} }
} }
func updateInstance(rtx jobmgr.JobStateRunContext, updateInfo *event.InstanceUpdateInfo, parentJob *job.MultiInstanceJob, updateInstanceFuture event.OperateInstanceFuture) {
// 更新策略
strategy := updateInfo.Info.UpdateStrategy
println("update strategy: " + strategy)
var failJobs []string
var wg sync.WaitGroup
for i := 0; i < len(parentJob.SubJobs); i++ {
// 发送请求进行任务更新
instanceID := parentJob.SubJobs[i]
wg.Add(1)
go func() {
defer wg.Done()
fut := future.NewSetValue[event.UpdateResult]()
rtx.Mgr.PostEvent(instanceID, event.NewUpdate("update", fut))
_, err := fut.Wait(context.TODO())
if err != nil {
logger.Error(err.Error())
failJobs = append(failJobs, string(instanceID))
}
println()
}()
}
wg.Wait()
if len(failJobs) == 0 {
updateInstanceFuture.SetValue(event.OperateInstanceResult{
Err: nil,
})
return
}
// 返回更新失败的instance
result := strings.Join(failJobs, ",")
updateInstanceFuture.SetValue(event.OperateInstanceResult{
OperateResult: result,
Err: fmt.Errorf("error"),
})
}
func createInstance(rtx jobmgr.JobStateRunContext, info *event.InstanceCreateInfo, preScheduler prescheduler.PreScheduler, jo *jobmgr.Job, multInstJob *job.MultiInstanceJob, future event.OperateInstanceFuture) {
dataSet := info.DataSet
//如果是模型扩容任务直接使用父Job的资源文件
if &multInstJob.Info.ModelJobInfo != nil {
dataSet = multInstJob.Info.Files.Dataset
}
// 构建InstanceJobInfo
infoFiles := schsdk.JobFilesInfo{
Dataset: dataSet,
Code: multInstJob.Info.Files.Code,
Image: multInstJob.Info.Files.Image,
}
newLocalJobID := fmt.Sprintf("%s_%s", multInstJob.Info.LocalJobID, utils.GenerateRandomID())
instJobInfo := &schsdk.InstanceJobInfo{
Type: schsdk.JobTypeInstance,
LocalJobID: newLocalJobID,
Files: infoFiles,
Runtime: multInstJob.Info.Runtime,
Resources: multInstJob.Info.Resources,
ModelJobInfo: multInstJob.Info.ModelJobInfo,
}
files := jobmod.JobFiles{
Code: multInstJob.Files.Code,
Image: multInstJob.Files.Image,
}
// 生成预调度方案和文件上传方案
jobSchedule, filesUploadScheme, err := preScheduler.ScheduleJob(instJobInfo)
if err != nil {
future.SetError(err)
return
}
// 创建实例并运行
instanceJob := job.NewInstanceJob(*instJobInfo, files)
jobID := rtx.Mgr.AddJob(jo.JobSetID, instanceJob, NewPreSchuduling(*jobSchedule))
// 在多实例任务中新增这个实例的任务ID
multInstJob.SubJobs = append(multInstJob.SubJobs, jobID)
// 将实例ID和文件上传方案返回
future.SetValue(event.OperateInstanceResult{
JobID: jobID,
FilesUploadScheme: *filesUploadScheme,
})
}
func (s *MultiInstanceRunning) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump { func (s *MultiInstanceRunning) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
return &jobmod.MultiInstCreateRunningDump{} return &jobmod.MultiInstCreateRunningDump{}
} }

View File

@ -0,0 +1,134 @@
package state
import (
"context"
"fmt"
"gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/common/pkgs/logger"
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
)
type MultiInstanceUpdate struct {
originalJob jobmod.JobDump
}
func NewMultiInstanceUpdate(originalJob jobmod.JobDump) *MultiInstanceUpdate {
return &MultiInstanceUpdate{
originalJob: originalJob,
}
}
func (s *MultiInstanceUpdate) Run(rtx jobmgr.JobStateRunContext, job *jobmgr.Job) {
s.do(rtx, job)
}
func (s *MultiInstanceUpdate) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) error {
updateJob := jo.Body.(*job.UpdateMultiInstanceJob)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// 监听取消事件
go func() {
event.WaitType[*event.Cancel](ctx, rtx.EventSet)
cancel()
}()
var pkgID cdssdk.PackageID
// 等待回源任务完成
if rt, ok := updateJob.Info.Files.Code.(*schsdk.DataReturnJobFileInfo); ok {
evt, ok := event.WaitTypeAnd[*event.JobCompleted](ctx, rtx.EventSet, func(val *event.JobCompleted) bool {
return val.Job.GetInfo().GetLocalJobID() == rt.DataReturnLocalJobID
})
if !ok {
return jobmgr.ErrJobCancelled
}
if evt.Err != nil {
return fmt.Errorf("depended job %s was failed", evt.Job.JobID)
}
rtJob, ok := evt.Job.Body.(*job.DataReturnJob)
if !ok {
return fmt.Errorf("job %s is not a DataReturn job(which is %T)", evt.Job.JobID, evt.Job)
}
pkgID = rtJob.DataReturnPackageID
}
// 获取包对象列表
stgCli, err := schglb.CloudreamStoragePool.Acquire()
if err != nil {
return fmt.Errorf("new cloudream storage client: %w", err)
}
defer schglb.CloudreamStoragePool.Release(stgCli)
// TODO UserID
pkgObjs, err := stgCli.Object().GetPackageObjects(cdssdk.ObjectGetPackageObjects{UserID: 1, PackageID: pkgID})
if err != nil {
return fmt.Errorf("getting package objects: %w", err)
}
// 获取原始任务信息
originalMultiInstanceJobBody := s.originalJob.Body.(*jobmod.MultiInstanceJobDump)
originalPackageID := originalMultiInstanceJobBody.Files.Code.PackageID
var objArr []cdssdk.MovingObject
for _, obj := range pkgObjs.Objects {
objArr = append(objArr, cdssdk.MovingObject{
ObjectID: obj.ObjectID,
PackageID: originalPackageID,
Path: obj.Path,
})
}
// TODO UserID
objMoveParam := cdssdk.ObjectMove{
UserID: 1,
Movings: objArr,
}
ccInfo, err := rtx.Mgr.DB.ComputingCenter().GetByID(rtx.Mgr.DB.SQLCtx(), originalMultiInstanceJobBody.TargetCCID)
if err != nil {
return fmt.Errorf("getting computing center info: %w", err)
}
// 将增量包合并到原有包中
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageMoveObject(objMoveParam), ccInfo)
if err != nil {
return fmt.Errorf("moving package: %w", err)
}
statusFut := taskStatus.Receive()
status := <-statusFut.Chan()
moveStatus := status.Value.Status.(*exectsk.StorageMoveObjectStatus)
if moveStatus.Error != "" {
return fmt.Errorf("moving package: %s", moveStatus.Error)
}
// 发送事件更新各个instance
updateInfo := event.InstanceUpdateInfo{
Info: updateJob.Info,
}
fut := future.NewSetValue[event.OperateInstanceResult]()
rtx.Mgr.PostEvent(s.originalJob.JobID, event.NewInstanceOperate(&updateInfo, fut))
result, err := fut.Wait(context.TODO())
if err != nil {
return err
}
println(result.JobID)
if result.Err != nil {
return fmt.Errorf("update instance failed: %s", result.OperateResult)
}
logger.Info("update instance success!")
return nil
}
func (s *MultiInstanceUpdate) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
return &jobmod.MultiInstanceUpdateDump{}
}

View File

@ -150,15 +150,15 @@ func (s *PreScheduling) doPackageScheduling(ctx context.Context, rtx jobmgr.JobS
if scheme.Action == jobmod.ActionMove { if scheme.Action == jobmod.ActionMove {
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSNodeID) logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSNodeID)
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("moving package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
moveStatus := status.(*exectsk.CacheMovePackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
if moveStatus.Error != "" { if moveStatus.Error != "" {
return fmt.Errorf("moving package: %s", moveStatus.Error) return fmt.Errorf("moving package: %s", moveStatus.Error)
} }
@ -169,15 +169,15 @@ func (s *PreScheduling) doPackageScheduling(ctx context.Context, rtx jobmgr.JobS
if scheme.Action == jobmod.ActionLoad { if scheme.Action == jobmod.ActionLoad {
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID) logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("moving package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
loadStatus := status.(*exectsk.StorageLoadPackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
loadStatus := status.Value.Status.(*exectsk.StorageLoadPackageStatus)
if loadStatus.Error != "" { if loadStatus.Error != "" {
return fmt.Errorf("moving package: %s", loadStatus.Error) return fmt.Errorf("moving package: %s", loadStatus.Error)
} }
@ -228,15 +228,15 @@ func (s *PreScheduling) doImageScheduling(ctx context.Context, rtx jobmgr.JobSta
} }
// TODO UserID // TODO UserID
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo) taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
defer wt.Close()
status, err := wt.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("moving package: %w", err) return fmt.Errorf("moving package: %w", err)
} }
moveStatus := status.(*exectsk.CacheMovePackageStatus) fut := taskStatus.Receive()
status := <-fut.Chan()
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
if moveStatus.Error != "" { if moveStatus.Error != "" {
return fmt.Errorf("moving package: %s", moveStatus.Error) return fmt.Errorf("moving package: %s", moveStatus.Error)
} }
@ -261,15 +261,15 @@ func (s *PreScheduling) doImageScheduling(ctx context.Context, rtx jobmgr.JobSta
return fmt.Errorf("there must be only 1 object in the package which will be imported") return fmt.Errorf("there must be only 1 object in the package which will be imported")
} }
wt2 := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo) taskStatus2, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
defer wt2.Close()
status2, err := wt2.Receive(ctx)
if err != nil { if err != nil {
return fmt.Errorf("uploading image: %w", err) return fmt.Errorf("moving package: %w", err)
} }
uploadStatus := status2.(*exectsk.UploadImageStatus) fut2 := taskStatus2.Receive()
status2 := <-fut2.Chan()
uploadStatus := status2.Value.Status.(*exectsk.UploadImageStatus)
if uploadStatus.Error != "" { if uploadStatus.Error != "" {
return fmt.Errorf("uploading image: %s", uploadStatus.Error) return fmt.Errorf("uploading image: %s", uploadStatus.Error)
} }

View File

@ -232,7 +232,7 @@ func (m *Manager) AddJob(jobSetID schsdk.JobSetID, jobBody JobBody, jobState Job
m.pubLock.Lock() m.pubLock.Lock()
defer m.pubLock.Unlock() defer m.pubLock.Unlock()
jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex+1)) jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex))
m.jobIDIndex += 1 m.jobIDIndex += 1
job := &mgrJob{ job := &mgrJob{

View File

@ -1,11 +1,6 @@
package mq package mq
import ( //func (svc *Service) ReportExecutorTaskStatus(msg *mgrmq.ReportExecutorTaskStatus) (*mgrmq.ReportExecutorTaskStatusResp, *mq.CodeMessage) {
"gitlink.org.cn/cloudream/common/pkgs/mq" // svc.exeMgr.Report(msg.ExecutorID, msg.TaskStatus)
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager" // return mq.ReplyOK(mgrmq.NewReportExecutorTaskStatusResp())
) //}
func (svc *Service) ReportExecutorTaskStatus(msg *mgrmq.ReportExecutorTaskStatus) (*mgrmq.ReportExecutorTaskStatusResp, *mq.CodeMessage) {
//svc.exeMgr.Report(msg.ExecutorID, msg.TaskStatus)
return mq.ReplyOK(mgrmq.NewReportExecutorTaskStatusResp())
}

View File

@ -28,7 +28,7 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
for _, jobInfo := range msg.JobSet.Jobs { for _, jobInfo := range msg.JobSet.Jobs {
switch info := jobInfo.(type) { switch info := jobInfo.(type) {
case *schsdk.NormalJobInfo: case *schsdk.NormalJobInfo:
job := job.NewNormalJob(*info) jo := job.NewNormalJob(*info)
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID] preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
if !ok { if !ok {
@ -36,31 +36,52 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
} }
jobs = append(jobs, jobmgr.SubmittingJob{ jobs = append(jobs, jobmgr.SubmittingJob{
Body: job, Body: jo,
InitState: state.NewPreSchuduling(preSch), InitState: state.NewPreSchuduling(preSch),
}) })
case *schsdk.DataReturnJobInfo: case *schsdk.DataReturnJobInfo:
job := job.NewDataReturnJob(*info) jo := job.NewDataReturnJob(*info)
jobs = append(jobs, jobmgr.SubmittingJob{ jobs = append(jobs, jobmgr.SubmittingJob{
Body: job, Body: jo,
InitState: state.NewWaitTargetComplete(), InitState: state.NewWaitTargetComplete(),
}) })
case *schsdk.MultiInstanceJobInfo: case *schsdk.MultiInstanceJobInfo:
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID] preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
job := job.NewMultiInstanceJob(*info, preSch) jo := job.NewMultiInstanceJob(*info, preSch)
if !ok { if !ok {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID)) return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
} }
jobs = append(jobs, jobmgr.SubmittingJob{ jobs = append(jobs, jobmgr.SubmittingJob{
Body: job, Body: jo,
InitState: state.NewMultiInstanceInit(), InitState: state.NewMultiInstanceInit(),
}) })
case *schsdk.UpdateMultiInstanceJobInfo:
modelJob := job.NewUpdateMultiInstanceJob(*info)
instanceJobSets := svc.jobMgr.DumpJobSet(modelJob.Info.MultiInstanceJobSetID)
if len(instanceJobSets) == 0 {
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("job set %s is not found", modelJob.Info.MultiInstanceJobSetID))
}
// 找到多实例任务本身
var multiInstanceJobDump jobmod.JobDump
for i := 0; i < len(instanceJobSets); i++ {
jobDump := instanceJobSets[i]
if _, ok := jobDump.Body.(*jobmod.MultiInstanceJobDump); ok {
multiInstanceJobDump = jobDump
break
}
}
jobs = append(jobs, jobmgr.SubmittingJob{
Body: modelJob,
InitState: state.NewMultiInstanceUpdate(multiInstanceJobDump),
})
} }
} }
@ -70,10 +91,13 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) { func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) {
logger.Debugf("start create instance") logger.Debugf("start create instance")
fut := future.NewSetValue[event.CreateInstanceResult]() fut := future.NewSetValue[event.OperateInstanceResult]()
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceCreate(instInfo.DataSet, fut)) info := event.InstanceCreateInfo{
DataSet: instInfo.DataSet,
}
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceOperate(&info, fut))
result, err := fut.WaitValue(context.TODO()) result, err := fut.Wait(context.TODO())
if err != nil { if err != nil {
return nil, mq.Failed(errorcode.OperationFailed, err.Error()) return nil, mq.Failed(errorcode.OperationFailed, err.Error())

View File

@ -0,0 +1,51 @@
package jobTask
import (
"fmt"
"gitlink.org.cn/cloudream/common/pkgs/async"
"gitlink.org.cn/cloudream/common/pkgs/future"
"gitlink.org.cn/cloudream/common/pkgs/logger"
"math/rand"
"time"
)
type JobTask[T any] struct {
id string
taskChan async.UnboundChannel[T]
}
func NewJobTask[T any]() *JobTask[T] {
return &JobTask[T]{
id: getTaskID(),
taskChan: *async.NewUnboundChannel[T](),
}
}
func getTaskID() string {
now := time.Now()
nano := now.UnixNano()
rand.Seed(time.Now().UnixNano())
randomNumber := rand.Intn(9000) + 1000 // 生成1000到9999之间的随机数
taskID := fmt.Sprintf("id_%d_%d", nano, randomNumber)
return taskID
}
func (c *JobTask[T]) Receive() future.Future1[T] {
return c.taskChan.Receive()
}
func (c *JobTask[T]) Send(info any) {
logger.Info("send http")
}
func (c *JobTask[T]) Chan() *async.UnboundChannel[T] {
return &c.taskChan
}
func (c *JobTask[T]) ID() string {
return c.id
}

View File

@ -76,7 +76,8 @@ func main() {
// 启动服务 // 启动服务
go serveJobManager(jobMgr) go serveJobManager(jobMgr)
go serveExecutorManager(exeMgr) //go serveExecutorManager(exeMgr)
go exeMgr.Serve()
go serveAdvisorManager(advMgr) go serveAdvisorManager(advMgr)
@ -108,17 +109,6 @@ func serveMQServer(server *mgrmq.Server) {
logger.Info("mq server stopped") logger.Info("mq server stopped")
} }
func serveExecutorManager(mgr *executormgr.Manager) {
logger.Info("start serving executor manager")
err := mgr.Serve()
if err != nil {
logger.Errorf("executor manager stopped with error: %s", err.Error())
}
logger.Info("executor manager stopped")
}
func serveAdvisorManager(mgr *advisormgr.Manager) { func serveAdvisorManager(mgr *advisormgr.Manager) {
logger.Info("start serving advisor manager") logger.Info("start serving advisor manager")