parent
2f7250eda1
commit
ae31802f19
|
@ -44,7 +44,7 @@ func (s *Service) MakeScheme(dump jobmod.JobDump) (*jobmod.JobScheduleScheme, er
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
return callback.WaitValue(context.Background())
|
return callback.Wait(context.Background())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Service) Serve() error {
|
func (s *Service) Serve() error {
|
||||||
|
|
|
@ -20,6 +20,7 @@ var _ = serder.UseTypeUnionExternallyTagged(types.Ref(types.NewTypeUnion[JobBody
|
||||||
(*DataReturnJobDump)(nil),
|
(*DataReturnJobDump)(nil),
|
||||||
(*InstanceJobDump)(nil),
|
(*InstanceJobDump)(nil),
|
||||||
(*MultiInstanceJobDump)(nil),
|
(*MultiInstanceJobDump)(nil),
|
||||||
|
(*UpdateMultiInstanceJobDump)(nil),
|
||||||
)))
|
)))
|
||||||
|
|
||||||
type NormalJobDump struct {
|
type NormalJobDump struct {
|
||||||
|
@ -64,3 +65,13 @@ type MultiInstanceJobDump struct {
|
||||||
func (d *MultiInstanceJobDump) getType() JobBodyDumpType {
|
func (d *MultiInstanceJobDump) getType() JobBodyDumpType {
|
||||||
return d.Type
|
return d.Type
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UpdateMultiInstanceJobDump struct {
|
||||||
|
serder.Metadata `union:"MultiInstanceJob"`
|
||||||
|
Type JobBodyDumpType `json:"type"`
|
||||||
|
Files JobFiles `json:"files"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *UpdateMultiInstanceJobDump) getType() JobBodyDumpType {
|
||||||
|
return d.Type
|
||||||
|
}
|
||||||
|
|
|
@ -56,6 +56,15 @@ func (dump *MultiInstCreateInitDump) getType() JobStateDumpType {
|
||||||
return dump.Type
|
return dump.Type
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MultiInstanceUpdateDump struct {
|
||||||
|
serder.Metadata `union:"MultiInstCreateInit"`
|
||||||
|
Type JobStateDumpType `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dump *MultiInstanceUpdateDump) getType() JobStateDumpType {
|
||||||
|
return dump.Type
|
||||||
|
}
|
||||||
|
|
||||||
type MultiInstCreateRunningDump struct {
|
type MultiInstCreateRunningDump struct {
|
||||||
serder.Metadata `union:"MultiInstCreateRunning"`
|
serder.Metadata `union:"MultiInstCreateRunning"`
|
||||||
Type JobStateDumpType `json:"type"`
|
Type JobStateDumpType `json:"type"`
|
||||||
|
|
|
@ -6,7 +6,6 @@ import (
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
||||||
myhttp "gitlink.org.cn/cloudream/common/utils/http"
|
myhttp "gitlink.org.cn/cloudream/common/utils/http"
|
||||||
"gitlink.org.cn/cloudream/common/utils/serder"
|
"gitlink.org.cn/cloudream/common/utils/serder"
|
||||||
"log"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -24,7 +23,8 @@ var _ = Register(Service.StartTask)
|
||||||
|
|
||||||
type StartTask struct {
|
type StartTask struct {
|
||||||
mq.MessageBodyBase
|
mq.MessageBodyBase
|
||||||
Info exectsk.TaskInfo `json:"info"`
|
TaskID string `json:"taskID"`
|
||||||
|
Info exectsk.TaskInfo `json:"info"`
|
||||||
}
|
}
|
||||||
type StartTaskResp struct {
|
type StartTaskResp struct {
|
||||||
mq.MessageBodyBase
|
mq.MessageBodyBase
|
||||||
|
@ -32,9 +32,10 @@ type StartTaskResp struct {
|
||||||
TaskID string `json:"taskID"`
|
TaskID string `json:"taskID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewStartTask(info exectsk.TaskInfo) *StartTask {
|
func NewStartTask(taskID string, info exectsk.TaskInfo) *StartTask {
|
||||||
return &StartTask{
|
return &StartTask{
|
||||||
Info: info,
|
TaskID: taskID,
|
||||||
|
Info: info,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func NewStartTaskResp(execID schmod.ExecutorID, taskID string) *StartTaskResp {
|
func NewStartTaskResp(execID schmod.ExecutorID, taskID string) *StartTaskResp {
|
||||||
|
@ -53,9 +54,7 @@ func (c *HttpClient) SubmitTask(req *StartTask) (*StartTaskResp, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
//data, err := json.Marshal(req)
|
|
||||||
data, err := serder.ObjectToJSONEx(req)
|
data, err := serder.ObjectToJSONEx(req)
|
||||||
log.Println("send data: " + string(data))
|
|
||||||
resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{
|
resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{
|
||||||
Body: data,
|
Body: data,
|
||||||
})
|
})
|
||||||
|
@ -95,3 +94,56 @@ func (c *HttpClient) GetReportInfo() (*http.Response, error) {
|
||||||
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TaskOperateInfo struct {
|
||||||
|
TaskID string
|
||||||
|
Command string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTaskOperateInfo(taskID string, command string) *TaskOperateInfo {
|
||||||
|
return &TaskOperateInfo{
|
||||||
|
TaskID: taskID,
|
||||||
|
Command: command,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type TaskOperateResp struct {
|
||||||
|
Err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTaskOperateResp(err error) *TaskOperateResp {
|
||||||
|
return &TaskOperateResp{
|
||||||
|
Err: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *HttpClient) OperateTask(req *TaskOperateInfo) (*TaskOperateResp, error) {
|
||||||
|
targetURL, err := url.JoinPath(c.baseURL + "/operateTask")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := serder.ObjectToJSONEx(req)
|
||||||
|
resp, err := myhttp.PostJSONRow(targetURL, myhttp.RequestParam{
|
||||||
|
Body: data,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
contType := resp.Header.Get("Content-Type")
|
||||||
|
if strings.Contains(contType, myhttp.ContentTypeJSON) {
|
||||||
|
var codeResp response[TaskOperateResp]
|
||||||
|
if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
|
||||||
|
return nil, fmt.Errorf("parsing response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if codeResp.Code == errorcode.OK {
|
||||||
|
return &codeResp.Data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, codeResp.ToError()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("unknow response content type: %s", contType)
|
||||||
|
}
|
||||||
|
|
|
@ -1,29 +1,36 @@
|
||||||
package task
|
package task
|
||||||
|
|
||||||
import cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
import (
|
||||||
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
|
)
|
||||||
|
|
||||||
type ScheduleCreateECS struct {
|
type ScheduleCreateECS struct {
|
||||||
TaskInfoBase
|
TaskInfoBase
|
||||||
UserID cdssdk.UserID `json:"userID"`
|
UserID cdssdk.UserID `json:"userID"`
|
||||||
PackageID cdssdk.PackageID `json:"packageID"`
|
PackageID cdssdk.PackageID `json:"packageID"`
|
||||||
|
ModelID schsdk.ModelID `json:"modelID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ScheduleCreateECSStatus struct {
|
type ScheduleCreateECSStatus struct {
|
||||||
TaskStatusBase
|
TaskStatusBase
|
||||||
Error string `json:"error"`
|
Error string `json:"error"`
|
||||||
Address string `json:"address"`
|
Address string `json:"address"`
|
||||||
|
ModelID schsdk.ModelID `json:"modelID"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduleCreateECS(userID cdssdk.UserID, packageID cdssdk.PackageID) *ScheduleCreateECS {
|
func NewScheduleCreateECS(userID cdssdk.UserID, packageID cdssdk.PackageID, modelID schsdk.ModelID) *ScheduleCreateECS {
|
||||||
return &ScheduleCreateECS{
|
return &ScheduleCreateECS{
|
||||||
UserID: userID,
|
UserID: userID,
|
||||||
PackageID: packageID,
|
PackageID: packageID,
|
||||||
|
ModelID: modelID,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduleCreateECSStatus(address string, err string) *ScheduleCreateECSStatus {
|
func NewScheduleCreateECSStatus(address string, modelID schsdk.ModelID, err string) *ScheduleCreateECSStatus {
|
||||||
return &ScheduleCreateECSStatus{
|
return &ScheduleCreateECSStatus{
|
||||||
Address: address,
|
Address: address,
|
||||||
|
ModelID: modelID,
|
||||||
Error: err,
|
Error: err,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
package task
|
||||||
|
|
||||||
|
import cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
|
|
||||||
|
type StorageMoveObject struct {
|
||||||
|
TaskInfoBase
|
||||||
|
ObjectMove cdssdk.ObjectMove `json:"objectMove"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type StorageMoveObjectStatus struct {
|
||||||
|
TaskStatusBase
|
||||||
|
Error string `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStorageMoveObject(objectMove cdssdk.ObjectMove) *StorageMoveObject {
|
||||||
|
return &StorageMoveObject{
|
||||||
|
ObjectMove: objectMove,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStorageMoveObjectStatus(err string) *StorageMoveObjectStatus {
|
||||||
|
return &StorageMoveObjectStatus{
|
||||||
|
Error: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
Register[*StorageMoveObject, *StorageMoveObjectStatus]()
|
||||||
|
}
|
|
@ -38,3 +38,8 @@ func Register[TTaskInfo TaskInfo, TTaskStatus TaskStatus]() any {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TaskOperateInfo struct {
|
||||||
|
TaskID string
|
||||||
|
Command string
|
||||||
|
}
|
||||||
|
|
|
@ -1,47 +1,50 @@
|
||||||
package manager
|
package manager
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
|
||||||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ExecutorService interface {
|
type ExecutorService interface {
|
||||||
ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus) (*ReportExecutorTaskStatusResp, *mq.CodeMessage)
|
//ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus) (*ReportExecutorTaskStatusResp, *mq.CodeMessage)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 接收executor上报的存活状态及任务执行情况
|
// 接收executor上报的存活状态及任务执行情况
|
||||||
var _ = Register(Service.ReportExecutorTaskStatus)
|
//var _ = Register(Service.ReportExecutorTaskStatus)
|
||||||
|
|
||||||
type ReportExecutorTaskStatus struct {
|
// type ReportExecutorTaskStatus struct {
|
||||||
mq.MessageBodyBase
|
// mq.MessageBodyBase
|
||||||
ExecutorID schmod.ExecutorID `json:"executorID"`
|
// ExecutorID schmod.ExecutorID `json:"executorID"`
|
||||||
TaskStatus []ExecutorTaskStatus `json:"taskStatus"`
|
// TaskStatus []ExecutorTaskStatus `json:"taskStatus"`
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
type ReportExecutorTaskStatusResp struct {
|
// type ReportExecutorTaskStatusResp struct {
|
||||||
mq.MessageBodyBase
|
// mq.MessageBodyBase
|
||||||
}
|
// }
|
||||||
type ExecutorTaskStatus struct {
|
type ExecutorTaskStatus struct {
|
||||||
TaskID string
|
ExecutorID schmod.ExecutorID `json:"executorID"`
|
||||||
Status exectsk.TaskStatus
|
TaskID string `json:"taskID"`
|
||||||
|
Status exectsk.TaskStatus `json:"status"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus {
|
// func NewReportExecutorTaskStatus(executorID schmod.ExecutorID, taskStatus []ExecutorTaskStatus) *ReportExecutorTaskStatus {
|
||||||
return &ReportExecutorTaskStatus{
|
// return &ReportExecutorTaskStatus{
|
||||||
ExecutorID: executorID,
|
// ExecutorID: executorID,
|
||||||
TaskStatus: taskStatus,
|
// TaskStatus: taskStatus,
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp {
|
//
|
||||||
return &ReportExecutorTaskStatusResp{}
|
// func NewReportExecutorTaskStatusResp() *ReportExecutorTaskStatusResp {
|
||||||
}
|
// return &ReportExecutorTaskStatusResp{}
|
||||||
func NewExecutorTaskStatus(taskID string, status exectsk.TaskStatus) ExecutorTaskStatus {
|
// }
|
||||||
|
func NewExecutorTaskStatus(executorID schmod.ExecutorID, taskID string, status exectsk.TaskStatus) ExecutorTaskStatus {
|
||||||
return ExecutorTaskStatus{
|
return ExecutorTaskStatus{
|
||||||
TaskID: taskID,
|
ExecutorID: executorID,
|
||||||
Status: status,
|
TaskID: taskID,
|
||||||
|
Status: status,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) {
|
|
||||||
return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...)
|
//func (c *Client) ReportExecutorTaskStatus(msg *ReportExecutorTaskStatus, opts ...mq.RequestOption) (*ReportExecutorTaskStatusResp, error) {
|
||||||
}
|
// return mq.Request(Service.ReportExecutorTaskStatus, c.roundTripper, msg, opts...)
|
||||||
|
//}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package globals
|
||||||
|
|
||||||
import (
|
import (
|
||||||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||||||
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var ExecutorID schmod.ExecutorID
|
var ExecutorID schmod.ExecutorID
|
||||||
|
@ -12,5 +11,9 @@ func Init(id schmod.ExecutorID) {
|
||||||
ExecutorID = id
|
ExecutorID = id
|
||||||
}
|
}
|
||||||
|
|
||||||
// 全局变量定义
|
const (
|
||||||
var EventChannel = make(chan manager.ReportExecutorTaskStatus)
|
UPDATE = "update"
|
||||||
|
STOP = "stop"
|
||||||
|
RESTART = "restart"
|
||||||
|
DESTROY = "destroy"
|
||||||
|
)
|
||||||
|
|
|
@ -41,5 +41,6 @@ func (s *Server) Serve() error {
|
||||||
|
|
||||||
func (s *Server) initRouters() {
|
func (s *Server) initRouters() {
|
||||||
s.engine.POST("/submitTask", s.TaskSvc().SubmitTask)
|
s.engine.POST("/submitTask", s.TaskSvc().SubmitTask)
|
||||||
|
s.engine.POST("/operateTask", s.TaskSvc().OperateTask)
|
||||||
s.engine.GET("/getReportInfo", s.TaskSvc().GetReportInfo)
|
s.engine.GET("/getReportInfo", s.TaskSvc().GetReportInfo)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
package http
|
package http
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"gitlink.org.cn/cloudream/common/consts/errorcode"
|
"gitlink.org.cn/cloudream/common/consts/errorcode"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
||||||
"gitlink.org.cn/cloudream/common/utils/serder"
|
"gitlink.org.cn/cloudream/common/utils/serder"
|
||||||
execmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
execmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
||||||
|
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||||||
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
@ -31,8 +31,6 @@ func (s *TaskService) SubmitTask(ctx *gin.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
println(string(bodyData))
|
|
||||||
|
|
||||||
req, err := serder.JSONToObjectEx[execmq.StartTask](bodyData)
|
req, err := serder.JSONToObjectEx[execmq.StartTask](bodyData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("parsing request body: %s", err.Error())
|
log.Warnf("parsing request body: %s", err.Error())
|
||||||
|
@ -48,7 +46,7 @@ func (s *TaskService) SubmitTask(ctx *gin.Context) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.JSON(http.StatusOK, OK(execmq.NewStartTaskResp(myglbs.ExecutorID, tsk.ID())))
|
ctx.JSON(http.StatusOK, OK(execmq.NewStartTaskResp(myglbs.ExecutorID, string(tsk.ID()))))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *TaskService) GetReportInfo(ctx *gin.Context) {
|
func (s *TaskService) GetReportInfo(ctx *gin.Context) {
|
||||||
|
@ -56,16 +54,64 @@ func (s *TaskService) GetReportInfo(ctx *gin.Context) {
|
||||||
ctx.Header("Cache-Control", "no-cache")
|
ctx.Header("Cache-Control", "no-cache")
|
||||||
ctx.Header("Connection", "keep-alive")
|
ctx.Header("Connection", "keep-alive")
|
||||||
|
|
||||||
for report := range myglbs.EventChannel {
|
taskChan := s.svc.TaskManager.GetTaskChan()
|
||||||
data, err := json.Marshal(report)
|
defer taskChan.Chan.Close()
|
||||||
|
|
||||||
|
status := mgrmq.ExecutorTaskStatus{
|
||||||
|
ExecutorID: myglbs.ExecutorID,
|
||||||
|
}
|
||||||
|
bytes, err := serder.ObjectToJSONEx(status)
|
||||||
|
_, err = ctx.Writer.Write([]byte("data: " + string(bytes) + "\n\n"))
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("write data: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx.Writer.Flush()
|
||||||
|
|
||||||
|
for {
|
||||||
|
receive, err := taskChan.Chan.Receive()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
continue
|
||||||
}
|
}
|
||||||
|
data, err := serder.ObjectToJSONEx(receive)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("marshal task: %s", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("send task status: %s", string(data))
|
||||||
|
|
||||||
_, err = ctx.Writer.Write([]byte("data: " + string(data) + "\n\n"))
|
_, err = ctx.Writer.Write([]byte("data: " + string(data) + "\n\n"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
logger.Errorf("write data: %s", err.Error())
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
ctx.Writer.Flush() // 确保数据立即发送到客户端
|
ctx.Writer.Flush() // 确保数据立即发送到客户端
|
||||||
println("report: " + string(data))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *TaskService) OperateTask(ctx *gin.Context) {
|
||||||
|
log := logger.WithField("HTTP", "TaskOperate")
|
||||||
|
|
||||||
|
bodyData, err := io.ReadAll(ctx.Request.Body)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("reading request body: %s", err.Error())
|
||||||
|
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "read request body failed"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := serder.JSONToObjectEx[execmq.TaskOperateInfo](bodyData)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("parsing request body: %s", err.Error())
|
||||||
|
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "parse request body failed"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
task, ok := s.svc.TaskManager.Tasks[req.TaskID]
|
||||||
|
if !ok {
|
||||||
|
ctx.JSON(http.StatusOK, Failed(errorcode.OperationFailed, "task not found"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
task.SendTaskOperate(req)
|
||||||
|
ctx.JSON(http.StatusOK, OK(execmq.NewTaskOperateResp(nil)))
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
package manager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
||||||
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/task"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Manager struct {
|
||||||
|
statusChans []*task.TaskChan[any]
|
||||||
|
Tasks map[string]*task.Task
|
||||||
|
ctx task.TaskContext
|
||||||
|
lock sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManager() Manager {
|
||||||
|
return Manager{
|
||||||
|
statusChans: make([]*task.TaskChan[any], 0),
|
||||||
|
Tasks: make(map[string]*task.Task),
|
||||||
|
ctx: task.TaskContext{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) GetTaskChan() *task.TaskChan[any] {
|
||||||
|
|
||||||
|
// 创建 TaskChan[any] 实例,并赋值 UnboundChannel[any]
|
||||||
|
taskChan := task.NewTaskChan[any]()
|
||||||
|
|
||||||
|
m.statusChans = append(m.statusChans, taskChan)
|
||||||
|
|
||||||
|
return taskChan
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) sendTaskChan(tskChan task.TaskChan[any]) {
|
||||||
|
for {
|
||||||
|
receive, err := tskChan.Chan.Receive()
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(m.statusChans); i++ {
|
||||||
|
err := m.statusChans[i].Chan.Send(receive)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) StartByInfo(taskID string, info exectsk.TaskInfo) (*task.Task, error) {
|
||||||
|
m.lock.Lock()
|
||||||
|
defer m.lock.Unlock()
|
||||||
|
|
||||||
|
infoType := reflect2.TypeOfValue(info)
|
||||||
|
ctor, ok := task.TaskFromInfoCtors[infoType]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("unknow info type")
|
||||||
|
}
|
||||||
|
|
||||||
|
newTask := task.NewTask(taskID)
|
||||||
|
m.Tasks[taskID] = newTask
|
||||||
|
|
||||||
|
go ctor(info).Execute(newTask, m.ctx)
|
||||||
|
|
||||||
|
// 将task的状态发送到所有channel
|
||||||
|
go m.sendTaskChan(newTask.TaskStatusChan)
|
||||||
|
|
||||||
|
return newTask, nil
|
||||||
|
}
|
|
@ -1,13 +1,11 @@
|
||||||
package reporter
|
package reporter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Reporter struct {
|
type Reporter struct {
|
||||||
|
@ -58,17 +56,17 @@ func (r *Reporter) Serve() error {
|
||||||
ticker.Reset(r.reportInterval)
|
ticker.Reset(r.reportInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
r.taskStatusLock.Lock()
|
//r.taskStatusLock.Lock()
|
||||||
var taskStatus []mgrmq.ExecutorTaskStatus
|
//var taskStatus []mgrmq.ExecutorTaskStatus
|
||||||
for taskID, status := range r.taskStatus {
|
//for taskID, status := range r.taskStatus {
|
||||||
taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status))
|
// taskStatus = append(taskStatus, mgrmq.NewExecutorTaskStatus(taskID, status))
|
||||||
}
|
//}
|
||||||
r.taskStatus = make(map[string]exectsk.TaskStatus)
|
//r.taskStatus = make(map[string]exectsk.TaskStatus)
|
||||||
r.taskStatusLock.Unlock()
|
//r.taskStatusLock.Unlock()
|
||||||
|
|
||||||
status := mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus)
|
//status := mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus)
|
||||||
// 将数据发送到管道中
|
//// 将数据发送到管道中
|
||||||
globals.EventChannel <- *status
|
//globals.EventChannel <- *status
|
||||||
|
|
||||||
//_, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus))
|
//_, err := magCli.ReportExecutorTaskStatus(mgrmq.NewReportExecutorTaskStatus(r.executorID, taskStatus))
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
package services
|
package services
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/task"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/manager"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Service struct {
|
type Service struct {
|
||||||
taskManager *task.Manager
|
TaskManager *manager.Manager
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewService(tskmgr *task.Manager) *Service {
|
func NewService(tskmgr *manager.Manager) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
taskManager: tskmgr,
|
TaskManager: tskmgr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package services
|
package services
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/cloudream/common/consts/errorcode"
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
||||||
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
||||||
|
@ -11,18 +10,18 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func (svc *Service) StartTask(msg *execmq.StartTask) (*execmq.StartTaskResp, *mq.CodeMessage) {
|
func (svc *Service) StartTask(msg *execmq.StartTask) (*execmq.StartTaskResp, *mq.CodeMessage) {
|
||||||
tsk, err := svc.taskManager.StartByInfo(msg.Info)
|
//tsk, err := svc.TaskManager.StartByInfo(msg.Info)
|
||||||
if err != nil {
|
//if err != nil {
|
||||||
logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
|
// logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
|
||||||
Warnf("starting task by info: %s", err.Error())
|
// Warnf("starting task by info: %s", err.Error())
|
||||||
return nil, mq.Failed(errorcode.OperationFailed, "start task by info failed")
|
// return nil, mq.Failed(errorcode.OperationFailed, "start task by info failed")
|
||||||
}
|
//}
|
||||||
|
|
||||||
return mq.ReplyOK(execmq.NewStartTaskResp(myglbs.ExecutorID, tsk.ID()))
|
return mq.ReplyOK(execmq.NewStartTaskResp(myglbs.ExecutorID, ""))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (svc *Service) SubmitTask(msg *execmq.StartTask) (*task.Task, error) {
|
func (svc *Service) SubmitTask(msg *execmq.StartTask) (*task.Task, error) {
|
||||||
tsk, err := svc.taskManager.StartByInfo(msg.Info)
|
tsk, err := svc.TaskManager.StartByInfo(msg.TaskID, msg.Info)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
|
logger.WithField("Info", reflect2.TypeOfValue(msg.Info).Name()).
|
||||||
Warnf("starting task by info: %s", err.Error())
|
Warnf("starting task by info: %s", err.Error())
|
||||||
|
|
|
@ -2,10 +2,7 @@ package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
@ -21,22 +18,17 @@ func NewCacheMovePackage(info *exectsk.CacheMovePackage) *CacheMovePackage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *CacheMovePackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *CacheMovePackage) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[CacheMovePackage]("Task")
|
log := logger.WithType[CacheMovePackage]("Task")
|
||||||
log.Debugf("begin with %v", logger.FormatStruct(t.CacheMovePackage))
|
log.Debugf("begin with %v", logger.FormatStruct(t.CacheMovePackage))
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
|
||||||
err := t.do(ctx)
|
err := t.do(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewCacheMovePackageStatus(err.Error()))
|
task.SendStatus(exectsk.NewCacheMovePackageStatus(err.Error()))
|
||||||
} else {
|
} else {
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewCacheMovePackageStatus(""))
|
task.SendStatus(exectsk.NewCacheMovePackageStatus(""))
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
complete(err, CompleteOption{
|
|
||||||
RemovingDelay: time.Minute,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *CacheMovePackage) do(ctx TaskContext) error {
|
func (t *CacheMovePackage) do(ctx TaskContext) error {
|
||||||
|
|
|
@ -9,7 +9,6 @@ import (
|
||||||
util "github.com/alibabacloud-go/tea-utils/v2/service"
|
util "github.com/alibabacloud-go/tea-utils/v2/service"
|
||||||
"github.com/alibabacloud-go/tea/tea"
|
"github.com/alibabacloud-go/tea/tea"
|
||||||
log "gitlink.org.cn/cloudream/common/pkgs/logger"
|
log "gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -49,10 +48,9 @@ func AliConfig(configMap map[string]interface{}) {
|
||||||
aliclient, _ = ecs.NewClient(config)
|
aliclient, _ = ecs.NewClient(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *AliCloud) CreateServer(commands []string) (string, error) {
|
// CreateServer 创建实例
|
||||||
|
func (a *AliCloud) CreateServer() (string, error) {
|
||||||
var instanceID string
|
var instanceID string
|
||||||
var instanceIDArr string
|
|
||||||
var result string
|
|
||||||
|
|
||||||
tryErr := func() (_e error) {
|
tryErr := func() (_e error) {
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -68,7 +66,7 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
|
||||||
return _err
|
return _err
|
||||||
}
|
}
|
||||||
instanceID = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet[0]))
|
instanceID = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet[0]))
|
||||||
instanceIDArr = tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet))
|
//instanceIDArr := tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet))
|
||||||
log.Info(tea.String("--------------------创建实例成功,实例ID:" + tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet)) + "--------------------"))
|
log.Info(tea.String("--------------------创建实例成功,实例ID:" + tea.StringValue(util.ToJSONString(responces.Body.InstanceIdSets.InstanceIdSet)) + "--------------------"))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -86,35 +84,16 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
|
||||||
return "", tryErr
|
return "", tryErr
|
||||||
}
|
}
|
||||||
|
|
||||||
println("instance: " + instanceID)
|
|
||||||
println("instanceArr: " + instanceIDArr)
|
|
||||||
// 获取实例IP
|
// 获取实例IP
|
||||||
ip, _ := getInstanceIP(instanceIDArr, *aliclient.RegionId)
|
//ip, _ := getInstanceIP(instanceIDArr, *aliclient.RegionId)
|
||||||
println("ip: " + ip)
|
//println("ip: " + ip)
|
||||||
|
|
||||||
CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID
|
return instanceID, nil
|
||||||
CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID
|
}
|
||||||
println("CDSRcloneID: " + CDSRcloneID)
|
|
||||||
println("CDSRcloneConfigID: " + CDSRcloneConfigID)
|
|
||||||
|
|
||||||
//commands := []string{}
|
|
||||||
//commandContent := "yum install -y fuse3"
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/"
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n"
|
|
||||||
//println(commandContent)
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n"
|
|
||||||
//println(commandContent)
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "cd /opt/rclone \n chmod +x rclone"
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &"
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
//commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh"
|
|
||||||
//commands = append(commands, commandContent)
|
|
||||||
|
|
||||||
|
// RunCommand 执行指令
|
||||||
|
func (a *AliCloud) RunCommand(commands []string, instanceID string) (string, error) {
|
||||||
|
var result string
|
||||||
for i := 0; i < len(commands); i++ {
|
for i := 0; i < len(commands); i++ {
|
||||||
log.Info("start execute command")
|
log.Info("start execute command")
|
||||||
commandId, err := runShellCommand(commands[i], instanceID, *aliclient.RegionId)
|
commandId, err := runShellCommand(commands[i], instanceID, *aliclient.RegionId)
|
||||||
|
@ -123,16 +102,27 @@ func (a *AliCloud) CreateServer(commands []string) (string, error) {
|
||||||
}
|
}
|
||||||
// 判断是否执行成功
|
// 判断是否执行成功
|
||||||
log.Info("describe result")
|
log.Info("describe result")
|
||||||
_, _, err = describeInvocationResults(aliclient, instanceID, commandId, tea.String("utf-8"), 500)
|
_, result, err = describeInvocationResults(aliclient, instanceID, commandId, tea.String("utf-8"), 500)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("describeInvocationResults: " + err.Error())
|
log.Error("describeInvocationResults: " + err.Error())
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DestroyServer 强制销毁实例
|
||||||
|
func (a *AliCloud) DestroyServer(instanceID string) (string, error) {
|
||||||
|
result, err := aliclient.DeleteInstance(&ecs.DeleteInstanceRequest{
|
||||||
|
InstanceId: &instanceID,
|
||||||
|
Force: tea.Bool(true),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return tea.StringValue(result.Body.RequestId), nil
|
||||||
|
}
|
||||||
|
|
||||||
func runShellCommand(commandContent string, instanceID string, regionId string) (*string, error) {
|
func runShellCommand(commandContent string, instanceID string, regionId string) (*string, error) {
|
||||||
// 从CDS下载文件
|
// 从CDS下载文件
|
||||||
commandRequest := ecs.RunCommandRequest{
|
commandRequest := ecs.RunCommandRequest{
|
||||||
|
|
|
@ -2,14 +2,11 @@ package create_ecs
|
||||||
|
|
||||||
// CloudProvider 是一个接口,定义了创建服务器的方法
|
// CloudProvider 是一个接口,定义了创建服务器的方法
|
||||||
type CloudProvider interface {
|
type CloudProvider interface {
|
||||||
CreateServer(commands []string) (string, error)
|
CreateServer() (string, error)
|
||||||
|
RunCommand(commands []string, instanceID string) (string, error)
|
||||||
|
DestroyServer(instanceID string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CloudFactory 是工厂接口
|
|
||||||
// 工厂模式中使用 CreateProvider 的设计原则是:
|
|
||||||
// 单一职责:Factory 只负责创建 CloudProvider 实例,CloudProvider 负责实际的服务器创建任务。
|
|
||||||
// 开闭原则:Factory 可以扩展以支持新的 CloudProvider 实现,而无需修改现有代码。
|
|
||||||
// 依赖倒置原则:客户端代码依赖于 CloudProvider 接口而不是具体实现,从而减少了耦合。
|
|
||||||
type CloudFactory interface {
|
type CloudFactory interface {
|
||||||
CreateProvider() CloudProvider
|
CreateProvider() CloudProvider
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
// HuaweiCloud实现了CloudProvider接口
|
// HuaweiCloud实现了CloudProvider接口
|
||||||
type HuaweiCloud struct{}
|
type HuaweiCloud struct{}
|
||||||
|
|
||||||
var req model.PostPaidServer
|
var serverbody model.PrePaidServer
|
||||||
var hwConfigMap map[string]interface{}
|
var hwConfigMap map[string]interface{}
|
||||||
var hwclient ecs.EcsClient
|
var hwclient ecs.EcsClient
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ func HWCloudConfig(configMap map[string]interface{}) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = json.Unmarshal(jsonData, &req)
|
err = json.Unmarshal(jsonData, &serverbody)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -45,13 +45,13 @@ func HWCloudConfig(configMap map[string]interface{}) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *HuaweiCloud) CreateServer(commands []string) (string, error) {
|
func (a *HuaweiCloud) CreateServer() (string, error) {
|
||||||
|
|
||||||
request := &model.CreatePostPaidServersRequest{}
|
request := &model.CreateServersRequest{}
|
||||||
request.Body = &model.CreatePostPaidServersRequestBody{
|
request.Body = &model.CreateServersRequestBody{
|
||||||
Server: &req,
|
Server: &serverbody,
|
||||||
}
|
}
|
||||||
response, err := hwclient.CreatePostPaidServers(request)
|
response, err := hwclient.CreateServers(request)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
fmt.Printf("%+v\n", response)
|
fmt.Printf("%+v\n", response)
|
||||||
} else {
|
} else {
|
||||||
|
@ -60,3 +60,26 @@ func (a *HuaweiCloud) CreateServer(commands []string) (string, error) {
|
||||||
//ids := response.ServerIds
|
//ids := response.ServerIds
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *HuaweiCloud) RunCommand(commands []string, instanceID string) (string, error) {
|
||||||
|
//TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *HuaweiCloud) DestroyServer(instanceID string) (string, error) {
|
||||||
|
request := &model.DeleteServersRequest{}
|
||||||
|
var listServersbody = []model.ServerId{
|
||||||
|
{
|
||||||
|
Id: instanceID,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
request.Body = &model.DeleteServersRequestBody{
|
||||||
|
Servers: listServersbody,
|
||||||
|
}
|
||||||
|
response, err := hwclient.DeleteServers(request)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.String(), nil
|
||||||
|
}
|
||||||
|
|
|
@ -2,12 +2,8 @@ package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
)
|
)
|
||||||
|
@ -22,24 +18,22 @@ func NewPCMSubmitTask(info *exectsk.SubmitTask) *PCMSubmitTask {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *PCMSubmitTask) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *PCMSubmitTask) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[PCMSubmitTask]("Task")
|
log := logger.WithType[PCMSubmitTask]("Task")
|
||||||
log.Debugf("begin with %v", logger.FormatStruct(t.SubmitTask))
|
log.Debugf("begin with %v", logger.FormatStruct(t.SubmitTask))
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
|
||||||
err := t.do(task.ID(), ctx)
|
//err := t.do(task, ctx)
|
||||||
|
err := error(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewSubmitTaskStatus("failed", err.Error()))
|
task.SendStatus(exectsk.NewSubmitTaskStatus("failed", err.Error()))
|
||||||
|
} else {
|
||||||
|
task.SendStatus(exectsk.NewSubmitTaskStatus("succeeded", ""))
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
complete(err, CompleteOption{
|
|
||||||
RemovingDelay: time.Minute,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *PCMSubmitTask) do(taskID string, ctx TaskContext) error {
|
func (t *PCMSubmitTask) do(task *Task, ctx TaskContext) error {
|
||||||
log := logger.WithType[PCMSubmitTask]("Task")
|
log := logger.WithType[PCMSubmitTask]("Task")
|
||||||
|
|
||||||
pcmCli, err := schglb.PCMPool.Acquire()
|
pcmCli, err := schglb.PCMPool.Acquire()
|
||||||
|
@ -79,7 +73,7 @@ func (t *PCMSubmitTask) do(taskID string, ctx TaskContext) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if tsResp.TaskStatus != prevStatus {
|
if tsResp.TaskStatus != prevStatus {
|
||||||
ctx.reporter.Report(taskID, exectsk.NewSubmitTaskStatus(tsResp.TaskStatus, ""))
|
task.SendStatus(exectsk.NewSubmitTaskStatus(tsResp.TaskStatus, ""))
|
||||||
}
|
}
|
||||||
|
|
||||||
prevStatus = tsResp.TaskStatus
|
prevStatus = tsResp.TaskStatus
|
||||||
|
|
|
@ -2,10 +2,7 @@ package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
|
pcmsdk "gitlink.org.cn/cloudream/common/sdks/pcm"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
@ -21,24 +18,19 @@ func NewPCMUploadImage(info *exectsk.UploadImage) *PCMUploadImage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *PCMUploadImage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *PCMUploadImage) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[PCMUploadImage]("Task")
|
log := logger.WithType[PCMUploadImage]("Task")
|
||||||
log.Debugf("begin")
|
log.Debugf("begin")
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
|
||||||
err := t.do(task.ID(), ctx)
|
err := t.do(task, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewUploadImageStatus("failed", err.Error(), pcmsdk.ImageID(""), ""))
|
task.SendStatus(exectsk.NewUploadImageStatus("failed", err.Error(), pcmsdk.ImageID(""), ""))
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
complete(err, CompleteOption{
|
|
||||||
RemovingDelay: time.Minute,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *PCMUploadImage) do(taskID string, ctx TaskContext) error {
|
func (t *PCMUploadImage) do(task *Task, ctx TaskContext) error {
|
||||||
pcmCli, err := schglb.PCMPool.Acquire()
|
pcmCli, err := schglb.PCMPool.Acquire()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("new pcm client: %w", err)
|
return fmt.Errorf("new pcm client: %w", err)
|
||||||
|
@ -53,7 +45,7 @@ func (t *PCMUploadImage) do(taskID string, ctx TaskContext) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reporter.Report(taskID, exectsk.NewUploadImageStatus(resp.Result, "", resp.ImageID, resp.Name))
|
task.SendStatus(exectsk.NewUploadImageStatus(resp.Result, "", resp.ImageID, resp.Name))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,12 @@ package task
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/config"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/config"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/task/create_ecs"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/task/create_ecs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -21,22 +22,21 @@ func NewScheduleCreateECS(info *exectsk.ScheduleCreateECS) *ScheduleCreateECS {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *ScheduleCreateECS) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *ScheduleCreateECS) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[ScheduleCreateECS]("Task")
|
log := logger.WithType[ScheduleCreateECS]("Task")
|
||||||
log.Debugf("begin")
|
log.Debugf("begin")
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
|
||||||
err := t.do(task.ID(), ctx)
|
err := t.do(task, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
log.Info("ScheduleCreateECS...")
|
log.Info("ScheduleCreateECS...")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *ScheduleCreateECS) do(taskID string, ctx TaskContext) error {
|
func (t *ScheduleCreateECS) do(task *Task, ctx TaskContext) error {
|
||||||
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("new cloudream storage client: %w", err)
|
return fmt.Errorf("new cloudream storage client: %w", err)
|
||||||
|
@ -52,51 +52,84 @@ func (t *ScheduleCreateECS) do(taskID string, ctx TaskContext) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
println(resp.Name)
|
println(resp.Name)
|
||||||
//factory := create_ecs.GetFactory(config.CloudName)
|
|
||||||
//provider := factory.CreateProvider()
|
|
||||||
//address, err := provider.CreateServer(resp.Name)
|
|
||||||
//if err != nil {
|
|
||||||
// ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("", err.Error()))
|
|
||||||
// return err
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("http://"+address+":5001", ""))
|
|
||||||
|
|
||||||
CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID
|
CDSRcloneID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneID
|
||||||
CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID
|
CDSRcloneConfigID := schglb.CloudreamStorageConfig.URL + "/object/download?userID=1&objectID=" + schglb.CDSRclone.CDSRcloneConfigID
|
||||||
println("CDSRcloneID: " + CDSRcloneID)
|
println("CDSRcloneID: " + CDSRcloneID)
|
||||||
println("CDSRcloneConfigID: " + CDSRcloneConfigID)
|
println("CDSRcloneConfigID: " + CDSRcloneConfigID)
|
||||||
|
|
||||||
commands := []string{}
|
var commands []string
|
||||||
commandContent := "yum install -y fuse3"
|
commandContent := "yum install -y fuse3"
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/"
|
commandContent = "mkdir -p /opt/rclone/ \n mkdir -p /mnt/cds/"
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n"
|
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone\",\"wb\").write(body);print(\"success\")'\n"
|
||||||
println(commandContent)
|
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n"
|
commandContent = "cd /opt/rclone \n python3 -c 'import requests;response=requests.get(\"" + CDSRcloneConfigID + "\",stream=True);response.raise_for_status();boundary=response.headers.get(\"Content-Type\").split(\"boundary=\")[-1].encode();content=response.content;body=[part.split(b\"\\r\\n\\r\\n\",1)[1].rsplit(b\"\\r\\n--\",1)[0] for part in content.split(b\"--\"+boundary+b\"\\r\\n\") if b\"filename=\" in part][0];open(\"rclone.conf\",\"wb\").write(body);print(\"success\")'\n"
|
||||||
println(commandContent)
|
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
commandContent = "cd /opt/rclone \n chmod +x rclone"
|
commandContent = "cd /opt/rclone \n chmod +x rclone"
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &"
|
commandContent = "cd /opt/rclone \n nohup ./rclone mount cds: /mnt/cds --vfs-cache-mode full --vfs-read-wait 0 --vfs-read-chunk-size 128M --cache-db-purge -vv > rclone.log 2>&1 &"
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
//commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple \n python3 -m pip install --upgrade pip setuptools \n python3 -m pip install transformers --ignore-installed pyyaml \n python3 -m pip install -r requirement.txt \n sh start.sh"
|
|
||||||
commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh"
|
commandContent = "cd /mnt/cds/bkt1/tiny_model/ \n sh execute.sh"
|
||||||
commands = append(commands, commandContent)
|
commands = append(commands, commandContent)
|
||||||
|
|
||||||
|
// 创建云主机
|
||||||
factory := create_ecs.GetFactory(config.CloudName)
|
factory := create_ecs.GetFactory(config.CloudName)
|
||||||
provider := factory.CreateProvider()
|
provider := factory.CreateProvider()
|
||||||
address, err := provider.CreateServer(commands)
|
|
||||||
|
instanceID, err := provider.CreateServer()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus("", err.Error()))
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reporter.Report(taskID, exectsk.NewScheduleCreateECSStatus(address, ""))
|
address, err := provider.RunCommand(commands, instanceID)
|
||||||
|
if err != nil {
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
// 返回执行结果
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus(address, t.ModelID, ""))
|
||||||
|
println("create ECS success, waiting msg...")
|
||||||
|
|
||||||
|
// 监听更新操作
|
||||||
|
for {
|
||||||
|
taskOperate, err := task.taskChan.Chan.Receive()
|
||||||
|
if err != nil {
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
info, ok := taskOperate.(executor.TaskOperateInfo)
|
||||||
|
if !ok {
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, "invalid task operate info"))
|
||||||
|
return fmt.Errorf("invalid task operate info")
|
||||||
|
}
|
||||||
|
|
||||||
|
switch info.Command {
|
||||||
|
case globals.RESTART:
|
||||||
|
var commands []string
|
||||||
|
commandContent := "yum install -y fuse3"
|
||||||
|
commands = append(commands, commandContent)
|
||||||
|
result, err := provider.RunCommand(commands, instanceID)
|
||||||
|
if err != nil {
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus(result, t.ModelID, ""))
|
||||||
|
case globals.STOP:
|
||||||
|
println("STOP")
|
||||||
|
case globals.DESTROY:
|
||||||
|
result, err := provider.DestroyServer(instanceID)
|
||||||
|
if err != nil {
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus("", t.ModelID, err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
task.SendStatus(exectsk.NewScheduleCreateECSStatus(result, t.ModelID, ""))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
|
|
@ -2,10 +2,7 @@ package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
@ -21,24 +18,19 @@ func NewStorageCreatePackage(info *exectsk.StorageCreatePackage) *StorageCreateP
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *StorageCreatePackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *StorageCreatePackage) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[StorageCreatePackage]("Task")
|
log := logger.WithType[StorageCreatePackage]("Task")
|
||||||
log.Debugf("begin")
|
log.Debugf("begin")
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
|
||||||
err := t.do(task.ID(), ctx)
|
err := t.do(task, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewStorageCreatePackageStatus("failed", err.Error(), 0))
|
task.SendStatus(exectsk.NewStorageCreatePackageStatus("failed", err.Error(), 0))
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
complete(err, CompleteOption{
|
|
||||||
RemovingDelay: time.Minute,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *StorageCreatePackage) do(taskID string, ctx TaskContext) error {
|
func (t *StorageCreatePackage) do(task *Task, ctx TaskContext) error {
|
||||||
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("new cloudream storage client: %w", err)
|
return fmt.Errorf("new cloudream storage client: %w", err)
|
||||||
|
@ -57,7 +49,7 @@ func (t *StorageCreatePackage) do(taskID string, ctx TaskContext) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO 根据接口result返回情况修改
|
// TODO 根据接口result返回情况修改
|
||||||
ctx.reporter.Report(taskID, exectsk.NewStorageCreatePackageStatus("completed", "", resp.PackageID))
|
task.SendStatus(exectsk.NewStorageCreatePackageStatus("completed", "", resp.PackageID))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,10 +2,7 @@ package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
@ -21,7 +18,7 @@ func NewStorageLoadPackage(info *exectsk.StorageLoadPackage) *StorageLoadPackage
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *StorageLoadPackage) Execute(task *task.Task[TaskContext], ctx TaskContext, complete CompleteFn) {
|
func (t *StorageLoadPackage) Execute(task *Task, ctx TaskContext) {
|
||||||
log := logger.WithType[StorageLoadPackage]("Task")
|
log := logger.WithType[StorageLoadPackage]("Task")
|
||||||
log.Debugf("begin with %v", logger.FormatStruct(t.StorageLoadPackage))
|
log.Debugf("begin with %v", logger.FormatStruct(t.StorageLoadPackage))
|
||||||
defer log.Debugf("end")
|
defer log.Debugf("end")
|
||||||
|
@ -29,15 +26,10 @@ func (t *StorageLoadPackage) Execute(task *task.Task[TaskContext], ctx TaskConte
|
||||||
packagePath, err := t.do(ctx)
|
packagePath, err := t.do(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
//TODO 若任务失败,上报的状态failed字段根据情况修改
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewStorageLoadPackageStatus(err.Error(), ""))
|
task.SendStatus(exectsk.NewStorageLoadPackageStatus(err.Error(), ""))
|
||||||
} else {
|
} else {
|
||||||
ctx.reporter.Report(task.ID(), exectsk.NewStorageLoadPackageStatus("", packagePath))
|
task.SendStatus(exectsk.NewStorageLoadPackageStatus("", packagePath))
|
||||||
}
|
}
|
||||||
ctx.reporter.ReportNow()
|
|
||||||
|
|
||||||
complete(err, CompleteOption{
|
|
||||||
RemovingDelay: time.Minute,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *StorageLoadPackage) do(ctx TaskContext) (string, error) {
|
func (t *StorageLoadPackage) do(ctx TaskContext) (string, error) {
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
package task
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
)
|
||||||
|
|
||||||
|
type StorageMoveObject struct {
|
||||||
|
*exectsk.StorageMoveObject
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStorageMoveObject(info *exectsk.StorageMoveObject) *StorageMoveObject {
|
||||||
|
return &StorageMoveObject{StorageMoveObject: info}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *StorageMoveObject) Execute(task *Task, ctx TaskContext) {
|
||||||
|
log := logger.WithType[StorageMoveObject]("Task")
|
||||||
|
log.Debugf("begin with %v", logger.FormatStruct(t.StorageMoveObject))
|
||||||
|
defer log.Debugf("end")
|
||||||
|
|
||||||
|
err := t.do()
|
||||||
|
if err != nil {
|
||||||
|
task.SendStatus(exectsk.NewStorageMoveObjectStatus(err.Error()))
|
||||||
|
} else {
|
||||||
|
task.SendStatus(exectsk.NewStorageMoveObjectStatus(""))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *StorageMoveObject) do() error {
|
||||||
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("new cloudream storage client: %w", err)
|
||||||
|
}
|
||||||
|
defer schglb.CloudreamStoragePool.Release(stgCli)
|
||||||
|
|
||||||
|
move, err := stgCli.Object().Move(t.ObjectMove)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("move object: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 判断全部object是否都移动成功
|
||||||
|
if len(move.Successes) != len(t.ObjectMove.Movings) {
|
||||||
|
return fmt.Errorf("move object: %d objects failed", len(t.ObjectMove.Movings)-len(move.Successes))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
Register(NewStorageMoveObject)
|
||||||
|
}
|
|
@ -1,56 +1,80 @@
|
||||||
package task
|
package task
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"reflect"
|
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/task"
|
|
||||||
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
"gitlink.org.cn/cloudream/common/utils/reflect2"
|
||||||
|
"gitlink.org.cn/cloudream/common/utils/sync2"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
||||||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
reporter "gitlink.org.cn/cloudream/scheduler/executor/internal/reporter"
|
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
||||||
|
"reflect"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TaskContext struct {
|
type TaskChan[T any] struct {
|
||||||
reporter *reporter.Reporter
|
Chan sync2.UnboundChannel[T]
|
||||||
}
|
}
|
||||||
|
|
||||||
// 需要在Task结束后主动调用,completing函数将在Manager加锁期间被调用,
|
func NewTaskChan[T any]() *TaskChan[T] {
|
||||||
// 因此适合进行执行结果的设置
|
return &TaskChan[T]{Chan: *sync2.NewUnboundChannel[T]()}
|
||||||
type CompleteFn = task.CompleteFn
|
|
||||||
|
|
||||||
type Manager struct {
|
|
||||||
task.Manager[TaskContext]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type TaskBody = task.TaskBody[TaskContext]
|
type Task struct {
|
||||||
|
id string
|
||||||
|
taskChan TaskChan[any]
|
||||||
|
TaskStatusChan TaskChan[any]
|
||||||
|
}
|
||||||
|
|
||||||
type Task = task.Task[TaskContext]
|
type TaskContext struct{}
|
||||||
|
|
||||||
type CompleteOption = task.CompleteOption
|
func NewTask(id string) *Task {
|
||||||
|
return &Task{
|
||||||
func NewManager(reporter *reporter.Reporter) Manager {
|
taskChan: *NewTaskChan[any](),
|
||||||
return Manager{
|
TaskStatusChan: *NewTaskChan[any](),
|
||||||
Manager: task.NewManager(TaskContext{
|
id: id,
|
||||||
reporter: reporter,
|
//body: body,
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) StartByInfo(info exectsk.TaskInfo) (*Task, error) {
|
type TaskBody interface {
|
||||||
infoType := reflect2.TypeOfValue(info)
|
Execute(task *Task, ctx TaskContext)
|
||||||
|
|
||||||
ctor, ok := taskFromInfoCtors[infoType]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknow info type")
|
|
||||||
}
|
|
||||||
|
|
||||||
return m.StartNew(ctor(info)), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var taskFromInfoCtors map[reflect.Type]func(exectsk.TaskInfo) TaskBody = make(map[reflect.Type]func(exectsk.TaskInfo) task.TaskBody[TaskContext])
|
func (c *Task) SendStatus(status exectsk.TaskStatus) {
|
||||||
|
|
||||||
|
taskStatus := mgrmq.NewExecutorTaskStatus(globals.ExecutorID, c.ID(), status)
|
||||||
|
err := c.TaskStatusChan.Chan.Send(taskStatus)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("send task status error: ", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Task) SendTaskOperate(info executor.TaskOperateInfo) {
|
||||||
|
|
||||||
|
err := c.taskChan.Chan.Send(info)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Task) WaitTaskOperate() *any {
|
||||||
|
|
||||||
|
receive, err := c.taskChan.Chan.Receive()
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(err.Error())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &receive
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Task) ID() string {
|
||||||
|
return t.id
|
||||||
|
}
|
||||||
|
|
||||||
|
var TaskFromInfoCtors map[reflect.Type]func(exectsk.TaskInfo) TaskBody = make(map[reflect.Type]func(exectsk.TaskInfo) TaskBody)
|
||||||
|
|
||||||
func Register[TInfo exectsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) {
|
func Register[TInfo exectsk.TaskInfo, TTaskBody TaskBody](ctor func(info TInfo) TTaskBody) {
|
||||||
taskFromInfoCtors[reflect2.TypeOf[TInfo]()] = func(info exectsk.TaskInfo) TaskBody {
|
TaskFromInfoCtors[reflect2.TypeOf[TInfo]()] = func(info exectsk.TaskInfo) TaskBody {
|
||||||
return ctor(info.(TInfo))
|
return ctor(info.(TInfo))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,11 +8,9 @@ import (
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/config"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/config"
|
||||||
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
myglbs "gitlink.org.cn/cloudream/scheduler/executor/internal/globals"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/http"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/http"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/reporter"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/manager"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/services"
|
"gitlink.org.cn/cloudream/scheduler/executor/internal/services"
|
||||||
"gitlink.org.cn/cloudream/scheduler/executor/internal/task"
|
|
||||||
"os"
|
"os"
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -36,9 +34,9 @@ func main() {
|
||||||
myglbs.Init(config.Cfg().Application.ExecutorID)
|
myglbs.Init(config.Cfg().Application.ExecutorID)
|
||||||
schglb.InitRcloneConfig(config.Cfg().Rclone.CDSRcloneID, config.Cfg().Rclone.CDSRcloneConfigID)
|
schglb.InitRcloneConfig(config.Cfg().Rclone.CDSRcloneID, config.Cfg().Rclone.CDSRcloneConfigID)
|
||||||
|
|
||||||
rpter := reporter.NewReporter(myglbs.ExecutorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec))
|
//rpter := reporter.NewReporter(myglbs.ExecutorID, time.Second*time.Duration(config.Cfg().ReportIntervalSec))
|
||||||
//
|
//
|
||||||
taskMgr := task.NewManager(&rpter)
|
taskMgr := manager.NewManager()
|
||||||
//
|
//
|
||||||
//mqSvr, err := execmq.NewServer(services.NewService(&taskMgr), &config.Cfg().RabbitMQ)
|
//mqSvr, err := execmq.NewServer(services.NewService(&taskMgr), &config.Cfg().RabbitMQ)
|
||||||
//if err != nil {
|
//if err != nil {
|
||||||
|
@ -52,7 +50,7 @@ func main() {
|
||||||
// 启动服务
|
// 启动服务
|
||||||
//go serveMQServer(mqSvr)
|
//go serveMQServer(mqSvr)
|
||||||
|
|
||||||
go serveReporter(&rpter)
|
//go serveReporter(&rpter)
|
||||||
|
|
||||||
svc := services.NewService(&taskMgr)
|
svc := services.NewService(&taskMgr)
|
||||||
server, err := http.NewServer(config.Cfg().Application.Address, svc)
|
server, err := http.NewServer(config.Cfg().Application.Address, svc)
|
||||||
|
@ -82,13 +80,13 @@ func serveMQServer(server *execmq.Server) {
|
||||||
logger.Info("mq server stopped")
|
logger.Info("mq server stopped")
|
||||||
}
|
}
|
||||||
|
|
||||||
func serveReporter(rpt *reporter.Reporter) {
|
//func serveReporter(rpt *reporter.Reporter) {
|
||||||
logger.Info("start serving reporter")
|
// logger.Info("start serving reporter")
|
||||||
|
//
|
||||||
err := rpt.Serve()
|
// err := rpt.Serve()
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
logger.Errorf("rpt stopped with error: %s", err.Error())
|
// logger.Errorf("rpt stopped with error: %s", err.Error())
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
logger.Info("rpt stopped")
|
// logger.Info("rpt stopped")
|
||||||
}
|
//}
|
||||||
|
|
|
@ -2,16 +2,16 @@ package executormgr
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/async"
|
||||||
log "gitlink.org.cn/cloudream/common/pkgs/logger"
|
log "gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
"gitlink.org.cn/cloudream/common/utils/serder"
|
||||||
|
jobTask "gitlink.org.cn/cloudream/scheduler/manager/internal/task"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/utils/sync2"
|
|
||||||
|
|
||||||
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||||||
exemq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
exemq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
||||||
|
@ -20,12 +20,11 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type task struct {
|
type task struct {
|
||||||
statusChan *sync2.Channel[exetsk.TaskStatus]
|
statusChan *async.UnboundChannel[mgrmq.ExecutorTaskStatus]
|
||||||
}
|
}
|
||||||
type ExecutorStatus struct {
|
type ExecutorStatus struct {
|
||||||
executorID schmod.ExecutorID
|
executorID schmod.ExecutorID
|
||||||
tasks map[string]task // key 为 TaskID
|
tasks map[string]task // key 为 TaskID
|
||||||
lastReportTime time.Time
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var ErrWaitReportTimeout = fmt.Errorf("wait report timeout")
|
var ErrWaitReportTimeout = fmt.Errorf("wait report timeout")
|
||||||
|
@ -57,149 +56,139 @@ func NewManager(reportTimeout time.Duration) (*Manager, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) ReceiveExecutorTaskStatus(url string) {
|
func (m *Manager) ReceiveExecutorTaskStatus(url string) (*mgrmq.ExecutorTaskStatus, error) {
|
||||||
|
|
||||||
client, err := ExecutorPool.AcquireByUrl(url)
|
client, err := ExecutorPool.AcquireByUrl(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
return
|
return &mgrmq.ExecutorTaskStatus{}, err
|
||||||
}
|
}
|
||||||
resp, err := client.GetReportInfo()
|
resp, err := client.GetReportInfo()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
return
|
return &mgrmq.ExecutorTaskStatus{}, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
reader := bufio.NewReader(resp.Body)
|
reader := bufio.NewReader(resp.Body)
|
||||||
for {
|
|
||||||
line, err := reader.ReadString('\n')
|
|
||||||
if err != nil && err != io.EOF {
|
|
||||||
log.Error("Error reading from response body:", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if line == "" {
|
|
||||||
continue // Skip empty lines
|
|
||||||
}
|
|
||||||
|
|
||||||
line = strings.TrimPrefix(line, "data: ")
|
line, err := reader.ReadString('\n')
|
||||||
line = strings.TrimSpace(line)
|
if err != nil && err != io.EOF {
|
||||||
if len(line) > 0 {
|
log.Error("Error reading from response body:", err)
|
||||||
var msg mgrmq.ReportExecutorTaskStatus
|
return &mgrmq.ExecutorTaskStatus{}, err
|
||||||
if err := json.Unmarshal([]byte(line), &msg); err != nil {
|
}
|
||||||
fmt.Println("Error unmarshalling JSON:", err)
|
// TODO 第一次获取的值包含执行器所有任务,用于失败重试
|
||||||
|
executorInfo := convertLine(line)
|
||||||
|
// 将第一次的executor放入到池子中
|
||||||
|
exec := &ExecutorStatus{
|
||||||
|
executorID: executorInfo.ExecutorID,
|
||||||
|
tasks: make(map[string]task),
|
||||||
|
}
|
||||||
|
|
||||||
|
m.executors[executorInfo.ExecutorID] = exec
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
line, err = reader.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
if err != io.EOF {
|
||||||
|
log.Error("Error reading from response body:", err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
status := convertLine(line)
|
||||||
|
if status == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Info("Received: %s", msg)
|
|
||||||
m.Report(msg.ExecutorID, msg.TaskStatus)
|
m.Report(*status)
|
||||||
}
|
}
|
||||||
if err == io.EOF {
|
}()
|
||||||
break
|
|
||||||
}
|
return executorInfo, nil
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Manager) Report(execID schmod.ExecutorID, taskStatus []mgrmq.ExecutorTaskStatus) {
|
func convertLine(line string) *mgrmq.ExecutorTaskStatus {
|
||||||
|
if line == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
line = strings.TrimPrefix(line, "data: ")
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if len(line) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
readResp, err := serder.JSONToObjectEx[mgrmq.ExecutorTaskStatus]([]byte(line))
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &readResp
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) Report(status mgrmq.ExecutorTaskStatus) {
|
||||||
m.lock.Lock()
|
m.lock.Lock()
|
||||||
defer m.lock.Unlock()
|
defer m.lock.Unlock()
|
||||||
|
|
||||||
exec, ok := m.executors[execID]
|
exec := m.executors[status.ExecutorID]
|
||||||
if !ok {
|
if exec == nil {
|
||||||
exec = &ExecutorStatus{
|
log.Error("Executor not found: ", status.ExecutorID)
|
||||||
executorID: execID,
|
return
|
||||||
tasks: make(map[string]task),
|
|
||||||
}
|
|
||||||
m.executors[execID] = exec
|
|
||||||
}
|
}
|
||||||
|
// 由于先将task chan放入到池子中再执行的task,所以这里的task必存在
|
||||||
|
tsk := exec.tasks[status.TaskID]
|
||||||
|
|
||||||
exec.lastReportTime = time.Now()
|
// TODO 考虑主动检测channel是否关闭,然后取消task
|
||||||
|
if tsk.statusChan.Send(status) != nil {
|
||||||
|
delete(exec.tasks, status.TaskID)
|
||||||
|
|
||||||
for _, s := range taskStatus {
|
if len(exec.tasks) == 0 {
|
||||||
tsk, ok := exec.tasks[s.TaskID]
|
delete(m.executors, exec.executorID)
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO 考虑主动检测channel是否关闭,然后取消task
|
|
||||||
if tsk.statusChan.Send(s.Status) != nil {
|
|
||||||
delete(exec.tasks, s.TaskID)
|
|
||||||
|
|
||||||
if len(exec.tasks) == 0 {
|
|
||||||
delete(m.executors, execID)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 启动一个Task
|
// 启动一个Task
|
||||||
func (m *Manager) StartTask(info exetsk.TaskInfo, ccInfo schmod.ComputingCenter) *sync2.Channel[exetsk.TaskStatus] {
|
func (m *Manager) StartTask(info exetsk.TaskInfo, ccInfo schmod.ComputingCenter) (*jobTask.JobTask[mgrmq.ExecutorTaskStatus], error) {
|
||||||
m.lock.Lock()
|
m.lock.Lock()
|
||||||
defer m.lock.Unlock()
|
defer m.lock.Unlock()
|
||||||
ch := sync2.NewChannel[exetsk.TaskStatus]()
|
newJobTask := jobTask.NewJobTask[mgrmq.ExecutorTaskStatus]()
|
||||||
|
ch := newJobTask.Chan()
|
||||||
|
|
||||||
client, err := ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL)
|
client, err := ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL)
|
||||||
//resp, err := m.exeCli.StartTask(exemq.NewStartTask(info))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ch.CloseWithError(fmt.Errorf("start task: %w", err))
|
ch.CloseWithError(fmt.Errorf("start task: %w", err))
|
||||||
return ch
|
return newJobTask, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
executorID := schmod.ExecutorID(ccInfo.ExecutorID)
|
||||||
// 检测是否连接过这个Executor,如果第一次连,则发送请求监听上报信息
|
// 检测是否连接过这个Executor,如果第一次连,则发送请求监听上报信息
|
||||||
_, ok := m.executors[schmod.ExecutorID(ccInfo.ExecutorID)]
|
_, ok := m.executors[executorID]
|
||||||
if !ok {
|
if !ok {
|
||||||
go m.ReceiveExecutorTaskStatus(ccInfo.ExecutorURL)
|
_, err = m.ReceiveExecutorTaskStatus(ccInfo.ExecutorURL)
|
||||||
}
|
if err != nil {
|
||||||
|
ch.CloseWithError(fmt.Errorf("start task: %w", err))
|
||||||
resp, err := client.SubmitTask(exemq.NewStartTask(info))
|
return newJobTask, err
|
||||||
if err != nil {
|
|
||||||
ch.CloseWithError(fmt.Errorf("start task: %w", err))
|
|
||||||
return ch
|
|
||||||
}
|
|
||||||
|
|
||||||
exeInfo, ok := m.executors[resp.ExecutorID]
|
|
||||||
if !ok {
|
|
||||||
exeInfo = &ExecutorStatus{
|
|
||||||
executorID: resp.ExecutorID,
|
|
||||||
tasks: make(map[string]task),
|
|
||||||
lastReportTime: time.Now(),
|
|
||||||
}
|
}
|
||||||
m.executors[resp.ExecutorID] = exeInfo
|
|
||||||
}
|
}
|
||||||
|
|
||||||
exeInfo.tasks[resp.TaskID] = task{
|
// 上面已经将executor放入到池子中了,这里的executor必存在
|
||||||
|
exeInfo := m.executors[executorID]
|
||||||
|
exeInfo.tasks[newJobTask.ID()] = task{
|
||||||
statusChan: ch,
|
statusChan: ch,
|
||||||
}
|
}
|
||||||
|
|
||||||
return ch
|
_, err = client.SubmitTask(exemq.NewStartTask(newJobTask.ID(), info))
|
||||||
}
|
if err != nil {
|
||||||
|
ch.CloseWithError(fmt.Errorf("start task: %w", err))
|
||||||
func (m *Manager) Serve() error {
|
return newJobTask, err
|
||||||
InitExecutorPool()
|
|
||||||
|
|
||||||
ticker := time.NewTicker(time.Second)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
func() {
|
|
||||||
m.lock.Lock()
|
|
||||||
defer m.lock.Unlock()
|
|
||||||
|
|
||||||
now := time.Now()
|
|
||||||
for exeID, exeInfo := range m.executors {
|
|
||||||
dt := now.Sub(exeInfo.lastReportTime)
|
|
||||||
|
|
||||||
if dt < m.reportTimeout {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tsk := range exeInfo.tasks {
|
|
||||||
tsk.statusChan.CloseWithError(ErrWaitReportTimeout)
|
|
||||||
}
|
|
||||||
|
|
||||||
delete(m.executors, exeID)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return newJobTask, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) Serve() {
|
||||||
|
InitExecutorPool()
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,27 +2,61 @@ package event
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/future"
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/types"
|
||||||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
|
"gitlink.org.cn/cloudream/common/utils/serder"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CreateInstanceFuture = *future.SetValueFuture[CreateInstanceResult]
|
type OperateInstanceFuture = *future.SetValueFuture[OperateInstanceResult]
|
||||||
|
|
||||||
type InstanceCreate struct {
|
type InstanceOperate struct {
|
||||||
DataSet schsdk.JobFileInfo
|
Info InstanceOperateInfo
|
||||||
Result CreateInstanceFuture
|
Result OperateInstanceFuture
|
||||||
}
|
}
|
||||||
|
|
||||||
type CreateInstanceResult struct {
|
type OperateInstanceResult struct {
|
||||||
|
OperateResult string
|
||||||
|
Err error
|
||||||
JobID schsdk.JobID
|
JobID schsdk.JobID
|
||||||
FilesUploadScheme schsdk.JobFilesUploadScheme
|
FilesUploadScheme schsdk.JobFilesUploadScheme
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewInstanceCreate(dataSet schsdk.JobFileInfo, future CreateInstanceFuture) *InstanceCreate {
|
type InstanceOperateInfo interface {
|
||||||
return &InstanceCreate{
|
Instance()
|
||||||
DataSet: dataSet,
|
}
|
||||||
Result: future,
|
|
||||||
|
type InstanceInfoBase struct{}
|
||||||
|
|
||||||
|
func (i *InstanceInfoBase) Instance() {}
|
||||||
|
|
||||||
|
var InstanceOperateInfoTypeUnion = types.NewTypeUnion[InstanceOperateInfo](
|
||||||
|
(*InstanceCreateInfo)(nil),
|
||||||
|
(*InstanceUpdateInfo)(nil),
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = serder.UseTypeUnionInternallyTagged(&InstanceOperateInfoTypeUnion, "type")
|
||||||
|
|
||||||
|
type InstanceCreateInfo struct {
|
||||||
|
serder.Metadata `union:"Create"`
|
||||||
|
InstanceInfoBase
|
||||||
|
DataSet schsdk.JobFileInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
type InstanceUpdateInfo struct {
|
||||||
|
serder.Metadata `union:"Update"`
|
||||||
|
InstanceInfoBase
|
||||||
|
Type string `json:"type"`
|
||||||
|
Info schsdk.UpdateMultiInstanceJobInfo `json:"info"`
|
||||||
|
PackageID cdssdk.PackageID `json:"packageID"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewInstanceOperate(info InstanceOperateInfo, future OperateInstanceFuture) *InstanceOperate {
|
||||||
|
return &InstanceOperate{
|
||||||
|
Info: info,
|
||||||
|
Result: future,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *InstanceCreate) Noop() {
|
func (s *InstanceOperate) Noop() {
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
package event
|
||||||
|
|
||||||
|
import "gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
|
||||||
|
type JobUpdateFuture = *future.SetValueFuture[UpdateResult]
|
||||||
|
|
||||||
|
type Update struct {
|
||||||
|
Command string
|
||||||
|
Result JobUpdateFuture
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Update) Noop() {}
|
||||||
|
|
||||||
|
type UpdateResult struct {
|
||||||
|
Err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUpdate(command string, jobUpdateFuture JobUpdateFuture) *Update {
|
||||||
|
return &Update{
|
||||||
|
Command: command,
|
||||||
|
Result: jobUpdateFuture,
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ package event
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
||||||
)
|
)
|
||||||
|
@ -51,3 +52,25 @@ func WaitTypeAnd[T jobmgr.Event](ctx context.Context, set *jobmgr.EventSet, cond
|
||||||
// 断言返回的事件为类型T,并返回该事件和操作成功标志。
|
// 断言返回的事件为类型T,并返回该事件和操作成功标志。
|
||||||
return ret.(T), ok
|
return ret.(T), ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BeginWaitType[T jobmgr.Event](set *jobmgr.EventSet) future.Future1[jobmgr.Event] {
|
||||||
|
// 等待一个满足特定类型和条件的事件。
|
||||||
|
return set.BeginWait(func(evt jobmgr.Event) bool {
|
||||||
|
_, ok := evt.(T)
|
||||||
|
return ok
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BeginWaitTypeAnd[T jobmgr.Event](set *jobmgr.EventSet, cond func(val T) bool) future.Future1[jobmgr.Event] {
|
||||||
|
// 等待一个满足特定类型和条件的事件。
|
||||||
|
return set.BeginWait(func(evt jobmgr.Event) bool {
|
||||||
|
// 尝试将事件断言为特定类型T,并检查断言是否成功。
|
||||||
|
e, ok := evt.(T)
|
||||||
|
if !ok {
|
||||||
|
return false // 如果事件不是期望的类型T,则返回false。
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果事件是类型T且满足给定条件,则返回true。
|
||||||
|
return cond(e)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package jobmgr
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/future"
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
@ -69,10 +70,11 @@ func (s *EventSet) Wait(ctx context.Context, cond EventWaitCondition) (Event, bo
|
||||||
future: fut,
|
future: fut,
|
||||||
}
|
}
|
||||||
s.waiters = append(s.waiters, waiter)
|
s.waiters = append(s.waiters, waiter)
|
||||||
|
logger.Info("append waiter: %p", &waiter)
|
||||||
|
|
||||||
s.lock.Unlock()
|
s.lock.Unlock()
|
||||||
|
|
||||||
val, err := fut.WaitValue(ctx)
|
val, err := fut.Wait(ctx)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, false
|
return nil, false
|
||||||
|
@ -80,3 +82,26 @@ func (s *EventSet) Wait(ctx context.Context, cond EventWaitCondition) (Event, bo
|
||||||
|
|
||||||
return val, true
|
return val, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *EventSet) BeginWait(cond EventWaitCondition) future.Future1[Event] {
|
||||||
|
s.lock.Lock()
|
||||||
|
|
||||||
|
for i, evt := range s.events {
|
||||||
|
if cond(evt) {
|
||||||
|
s.events = lo2.RemoveAt(s.events, i)
|
||||||
|
s.lock.Unlock()
|
||||||
|
return future.NewReadyValue1(evt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fut := future.NewSetValue[Event]()
|
||||||
|
waiter := EventWaiter{
|
||||||
|
condition: cond,
|
||||||
|
future: fut,
|
||||||
|
}
|
||||||
|
s.waiters = append(s.waiters, waiter)
|
||||||
|
|
||||||
|
s.lock.Unlock()
|
||||||
|
|
||||||
|
return fut
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
package job
|
||||||
|
|
||||||
|
import (
|
||||||
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
|
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||||||
|
)
|
||||||
|
|
||||||
|
type UpdateMultiInstanceJob struct {
|
||||||
|
Info schsdk.UpdateMultiInstanceJobInfo
|
||||||
|
Files jobmod.JobFiles
|
||||||
|
|
||||||
|
//InstanceIDs []schsdk.JobID
|
||||||
|
//UpdateStrategy string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUpdateMultiInstanceJob(info schsdk.UpdateMultiInstanceJobInfo) *UpdateMultiInstanceJob {
|
||||||
|
return &UpdateMultiInstanceJob{
|
||||||
|
Info: info,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (j *UpdateMultiInstanceJob) GetInfo() schsdk.JobInfo {
|
||||||
|
return &j.Info
|
||||||
|
}
|
||||||
|
|
||||||
|
func (j *UpdateMultiInstanceJob) Dump() jobmod.JobBodyDump {
|
||||||
|
return &jobmod.UpdateMultiInstanceJobDump{
|
||||||
|
Files: j.Files,
|
||||||
|
}
|
||||||
|
}
|
|
@ -128,15 +128,14 @@ func (s *Adjusting) doPackageScheduling(ctx context.Context, rtx jobmgr.JobState
|
||||||
if scheme.Action == jobmod.ActionMove {
|
if scheme.Action == jobmod.ActionMove {
|
||||||
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
||||||
|
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("moving package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
moveStatus := status.(*exectsk.CacheMovePackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
||||||
if moveStatus.Error != "" {
|
if moveStatus.Error != "" {
|
||||||
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -147,15 +146,15 @@ func (s *Adjusting) doPackageScheduling(ctx context.Context, rtx jobmgr.JobState
|
||||||
if scheme.Action == jobmod.ActionLoad {
|
if scheme.Action == jobmod.ActionLoad {
|
||||||
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
||||||
|
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("loading package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
loadStatus := status.(*exectsk.StorageLoadPackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
|
||||||
|
loadStatus := status.Value.Status.(*exectsk.StorageLoadPackageStatus)
|
||||||
if loadStatus.Error != "" {
|
if loadStatus.Error != "" {
|
||||||
return fmt.Errorf("loading package: %s", loadStatus.Error)
|
return fmt.Errorf("loading package: %s", loadStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -175,15 +174,15 @@ func (s *Adjusting) doImageScheduling(ctx context.Context, rtx jobmgr.JobStateRu
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO UserID
|
// TODO UserID
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("moving package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
moveStatus := status.(*exectsk.CacheMovePackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
|
||||||
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
||||||
if moveStatus.Error != "" {
|
if moveStatus.Error != "" {
|
||||||
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -208,15 +207,18 @@ func (s *Adjusting) doImageScheduling(ctx context.Context, rtx jobmgr.JobStateRu
|
||||||
return fmt.Errorf("there must be only 1 object in the package which will be imported")
|
return fmt.Errorf("there must be only 1 object in the package which will be imported")
|
||||||
}
|
}
|
||||||
|
|
||||||
wt2 := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
|
taskStatus2, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
|
||||||
defer wt2.Close()
|
if err != nil {
|
||||||
|
return fmt.Errorf("moving package: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
status2, err := wt2.Receive(ctx)
|
fut2 := taskStatus2.Receive()
|
||||||
|
status2 := <-fut2.Chan()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("uploading image: %w", err)
|
return fmt.Errorf("uploading image: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
uploadStatus := status2.(*exectsk.UploadImageStatus)
|
uploadStatus := status2.Value.Status.(*exectsk.UploadImageStatus)
|
||||||
if uploadStatus.Error != "" {
|
if uploadStatus.Error != "" {
|
||||||
return fmt.Errorf("uploading image: %s", uploadStatus.Error)
|
return fmt.Errorf("uploading image: %s", uploadStatus.Error)
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,11 @@ import (
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor"
|
||||||
|
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/executormgr"
|
||||||
|
jobTask "gitlink.org.cn/cloudream/scheduler/manager/internal/task"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
@ -73,9 +78,6 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
|
||||||
packageID = runningJob.Files.Dataset.PackageID
|
packageID = runningJob.Files.Dataset.PackageID
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
pcmImgInfo, err := rtx.Mgr.DB.PCMImage().GetByImageIDAndCCID(rtx.Mgr.DB.SQLCtx(), jobFiles.Image.ImageID, targetCCID)
|
pcmImgInfo, err := rtx.Mgr.DB.PCMImage().GetByImageIDAndCCID(rtx.Mgr.DB.SQLCtx(), jobFiles.Image.ImageID, targetCCID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("getting pcm image info: %w", err)
|
return fmt.Errorf("getting pcm image info: %w", err)
|
||||||
|
@ -96,37 +98,20 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
|
||||||
|
|
||||||
// TODO 判断是否是模型推理任务,如果是,则进行扩缩容管理
|
// TODO 判断是否是模型推理任务,如果是,则进行扩缩容管理
|
||||||
if modelJobInfo != nil {
|
if modelJobInfo != nil {
|
||||||
//address := nodeExpansion(jobFiles)
|
|
||||||
//
|
|
||||||
//node := schsdk.NodeInfo{
|
|
||||||
// InstanceID: jo.JobID,
|
|
||||||
// Address: address,
|
|
||||||
//}
|
|
||||||
//jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), schsdk.ModelID(modelJobInfo.ModelID), node)
|
|
||||||
|
|
||||||
// 发送扩容任务
|
// 发送扩容任务
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewScheduleCreateECS(
|
ecs := exetsk.NewScheduleCreateECS(
|
||||||
userID,
|
userID,
|
||||||
packageID,
|
packageID,
|
||||||
), ccInfo)
|
schsdk.ModelID(modelJobInfo.ModelID),
|
||||||
|
)
|
||||||
|
task, err := rtx.Mgr.ExecMgr.StartTask(ecs, ccInfo)
|
||||||
|
|
||||||
for {
|
if err != nil {
|
||||||
status, err := wt.Receive(ctx)
|
log.Error(err.Error())
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
|
||||||
}
|
|
||||||
taskStatus := status.(*exetsk.ScheduleCreateECSStatus)
|
|
||||||
if taskStatus.Error != "" {
|
|
||||||
log.Error(taskStatus.Error)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
node := schsdk.NodeInfo{
|
|
||||||
InstanceID: jo.JobID,
|
|
||||||
Address: schsdk.Address(taskStatus.Address),
|
|
||||||
}
|
|
||||||
jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), schsdk.ModelID(modelJobInfo.ModelID), node)
|
|
||||||
log.Infof("node expansion: %v", taskStatus.Address)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return s.listen(rtx, jo, task, ccInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
||||||
|
@ -166,7 +151,7 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
|
||||||
cmd = runtime.Command
|
cmd = runtime.Command
|
||||||
}
|
}
|
||||||
|
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewSubmitTask(
|
task, err := rtx.Mgr.ExecMgr.StartTask(exetsk.NewSubmitTask(
|
||||||
ccInfo.PCMParticipantID,
|
ccInfo.PCMParticipantID,
|
||||||
pcmImgInfo.PCMImageID,
|
pcmImgInfo.PCMImageID,
|
||||||
// TODO 选择资源的算法
|
// TODO 选择资源的算法
|
||||||
|
@ -176,17 +161,16 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
|
||||||
// params, TODO params不应该是kv数组,而应该是字符串数组
|
// params, TODO params不应该是kv数组,而应该是字符串数组
|
||||||
[]schsdk.KVPair{},
|
[]schsdk.KVPair{},
|
||||||
), ccInfo)
|
), ccInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
taskFut := task.Receive()
|
||||||
for {
|
for {
|
||||||
status, err := wt.Receive(ctx)
|
msg := <-taskFut.Chan()
|
||||||
if err != nil {
|
tskStatus := msg.Value.Status.(*exetsk.SubmitTaskStatus)
|
||||||
return err
|
|
||||||
}
|
|
||||||
tskStatus := status.(*exetsk.SubmitTaskStatus)
|
|
||||||
if tskStatus.Error != "" {
|
|
||||||
return fmt.Errorf("submitting task: %s", tskStatus.Error)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tskStatus.Status != s.lastStatus {
|
if tskStatus.Status != s.lastStatus {
|
||||||
log.Infof("task %s -> %s", s.lastStatus, tskStatus.Status)
|
log.Infof("task %s -> %s", s.lastStatus, tskStatus.Status)
|
||||||
|
@ -203,37 +187,63 @@ func (s *NormalJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) e
|
||||||
case pcmsdk.TaskStatusFailed:
|
case pcmsdk.TaskStatusFailed:
|
||||||
return fmt.Errorf("task failed")
|
return fmt.Errorf("task failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
taskFut = task.Receive()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 模拟
|
func (s *NormalJobExecuting) listen(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job, task *jobTask.JobTask[mgrmq.ExecutorTaskStatus], ccInfo schmod.ComputingCenter) error {
|
||||||
var nodesAddress = []string{"120.46.183.86:22", "121.36.5.116:22"}
|
log := logger.WithType[NormalJobExecuting]("State").WithField("TaskID", task.ID())
|
||||||
var count = 0
|
|
||||||
|
|
||||||
func nodeExpansion(files *jobmod.JobFiles) schsdk.Address {
|
waitFut := event.BeginWaitType[*event.Update](rtx.EventSet)
|
||||||
if count >= 2 {
|
taskFut := task.Receive()
|
||||||
logger.Info("There is no available node")
|
|
||||||
return ""
|
for {
|
||||||
|
select {
|
||||||
|
case v1 := <-waitFut.Chan():
|
||||||
|
// 对任务进行更新操作
|
||||||
|
client, err := executormgr.ExecutorPool.AcquireByUrl(ccInfo.ExecutorURL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("getting executor client: %w", err)
|
||||||
|
}
|
||||||
|
evt := v1.Value.(*event.Update)
|
||||||
|
operateResp, err := client.OperateTask(executor.NewTaskOperateInfo(task.ID(), evt.Command))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("operate task: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
evt.Result.SetValue(event.UpdateResult{
|
||||||
|
Err: operateResp.Err,
|
||||||
|
})
|
||||||
|
|
||||||
|
if operateResp.Err != nil {
|
||||||
|
return fmt.Errorf("operate task: %w", operateResp.Err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 持续等待
|
||||||
|
waitFut = event.BeginWaitType[*event.Update](rtx.EventSet)
|
||||||
|
case msg := <-taskFut.Chan():
|
||||||
|
switch v2 := msg.Value.Status.(type) {
|
||||||
|
case *exetsk.ScheduleCreateECSStatus:
|
||||||
|
// 扩容任务,将结果放到池子中
|
||||||
|
node := schsdk.NodeInfo{
|
||||||
|
InstanceID: jo.JobID,
|
||||||
|
Address: schsdk.Address(v2.Address),
|
||||||
|
}
|
||||||
|
|
||||||
|
jobmgr.SetNodeData(schsdk.JobID(jo.JobSetID), v2.ModelID, node)
|
||||||
|
log.Infof("node expansion: %v", v2.Address)
|
||||||
|
case error:
|
||||||
|
fmt.Println("Received error:", v2.Error())
|
||||||
|
default:
|
||||||
|
fmt.Println("Received unexpected type")
|
||||||
|
}
|
||||||
|
|
||||||
|
// 持续接收
|
||||||
|
taskFut = task.Receive()
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
address := nodesAddress[count]
|
|
||||||
count++
|
|
||||||
|
|
||||||
client := utils.GetSSHClient("pcm", "", address)
|
|
||||||
defer client.Close()
|
|
||||||
// 创建SSH会话
|
|
||||||
session, err := client.NewSession()
|
|
||||||
if err != nil {
|
|
||||||
logger.Warn("Failed to create session: %s", err)
|
|
||||||
}
|
|
||||||
defer session.Close()
|
|
||||||
|
|
||||||
// 执行远程命令
|
|
||||||
output, err := session.CombinedOutput("sh /home/pcm/modeltest/http/start.sh")
|
|
||||||
if err != nil {
|
|
||||||
logger.Warn("Failed to run command: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return schsdk.Address(output)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type DataReturnJobExecuting struct {
|
type DataReturnJobExecuting struct {
|
||||||
|
@ -277,20 +287,24 @@ func (s *DataReturnJobExecuting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Jo
|
||||||
}
|
}
|
||||||
|
|
||||||
packageName := utils.MakeResourcePackageName(reJob.TargetJobID)
|
packageName := utils.MakeResourcePackageName(reJob.TargetJobID)
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exetsk.NewStorageCreatePackage(
|
task, err := rtx.Mgr.ExecMgr.StartTask(exetsk.NewStorageCreatePackage(
|
||||||
userID, // TOOD 用户ID
|
userID, // TOOD 用户ID
|
||||||
ccInfo.CDSStorageID,
|
ccInfo.CDSStorageID,
|
||||||
reJob.TargetJobOutputPath,
|
reJob.TargetJobOutputPath,
|
||||||
reJob.Info.BucketID,
|
reJob.Info.BucketID,
|
||||||
packageName,
|
packageName,
|
||||||
), ccInfo)
|
), ccInfo)
|
||||||
defer wt.Close()
|
if err != nil {
|
||||||
|
log.Error(err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
fut := task.Receive()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
tskStatus := status.(*exetsk.StorageCreatePackageStatus)
|
status := <-fut.Chan()
|
||||||
|
tskStatus := status.Value.Status.(*exetsk.StorageCreatePackageStatus)
|
||||||
if tskStatus.Error != "" {
|
if tskStatus.Error != "" {
|
||||||
return fmt.Errorf("creating package: %s", tskStatus.Error)
|
return fmt.Errorf("creating package: %s", tskStatus.Error)
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package state
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||||||
|
@ -11,6 +12,8 @@ import (
|
||||||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
||||||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
|
||||||
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MultiInstanceRunning struct {
|
type MultiInstanceRunning struct {
|
||||||
|
@ -39,67 +42,120 @@ func (s *MultiInstanceRunning) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job)
|
||||||
|
|
||||||
multInstJob := jo.Body.(*job.MultiInstanceJob)
|
multInstJob := jo.Body.(*job.MultiInstanceJob)
|
||||||
|
|
||||||
|
waitFut := event.BeginWaitType[*event.InstanceOperate](rtx.EventSet)
|
||||||
for {
|
for {
|
||||||
// 监听创建实例事件
|
chanValue := <-waitFut.Chan()
|
||||||
ic, ok := event.WaitType[*event.InstanceCreate](ctx, rtx.EventSet)
|
instanceInfo := chanValue.Value.(*event.InstanceOperate)
|
||||||
if !ok {
|
instanceFuture := instanceInfo.Result
|
||||||
logger.Info("MultiInstanceRunning canceled")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
logger.Info("wait a event happened")
|
logger.Info("wait a event happened")
|
||||||
|
waitFut = event.BeginWaitType[*event.InstanceOperate](rtx.EventSet)
|
||||||
|
|
||||||
dataSet := ic.DataSet
|
switch info := instanceInfo.Info.(type) {
|
||||||
//如果是模型扩容任务,直接使用父Job的资源文件
|
case *event.InstanceCreateInfo:
|
||||||
if &multInstJob.Info.ModelJobInfo != nil {
|
createInstance(rtx, info, s.preScheduler, jo, multInstJob, instanceFuture)
|
||||||
dataSet = multInstJob.Info.Files.Dataset
|
case *event.InstanceUpdateInfo:
|
||||||
|
updateInstance(rtx, info, multInstJob, instanceFuture)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 构建InstanceJobInfo
|
|
||||||
infoFiles := schsdk.JobFilesInfo{
|
|
||||||
Dataset: dataSet,
|
|
||||||
Code: multInstJob.Info.Files.Code,
|
|
||||||
Image: multInstJob.Info.Files.Image,
|
|
||||||
}
|
|
||||||
|
|
||||||
newLocalJobID := fmt.Sprintf("%s_%s", multInstJob.Info.LocalJobID, utils.GenerateRandomID())
|
|
||||||
|
|
||||||
instJobInfo := &schsdk.InstanceJobInfo{
|
|
||||||
Type: schsdk.JobTypeInstance,
|
|
||||||
LocalJobID: newLocalJobID,
|
|
||||||
Files: infoFiles,
|
|
||||||
Runtime: multInstJob.Info.Runtime,
|
|
||||||
Resources: multInstJob.Info.Resources,
|
|
||||||
ModelJobInfo: multInstJob.Info.ModelJobInfo,
|
|
||||||
}
|
|
||||||
|
|
||||||
files := jobmod.JobFiles{
|
|
||||||
Code: multInstJob.Files.Code,
|
|
||||||
Image: multInstJob.Files.Image,
|
|
||||||
}
|
|
||||||
|
|
||||||
// 生成预调度方案和文件上传方案
|
|
||||||
jobSchedule, filesUploadScheme, err := s.preScheduler.ScheduleJob(instJobInfo)
|
|
||||||
if err != nil {
|
|
||||||
ic.Result.SetError(err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// 创建实例并运行
|
|
||||||
instanceJob := job.NewInstanceJob(*instJobInfo, files)
|
|
||||||
jobID := rtx.Mgr.AddJob(jo.JobSetID, instanceJob, NewPreSchuduling(*jobSchedule))
|
|
||||||
|
|
||||||
// 在多实例任务中新增这个实例的任务ID
|
|
||||||
multInstJob.SubJobs = append(multInstJob.SubJobs, jobID)
|
|
||||||
|
|
||||||
// 将实例ID和文件上传方案返回
|
|
||||||
ic.Result.SetValue(event.CreateInstanceResult{
|
|
||||||
JobID: jobID,
|
|
||||||
FilesUploadScheme: *filesUploadScheme,
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func updateInstance(rtx jobmgr.JobStateRunContext, updateInfo *event.InstanceUpdateInfo, parentJob *job.MultiInstanceJob, updateInstanceFuture event.OperateInstanceFuture) {
|
||||||
|
|
||||||
|
// 更新策略
|
||||||
|
strategy := updateInfo.Info.UpdateStrategy
|
||||||
|
println("update strategy: " + strategy)
|
||||||
|
|
||||||
|
var failJobs []string
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
for i := 0; i < len(parentJob.SubJobs); i++ {
|
||||||
|
// 发送请求进行任务更新
|
||||||
|
instanceID := parentJob.SubJobs[i]
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
fut := future.NewSetValue[event.UpdateResult]()
|
||||||
|
rtx.Mgr.PostEvent(instanceID, event.NewUpdate("update", fut))
|
||||||
|
_, err := fut.Wait(context.TODO())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(err.Error())
|
||||||
|
failJobs = append(failJobs, string(instanceID))
|
||||||
|
}
|
||||||
|
println()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if len(failJobs) == 0 {
|
||||||
|
updateInstanceFuture.SetValue(event.OperateInstanceResult{
|
||||||
|
Err: nil,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 返回更新失败的instance
|
||||||
|
result := strings.Join(failJobs, ",")
|
||||||
|
updateInstanceFuture.SetValue(event.OperateInstanceResult{
|
||||||
|
OperateResult: result,
|
||||||
|
Err: fmt.Errorf("error"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func createInstance(rtx jobmgr.JobStateRunContext, info *event.InstanceCreateInfo, preScheduler prescheduler.PreScheduler, jo *jobmgr.Job, multInstJob *job.MultiInstanceJob, future event.OperateInstanceFuture) {
|
||||||
|
dataSet := info.DataSet
|
||||||
|
|
||||||
|
//如果是模型扩容任务,直接使用父Job的资源文件
|
||||||
|
if &multInstJob.Info.ModelJobInfo != nil {
|
||||||
|
dataSet = multInstJob.Info.Files.Dataset
|
||||||
|
}
|
||||||
|
|
||||||
|
// 构建InstanceJobInfo
|
||||||
|
infoFiles := schsdk.JobFilesInfo{
|
||||||
|
Dataset: dataSet,
|
||||||
|
Code: multInstJob.Info.Files.Code,
|
||||||
|
Image: multInstJob.Info.Files.Image,
|
||||||
|
}
|
||||||
|
|
||||||
|
newLocalJobID := fmt.Sprintf("%s_%s", multInstJob.Info.LocalJobID, utils.GenerateRandomID())
|
||||||
|
|
||||||
|
instJobInfo := &schsdk.InstanceJobInfo{
|
||||||
|
Type: schsdk.JobTypeInstance,
|
||||||
|
LocalJobID: newLocalJobID,
|
||||||
|
Files: infoFiles,
|
||||||
|
Runtime: multInstJob.Info.Runtime,
|
||||||
|
Resources: multInstJob.Info.Resources,
|
||||||
|
ModelJobInfo: multInstJob.Info.ModelJobInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
files := jobmod.JobFiles{
|
||||||
|
Code: multInstJob.Files.Code,
|
||||||
|
Image: multInstJob.Files.Image,
|
||||||
|
}
|
||||||
|
|
||||||
|
// 生成预调度方案和文件上传方案
|
||||||
|
jobSchedule, filesUploadScheme, err := preScheduler.ScheduleJob(instJobInfo)
|
||||||
|
if err != nil {
|
||||||
|
future.SetError(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 创建实例并运行
|
||||||
|
instanceJob := job.NewInstanceJob(*instJobInfo, files)
|
||||||
|
jobID := rtx.Mgr.AddJob(jo.JobSetID, instanceJob, NewPreSchuduling(*jobSchedule))
|
||||||
|
|
||||||
|
// 在多实例任务中新增这个实例的任务ID
|
||||||
|
multInstJob.SubJobs = append(multInstJob.SubJobs, jobID)
|
||||||
|
|
||||||
|
// 将实例ID和文件上传方案返回
|
||||||
|
future.SetValue(event.OperateInstanceResult{
|
||||||
|
JobID: jobID,
|
||||||
|
FilesUploadScheme: *filesUploadScheme,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (s *MultiInstanceRunning) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
|
func (s *MultiInstanceRunning) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
|
||||||
return &jobmod.MultiInstCreateRunningDump{}
|
return &jobmod.MultiInstCreateRunningDump{}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
package state
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||||||
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
||||||
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
||||||
|
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
||||||
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
|
||||||
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MultiInstanceUpdate struct {
|
||||||
|
originalJob jobmod.JobDump
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMultiInstanceUpdate(originalJob jobmod.JobDump) *MultiInstanceUpdate {
|
||||||
|
return &MultiInstanceUpdate{
|
||||||
|
originalJob: originalJob,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MultiInstanceUpdate) Run(rtx jobmgr.JobStateRunContext, job *jobmgr.Job) {
|
||||||
|
s.do(rtx, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MultiInstanceUpdate) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) error {
|
||||||
|
updateJob := jo.Body.(*job.UpdateMultiInstanceJob)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// 监听取消事件
|
||||||
|
go func() {
|
||||||
|
event.WaitType[*event.Cancel](ctx, rtx.EventSet)
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
|
var pkgID cdssdk.PackageID
|
||||||
|
// 等待回源任务完成
|
||||||
|
if rt, ok := updateJob.Info.Files.Code.(*schsdk.DataReturnJobFileInfo); ok {
|
||||||
|
evt, ok := event.WaitTypeAnd[*event.JobCompleted](ctx, rtx.EventSet, func(val *event.JobCompleted) bool {
|
||||||
|
return val.Job.GetInfo().GetLocalJobID() == rt.DataReturnLocalJobID
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
return jobmgr.ErrJobCancelled
|
||||||
|
}
|
||||||
|
if evt.Err != nil {
|
||||||
|
return fmt.Errorf("depended job %s was failed", evt.Job.JobID)
|
||||||
|
}
|
||||||
|
rtJob, ok := evt.Job.Body.(*job.DataReturnJob)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("job %s is not a DataReturn job(which is %T)", evt.Job.JobID, evt.Job)
|
||||||
|
}
|
||||||
|
pkgID = rtJob.DataReturnPackageID
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取包对象列表
|
||||||
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("new cloudream storage client: %w", err)
|
||||||
|
}
|
||||||
|
defer schglb.CloudreamStoragePool.Release(stgCli)
|
||||||
|
// TODO UserID
|
||||||
|
pkgObjs, err := stgCli.Object().GetPackageObjects(cdssdk.ObjectGetPackageObjects{UserID: 1, PackageID: pkgID})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("getting package objects: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取原始任务信息
|
||||||
|
originalMultiInstanceJobBody := s.originalJob.Body.(*jobmod.MultiInstanceJobDump)
|
||||||
|
originalPackageID := originalMultiInstanceJobBody.Files.Code.PackageID
|
||||||
|
var objArr []cdssdk.MovingObject
|
||||||
|
for _, obj := range pkgObjs.Objects {
|
||||||
|
objArr = append(objArr, cdssdk.MovingObject{
|
||||||
|
ObjectID: obj.ObjectID,
|
||||||
|
PackageID: originalPackageID,
|
||||||
|
Path: obj.Path,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// TODO UserID
|
||||||
|
objMoveParam := cdssdk.ObjectMove{
|
||||||
|
UserID: 1,
|
||||||
|
Movings: objArr,
|
||||||
|
}
|
||||||
|
|
||||||
|
ccInfo, err := rtx.Mgr.DB.ComputingCenter().GetByID(rtx.Mgr.DB.SQLCtx(), originalMultiInstanceJobBody.TargetCCID)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("getting computing center info: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将增量包合并到原有包中
|
||||||
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageMoveObject(objMoveParam), ccInfo)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("moving package: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
statusFut := taskStatus.Receive()
|
||||||
|
status := <-statusFut.Chan()
|
||||||
|
moveStatus := status.Value.Status.(*exectsk.StorageMoveObjectStatus)
|
||||||
|
if moveStatus.Error != "" {
|
||||||
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 发送事件,更新各个instance
|
||||||
|
updateInfo := event.InstanceUpdateInfo{
|
||||||
|
Info: updateJob.Info,
|
||||||
|
}
|
||||||
|
fut := future.NewSetValue[event.OperateInstanceResult]()
|
||||||
|
rtx.Mgr.PostEvent(s.originalJob.JobID, event.NewInstanceOperate(&updateInfo, fut))
|
||||||
|
|
||||||
|
result, err := fut.Wait(context.TODO())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
println(result.JobID)
|
||||||
|
|
||||||
|
if result.Err != nil {
|
||||||
|
return fmt.Errorf("update instance failed: %s", result.OperateResult)
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info("update instance success!")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MultiInstanceUpdate) Dump(ctx jobmgr.JobStateRunContext, job *jobmgr.Job) jobmod.JobStateDump {
|
||||||
|
return &jobmod.MultiInstanceUpdateDump{}
|
||||||
|
}
|
|
@ -150,15 +150,15 @@ func (s *PreScheduling) doPackageScheduling(ctx context.Context, rtx jobmgr.JobS
|
||||||
if scheme.Action == jobmod.ActionMove {
|
if scheme.Action == jobmod.ActionMove {
|
||||||
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSNodeID)
|
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSNodeID)
|
||||||
|
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("moving package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
moveStatus := status.(*exectsk.CacheMovePackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
|
||||||
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
||||||
if moveStatus.Error != "" {
|
if moveStatus.Error != "" {
|
||||||
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -169,15 +169,15 @@ func (s *PreScheduling) doPackageScheduling(ctx context.Context, rtx jobmgr.JobS
|
||||||
if scheme.Action == jobmod.ActionLoad {
|
if scheme.Action == jobmod.ActionLoad {
|
||||||
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
||||||
|
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("moving package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
loadStatus := status.(*exectsk.StorageLoadPackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
|
||||||
|
loadStatus := status.Value.Status.(*exectsk.StorageLoadPackageStatus)
|
||||||
if loadStatus.Error != "" {
|
if loadStatus.Error != "" {
|
||||||
return fmt.Errorf("moving package: %s", loadStatus.Error)
|
return fmt.Errorf("moving package: %s", loadStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -228,15 +228,15 @@ func (s *PreScheduling) doImageScheduling(ctx context.Context, rtx jobmgr.JobSta
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO UserID
|
// TODO UserID
|
||||||
wt := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSNodeID), s.targetCCInfo)
|
||||||
defer wt.Close()
|
|
||||||
|
|
||||||
status, err := wt.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("moving package: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
moveStatus := status.(*exectsk.CacheMovePackageStatus)
|
fut := taskStatus.Receive()
|
||||||
|
status := <-fut.Chan()
|
||||||
|
|
||||||
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
||||||
if moveStatus.Error != "" {
|
if moveStatus.Error != "" {
|
||||||
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
||||||
}
|
}
|
||||||
|
@ -261,15 +261,15 @@ func (s *PreScheduling) doImageScheduling(ctx context.Context, rtx jobmgr.JobSta
|
||||||
return fmt.Errorf("there must be only 1 object in the package which will be imported")
|
return fmt.Errorf("there must be only 1 object in the package which will be imported")
|
||||||
}
|
}
|
||||||
|
|
||||||
wt2 := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
|
taskStatus2, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdssdk.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
|
||||||
defer wt2.Close()
|
|
||||||
|
|
||||||
status2, err := wt2.Receive(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("uploading image: %w", err)
|
return fmt.Errorf("moving package: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
uploadStatus := status2.(*exectsk.UploadImageStatus)
|
fut2 := taskStatus2.Receive()
|
||||||
|
status2 := <-fut2.Chan()
|
||||||
|
|
||||||
|
uploadStatus := status2.Value.Status.(*exectsk.UploadImageStatus)
|
||||||
if uploadStatus.Error != "" {
|
if uploadStatus.Error != "" {
|
||||||
return fmt.Errorf("uploading image: %s", uploadStatus.Error)
|
return fmt.Errorf("uploading image: %s", uploadStatus.Error)
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,7 +232,7 @@ func (m *Manager) AddJob(jobSetID schsdk.JobSetID, jobBody JobBody, jobState Job
|
||||||
m.pubLock.Lock()
|
m.pubLock.Lock()
|
||||||
defer m.pubLock.Unlock()
|
defer m.pubLock.Unlock()
|
||||||
|
|
||||||
jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex+1))
|
jobID := schsdk.JobID(fmt.Sprintf("%d", m.jobIDIndex))
|
||||||
m.jobIDIndex += 1
|
m.jobIDIndex += 1
|
||||||
|
|
||||||
job := &mgrJob{
|
job := &mgrJob{
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
package mq
|
package mq
|
||||||
|
|
||||||
import (
|
//func (svc *Service) ReportExecutorTaskStatus(msg *mgrmq.ReportExecutorTaskStatus) (*mgrmq.ReportExecutorTaskStatusResp, *mq.CodeMessage) {
|
||||||
"gitlink.org.cn/cloudream/common/pkgs/mq"
|
// svc.exeMgr.Report(msg.ExecutorID, msg.TaskStatus)
|
||||||
mgrmq "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/manager"
|
// return mq.ReplyOK(mgrmq.NewReportExecutorTaskStatusResp())
|
||||||
)
|
//}
|
||||||
|
|
||||||
func (svc *Service) ReportExecutorTaskStatus(msg *mgrmq.ReportExecutorTaskStatus) (*mgrmq.ReportExecutorTaskStatusResp, *mq.CodeMessage) {
|
|
||||||
//svc.exeMgr.Report(msg.ExecutorID, msg.TaskStatus)
|
|
||||||
return mq.ReplyOK(mgrmq.NewReportExecutorTaskStatusResp())
|
|
||||||
}
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
|
||||||
for _, jobInfo := range msg.JobSet.Jobs {
|
for _, jobInfo := range msg.JobSet.Jobs {
|
||||||
switch info := jobInfo.(type) {
|
switch info := jobInfo.(type) {
|
||||||
case *schsdk.NormalJobInfo:
|
case *schsdk.NormalJobInfo:
|
||||||
job := job.NewNormalJob(*info)
|
jo := job.NewNormalJob(*info)
|
||||||
|
|
||||||
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -36,31 +36,52 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
|
||||||
}
|
}
|
||||||
|
|
||||||
jobs = append(jobs, jobmgr.SubmittingJob{
|
jobs = append(jobs, jobmgr.SubmittingJob{
|
||||||
Body: job,
|
Body: jo,
|
||||||
InitState: state.NewPreSchuduling(preSch),
|
InitState: state.NewPreSchuduling(preSch),
|
||||||
})
|
})
|
||||||
|
|
||||||
case *schsdk.DataReturnJobInfo:
|
case *schsdk.DataReturnJobInfo:
|
||||||
job := job.NewDataReturnJob(*info)
|
jo := job.NewDataReturnJob(*info)
|
||||||
jobs = append(jobs, jobmgr.SubmittingJob{
|
jobs = append(jobs, jobmgr.SubmittingJob{
|
||||||
Body: job,
|
Body: jo,
|
||||||
InitState: state.NewWaitTargetComplete(),
|
InitState: state.NewWaitTargetComplete(),
|
||||||
})
|
})
|
||||||
|
|
||||||
case *schsdk.MultiInstanceJobInfo:
|
case *schsdk.MultiInstanceJobInfo:
|
||||||
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
preSch, ok := msg.PreScheduleScheme.JobSchemes[info.LocalJobID]
|
||||||
|
|
||||||
job := job.NewMultiInstanceJob(*info, preSch)
|
jo := job.NewMultiInstanceJob(*info, preSch)
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
|
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("pre schedule scheme for job %s is not found", info.LocalJobID))
|
||||||
}
|
}
|
||||||
|
|
||||||
jobs = append(jobs, jobmgr.SubmittingJob{
|
jobs = append(jobs, jobmgr.SubmittingJob{
|
||||||
Body: job,
|
Body: jo,
|
||||||
InitState: state.NewMultiInstanceInit(),
|
InitState: state.NewMultiInstanceInit(),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
case *schsdk.UpdateMultiInstanceJobInfo:
|
||||||
|
modelJob := job.NewUpdateMultiInstanceJob(*info)
|
||||||
|
instanceJobSets := svc.jobMgr.DumpJobSet(modelJob.Info.MultiInstanceJobSetID)
|
||||||
|
if len(instanceJobSets) == 0 {
|
||||||
|
return nil, mq.Failed(errorcode.OperationFailed, fmt.Sprintf("job set %s is not found", modelJob.Info.MultiInstanceJobSetID))
|
||||||
|
}
|
||||||
|
|
||||||
|
// 找到多实例任务本身
|
||||||
|
var multiInstanceJobDump jobmod.JobDump
|
||||||
|
for i := 0; i < len(instanceJobSets); i++ {
|
||||||
|
jobDump := instanceJobSets[i]
|
||||||
|
if _, ok := jobDump.Body.(*jobmod.MultiInstanceJobDump); ok {
|
||||||
|
multiInstanceJobDump = jobDump
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs = append(jobs, jobmgr.SubmittingJob{
|
||||||
|
Body: modelJob,
|
||||||
|
InitState: state.NewMultiInstanceUpdate(multiInstanceJobDump),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,10 +91,13 @@ func (svc *Service) SubmitJobSet(msg *mgrmq.SubmitJobSet) (*mgrmq.SubmitJobSetRe
|
||||||
func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) {
|
func (svc *Service) CreateInstance(instInfo *mgrmq.CreateInstance) (*mgrmq.CreateInstanceResp, *mq.CodeMessage) {
|
||||||
logger.Debugf("start create instance")
|
logger.Debugf("start create instance")
|
||||||
|
|
||||||
fut := future.NewSetValue[event.CreateInstanceResult]()
|
fut := future.NewSetValue[event.OperateInstanceResult]()
|
||||||
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceCreate(instInfo.DataSet, fut))
|
info := event.InstanceCreateInfo{
|
||||||
|
DataSet: instInfo.DataSet,
|
||||||
|
}
|
||||||
|
svc.jobMgr.PostEvent(instInfo.JobID, event.NewInstanceOperate(&info, fut))
|
||||||
|
|
||||||
result, err := fut.WaitValue(context.TODO())
|
result, err := fut.Wait(context.TODO())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, mq.Failed(errorcode.OperationFailed, err.Error())
|
return nil, mq.Failed(errorcode.OperationFailed, err.Error())
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
package jobTask
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/async"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/future"
|
||||||
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||||||
|
"math/rand"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JobTask[T any] struct {
|
||||||
|
id string
|
||||||
|
taskChan async.UnboundChannel[T]
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewJobTask[T any]() *JobTask[T] {
|
||||||
|
return &JobTask[T]{
|
||||||
|
id: getTaskID(),
|
||||||
|
taskChan: *async.NewUnboundChannel[T](),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTaskID() string {
|
||||||
|
now := time.Now()
|
||||||
|
nano := now.UnixNano()
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
randomNumber := rand.Intn(9000) + 1000 // 生成1000到9999之间的随机数
|
||||||
|
|
||||||
|
taskID := fmt.Sprintf("id_%d_%d", nano, randomNumber)
|
||||||
|
|
||||||
|
return taskID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *JobTask[T]) Receive() future.Future1[T] {
|
||||||
|
|
||||||
|
return c.taskChan.Receive()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *JobTask[T]) Send(info any) {
|
||||||
|
|
||||||
|
logger.Info("send http")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *JobTask[T]) Chan() *async.UnboundChannel[T] {
|
||||||
|
return &c.taskChan
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *JobTask[T]) ID() string {
|
||||||
|
return c.id
|
||||||
|
}
|
|
@ -76,7 +76,8 @@ func main() {
|
||||||
// 启动服务
|
// 启动服务
|
||||||
go serveJobManager(jobMgr)
|
go serveJobManager(jobMgr)
|
||||||
|
|
||||||
go serveExecutorManager(exeMgr)
|
//go serveExecutorManager(exeMgr)
|
||||||
|
go exeMgr.Serve()
|
||||||
|
|
||||||
go serveAdvisorManager(advMgr)
|
go serveAdvisorManager(advMgr)
|
||||||
|
|
||||||
|
@ -108,17 +109,6 @@ func serveMQServer(server *mgrmq.Server) {
|
||||||
logger.Info("mq server stopped")
|
logger.Info("mq server stopped")
|
||||||
}
|
}
|
||||||
|
|
||||||
func serveExecutorManager(mgr *executormgr.Manager) {
|
|
||||||
logger.Info("start serving executor manager")
|
|
||||||
|
|
||||||
err := mgr.Serve()
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("executor manager stopped with error: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.Info("executor manager stopped")
|
|
||||||
}
|
|
||||||
|
|
||||||
func serveAdvisorManager(mgr *advisormgr.Manager) {
|
func serveAdvisorManager(mgr *advisormgr.Manager) {
|
||||||
logger.Info("start serving advisor manager")
|
logger.Info("start serving advisor manager")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue