240 lines
7.1 KiB
Go
240 lines
7.1 KiB
Go
package state
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
|
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
|
cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
|
|
"gitlink.org.cn/cloudream/common/sdks/storage/cdsapi"
|
|
schglb "gitlink.org.cn/cloudream/scheduler/common/globals"
|
|
schmod "gitlink.org.cn/cloudream/scheduler/common/models"
|
|
jobmod "gitlink.org.cn/cloudream/scheduler/common/models/job"
|
|
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
|
"gitlink.org.cn/cloudream/scheduler/common/utils"
|
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr"
|
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/event"
|
|
"gitlink.org.cn/cloudream/scheduler/manager/internal/jobmgr/job"
|
|
)
|
|
|
|
type Adjusting struct {
|
|
scheme jobmod.JobScheduleScheme
|
|
targetCCInfo schmod.ComputingCenter
|
|
}
|
|
|
|
func NewAdjusting(scheme jobmod.JobScheduleScheme) *Adjusting {
|
|
return &Adjusting{
|
|
scheme: scheme,
|
|
}
|
|
}
|
|
|
|
func (s *Adjusting) Run(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) {
|
|
err := s.do(rtx, jo)
|
|
if err != nil {
|
|
rtx.Mgr.ChangeState(jo, FailureComplete(err))
|
|
} else {
|
|
rtx.Mgr.ChangeState(jo, NewNormalJobReadyToExecute())
|
|
}
|
|
}
|
|
|
|
func (s *Adjusting) Dump(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) jobmod.JobStateDump {
|
|
return &jobmod.AdjustingDump{
|
|
Scheme: s.scheme,
|
|
}
|
|
}
|
|
|
|
func (s *Adjusting) do(rtx jobmgr.JobStateRunContext, jo *jobmgr.Job) error {
|
|
userID := cdssdk.UserID(1)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// 监听取消事件
|
|
go func() {
|
|
event.WaitType[*event.Cancel](ctx, rtx.EventSet)
|
|
cancel()
|
|
}()
|
|
|
|
ccInfo, err := rtx.Mgr.DB.ComputingCenter().GetByID(rtx.Mgr.DB.SQLCtx(), s.scheme.TargetCCID)
|
|
if err != nil {
|
|
return fmt.Errorf("getting computing center info: %w", err)
|
|
}
|
|
s.targetCCInfo = ccInfo
|
|
|
|
logger.WithField("JobID", jo.JobID).Infof("job is scheduled to %v(%v)", ccInfo.Name, ccInfo.CCID)
|
|
|
|
// 已经确定最终执行的目标计算中心,则可以生成结果输出路径了
|
|
// TODO UserID
|
|
outputPath := utils.MakeJobOutputPath(userID, jo.JobID)
|
|
|
|
var jobFilesInfo schsdk.JobFilesInfo
|
|
var jobFiles *jobmod.JobFiles
|
|
|
|
switch runningJob := jo.Body.(type) {
|
|
case *job.NormalJob:
|
|
jobFilesInfo = runningJob.Info.Files
|
|
jobFiles = &runningJob.Files
|
|
runningJob.TargetCCID = s.scheme.TargetCCID
|
|
runningJob.OutputPath = outputPath
|
|
case *job.MultiInstanceJob:
|
|
jobFilesInfo = runningJob.Info.Files
|
|
jobFiles = &runningJob.Files
|
|
runningJob.TargetCCID = s.scheme.TargetCCID
|
|
case *job.InstanceJob:
|
|
jobFilesInfo = runningJob.Info.Files
|
|
jobFiles = &runningJob.Files
|
|
runningJob.TargetCCID = s.scheme.TargetCCID
|
|
runningJob.OutputPath = outputPath
|
|
}
|
|
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(3)
|
|
|
|
var e1, e2, e3 error
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
e1 = s.doPackageScheduling(ctx, rtx, jobFilesInfo.Dataset, &jobFiles.Dataset, &s.scheme.Dataset)
|
|
if e1 != nil {
|
|
cancel()
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
e2 = s.doPackageScheduling(ctx, rtx, jobFilesInfo.Code, &jobFiles.Code, &s.scheme.Code)
|
|
if e2 != nil {
|
|
cancel()
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
e3 = s.doImageScheduling(ctx, rtx, s.scheme.TargetCCID, jobFilesInfo.Image, &jobFiles.Image, &s.scheme.Image)
|
|
if e3 != nil {
|
|
cancel()
|
|
}
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
return errors.Join(e1, e2, e3)
|
|
}
|
|
|
|
func (s *Adjusting) doPackageScheduling(ctx context.Context, rtx jobmgr.JobStateRunContext, fileInfo schsdk.JobFileInfo, file *jobmod.PackageJobFile, scheme *jobmod.FileScheduleScheme) error {
|
|
if scheme.Action == jobmod.ActionMove {
|
|
logger.Debugf("begin move pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
|
|
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
|
if err != nil {
|
|
return fmt.Errorf("moving package: %w", err)
|
|
}
|
|
|
|
fut := taskStatus.Receive()
|
|
status := <-fut.Chan()
|
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
|
if moveStatus.Error != "" {
|
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
if scheme.Action == jobmod.ActionLoad {
|
|
logger.Debugf("begin load pacakge %v to %v", file.PackageID, s.targetCCInfo.CDSStorageID)
|
|
|
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewStorageLoadPackage(1, file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
|
if err != nil {
|
|
return fmt.Errorf("moving package: %w", err)
|
|
}
|
|
|
|
fut := taskStatus.Receive()
|
|
status := <-fut.Chan()
|
|
|
|
loadStatus := status.Value.Status.(*exectsk.StorageLoadPackageStatus)
|
|
if loadStatus.Error != "" {
|
|
return fmt.Errorf("loading package: %s", loadStatus.Error)
|
|
}
|
|
|
|
// file.PackagePath = loadStatus.PackagePath TODO 路径谁来产生的问题
|
|
|
|
return nil
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Adjusting) doImageScheduling(ctx context.Context, rtx jobmgr.JobStateRunContext, targetCCID schsdk.CCID, fileInfo schsdk.JobFileInfo, file *jobmod.ImageJobFile, scheme *jobmod.FileScheduleScheme) error {
|
|
if scheme.Action == jobmod.ActionImportImage {
|
|
// TODO 镜像文件位置需要重新设计
|
|
return fmt.Errorf("not implemented yet")
|
|
|
|
if file.PackageID == nil {
|
|
return fmt.Errorf("image %v has no associated package, which cannot be uploaded to %v", file.ImageID, s.targetCCInfo.CCID)
|
|
}
|
|
|
|
// TODO UserID
|
|
taskStatus, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewCacheMovePackage(1, *file.PackageID, s.targetCCInfo.CDSStorageID), s.targetCCInfo)
|
|
if err != nil {
|
|
return fmt.Errorf("moving package: %w", err)
|
|
}
|
|
|
|
fut := taskStatus.Receive()
|
|
status := <-fut.Chan()
|
|
|
|
moveStatus := status.Value.Status.(*exectsk.CacheMovePackageStatus)
|
|
if moveStatus.Error != "" {
|
|
return fmt.Errorf("moving package: %s", moveStatus.Error)
|
|
}
|
|
|
|
stgCli, err := schglb.CloudreamStoragePool.Acquire()
|
|
if err != nil {
|
|
return fmt.Errorf("new cloudream storage client: %w", err)
|
|
}
|
|
defer schglb.CloudreamStoragePool.Release(stgCli)
|
|
|
|
// TODO UserID
|
|
pkgObjs, err := stgCli.Object().GetPackageObjects(cdsapi.ObjectGetPackageObjects{UserID: 1, PackageID: *file.PackageID})
|
|
if err != nil {
|
|
return fmt.Errorf("getting package objects: %w", err)
|
|
}
|
|
|
|
if len(pkgObjs.Objects) == 0 {
|
|
return fmt.Errorf("no object in the package which will be imported")
|
|
}
|
|
|
|
if len(pkgObjs.Objects) > 1 {
|
|
return fmt.Errorf("there must be only 1 object in the package which will be imported")
|
|
}
|
|
|
|
// taskStatus2, err := rtx.Mgr.ExecMgr.StartTask(exectsk.NewUploadImage(s.targetCCInfo.PCMParticipantID, cdsapi.MakeIPFSFilePath(pkgObjs.Objects[0].FileHash)), s.targetCCInfo)
|
|
// if err != nil {
|
|
// return fmt.Errorf("moving package: %w", err)
|
|
// }
|
|
|
|
// fut2 := taskStatus2.Receive()
|
|
// status2 := <-fut2.Chan()
|
|
// if err != nil {
|
|
// return fmt.Errorf("uploading image: %w", err)
|
|
// }
|
|
|
|
// uploadStatus := status2.Value.Status.(*exectsk.UploadImageStatus)
|
|
// if uploadStatus.Error != "" {
|
|
// return fmt.Errorf("uploading image: %s", uploadStatus.Error)
|
|
// }
|
|
|
|
// // TODO 镜像名称
|
|
// err = rtx.Mgr.DB.PCMImage().Create(rtx.Mgr.DB.SQLCtx(), file.ImageID, targetCCID, uploadStatus.PCMImageID, uploadStatus.Name, time.Now())
|
|
// if err != nil {
|
|
// return fmt.Errorf("creating image info: %w", err)
|
|
// }
|
|
|
|
return nil
|
|
}
|
|
|
|
return nil
|
|
}
|