forked from JointCloud/pcm-coordinator
104 lines
3.3 KiB
Go
104 lines
3.3 KiB
Go
package hpc
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"github.com/zeromicro/go-zero/core/logx"
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
|
)
|
|
|
|
type CancelJobLogic struct {
|
|
logx.Logger
|
|
ctx context.Context
|
|
svcCtx *svc.ServiceContext
|
|
hpcService *service.HpcService
|
|
}
|
|
|
|
type TaskHPCResult struct {
|
|
ID uint `gorm:"column:id"` // 对应 t.id
|
|
JobID string `gorm:"column:job_id"` // 对应 hpc.job_id
|
|
AdapterId string `gorm:"column:adapter_id"` // 对应 hpc.adapter_id
|
|
ClusterId string `gorm:"column:cluster_id"` // 对应 hpc.cluster_id
|
|
}
|
|
|
|
func NewCancelJobLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CancelJobLogic {
|
|
cache := make(map[string]interface{}, 10)
|
|
hpcService, err := service.NewHpcService(&svcCtx.Config, svcCtx.Scheduler.HpcStorages, cache)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return &CancelJobLogic{
|
|
Logger: logx.WithContext(ctx),
|
|
ctx: ctx,
|
|
svcCtx: svcCtx,
|
|
hpcService: hpcService,
|
|
}
|
|
}
|
|
|
|
func (l *CancelJobLogic) CancelJob(req *types.CancelJobReq) error {
|
|
//var clusterInfo *types.ClusterInfo
|
|
//tx := l.svcCtx.DbEngin.Raw("select * from t_cluster where id = ?", req.ClusterId).Scan(&clusterInfo)
|
|
//if tx.Error != nil {
|
|
// return tx.Error
|
|
//}
|
|
//// 查询p端调用地址
|
|
//var adapterAddress string
|
|
//l.svcCtx.DbEngin.Raw("SELECT server FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterAddress)
|
|
//var jobResp slurm.GetJobResp
|
|
//httpClient := resty.New().R()
|
|
//_, err := httpClient.SetHeader("Content-Type", "application/json").
|
|
// SetQueryParams(map[string]string{
|
|
// "jobId": req.JobId,
|
|
// "server": clusterInfo.Server,
|
|
// "version": clusterInfo.Version,
|
|
// "token": clusterInfo.Token,
|
|
// "username": clusterInfo.Username,
|
|
// }).
|
|
// SetResult(&jobResp).
|
|
// Delete(adapterAddress + "/api/v1/job/cancel")
|
|
//if err != nil {
|
|
// return err
|
|
//}
|
|
//if len(jobResp.Errors) != 0 {
|
|
// return errors.Errorf(jobResp.Errors[0].Description)
|
|
//}
|
|
//return nil
|
|
var hpcR TaskHPCResult
|
|
tx := l.svcCtx.DbEngin.Raw(
|
|
"SELECT t.id, hpc.job_id ,hpc.adapter_id, hpc.cluster_id FROM task t "+
|
|
"INNER JOIN task_hpc hpc ON t.id = hpc.task_id "+
|
|
"WHERE adapter_type_dict = 2 AND t.id = ?",
|
|
req.TaskId,
|
|
).Scan(&hpcR).Error
|
|
if tx != nil {
|
|
return fmt.Errorf("数据库查询失败: %v", tx.Error)
|
|
}
|
|
if hpcR.ID == 0 || hpcR.JobID == "" {
|
|
return fmt.Errorf("作业不存在")
|
|
}
|
|
var adapterInfo types.AdapterInfo
|
|
l.svcCtx.DbEngin.Raw("SELECT * FROM `t_adapter` where id = ?", hpcR.AdapterId).Scan(&adapterInfo)
|
|
if adapterInfo.Id == "" {
|
|
return fmt.Errorf("adapter not found")
|
|
}
|
|
// 取消作业
|
|
err := l.hpcService.HpcExecutorAdapterMap[adapterInfo.Id].CancelTask(l.ctx, hpcR.JobID, hpcR.ClusterId)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// 更新数据库状态
|
|
tx = l.svcCtx.DbEngin.Model(&types.Task{}).Where("id = ?", hpcR.ID).Update("status", "Canceled").Error
|
|
if tx != nil {
|
|
return fmt.Errorf("数据库更新失败: %v", tx.Error)
|
|
}
|
|
// 更新数据库状态
|
|
tx = l.svcCtx.DbEngin.Model(&models.TaskHpc{}).Where("task_id = ?", hpcR.ID).Update("status", "Canceled").Error
|
|
if tx != nil {
|
|
return fmt.Errorf("数据库更新失败: %v", tx.Error)
|
|
}
|
|
return nil
|
|
}
|