pcm-coordinator/internal/scheduler/service/hpc/slurm.go

157 lines
4.3 KiB
Go

package hpcservice
import (
"context"
"fmt"
"github.com/go-resty/resty/v2"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/restyclient"
"net/http"
)
type ParticipantHpc struct {
participantId int64
platform string
host string
userName string
accessToken string
*restyclient.RestyClient
}
const (
JobDetailUrl = "/api/v1/jobs/detail/{clusterId}/{jobId}"
SubmitTaskUrl = "/api/v1/jobs"
CancelTaskUrl = "/api/v1/jobs/cancel/{clusterId}/{jobId}"
JobLogUrl = "/api/v1/jobs/logs/{clusterId}/{jobId}"
)
func NewHpc(host string, id int64, platform string) *ParticipantHpc {
return &ParticipantHpc{
host: host,
participantId: id,
platform: platform,
RestyClient: restyclient.InitClient(host, ""),
}
}
func (c *ParticipantHpc) GetTask(ctx context.Context, taskId string, clusterId string) (*collector.Task, error) {
reqUrl := c.host + JobDetailUrl
hpcResp := &collector.HpcJobDetailResp{}
httpClient := resty.New().R()
_, err := httpClient.SetHeaders(
map[string]string{
"Content-Type": "application/json",
"traceId": result.TraceIDFromContext(ctx),
}).
SetPathParams(map[string]string{
"clusterId": clusterId,
"jobId": taskId,
}).
SetResult(&hpcResp).
Get(reqUrl)
if err != nil {
return nil, err
}
var resp collector.Task
resp.Id = hpcResp.Data.ID
if !hpcResp.Data.StartTime.IsZero() {
resp.Start = hpcResp.Data.StartTime.Format(constants.Layout)
}
if !hpcResp.Data.EndTime.IsZero() {
resp.End = hpcResp.Data.EndTime.Format(constants.Layout)
}
switch hpcResp.Data.StatusText {
case "COMPLETED":
resp.Status = constants.Completed
case "FAILED":
resp.Status = constants.Failed
case "CREATED_FAILED":
resp.Status = constants.Failed
case "RUNNING":
resp.Status = constants.Running
case "STOPPED":
resp.Status = constants.Stopped
case "PENDING":
resp.Status = constants.Pending
case "WAITING":
resp.Status = constants.Waiting
case "CANCELLED":
resp.Status = constants.Cancelled
default:
resp.Status = "undefined"
}
return &resp, nil
}
func (c *ParticipantHpc) SubmitTask(ctx context.Context, req types.SubmitHpcTaskReq) (*types.CommitHpcTaskResp, error) {
reqUrl := c.host + SubmitTaskUrl
resp := types.CommitHpcTaskResp{}
logx.WithContext(ctx).Infof("提交任务到超算集群, url: %s, req: %+v", reqUrl, req)
httpClient := resty.New().R()
_, err := httpClient.SetHeaders(
map[string]string{
"Content-Type": "application/json",
"traceId": result.TraceIDFromContext(ctx),
}).SetBody(req).
SetResult(&resp).
Post(reqUrl)
if err != nil {
return nil, err
}
if resp.Code != http.StatusOK {
return nil, fmt.Errorf(resp.Msg)
}
return &resp, nil
}
func (c *ParticipantHpc) CancelTask(ctx context.Context, jobId string, clusterId string) error {
reqUrl := c.host + CancelTaskUrl
resp := types.CommonResp{}
logx.WithContext(ctx).Infof("取消超算集群任务, url: %s, jobId: %s", reqUrl, jobId)
httpClient := resty.New().R()
_, err := httpClient.SetHeaders(
map[string]string{
"Content-Type": "application/json",
"traceId": result.TraceIDFromContext(ctx),
}).SetPathParams(map[string]string{
"clusterId": clusterId,
"jobId": jobId,
}).SetResult(&resp).Delete(reqUrl)
if err != nil {
return err
}
if resp.Code != http.StatusOK {
return fmt.Errorf(resp.Msg)
}
return nil
}
func (c *ParticipantHpc) GetTaskLogs(ctx context.Context, jobId string, clusterId string) (interface{}, error) {
logx.WithContext(ctx).Infof("获取超算集群任务日志, url: %s, jobId: %s", JobLogUrl, jobId)
if jobId == "" {
return nil, fmt.Errorf("jobId is empty")
}
resp := types.CommonResp{}
_, err := c.Request(JobLogUrl, http.MethodGet, func(req *resty.Request) {
req.SetHeaders(map[string]string{
"Content-Type": "application/json",
"traceId": result.TraceIDFromContext(ctx),
}).SetPathParams(map[string]string{
"clusterId": clusterId,
"jobId": jobId,
}).SetResult(&resp)
})
if err != nil {
return nil, err
}
if resp.Code != http.StatusOK {
return nil, fmt.Errorf(resp.Msg)
}
return resp, nil
}