added memberJobs and nodeResources rpc functions

This commit is contained in:
tzwang 2024-01-31 22:20:30 +08:00
parent 0fb7a72e0f
commit 34aa5fed00
12 changed files with 1178 additions and 244 deletions

View File

@ -53,8 +53,13 @@ GetResourceSpec : "/sothisai/api/tasks/resources"
# 文件
GetFileList : "/openapi/v2/file/list"
#用户资源
# 容器
GetNodeResources: "/ai/openapi/v2/instance-service/resources"
# 用户资源
GetUserInfo : "/ac/openapi/v2/user"
GetGroupMembers: "/ac/openapi/v2/groupmembers" #查询成员信息
GetMemberJobs: "/ac/openapi/v2/clusters/{clusterId}/groups/{groupId}/clusterUserNames/{clusterUserName}/jobs"
#链路追踪
#Telemetry:

File diff suppressed because it is too large Load Diff

View File

@ -49,6 +49,8 @@ const (
HpcAC_GetResourceSpec_FullMethodName = "/hpcAC.hpcAC/GetResourceSpec"
HpcAC_GetFileList_FullMethodName = "/hpcAC.hpcAC/GetFileList"
HpcAC_GetUserInfo_FullMethodName = "/hpcAC.hpcAC/GetUserInfo"
HpcAC_GetMemberJobs_FullMethodName = "/hpcAC.hpcAC/GetMemberJobs"
HpcAC_GetNodeResources_FullMethodName = "/hpcAC.hpcAC/GetNodeResources"
)
// HpcACClient is the client API for HpcAC service.
@ -105,6 +107,9 @@ type HpcACClient interface {
GetFileList(ctx context.Context, in *GetFileListReq, opts ...grpc.CallOption) (*GetFileListResp, error)
//用户资源
GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts ...grpc.CallOption) (*GetUserInfoResp, error)
GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error)
//获取节点资源限额
GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error)
}
type hpcACClient struct {
@ -385,6 +390,24 @@ func (c *hpcACClient) GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts
return out, nil
}
func (c *hpcACClient) GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error) {
out := new(GetMemberJobsResp)
err := c.cc.Invoke(ctx, HpcAC_GetMemberJobs_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *hpcACClient) GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error) {
out := new(GetNodeResourcesResp)
err := c.cc.Invoke(ctx, HpcAC_GetNodeResources_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// HpcACServer is the server API for HpcAC service.
// All implementations must embed UnimplementedHpcACServer
// for forward compatibility
@ -439,6 +462,9 @@ type HpcACServer interface {
GetFileList(context.Context, *GetFileListReq) (*GetFileListResp, error)
//用户资源
GetUserInfo(context.Context, *GetUserInfoReq) (*GetUserInfoResp, error)
GetMemberJobs(context.Context, *GetMemberJobsReq) (*GetMemberJobsResp, error)
//获取节点资源限额
GetNodeResources(context.Context, *GetNodeResourcesReq) (*GetNodeResourcesResp, error)
mustEmbedUnimplementedHpcACServer()
}
@ -536,6 +562,12 @@ func (UnimplementedHpcACServer) GetFileList(context.Context, *GetFileListReq) (*
func (UnimplementedHpcACServer) GetUserInfo(context.Context, *GetUserInfoReq) (*GetUserInfoResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetUserInfo not implemented")
}
func (UnimplementedHpcACServer) GetMemberJobs(context.Context, *GetMemberJobsReq) (*GetMemberJobsResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetMemberJobs not implemented")
}
func (UnimplementedHpcACServer) GetNodeResources(context.Context, *GetNodeResourcesReq) (*GetNodeResourcesResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetNodeResources not implemented")
}
func (UnimplementedHpcACServer) mustEmbedUnimplementedHpcACServer() {}
// UnsafeHpcACServer may be embedded to opt out of forward compatibility for this service.
@ -1089,6 +1121,42 @@ func _HpcAC_GetUserInfo_Handler(srv interface{}, ctx context.Context, dec func(i
return interceptor(ctx, in, info, handler)
}
func _HpcAC_GetMemberJobs_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GetMemberJobsReq)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(HpcACServer).GetMemberJobs(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: HpcAC_GetMemberJobs_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(HpcACServer).GetMemberJobs(ctx, req.(*GetMemberJobsReq))
}
return interceptor(ctx, in, info, handler)
}
func _HpcAC_GetNodeResources_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GetNodeResourcesReq)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(HpcACServer).GetNodeResources(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: HpcAC_GetNodeResources_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(HpcACServer).GetNodeResources(ctx, req.(*GetNodeResourcesReq))
}
return interceptor(ctx, in, info, handler)
}
// HpcAC_ServiceDesc is the grpc.ServiceDesc for HpcAC service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
@ -1216,6 +1284,14 @@ var HpcAC_ServiceDesc = grpc.ServiceDesc{
MethodName: "GetUserInfo",
Handler: _HpcAC_GetUserInfo_Handler,
},
{
MethodName: "GetMemberJobs",
Handler: _HpcAC_GetMemberJobs_Handler,
},
{
MethodName: "GetNodeResources",
Handler: _HpcAC_GetNodeResources_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "hpcAC.proto",

View File

@ -42,6 +42,12 @@ type (
GetImageListAiReq = hpcAC.GetImageListAiReq
GetImageListAiResp = hpcAC.GetImageListAiResp
GetJobDetailResp = hpcAC.GetJobDetailResp
GetMemberJobsData = hpcAC.GetMemberJobsData
GetMemberJobsReq = hpcAC.GetMemberJobsReq
GetMemberJobsResp = hpcAC.GetMemberJobsResp
GetNodeResourcesData = hpcAC.GetNodeResourcesData
GetNodeResourcesReq = hpcAC.GetNodeResourcesReq
GetNodeResourcesResp = hpcAC.GetNodeResourcesResp
GetPytorchTaskReq = hpcAC.GetPytorchTaskReq
GetPytorchTaskResp = hpcAC.GetPytorchTaskResp
GetPytorchTaskRespParams = hpcAC.GetPytorchTaskRespParams
@ -155,6 +161,9 @@ type (
GetFileList(ctx context.Context, in *GetFileListReq, opts ...grpc.CallOption) (*GetFileListResp, error)
// 用户资源
GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts ...grpc.CallOption) (*GetUserInfoResp, error)
GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error)
// 获取节点资源限额
GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error)
}
defaultHpcAC struct {
@ -337,3 +346,14 @@ func (m *defaultHpcAC) GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts
client := hpcAC.NewHpcACClient(m.cli.Conn())
return client.GetUserInfo(ctx, in, opts...)
}
func (m *defaultHpcAC) GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error) {
client := hpcAC.NewHpcACClient(m.cli.Conn())
return client.GetMemberJobs(ctx, in, opts...)
}
// 获取节点资源限额
func (m *defaultHpcAC) GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error) {
client := hpcAC.NewHpcACClient(m.cli.Conn())
return client.GetNodeResources(ctx, in, opts...)
}

View File

@ -13,10 +13,12 @@ var (
type AuthService struct {
C *config.Config
Token string //区域用户认证token
ClusterId string
AiCenterUrlPrefix string
HpcCenterUrlPrefix string
EFileUrlPrefix string
JobManagerId int
GroupId string
}
func NewAuthService() *AuthService {
@ -24,19 +26,21 @@ func NewAuthService() *AuthService {
configFile := flag.String("c", "etc/hpcac.yaml", "the config file")
conf.MustLoad(*configFile, &cfg)
flag.Parse()
token := getToken(&cfg)
token, clusterId := getTokenAndClusterId(&cfg)
return &AuthService{
C: &cfg,
Token: token,
ClusterId: clusterId,
AiCenterUrlPrefix: getAiCenterUrl(&cfg, token),
HpcCenterUrlPrefix: getHpcCenterUrl(&cfg, token),
EFileUrlPrefix: getEFileUrlPrefix(&cfg, token),
JobManagerId: getJobManagerId(&cfg, token),
GroupId: getGroupId(&cfg, token),
}
}
func (a *AuthService) reGet() {
a.Token = getToken(a.C)
a.Token, a.ClusterId = getTokenAndClusterId(a.C)
}
func (a *AuthService) valid() bool {
@ -78,7 +82,7 @@ func EFileUrlPrefix() string {
return as.EFileUrlPrefix
}
func getToken(cfg *config.Config) string {
func getTokenAndClusterId(cfg *config.Config) (string, string) {
authUrl := cfg.AiConf.BaseUrlAi + cfg.AuthUrl
var tr TokenResp
req := GetRestyRequest(3)
@ -90,16 +94,16 @@ func getToken(cfg *config.Config) string {
Post(authUrl)
if err != nil || tr.Code != "0" {
return ""
return "", ""
}
for _, datum := range tr.Data {
if datum.ClusterId != "0" {
return datum.Token
return datum.Token, datum.ClusterId
}
}
return ""
return "", ""
}
func getAiCenterUrl(cfg *config.Config, token string) string {
@ -183,3 +187,24 @@ func getJobManagerId(cfg *config.Config, token string) int {
}
return 0
}
func getGroupId(cfg *config.Config, token string) string {
url := cfg.AcBaseUrl + cfg.UserConf.GetGroupMembers
var gm GroupMembers
req := GetRestyRequest(3)
_, err := req.
SetHeader("token", token).
SetResult(&gm).
Get(url)
if err != nil || gm.Code != "0" {
return ""
}
for _, datum := range gm.Data {
// Todo multiple groups filtering
return datum.GroupId
}
return ""
}

View File

@ -155,3 +155,21 @@ type TokenState struct {
Code string `json:"code"`
Data string `json:"data"`
}
type GroupMembers struct {
Code string `json:"code"`
Msg string `json:"msg"`
Data []struct {
AccountId string `json:"accountId"`
AccountName string `json:"accountName"`
GroupDisplayName string `json:"groupDisplayName"`
GroupId string `json:"groupId"`
GroupRole string `json:"groupRole"`
GroupUsersInfos []struct {
UserId string `json:"userId"`
UserName string `json:"userName"`
ClusterId string `json:"clusterId"`
ClusterUserName string `json:"clusterUserName"`
} `json:"groupUsersInfos"`
} `json:"data"`
}

View File

@ -47,5 +47,12 @@ type FileConf struct {
// 曙光用户资源
type UserConf struct {
GetUserInfo string
GetUserInfo string
GetGroupMembers string
GetMemberJobs string
}
// 曙光容器
type ContainerConf struct {
GetNodeResources string
}

View File

@ -139,9 +139,12 @@ func (l *GetGeneralInfoLogic) GetGeneralInfo(in *hpcAC.ResourceReq) (*hpcAC.GiRe
//获取节点资源限额
resourceUrl := ai_prefix_url + cpConf.AiResourceUrl
resourceGroups := []string{
resourceGroupResp.Data.Cpu[0],
resourceGroupResp.Data.Dcu[0],
resourceGroups := []string{}
if len(resourceGroupResp.Data.Dcu) != 0 {
resourceGroups = append(resourceGroups, resourceGroupResp.Data.Dcu[0])
}
if len(resourceGroupResp.Data.Cpu) != 0 {
resourceGroups = append(resourceGroups, resourceGroupResp.Data.Cpu[0])
}
var memorySize int32

View File

@ -0,0 +1,30 @@
package logic
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-ac/internal/svc"
"github.com/zeromicro/go-zero/core/logx"
)
type GetMemberJobsLogic struct {
ctx context.Context
svcCtx *svc.ServiceContext
logx.Logger
}
func NewGetMemberJobsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetMemberJobsLogic {
return &GetMemberJobsLogic{
ctx: ctx,
svcCtx: svcCtx,
Logger: logx.WithContext(ctx),
}
}
func (l *GetMemberJobsLogic) GetMemberJobs(in *hpcAC.GetMemberJobsReq) (*hpcAC.GetMemberJobsResp, error) {
// todo: add your logic here and delete this line
return &hpcAC.GetMemberJobsResp{}, nil
}

View File

@ -0,0 +1,31 @@
package logic
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-ac/internal/svc"
"github.com/zeromicro/go-zero/core/logx"
)
type GetNodeResourcesLogic struct {
ctx context.Context
svcCtx *svc.ServiceContext
logx.Logger
}
func NewGetNodeResourcesLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetNodeResourcesLogic {
return &GetNodeResourcesLogic{
ctx: ctx,
svcCtx: svcCtx,
Logger: logx.WithContext(ctx),
}
}
// 获取节点资源限额
func (l *GetNodeResourcesLogic) GetNodeResources(in *hpcAC.GetNodeResourcesReq) (*hpcAC.GetNodeResourcesResp, error) {
// todo: add your logic here and delete this line
return &hpcAC.GetNodeResourcesResp{}, nil
}

View File

@ -191,3 +191,14 @@ func (s *HpcACServer) GetUserInfo(ctx context.Context, in *hpcAC.GetUserInfoReq)
l := logic.NewGetUserInfoLogic(ctx, s.svcCtx)
return l.GetUserInfo(in)
}
func (s *HpcACServer) GetMemberJobs(ctx context.Context, in *hpcAC.GetMemberJobsReq) (*hpcAC.GetMemberJobsResp, error) {
l := logic.NewGetMemberJobsLogic(ctx, s.svcCtx)
return l.GetMemberJobs(in)
}
// 获取节点资源限额
func (s *HpcACServer) GetNodeResources(ctx context.Context, in *hpcAC.GetNodeResourcesReq) (*hpcAC.GetNodeResourcesResp, error) {
l := logic.NewGetNodeResourcesLogic(ctx, s.svcCtx)
return l.GetNodeResources(in)
}

View File

@ -893,6 +893,33 @@ message Children{
}
/******************曙光文件接口 End*************************/
/******************曙光容器 Start*************************/
message GetNodeResourcesReq{
string acceleratorType = 1;
string resourceGroup = 2;
}
message GetNodeResourcesResp{
string code = 1;
string msg = 2;
GetNodeResourcesData data = 3;
}
message GetNodeResourcesData{
string id = 1;
int64 cpuNumber = 2;
int64 mluLimits = 3;
int64 dcuLimits = 4;
int64 nvLimits = 5;
int64 gpuNumber = 6;
int64 memorySize = 7;
string resourceGroup = 8;
string userName = 9;
int64 nodeNumber = 10;
string maxTime = 11;
}
/******************曙光容器 End*************************/
/******************用户资源 Start*************************/
message GetUserInfoReq{
}
@ -915,6 +942,31 @@ message GetUserInfoData{
string accountStatus = 9;
string accountBalance = 10;
}
message GetMemberJobsReq{
int64 clusterId = 1;
int64 groupId = 2;
string clusterUserName = 3;
}
message GetMemberJobsResp{
string code = 1;
string msg = 2;
repeated GetMemberJobsData data = 3;
}
message GetMemberJobsData{
string id = 1;
string name = 2;
string status = 3;
int64 nodeNumReq = 4;
int64 cpuCore = 5;
int64 procNumReq = 6;
int64 gpuNum = 7;
int64 gpuNumReq = 8;
int64 dcuNumUsed = 9;
int64 dcuNumReq = 10;
}
/******************用户资源 End*************************/
// HPC Services for AC
@ -992,5 +1044,10 @@ service hpcAC {
//
rpc GetUserInfo(GetUserInfoReq) returns (GetUserInfoResp);
rpc GetMemberJobs(GetMemberJobsReq) returns (GetMemberJobsResp); //
//
rpc GetNodeResources(GetNodeResourcesReq) returns (GetNodeResourcesResp);
}