forked from JointCloud/pcm-ac
added memberJobs and nodeResources rpc functions
This commit is contained in:
parent
0fb7a72e0f
commit
34aa5fed00
|
@ -53,8 +53,13 @@ GetResourceSpec : "/sothisai/api/tasks/resources"
|
|||
# 文件
|
||||
GetFileList : "/openapi/v2/file/list"
|
||||
|
||||
#用户资源
|
||||
# 容器
|
||||
GetNodeResources: "/ai/openapi/v2/instance-service/resources"
|
||||
|
||||
# 用户资源
|
||||
GetUserInfo : "/ac/openapi/v2/user"
|
||||
GetGroupMembers: "/ac/openapi/v2/groupmembers" #查询成员信息
|
||||
GetMemberJobs: "/ac/openapi/v2/clusters/{clusterId}/groups/{groupId}/clusterUserNames/{clusterUserName}/jobs"
|
||||
|
||||
#链路追踪
|
||||
#Telemetry:
|
||||
|
|
1117
hpcAC/hpcAC.pb.go
1117
hpcAC/hpcAC.pb.go
File diff suppressed because it is too large
Load Diff
|
@ -49,6 +49,8 @@ const (
|
|||
HpcAC_GetResourceSpec_FullMethodName = "/hpcAC.hpcAC/GetResourceSpec"
|
||||
HpcAC_GetFileList_FullMethodName = "/hpcAC.hpcAC/GetFileList"
|
||||
HpcAC_GetUserInfo_FullMethodName = "/hpcAC.hpcAC/GetUserInfo"
|
||||
HpcAC_GetMemberJobs_FullMethodName = "/hpcAC.hpcAC/GetMemberJobs"
|
||||
HpcAC_GetNodeResources_FullMethodName = "/hpcAC.hpcAC/GetNodeResources"
|
||||
)
|
||||
|
||||
// HpcACClient is the client API for HpcAC service.
|
||||
|
@ -105,6 +107,9 @@ type HpcACClient interface {
|
|||
GetFileList(ctx context.Context, in *GetFileListReq, opts ...grpc.CallOption) (*GetFileListResp, error)
|
||||
//用户资源
|
||||
GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts ...grpc.CallOption) (*GetUserInfoResp, error)
|
||||
GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error)
|
||||
//获取节点资源限额
|
||||
GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error)
|
||||
}
|
||||
|
||||
type hpcACClient struct {
|
||||
|
@ -385,6 +390,24 @@ func (c *hpcACClient) GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts
|
|||
return out, nil
|
||||
}
|
||||
|
||||
func (c *hpcACClient) GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error) {
|
||||
out := new(GetMemberJobsResp)
|
||||
err := c.cc.Invoke(ctx, HpcAC_GetMemberJobs_FullMethodName, in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *hpcACClient) GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error) {
|
||||
out := new(GetNodeResourcesResp)
|
||||
err := c.cc.Invoke(ctx, HpcAC_GetNodeResources_FullMethodName, in, out, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// HpcACServer is the server API for HpcAC service.
|
||||
// All implementations must embed UnimplementedHpcACServer
|
||||
// for forward compatibility
|
||||
|
@ -439,6 +462,9 @@ type HpcACServer interface {
|
|||
GetFileList(context.Context, *GetFileListReq) (*GetFileListResp, error)
|
||||
//用户资源
|
||||
GetUserInfo(context.Context, *GetUserInfoReq) (*GetUserInfoResp, error)
|
||||
GetMemberJobs(context.Context, *GetMemberJobsReq) (*GetMemberJobsResp, error)
|
||||
//获取节点资源限额
|
||||
GetNodeResources(context.Context, *GetNodeResourcesReq) (*GetNodeResourcesResp, error)
|
||||
mustEmbedUnimplementedHpcACServer()
|
||||
}
|
||||
|
||||
|
@ -536,6 +562,12 @@ func (UnimplementedHpcACServer) GetFileList(context.Context, *GetFileListReq) (*
|
|||
func (UnimplementedHpcACServer) GetUserInfo(context.Context, *GetUserInfoReq) (*GetUserInfoResp, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GetUserInfo not implemented")
|
||||
}
|
||||
func (UnimplementedHpcACServer) GetMemberJobs(context.Context, *GetMemberJobsReq) (*GetMemberJobsResp, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GetMemberJobs not implemented")
|
||||
}
|
||||
func (UnimplementedHpcACServer) GetNodeResources(context.Context, *GetNodeResourcesReq) (*GetNodeResourcesResp, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GetNodeResources not implemented")
|
||||
}
|
||||
func (UnimplementedHpcACServer) mustEmbedUnimplementedHpcACServer() {}
|
||||
|
||||
// UnsafeHpcACServer may be embedded to opt out of forward compatibility for this service.
|
||||
|
@ -1089,6 +1121,42 @@ func _HpcAC_GetUserInfo_Handler(srv interface{}, ctx context.Context, dec func(i
|
|||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _HpcAC_GetMemberJobs_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(GetMemberJobsReq)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(HpcACServer).GetMemberJobs(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: HpcAC_GetMemberJobs_FullMethodName,
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(HpcACServer).GetMemberJobs(ctx, req.(*GetMemberJobsReq))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _HpcAC_GetNodeResources_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(GetNodeResourcesReq)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(HpcACServer).GetNodeResources(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: HpcAC_GetNodeResources_FullMethodName,
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(HpcACServer).GetNodeResources(ctx, req.(*GetNodeResourcesReq))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
// HpcAC_ServiceDesc is the grpc.ServiceDesc for HpcAC service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
|
@ -1216,6 +1284,14 @@ var HpcAC_ServiceDesc = grpc.ServiceDesc{
|
|||
MethodName: "GetUserInfo",
|
||||
Handler: _HpcAC_GetUserInfo_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "GetMemberJobs",
|
||||
Handler: _HpcAC_GetMemberJobs_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "GetNodeResources",
|
||||
Handler: _HpcAC_GetNodeResources_Handler,
|
||||
},
|
||||
},
|
||||
Streams: []grpc.StreamDesc{},
|
||||
Metadata: "hpcAC.proto",
|
||||
|
|
|
@ -42,6 +42,12 @@ type (
|
|||
GetImageListAiReq = hpcAC.GetImageListAiReq
|
||||
GetImageListAiResp = hpcAC.GetImageListAiResp
|
||||
GetJobDetailResp = hpcAC.GetJobDetailResp
|
||||
GetMemberJobsData = hpcAC.GetMemberJobsData
|
||||
GetMemberJobsReq = hpcAC.GetMemberJobsReq
|
||||
GetMemberJobsResp = hpcAC.GetMemberJobsResp
|
||||
GetNodeResourcesData = hpcAC.GetNodeResourcesData
|
||||
GetNodeResourcesReq = hpcAC.GetNodeResourcesReq
|
||||
GetNodeResourcesResp = hpcAC.GetNodeResourcesResp
|
||||
GetPytorchTaskReq = hpcAC.GetPytorchTaskReq
|
||||
GetPytorchTaskResp = hpcAC.GetPytorchTaskResp
|
||||
GetPytorchTaskRespParams = hpcAC.GetPytorchTaskRespParams
|
||||
|
@ -155,6 +161,9 @@ type (
|
|||
GetFileList(ctx context.Context, in *GetFileListReq, opts ...grpc.CallOption) (*GetFileListResp, error)
|
||||
// 用户资源
|
||||
GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts ...grpc.CallOption) (*GetUserInfoResp, error)
|
||||
GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error)
|
||||
// 获取节点资源限额
|
||||
GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error)
|
||||
}
|
||||
|
||||
defaultHpcAC struct {
|
||||
|
@ -337,3 +346,14 @@ func (m *defaultHpcAC) GetUserInfo(ctx context.Context, in *GetUserInfoReq, opts
|
|||
client := hpcAC.NewHpcACClient(m.cli.Conn())
|
||||
return client.GetUserInfo(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (m *defaultHpcAC) GetMemberJobs(ctx context.Context, in *GetMemberJobsReq, opts ...grpc.CallOption) (*GetMemberJobsResp, error) {
|
||||
client := hpcAC.NewHpcACClient(m.cli.Conn())
|
||||
return client.GetMemberJobs(ctx, in, opts...)
|
||||
}
|
||||
|
||||
// 获取节点资源限额
|
||||
func (m *defaultHpcAC) GetNodeResources(ctx context.Context, in *GetNodeResourcesReq, opts ...grpc.CallOption) (*GetNodeResourcesResp, error) {
|
||||
client := hpcAC.NewHpcACClient(m.cli.Conn())
|
||||
return client.GetNodeResources(ctx, in, opts...)
|
||||
}
|
||||
|
|
|
@ -13,10 +13,12 @@ var (
|
|||
type AuthService struct {
|
||||
C *config.Config
|
||||
Token string //区域用户认证token
|
||||
ClusterId string
|
||||
AiCenterUrlPrefix string
|
||||
HpcCenterUrlPrefix string
|
||||
EFileUrlPrefix string
|
||||
JobManagerId int
|
||||
GroupId string
|
||||
}
|
||||
|
||||
func NewAuthService() *AuthService {
|
||||
|
@ -24,19 +26,21 @@ func NewAuthService() *AuthService {
|
|||
configFile := flag.String("c", "etc/hpcac.yaml", "the config file")
|
||||
conf.MustLoad(*configFile, &cfg)
|
||||
flag.Parse()
|
||||
token := getToken(&cfg)
|
||||
token, clusterId := getTokenAndClusterId(&cfg)
|
||||
return &AuthService{
|
||||
C: &cfg,
|
||||
Token: token,
|
||||
ClusterId: clusterId,
|
||||
AiCenterUrlPrefix: getAiCenterUrl(&cfg, token),
|
||||
HpcCenterUrlPrefix: getHpcCenterUrl(&cfg, token),
|
||||
EFileUrlPrefix: getEFileUrlPrefix(&cfg, token),
|
||||
JobManagerId: getJobManagerId(&cfg, token),
|
||||
GroupId: getGroupId(&cfg, token),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AuthService) reGet() {
|
||||
a.Token = getToken(a.C)
|
||||
a.Token, a.ClusterId = getTokenAndClusterId(a.C)
|
||||
}
|
||||
|
||||
func (a *AuthService) valid() bool {
|
||||
|
@ -78,7 +82,7 @@ func EFileUrlPrefix() string {
|
|||
return as.EFileUrlPrefix
|
||||
}
|
||||
|
||||
func getToken(cfg *config.Config) string {
|
||||
func getTokenAndClusterId(cfg *config.Config) (string, string) {
|
||||
authUrl := cfg.AiConf.BaseUrlAi + cfg.AuthUrl
|
||||
var tr TokenResp
|
||||
req := GetRestyRequest(3)
|
||||
|
@ -90,16 +94,16 @@ func getToken(cfg *config.Config) string {
|
|||
Post(authUrl)
|
||||
|
||||
if err != nil || tr.Code != "0" {
|
||||
return ""
|
||||
return "", ""
|
||||
}
|
||||
|
||||
for _, datum := range tr.Data {
|
||||
if datum.ClusterId != "0" {
|
||||
return datum.Token
|
||||
return datum.Token, datum.ClusterId
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func getAiCenterUrl(cfg *config.Config, token string) string {
|
||||
|
@ -183,3 +187,24 @@ func getJobManagerId(cfg *config.Config, token string) int {
|
|||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getGroupId(cfg *config.Config, token string) string {
|
||||
url := cfg.AcBaseUrl + cfg.UserConf.GetGroupMembers
|
||||
var gm GroupMembers
|
||||
req := GetRestyRequest(3)
|
||||
_, err := req.
|
||||
SetHeader("token", token).
|
||||
SetResult(&gm).
|
||||
Get(url)
|
||||
|
||||
if err != nil || gm.Code != "0" {
|
||||
return ""
|
||||
}
|
||||
|
||||
for _, datum := range gm.Data {
|
||||
// Todo multiple groups filtering
|
||||
return datum.GroupId
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
|
|
@ -155,3 +155,21 @@ type TokenState struct {
|
|||
Code string `json:"code"`
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
type GroupMembers struct {
|
||||
Code string `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []struct {
|
||||
AccountId string `json:"accountId"`
|
||||
AccountName string `json:"accountName"`
|
||||
GroupDisplayName string `json:"groupDisplayName"`
|
||||
GroupId string `json:"groupId"`
|
||||
GroupRole string `json:"groupRole"`
|
||||
GroupUsersInfos []struct {
|
||||
UserId string `json:"userId"`
|
||||
UserName string `json:"userName"`
|
||||
ClusterId string `json:"clusterId"`
|
||||
ClusterUserName string `json:"clusterUserName"`
|
||||
} `json:"groupUsersInfos"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
|
|
@ -47,5 +47,12 @@ type FileConf struct {
|
|||
|
||||
// 曙光用户资源
|
||||
type UserConf struct {
|
||||
GetUserInfo string
|
||||
GetUserInfo string
|
||||
GetGroupMembers string
|
||||
GetMemberJobs string
|
||||
}
|
||||
|
||||
// 曙光容器
|
||||
type ContainerConf struct {
|
||||
GetNodeResources string
|
||||
}
|
||||
|
|
|
@ -139,9 +139,12 @@ func (l *GetGeneralInfoLogic) GetGeneralInfo(in *hpcAC.ResourceReq) (*hpcAC.GiRe
|
|||
//获取节点资源限额
|
||||
resourceUrl := ai_prefix_url + cpConf.AiResourceUrl
|
||||
|
||||
resourceGroups := []string{
|
||||
resourceGroupResp.Data.Cpu[0],
|
||||
resourceGroupResp.Data.Dcu[0],
|
||||
resourceGroups := []string{}
|
||||
if len(resourceGroupResp.Data.Dcu) != 0 {
|
||||
resourceGroups = append(resourceGroups, resourceGroupResp.Data.Dcu[0])
|
||||
}
|
||||
if len(resourceGroupResp.Data.Cpu) != 0 {
|
||||
resourceGroups = append(resourceGroups, resourceGroupResp.Data.Cpu[0])
|
||||
}
|
||||
|
||||
var memorySize int32
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package logic
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-ac/internal/svc"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetMemberJobsLogic struct {
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
logx.Logger
|
||||
}
|
||||
|
||||
func NewGetMemberJobsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetMemberJobsLogic {
|
||||
return &GetMemberJobsLogic{
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
Logger: logx.WithContext(ctx),
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetMemberJobsLogic) GetMemberJobs(in *hpcAC.GetMemberJobsReq) (*hpcAC.GetMemberJobsResp, error) {
|
||||
// todo: add your logic here and delete this line
|
||||
|
||||
return &hpcAC.GetMemberJobsResp{}, nil
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package logic
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-ac/internal/svc"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetNodeResourcesLogic struct {
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
logx.Logger
|
||||
}
|
||||
|
||||
func NewGetNodeResourcesLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetNodeResourcesLogic {
|
||||
return &GetNodeResourcesLogic{
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
Logger: logx.WithContext(ctx),
|
||||
}
|
||||
}
|
||||
|
||||
// 获取节点资源限额
|
||||
func (l *GetNodeResourcesLogic) GetNodeResources(in *hpcAC.GetNodeResourcesReq) (*hpcAC.GetNodeResourcesResp, error) {
|
||||
// todo: add your logic here and delete this line
|
||||
|
||||
return &hpcAC.GetNodeResourcesResp{}, nil
|
||||
}
|
|
@ -191,3 +191,14 @@ func (s *HpcACServer) GetUserInfo(ctx context.Context, in *hpcAC.GetUserInfoReq)
|
|||
l := logic.NewGetUserInfoLogic(ctx, s.svcCtx)
|
||||
return l.GetUserInfo(in)
|
||||
}
|
||||
|
||||
func (s *HpcACServer) GetMemberJobs(ctx context.Context, in *hpcAC.GetMemberJobsReq) (*hpcAC.GetMemberJobsResp, error) {
|
||||
l := logic.NewGetMemberJobsLogic(ctx, s.svcCtx)
|
||||
return l.GetMemberJobs(in)
|
||||
}
|
||||
|
||||
// 获取节点资源限额
|
||||
func (s *HpcACServer) GetNodeResources(ctx context.Context, in *hpcAC.GetNodeResourcesReq) (*hpcAC.GetNodeResourcesResp, error) {
|
||||
l := logic.NewGetNodeResourcesLogic(ctx, s.svcCtx)
|
||||
return l.GetNodeResources(in)
|
||||
}
|
||||
|
|
|
@ -893,6 +893,33 @@ message Children{
|
|||
}
|
||||
/******************曙光文件接口 End*************************/
|
||||
|
||||
/******************曙光容器 Start*************************/
|
||||
message GetNodeResourcesReq{
|
||||
string acceleratorType = 1;
|
||||
string resourceGroup = 2;
|
||||
}
|
||||
|
||||
message GetNodeResourcesResp{
|
||||
string code = 1;
|
||||
string msg = 2;
|
||||
GetNodeResourcesData data = 3;
|
||||
}
|
||||
|
||||
message GetNodeResourcesData{
|
||||
string id = 1;
|
||||
int64 cpuNumber = 2;
|
||||
int64 mluLimits = 3;
|
||||
int64 dcuLimits = 4;
|
||||
int64 nvLimits = 5;
|
||||
int64 gpuNumber = 6;
|
||||
int64 memorySize = 7;
|
||||
string resourceGroup = 8;
|
||||
string userName = 9;
|
||||
int64 nodeNumber = 10;
|
||||
string maxTime = 11;
|
||||
}
|
||||
/******************曙光容器 End*************************/
|
||||
|
||||
/******************用户资源 Start*************************/
|
||||
message GetUserInfoReq{
|
||||
}
|
||||
|
@ -915,6 +942,31 @@ message GetUserInfoData{
|
|||
string accountStatus = 9;
|
||||
string accountBalance = 10;
|
||||
}
|
||||
|
||||
message GetMemberJobsReq{
|
||||
int64 clusterId = 1;
|
||||
int64 groupId = 2;
|
||||
string clusterUserName = 3;
|
||||
}
|
||||
|
||||
message GetMemberJobsResp{
|
||||
string code = 1;
|
||||
string msg = 2;
|
||||
repeated GetMemberJobsData data = 3;
|
||||
}
|
||||
|
||||
message GetMemberJobsData{
|
||||
string id = 1;
|
||||
string name = 2;
|
||||
string status = 3;
|
||||
int64 nodeNumReq = 4;
|
||||
int64 cpuCore = 5;
|
||||
int64 procNumReq = 6;
|
||||
int64 gpuNum = 7;
|
||||
int64 gpuNumReq = 8;
|
||||
int64 dcuNumUsed = 9;
|
||||
int64 dcuNumReq = 10;
|
||||
}
|
||||
/******************用户资源 End*************************/
|
||||
|
||||
// HPC Services for AC
|
||||
|
@ -992,5 +1044,10 @@ service hpcAC {
|
|||
|
||||
//用户资源
|
||||
rpc GetUserInfo(GetUserInfoReq) returns (GetUserInfoResp);
|
||||
rpc GetMemberJobs(GetMemberJobsReq) returns (GetMemberJobsResp); //查询成员实时作业列表
|
||||
|
||||
//获取节点资源限额
|
||||
rpc GetNodeResources(GetNodeResourcesReq) returns (GetNodeResourcesResp);
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue