feat TrainingJobLogsPreview

This commit is contained in:
jagger 2024-05-15 08:52:24 +08:00
parent 15e13a7b57
commit 689bb339a9
9 changed files with 1072 additions and 760 deletions

View File

@ -118,6 +118,8 @@ type (
GetExportTasksOfDatasetResp = modelarts.GetExportTasksOfDatasetResp
GetNotebookStorageReq = modelarts.GetNotebookStorageReq
GetNotebookStorageResp = modelarts.GetNotebookStorageResp
GetTrainingJobLogsPreviewReq = modelarts.GetTrainingJobLogsPreviewReq
GetTrainingJobLogsPreviewResp = modelarts.GetTrainingJobLogsPreviewResp
GetVisualizationJobParam = modelarts.GetVisualizationJobParam
GetVisualizationJobReq = modelarts.GetVisualizationJobReq
GetVisualizationJobResp = modelarts.GetVisualizationJobResp

View File

@ -118,6 +118,8 @@ type (
GetExportTasksOfDatasetResp = modelarts.GetExportTasksOfDatasetResp
GetNotebookStorageReq = modelarts.GetNotebookStorageReq
GetNotebookStorageResp = modelarts.GetNotebookStorageResp
GetTrainingJobLogsPreviewReq = modelarts.GetTrainingJobLogsPreviewReq
GetTrainingJobLogsPreviewResp = modelarts.GetTrainingJobLogsPreviewResp
GetVisualizationJobParam = modelarts.GetVisualizationJobParam
GetVisualizationJobReq = modelarts.GetVisualizationJobReq
GetVisualizationJobResp = modelarts.GetVisualizationJobResp
@ -324,6 +326,8 @@ type (
GetTrainingJobFlavors(ctx context.Context, in *TrainingJobFlavorsReq, opts ...grpc.CallOption) (*TrainingJobFlavorsResp, error)
// GET ai-engines 查询作业引擎规格
GetAiEnginesList(ctx context.Context, in *ListAiEnginesReq, opts ...grpc.CallOption) (*ListAiEnginesResp, error)
// 查询训练作业指定任务的日志(预览)
GetTrainingJobLogsPreview(ctx context.Context, in *GetTrainingJobLogsPreviewReq, opts ...grpc.CallOption) (*GetTrainingJobLogsPreviewResp, error)
// export task
ExportTask(ctx context.Context, in *ExportTaskReq, opts ...grpc.CallOption) (*ExportTaskDataResp, error)
GetExportTasksOfDataset(ctx context.Context, in *GetExportTasksOfDatasetReq, opts ...grpc.CallOption) (*GetExportTasksOfDatasetResp, error)
@ -479,6 +483,12 @@ func (m *defaultModelArtsService) GetAiEnginesList(ctx context.Context, in *List
return client.GetAiEnginesList(ctx, in, opts...)
}
// 查询训练作业指定任务的日志(预览)
func (m *defaultModelArtsService) GetTrainingJobLogsPreview(ctx context.Context, in *GetTrainingJobLogsPreviewReq, opts ...grpc.CallOption) (*GetTrainingJobLogsPreviewResp, error) {
client := modelarts.NewModelArtsServiceClient(m.cli.Conn())
return client.GetTrainingJobLogsPreview(ctx, in, opts...)
}
// export task
func (m *defaultModelArtsService) ExportTask(ctx context.Context, in *ExportTaskReq, opts ...grpc.CallOption) (*ExportTaskDataResp, error) {
client := modelarts.NewModelArtsServiceClient(m.cli.Conn())

View File

@ -0,0 +1,4 @@
package initialize
type ModelArtsClient struct {
}

View File

@ -0,0 +1,51 @@
package modelartsservicelogic
import (
"context"
"fmt"
"gitlink.org.cn/JointCloud/pcm-modelarts/internal/util"
"k8s.io/apimachinery/pkg/util/json"
"gitlink.org.cn/JointCloud/pcm-modelarts/internal/svc"
"gitlink.org.cn/JointCloud/pcm-modelarts/modelarts"
"github.com/zeromicro/go-zero/core/logx"
)
type GetTrainingJobLogsPreviewLogic struct {
ctx context.Context
svcCtx *svc.ServiceContext
logx.Logger
}
func NewGetTrainingJobLogsPreviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetTrainingJobLogsPreviewLogic {
return &GetTrainingJobLogsPreviewLogic{
ctx: ctx,
svcCtx: svcCtx,
Logger: logx.WithContext(ctx),
}
}
// 查询训练作业指定任务的日志(预览)
func (l *GetTrainingJobLogsPreviewLogic) GetTrainingJobLogsPreview(in *modelarts.GetTrainingJobLogsPreviewReq) (*modelarts.GetTrainingJobLogsPreviewResp, error) {
var resp modelarts.GetTrainingJobLogsPreviewResp
platform, err := util.GetModelArtsConfWithPlatform(in.Platform)
if err != nil {
return nil, err
}
url := fmt.Sprintf("%sv2/%s/training-jobs/%s/tasks/%s/logs/preview",
platform.Endpoint,
platform.ProjectId,
in.TrainingJobId,
in.TaskId)
body, err := util.SendRequest("GET", url,
nil, in.Platform)
if err != nil {
logx.Errorf("查询训练作业指定任务的日志(预览)失败,请求url: %s\n err%v", url, err)
return nil, err
}
json.Unmarshal(*body, &resp)
return &resp, nil
}

View File

@ -136,6 +136,12 @@ func (s *ModelArtsServiceServer) GetAiEnginesList(ctx context.Context, in *model
return l.GetAiEnginesList(in)
}
// 查询训练作业指定任务的日志(预览)
func (s *ModelArtsServiceServer) GetTrainingJobLogsPreview(ctx context.Context, in *modelarts.GetTrainingJobLogsPreviewReq) (*modelarts.GetTrainingJobLogsPreviewResp, error) {
l := modelartsservicelogic.NewGetTrainingJobLogsPreviewLogic(ctx, s.svcCtx)
return l.GetTrainingJobLogsPreview(in)
}
// export task
func (s *ModelArtsServiceServer) ExportTask(ctx context.Context, in *modelarts.ExportTaskReq) (*modelarts.ExportTaskDataResp, error) {
l := modelartsservicelogic.NewExportTaskLogic(ctx, s.svcCtx)

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.3.0
// - protoc v4.24.3
// - protoc v4.25.3
// source: pb/pcm-modelarts.proto
package modelarts
@ -38,6 +38,7 @@ const (
ModelArtsService_ShowAlgorithmByUuid_FullMethodName = "/modelarts.ModelArtsService/ShowAlgorithmByUuid"
ModelArtsService_GetTrainingJobFlavors_FullMethodName = "/modelarts.ModelArtsService/GetTrainingJobFlavors"
ModelArtsService_GetAiEnginesList_FullMethodName = "/modelarts.ModelArtsService/GetAiEnginesList"
ModelArtsService_GetTrainingJobLogsPreview_FullMethodName = "/modelarts.ModelArtsService/GetTrainingJobLogsPreview"
ModelArtsService_ExportTask_FullMethodName = "/modelarts.ModelArtsService/ExportTask"
ModelArtsService_GetExportTasksOfDataset_FullMethodName = "/modelarts.ModelArtsService/GetExportTasksOfDataset"
ModelArtsService_GetExportTaskStatusOfDataset_FullMethodName = "/modelarts.ModelArtsService/GetExportTaskStatusOfDataset"
@ -104,6 +105,8 @@ type ModelArtsServiceClient interface {
GetTrainingJobFlavors(ctx context.Context, in *TrainingJobFlavorsReq, opts ...grpc.CallOption) (*TrainingJobFlavorsResp, error)
// GET ai-engines 查询作业引擎规格
GetAiEnginesList(ctx context.Context, in *ListAiEnginesReq, opts ...grpc.CallOption) (*ListAiEnginesResp, error)
// 查询训练作业指定任务的日志(预览)
GetTrainingJobLogsPreview(ctx context.Context, in *GetTrainingJobLogsPreviewReq, opts ...grpc.CallOption) (*GetTrainingJobLogsPreviewResp, error)
//export task
ExportTask(ctx context.Context, in *ExportTaskReq, opts ...grpc.CallOption) (*ExportTaskDataResp, error)
GetExportTasksOfDataset(ctx context.Context, in *GetExportTasksOfDatasetReq, opts ...grpc.CallOption) (*GetExportTasksOfDatasetResp, error)
@ -313,6 +316,15 @@ func (c *modelArtsServiceClient) GetAiEnginesList(ctx context.Context, in *ListA
return out, nil
}
func (c *modelArtsServiceClient) GetTrainingJobLogsPreview(ctx context.Context, in *GetTrainingJobLogsPreviewReq, opts ...grpc.CallOption) (*GetTrainingJobLogsPreviewResp, error) {
out := new(GetTrainingJobLogsPreviewResp)
err := c.cc.Invoke(ctx, ModelArtsService_GetTrainingJobLogsPreview_FullMethodName, in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *modelArtsServiceClient) ExportTask(ctx context.Context, in *ExportTaskReq, opts ...grpc.CallOption) (*ExportTaskDataResp, error) {
out := new(ExportTaskDataResp)
err := c.cc.Invoke(ctx, ModelArtsService_ExportTask_FullMethodName, in, out, opts...)
@ -553,6 +565,8 @@ type ModelArtsServiceServer interface {
GetTrainingJobFlavors(context.Context, *TrainingJobFlavorsReq) (*TrainingJobFlavorsResp, error)
// GET ai-engines 查询作业引擎规格
GetAiEnginesList(context.Context, *ListAiEnginesReq) (*ListAiEnginesResp, error)
// 查询训练作业指定任务的日志(预览)
GetTrainingJobLogsPreview(context.Context, *GetTrainingJobLogsPreviewReq) (*GetTrainingJobLogsPreviewResp, error)
//export task
ExportTask(context.Context, *ExportTaskReq) (*ExportTaskDataResp, error)
GetExportTasksOfDataset(context.Context, *GetExportTasksOfDatasetReq) (*GetExportTasksOfDatasetResp, error)
@ -645,6 +659,9 @@ func (UnimplementedModelArtsServiceServer) GetTrainingJobFlavors(context.Context
func (UnimplementedModelArtsServiceServer) GetAiEnginesList(context.Context, *ListAiEnginesReq) (*ListAiEnginesResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetAiEnginesList not implemented")
}
func (UnimplementedModelArtsServiceServer) GetTrainingJobLogsPreview(context.Context, *GetTrainingJobLogsPreviewReq) (*GetTrainingJobLogsPreviewResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method GetTrainingJobLogsPreview not implemented")
}
func (UnimplementedModelArtsServiceServer) ExportTask(context.Context, *ExportTaskReq) (*ExportTaskDataResp, error) {
return nil, status.Errorf(codes.Unimplemented, "method ExportTask not implemented")
}
@ -1066,6 +1083,24 @@ func _ModelArtsService_GetAiEnginesList_Handler(srv interface{}, ctx context.Con
return interceptor(ctx, in, info, handler)
}
func _ModelArtsService_GetTrainingJobLogsPreview_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GetTrainingJobLogsPreviewReq)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(ModelArtsServiceServer).GetTrainingJobLogsPreview(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: ModelArtsService_GetTrainingJobLogsPreview_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(ModelArtsServiceServer).GetTrainingJobLogsPreview(ctx, req.(*GetTrainingJobLogsPreviewReq))
}
return interceptor(ctx, in, info, handler)
}
func _ModelArtsService_ExportTask_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ExportTaskReq)
if err := dec(in); err != nil {
@ -1545,6 +1580,10 @@ var ModelArtsService_ServiceDesc = grpc.ServiceDesc{
MethodName: "GetAiEnginesList",
Handler: _ModelArtsService_GetAiEnginesList_Handler,
},
{
MethodName: "GetTrainingJobLogsPreview",
Handler: _ModelArtsService_GetTrainingJobLogsPreview_Handler,
},
{
MethodName: "ExportTask",
Handler: _ModelArtsService_ExportTask_Handler,

View File

@ -2170,6 +2170,18 @@ message Schedule{
}
/******************Visualization Job End*************************/
message GetTrainingJobLogsPreviewReq {
string project_id = 1; //IDID和名称
string training_job_id = 2; //ID
string task_id = 3; //
string platform = 4;
}
message GetTrainingJobLogsPreviewResp {
string content = 1; //n兆n兆n兆的日志>2022/03/01 00:00:00 (GMT+08:00)contextcontent
int32 current_size =2; //5
int32 full_size = 3; //
}
// Slurm Services for Shuguang Branch
service ModelArtsService {
@ -2213,6 +2225,8 @@ service ModelArtsService {
rpc GetTrainingJobFlavors(TrainingJobFlavorsReq) returns (TrainingJobFlavorsResp);
// GET ai-engines
rpc GetAiEnginesList(ListAiEnginesReq) returns (ListAiEnginesResp);
//
rpc GetTrainingJobLogsPreview(GetTrainingJobLogsPreviewReq) returns(GetTrainingJobLogsPreviewResp);
//export task
rpc ExportTask(ExportTaskReq) returns (ExportTaskDataResp);

View File

@ -15,11 +15,11 @@ GRPC localhost:2002/modelarts.ModelArtsService/CreateTrainingJob
}
},
"algorithm": {
"code_dir": "/nudt-cloudream/mnt/flageval/eval-41/153/model/",
"boot_file": "/nudt-cloudream/mnt/flageval/eval-41/153/model/service.py",
"code_dir": "/000gezhecheng/1/",
"boot_file": "/000gezhecheng/1/2.py",
"engine": {
"engine_name": "Ascend-Powered-Engine",
"engine_version": "mindspore_2.1.0-cann_6.3.2-py_3.7-euler_2.8.3-aarch64",
"engine_version": "mindspore_2.1.0-cann_6.3.2-py_3.7-euler_2.8.3-aarch64-d910",
"image_url": ""
},
"parameters": [],
@ -37,7 +37,7 @@ GRPC localhost:2002/modelarts.ModelArtsService/CreateTrainingJob
"node_count": 1
},
"log_export_path": {
"obs_url": "/nudt-modelarts/nudt-modelarts-out/"
"obs_url": "/nudt-cloudream2/job-logs/"
}
}
}
@ -115,7 +115,7 @@ GRPC localhost:2002/modelarts.ModelArtsService/CreateTrainingJob
"node_count": 1
},
"log_export_path": {
"obs_url": "/nudt-modelarts/nudt-modelarts-out/"
"obs_url": "/nudt-cloudream2/job-logs/"
}
}
}