forked from JointCloud/pcm-coordinator
parent
9766e68075
commit
58263827d1
|
@ -1,137 +1,137 @@
|
|||
syntax = "v1"
|
||||
|
||||
info(
|
||||
title: "type title here"
|
||||
desc: "type desc here"
|
||||
author: "type author here"
|
||||
email: "type email here"
|
||||
version: "type version here"
|
||||
title: "type title here"
|
||||
desc: "type desc here"
|
||||
author: "type author here"
|
||||
email: "type email here"
|
||||
version: "type version here"
|
||||
)
|
||||
|
||||
type (
|
||||
commitHpcTaskReq {
|
||||
Name string `json:"name"` // paratera:jobName
|
||||
Description string `json:"description,optional"`
|
||||
tenantId int64 `json:"tenantId,optional"`
|
||||
TaskId int64 `json:"taskId,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
CardCount int64 `json:"cardCount,optional"`
|
||||
WorkDir string `json:"workDir,optional"` //paratera:workingDir
|
||||
WallTime string `json:"wallTime,optional"`
|
||||
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
|
||||
AppType string `json:"appType,optional"`
|
||||
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
|
||||
Queue string `json:"queue,optional"`
|
||||
NNode string `json:"nNode,optional"`
|
||||
SubmitType string `json:"submitType,optional"`
|
||||
StdOutFile string `json:"stdOutFile,optional"`
|
||||
StdErrFile string `json:"stdErrFile,optional"`
|
||||
StdInput string `json:"stdInput,optional"`
|
||||
Environment map[string]string `json:"environment,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
}
|
||||
commitHpcTaskReq {
|
||||
Name string `json:"name"` // paratera:jobName
|
||||
Description string `json:"description,optional"`
|
||||
TenantId int64 `json:"tenantId,optional"`
|
||||
TaskId int64 `json:"taskId,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
CardCount int64 `json:"cardCount,optional"`
|
||||
WorkDir string `json:"workDir,optional"` //paratera:workingDir
|
||||
WallTime string `json:"wallTime,optional"`
|
||||
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
|
||||
AppType string `json:"appType,optional"`
|
||||
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
|
||||
Queue string `json:"queue,optional"`
|
||||
NNode string `json:"nNode,optional"`
|
||||
SubmitType string `json:"submitType,optional"`
|
||||
StdOutFile string `json:"stdOutFile,optional"`
|
||||
StdErrFile string `json:"stdErrFile,optional"`
|
||||
StdInput string `json:"stdInput,optional"`
|
||||
Environment map[string]string `json:"environment,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
}
|
||||
|
||||
commitHpcTaskResp {
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
commitHpcTaskResp {
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcOverViewReq {
|
||||
}
|
||||
hpcOverViewResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCOverView `json:"data"`
|
||||
}
|
||||
HPCOverView {
|
||||
AdapterCount int32 `json:"adapterCount"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
hpcOverViewReq {
|
||||
}
|
||||
hpcOverViewResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCOverView `json:"data"`
|
||||
}
|
||||
HPCOverView {
|
||||
AdapterCount int32 `json:"adapterCount"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcAdapterSummaryReq {
|
||||
}
|
||||
hpcAdapterSummaryResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []HPCAdapterSummary `json:"data"`
|
||||
}
|
||||
HPCAdapterSummary {
|
||||
AdapterName string `json:"adapterName"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
hpcAdapterSummaryReq {
|
||||
}
|
||||
hpcAdapterSummaryResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []HPCAdapterSummary `json:"data"`
|
||||
}
|
||||
HPCAdapterSummary {
|
||||
AdapterName string `json:"adapterName"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcJobReq {
|
||||
}
|
||||
hpcJobResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []Job `json:"data"`
|
||||
}
|
||||
Job {
|
||||
JobName string `json:"jobName"`
|
||||
JobDesc string `json:"jobDesc"`
|
||||
SubmitTime string `json:"submitTime"`
|
||||
JobStatus string `json:"jobStatus"`
|
||||
AdapterName string `json:"adapterName"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
ClusterType string `json:"clusterType"`
|
||||
}
|
||||
hpcJobReq {
|
||||
}
|
||||
hpcJobResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []Job `json:"data"`
|
||||
}
|
||||
Job {
|
||||
JobName string `json:"jobName"`
|
||||
JobDesc string `json:"jobDesc"`
|
||||
SubmitTime string `json:"submitTime"`
|
||||
JobStatus string `json:"jobStatus"`
|
||||
AdapterName string `json:"adapterName"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
ClusterType string `json:"clusterType"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcResourceReq {
|
||||
}
|
||||
hpcResourceResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
HPCResource HPCResource `json:"hpcResource"`
|
||||
}
|
||||
HPCResource {
|
||||
GPUCardsTotal int32 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal int32 `json:"cpuCoresTotal"`
|
||||
RAMTotal int32 `json:"ramTotal"`
|
||||
GPUCardsUsed int32 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed int32 `json:"cpuCoresUsed"`
|
||||
RAMUsed int32 `json:"ramUsed"`
|
||||
GPURate float32 `json:"gpuRate"`
|
||||
CPURate float32 `json:"cpuRate"`
|
||||
RAMRate float32 `json:"ramRate"`
|
||||
}
|
||||
hpcResourceReq {
|
||||
}
|
||||
hpcResourceResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
HPCResource HPCResource `json:"hpcResource"`
|
||||
}
|
||||
HPCResource {
|
||||
GPUCardsTotal float64 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal float64 `json:"cpuCoresTotal"`
|
||||
RAMTotal float64 `json:"ramTotal"`
|
||||
GPUCardsUsed float64 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed float64 `json:"cpuCoresUsed"`
|
||||
RAMUsed float64 `json:"ramUsed"`
|
||||
GPURate float64 `json:"gpuRate"`
|
||||
CPURate float64 `json:"cpuRate"`
|
||||
RAMRate float64 `json:"ramRate"`
|
||||
}
|
||||
)
|
||||
|
||||
type QueueAssetsResp {
|
||||
QueueAssets []QueueAsset `json:"queueAsset"`
|
||||
QueueAssets []QueueAsset `json:"queueAsset"`
|
||||
}
|
||||
type QueueAsset {
|
||||
TenantName string `json:"tenantName"` //租户名称
|
||||
ParticipantId int64 `json:"participantId"`
|
||||
AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开
|
||||
QueNodes string `json:"queNodes"` //队列节点总数
|
||||
QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数
|
||||
QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数
|
||||
QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数
|
||||
QueChargeRate string `json:"queChargeRate,omitempty"` //费率
|
||||
QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数
|
||||
QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数
|
||||
QueueName string `json:"queueName,omitempty"` //队列名称
|
||||
QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数
|
||||
QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数
|
||||
QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数
|
||||
QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数
|
||||
QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间
|
||||
QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数
|
||||
QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数
|
||||
QueNcpus string `json:"queNcpus"` //队列cpu数
|
||||
TenantName string `json:"tenantName"` //租户名称
|
||||
ParticipantId int64 `json:"participantId"`
|
||||
AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开
|
||||
QueNodes string `json:"queNodes"` //队列节点总数
|
||||
QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数
|
||||
QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数
|
||||
QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数
|
||||
QueChargeRate string `json:"queChargeRate,omitempty"` //费率
|
||||
QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数
|
||||
QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数
|
||||
QueueName string `json:"queueName,omitempty"` //队列名称
|
||||
QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数
|
||||
QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数
|
||||
QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数
|
||||
QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数
|
||||
QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间
|
||||
QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数
|
||||
QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数
|
||||
QueNcpus string `json:"queNcpus"` //队列cpu数
|
||||
}
|
|
@ -2,7 +2,6 @@ package hpc
|
|||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
|
@ -25,18 +24,29 @@ func NewResourceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Resource
|
|||
|
||||
func (l *ResourceLogic) Resource(req *types.HpcResourceReq) (resp *types.HpcResourceResp, err error) {
|
||||
|
||||
l.svcCtx.DbEngin.Raw("SELECT th.NAME as job_name,t.description as job_desc,t.commit_time as submit_time,th.STATUS as job_status,ta.name as adapter_name,tc.name as cluster_name,tc.label as cluster_type FROM task_hpc th LEFT JOIN task t ON t.id = th.task_id JOIN t_cluster tc on th.cluster_id = tc.id JOIN t_adapter ta on tc.adapter_id = ta.id")
|
||||
type hpcResourceOV struct {
|
||||
CpuAvail float64 `json:"cpu_avail"`
|
||||
CpuTotal float64 `json:"cpu_total"`
|
||||
MemAvail float64 `json:"mem_avail"`
|
||||
MemTotal float64 `json:"mem_total"`
|
||||
DiskAvail float64 `json:"disk_avail"`
|
||||
DiskTotal float64 `json:"disk_total"`
|
||||
GpuAvail float64 `json:"gpu_avail"`
|
||||
GpuTotal float64 `json:"gpu_total"`
|
||||
}
|
||||
var hrov hpcResourceOV
|
||||
l.svcCtx.DbEngin.Raw("SELECT sum(cpu_avail) as cpu_avail,sum(cpu_total) as cpu_total,sum(mem_avail) as mem_avail,sum(mem_total) as mem_total,sum(disk_avail) as disk_avail,sum(disk_total) as disk_total,sum(gpu_avail) as gpu_avail,sum(gpu_total) as gpu_total FROM t_cluster_resource where cluster_type = 2").Scan(&hrov)
|
||||
|
||||
hpcResource := types.HPCResource{
|
||||
GPUCardsTotal: 0,
|
||||
CPUCoresTotal: 0,
|
||||
RAMTotal: 0,
|
||||
GPUCardsUsed: 0,
|
||||
CPUCoresUsed: 0,
|
||||
RAMUsed: 0,
|
||||
GPURate: 0,
|
||||
CPURate: 0,
|
||||
RAMRate: 0,
|
||||
GPUCardsTotal: hrov.GpuTotal,
|
||||
CPUCoresTotal: hrov.CpuTotal,
|
||||
RAMTotal: hrov.MemTotal,
|
||||
GPUCardsUsed: hrov.GpuTotal - hrov.GpuAvail,
|
||||
CPUCoresUsed: hrov.CpuTotal - hrov.CpuAvail,
|
||||
RAMUsed: hrov.MemTotal - hrov.MemAvail,
|
||||
GPURate: (hrov.GpuTotal - hrov.GpuAvail) / hrov.GpuTotal,
|
||||
CPURate: (hrov.CpuTotal - hrov.CpuAvail) / hrov.CpuTotal,
|
||||
RAMRate: (hrov.MemTotal - hrov.MemAvail) / hrov.MemTotal,
|
||||
}
|
||||
|
||||
resp = &types.HpcResourceResp{
|
||||
|
|
|
@ -937,15 +937,15 @@ type HpcResourceResp struct {
|
|||
}
|
||||
|
||||
type HPCResource struct {
|
||||
GPUCardsTotal int32 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal int32 `json:"cpuCoresTotal"`
|
||||
RAMTotal int32 `json:"ramTotal"`
|
||||
GPUCardsUsed int32 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed int32 `json:"cpuCoresUsed"`
|
||||
RAMUsed int32 `json:"ramUsed"`
|
||||
GPURate float32 `json:"gpuRate"`
|
||||
CPURate float32 `json:"cpuRate"`
|
||||
RAMRate float32 `json:"ramRate"`
|
||||
GPUCardsTotal float64 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal float64 `json:"cpuCoresTotal"`
|
||||
RAMTotal float64 `json:"ramTotal"`
|
||||
GPUCardsUsed float64 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed float64 `json:"cpuCoresUsed"`
|
||||
RAMUsed float64 `json:"ramUsed"`
|
||||
GPURate float64 `json:"gpuRate"`
|
||||
CPURate float64 `json:"cpuRate"`
|
||||
RAMRate float64 `json:"ramRate"`
|
||||
}
|
||||
|
||||
type QueueAssetsResp struct {
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
package models
|
||||
|
||||
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
|
||||
var _ TClusterResourceModel = (*customTClusterResourceModel)(nil)
|
||||
|
||||
type (
|
||||
// TClusterResourceModel is an interface to be customized, add more methods here,
|
||||
// and implement the added methods in customTClusterResourceModel.
|
||||
TClusterResourceModel interface {
|
||||
tClusterResourceModel
|
||||
withSession(session sqlx.Session) TClusterResourceModel
|
||||
}
|
||||
|
||||
customTClusterResourceModel struct {
|
||||
*defaultTClusterResourceModel
|
||||
}
|
||||
)
|
||||
|
||||
// NewTClusterResourceModel returns a model for the database table.
|
||||
func NewTClusterResourceModel(conn sqlx.SqlConn) TClusterResourceModel {
|
||||
return &customTClusterResourceModel{
|
||||
defaultTClusterResourceModel: newTClusterResourceModel(conn),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *customTClusterResourceModel) withSession(session sqlx.Session) TClusterResourceModel {
|
||||
return NewTClusterResourceModel(sqlx.NewSqlConnFromSession(session))
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
// Code generated by goctl. DO NOT EDIT.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/stores/builder"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlc"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
"github.com/zeromicro/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
tClusterResourceFieldNames = builder.RawFieldNames(&TClusterResource{})
|
||||
tClusterResourceRows = strings.Join(tClusterResourceFieldNames, ",")
|
||||
tClusterResourceRowsExpectAutoSet = strings.Join(stringx.Remove(tClusterResourceFieldNames, "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
|
||||
tClusterResourceRowsWithPlaceHolder = strings.Join(stringx.Remove(tClusterResourceFieldNames, "`cluster_id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
|
||||
)
|
||||
|
||||
type (
|
||||
tClusterResourceModel interface {
|
||||
Insert(ctx context.Context, data *TClusterResource) (sql.Result, error)
|
||||
FindOne(ctx context.Context, clusterId int64) (*TClusterResource, error)
|
||||
Update(ctx context.Context, data *TClusterResource) error
|
||||
Delete(ctx context.Context, clusterId int64) error
|
||||
}
|
||||
|
||||
defaultTClusterResourceModel struct {
|
||||
conn sqlx.SqlConn
|
||||
table string
|
||||
}
|
||||
|
||||
TClusterResource struct {
|
||||
ClusterId int64 `db:"cluster_id"`
|
||||
ClusterName string `db:"cluster_name"`
|
||||
ClusterType int64 `db:"cluster_type"` // 类型0->容器,1->智算,2->超算,3-虚拟机
|
||||
CpuAvail float64 `db:"cpu_avail"`
|
||||
CpuTotal float64 `db:"cpu_total"`
|
||||
MemAvail float64 `db:"mem_avail"`
|
||||
MemTotal float64 `db:"mem_total"`
|
||||
DiskAvail float64 `db:"disk_avail"`
|
||||
DiskTotal float64 `db:"disk_total"`
|
||||
GpuAvail float64 `db:"gpu_avail"`
|
||||
GpuTotal float64 `db:"gpu_total"`
|
||||
}
|
||||
)
|
||||
|
||||
func newTClusterResourceModel(conn sqlx.SqlConn) *defaultTClusterResourceModel {
|
||||
return &defaultTClusterResourceModel{
|
||||
conn: conn,
|
||||
table: "`t_cluster_resource`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Delete(ctx context.Context, clusterId int64) error {
|
||||
query := fmt.Sprintf("delete from %s where `cluster_id` = ?", m.table)
|
||||
_, err := m.conn.ExecCtx(ctx, query, clusterId)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) FindOne(ctx context.Context, clusterId int64) (*TClusterResource, error) {
|
||||
query := fmt.Sprintf("select %s from %s where `cluster_id` = ? limit 1", tClusterResourceRows, m.table)
|
||||
var resp TClusterResource
|
||||
err := m.conn.QueryRowCtx(ctx, &resp, query, clusterId)
|
||||
switch err {
|
||||
case nil:
|
||||
return &resp, nil
|
||||
case sqlc.ErrNotFound:
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) {
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error {
|
||||
query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.ClusterId)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) tableName() string {
|
||||
return m.table
|
||||
}
|
Loading…
Reference in New Issue