96 lines
2.8 KiB
Go
96 lines
2.8 KiB
Go
package task
|
||
|
||
import (
|
||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||
schsdk "gitlink.org.cn/cloudream/common/sdks/scheduler"
|
||
exectsk "gitlink.org.cn/cloudream/scheduler/common/pkgs/mq/executor/task"
|
||
"gitlink.org.cn/cloudream/scheduler/common/utils"
|
||
"gitlink.org.cn/cloudream/scheduler/executor/internal/config"
|
||
"gitlink.org.cn/cloudream/scheduler/executor/internal/task/create_ecs"
|
||
)
|
||
|
||
type SchedulerModelFinetuning struct {
|
||
*exectsk.SchedulerModelFinetuning
|
||
}
|
||
|
||
func NewSchedulerModelFinetuning(info *exectsk.SchedulerModelFinetuning) *SchedulerModelFinetuning {
|
||
return &SchedulerModelFinetuning{info}
|
||
}
|
||
|
||
func (t *SchedulerModelFinetuning) Execute(task *Task, ctx TaskContext) {
|
||
log := logger.WithType[SchedulerModelFinetuning]("Task")
|
||
log.Debugf("begin")
|
||
defer log.Debugf("end")
|
||
|
||
err := t.do(task, ctx)
|
||
if err != nil {
|
||
log.Error(err)
|
||
return
|
||
}
|
||
|
||
log.Info("ScheduleCreateECS...")
|
||
}
|
||
|
||
func (t *SchedulerModelFinetuning) do(task *Task, ctx TaskContext) error {
|
||
|
||
// t.Envs添加新值
|
||
t.Envs = append(t.Envs, schsdk.KVPair{Key: "MountDir", Value: schsdk.MountDir})
|
||
// 设置环境变量
|
||
commands := utils.ConvertEnvsToCommand(t.Envs)
|
||
|
||
arr := utils.SplitCommands(t.CMD)
|
||
commands = append(commands, arr...)
|
||
|
||
factory := create_ecs.GetFactory(config.CloudName)
|
||
provider := factory.CreateProvider()
|
||
|
||
instanceID := t.InstanceID
|
||
// 如果没有指定实例ID,则创建一个(即预处理服务器与微调服务器分块)
|
||
if t.InstanceID == "" {
|
||
// 创建服务器
|
||
instID, ecsIP, err := provider.CreateServer()
|
||
if err != nil {
|
||
task.SendStatus(exectsk.NewSchedulerModelFinetuningStatus(err))
|
||
return err
|
||
}
|
||
instanceID = instID
|
||
logger.Info("create ECS success, instance id: " + instanceID + ", ip: " + ecsIP)
|
||
|
||
if t.ObjectStorage.MountType == schsdk.RcloneMount {
|
||
// 获取Rclone挂载命令
|
||
mountCommands := utils.GetRcloneCommands(t.ObjectStorage, t.UserID, schsdk.MountDir)
|
||
commands = append(commands, mountCommands...)
|
||
}
|
||
}
|
||
|
||
mountCommands := utils.GetRcloneCommands(t.ObjectStorage, t.UserID, schsdk.MountDir)
|
||
commands = append(commands, mountCommands...)
|
||
// 获取微调脚本执行命令
|
||
startScript := t.ModelResource.FinetuningShellPath
|
||
if t.ObjectStorage.MountType == schsdk.RcloneMount {
|
||
startScript = schsdk.MountDir + "/" + t.ModelResource.FinetuningShellPath
|
||
}
|
||
// 获取启动命令
|
||
commands = append(commands, utils.HandleCommand(startScript))
|
||
|
||
// 执行微调任务
|
||
_, err := provider.RunCommand(commands, instanceID, 2000)
|
||
// 执行结束后销毁服务器
|
||
_, err2 := provider.DeleteInstance(instanceID)
|
||
if err2 != nil {
|
||
task.SendStatus(exectsk.NewSchedulerModelFinetuningStatus(err))
|
||
return err2
|
||
}
|
||
if err != nil {
|
||
task.SendStatus(exectsk.NewSchedulerModelFinetuningStatus(err))
|
||
return err
|
||
}
|
||
|
||
task.SendStatus(exectsk.NewSchedulerModelFinetuningStatus(nil))
|
||
return nil
|
||
}
|
||
|
||
func init() {
|
||
Register(NewSchedulerModelFinetuning)
|
||
}
|