960 lines
30 KiB
Go
960 lines
30 KiB
Go
package ticktock
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"math"
|
||
"math/rand"
|
||
"sync"
|
||
|
||
"github.com/samber/lo"
|
||
"gitlink.org.cn/cloudream/common/pkgs/bitmap"
|
||
"gitlink.org.cn/cloudream/common/pkgs/ioswitch/exec"
|
||
"gitlink.org.cn/cloudream/common/pkgs/logger"
|
||
"gitlink.org.cn/cloudream/common/utils/lo2"
|
||
"gitlink.org.cn/cloudream/common/utils/math2"
|
||
"gitlink.org.cn/cloudream/common/utils/sort2"
|
||
"gitlink.org.cn/cloudream/jcs-pub/client/internal/db"
|
||
clitypes "gitlink.org.cn/cloudream/jcs-pub/client/types"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/consts"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/models/datamap"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/pkgs/distlock"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/pkgs/distlock/reqbuilder"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2/ops2"
|
||
"gitlink.org.cn/cloudream/jcs-pub/common/pkgs/ioswitch2/parser"
|
||
)
|
||
|
||
func (t *ChangeRedundancy) doRedundancyShrink(execCtx *changeRedundancyContext, pkg clitypes.PackageDetail, objs []clitypes.ObjectDetail, reen *distlock.Reentrant) ([]db.UpdatingObjectRedundancy, []datamap.SysEventBody, error) {
|
||
log := logger.WithType[ChangeRedundancy]("TickTock")
|
||
|
||
var readerStgIDs []clitypes.UserSpaceID
|
||
for _, space := range execCtx.allUserSpaces {
|
||
// TODO 可以考虑做成配置
|
||
if space.AccessAmount >= float64(pkg.ObjectCount/2) {
|
||
readerStgIDs = append(readerStgIDs, space.UserSpace.UserSpace.UserSpaceID)
|
||
}
|
||
}
|
||
|
||
// 只对ec和rep对象进行处理
|
||
var ecObjects []clitypes.ObjectDetail
|
||
var repObjects []clitypes.ObjectDetail
|
||
for _, obj := range objs {
|
||
if _, ok := obj.Object.Redundancy.(*clitypes.ECRedundancy); ok {
|
||
ecObjects = append(ecObjects, obj)
|
||
} else if _, ok := obj.Object.Redundancy.(*clitypes.RepRedundancy); ok {
|
||
repObjects = append(repObjects, obj)
|
||
}
|
||
}
|
||
|
||
planBld := exec.NewPlanBuilder()
|
||
planningStgIDs := make(map[clitypes.UserSpaceID]bool)
|
||
|
||
var sysEvents []datamap.SysEventBody
|
||
|
||
// 对于rep对象,统计出所有对象块分布最多的两个节点,用这两个节点代表所有rep对象块的分布,去进行退火算法
|
||
var repObjectsUpdating []db.UpdatingObjectRedundancy
|
||
repMostHubIDs := t.summaryRepObjectBlockNodes(repObjects)
|
||
solu := t.startAnnealing(execCtx, readerStgIDs, annealingObject{
|
||
totalBlockCount: 1,
|
||
minBlockCnt: 1,
|
||
pinnedAt: repMostHubIDs,
|
||
blocks: nil,
|
||
})
|
||
for _, obj := range repObjects {
|
||
repObjectsUpdating = append(repObjectsUpdating, t.makePlansForRepObject(execCtx, solu, obj, planBld, planningStgIDs))
|
||
sysEvents = append(sysEvents, t.generateSysEventForRepObject(solu, obj)...)
|
||
}
|
||
|
||
// 对于ec对象,则每个对象单独进行退火算法
|
||
var ecObjectsUpdating []db.UpdatingObjectRedundancy
|
||
for _, obj := range ecObjects {
|
||
ecRed := obj.Object.Redundancy.(*clitypes.ECRedundancy)
|
||
solu := t.startAnnealing(execCtx, readerStgIDs, annealingObject{
|
||
totalBlockCount: ecRed.N,
|
||
minBlockCnt: ecRed.K,
|
||
pinnedAt: obj.PinnedAt,
|
||
blocks: obj.Blocks,
|
||
})
|
||
ecObjectsUpdating = append(ecObjectsUpdating, t.makePlansForECObject(execCtx, solu, obj, planBld, planningStgIDs))
|
||
sysEvents = append(sysEvents, t.generateSysEventForECObject(solu, obj)...)
|
||
}
|
||
|
||
ioSwRets, err := t.executePlans(execCtx, planBld, planningStgIDs, reen)
|
||
if err != nil {
|
||
log.Warn(err.Error())
|
||
return nil, nil, fmt.Errorf("execute plans: %w", err)
|
||
}
|
||
|
||
// 根据按照方案进行调整的结果,填充更新元数据的命令
|
||
for i := range ecObjectsUpdating {
|
||
t.populateECObjectEntry(&ecObjectsUpdating[i], ecObjects[i], ioSwRets)
|
||
}
|
||
|
||
return append(repObjectsUpdating, ecObjectsUpdating...), sysEvents, nil
|
||
}
|
||
|
||
func (t *ChangeRedundancy) summaryRepObjectBlockNodes(objs []clitypes.ObjectDetail) []clitypes.UserSpaceID {
|
||
type stgBlocks struct {
|
||
UserSpaceID clitypes.UserSpaceID
|
||
Count int
|
||
}
|
||
|
||
stgBlocksMap := make(map[clitypes.UserSpaceID]*stgBlocks)
|
||
for _, obj := range objs {
|
||
cacheBlockStgs := make(map[clitypes.UserSpaceID]bool)
|
||
for _, block := range obj.Blocks {
|
||
if _, ok := stgBlocksMap[block.UserSpaceID]; !ok {
|
||
stgBlocksMap[block.UserSpaceID] = &stgBlocks{
|
||
UserSpaceID: block.UserSpaceID,
|
||
Count: 0,
|
||
}
|
||
}
|
||
stgBlocksMap[block.UserSpaceID].Count++
|
||
cacheBlockStgs[block.UserSpaceID] = true
|
||
}
|
||
|
||
for _, hubID := range obj.PinnedAt {
|
||
if cacheBlockStgs[hubID] {
|
||
continue
|
||
}
|
||
|
||
if _, ok := stgBlocksMap[hubID]; !ok {
|
||
stgBlocksMap[hubID] = &stgBlocks{
|
||
UserSpaceID: hubID,
|
||
Count: 0,
|
||
}
|
||
}
|
||
stgBlocksMap[hubID].Count++
|
||
}
|
||
}
|
||
|
||
stgs := lo.Values(stgBlocksMap)
|
||
sort2.Sort(stgs, func(left *stgBlocks, right *stgBlocks) int {
|
||
return right.Count - left.Count
|
||
})
|
||
|
||
// 只选出块数超过一半的节点,但要保证至少有两个节点
|
||
for i := 2; i < len(stgs); i++ {
|
||
if stgs[i].Count < len(objs)/2 {
|
||
stgs = stgs[:i]
|
||
break
|
||
}
|
||
}
|
||
|
||
return lo.Map(stgs, func(item *stgBlocks, idx int) clitypes.UserSpaceID { return item.UserSpaceID })
|
||
}
|
||
|
||
type annealingState struct {
|
||
ctx *changeRedundancyContext
|
||
readerStgIDs []clitypes.UserSpaceID // 近期可能访问此对象的节点
|
||
stgsSortedByReader map[clitypes.UserSpaceID][]stgDist // 拥有数据的节点到每个可能访问对象的节点按距离排序
|
||
object annealingObject // 进行退火的对象
|
||
blockList []objectBlock // 排序后的块分布情况
|
||
stgBlockBitmaps map[clitypes.UserSpaceID]*bitmap.Bitmap64 // 用位图的形式表示每一个节点上有哪些块
|
||
stgCombTree combinatorialTree // 节点组合树,用于加速计算容灾度
|
||
|
||
maxScore float64 // 搜索过程中得到过的最大分数
|
||
maxScoreRmBlocks []bool // 最大分数对应的删除方案
|
||
|
||
rmBlocks []bool // 当前删除方案
|
||
inversedIndex int // 当前删除方案是从上一次的方案改动哪个flag而来的
|
||
lastDisasterTolerance float64 // 上一次方案的容灾度
|
||
lastSpaceCost float64 // 上一次方案的冗余度
|
||
lastMinAccessCost float64 // 上一次方案的最小访问费用
|
||
lastScore float64 // 上一次方案的分数
|
||
}
|
||
|
||
type objectBlock struct {
|
||
Index int
|
||
UserSpaceID clitypes.UserSpaceID
|
||
HasEntity bool // 节点拥有实际的文件数据块
|
||
HasShadow bool // 如果节点拥有完整文件数据,那么认为这个节点拥有所有块,这些块被称为影子块
|
||
FileHash clitypes.FileHash // 只有在拥有实际文件数据块时,这个字段才有值
|
||
Size int64 // 块大小
|
||
}
|
||
|
||
type stgDist struct {
|
||
UserSpaceID clitypes.UserSpaceID
|
||
Distance float64
|
||
}
|
||
|
||
type combinatorialTree struct {
|
||
nodes []combinatorialTreeNode
|
||
blocksMaps map[int]bitmap.Bitmap64
|
||
stgIDToLocalStgID map[clitypes.UserSpaceID]int
|
||
localStgIDToStgID []clitypes.UserSpaceID
|
||
}
|
||
|
||
type annealingObject struct {
|
||
totalBlockCount int
|
||
minBlockCnt int
|
||
pinnedAt []clitypes.UserSpaceID
|
||
blocks []clitypes.ObjectBlock
|
||
}
|
||
|
||
const (
|
||
iterActionNone = 0
|
||
iterActionSkip = 1
|
||
iterActionBreak = 2
|
||
)
|
||
|
||
func newCombinatorialTree(stgBlocksMaps map[clitypes.UserSpaceID]*bitmap.Bitmap64) combinatorialTree {
|
||
tree := combinatorialTree{
|
||
blocksMaps: make(map[int]bitmap.Bitmap64),
|
||
stgIDToLocalStgID: make(map[clitypes.UserSpaceID]int),
|
||
}
|
||
|
||
tree.nodes = make([]combinatorialTreeNode, (1 << len(stgBlocksMaps)))
|
||
for id, mp := range stgBlocksMaps {
|
||
tree.stgIDToLocalStgID[id] = len(tree.localStgIDToStgID)
|
||
tree.blocksMaps[len(tree.localStgIDToStgID)] = *mp
|
||
tree.localStgIDToStgID = append(tree.localStgIDToStgID, id)
|
||
}
|
||
|
||
tree.nodes[0].localHubID = -1
|
||
index := 1
|
||
tree.initNode(0, &tree.nodes[0], &index)
|
||
|
||
return tree
|
||
}
|
||
|
||
func (t *combinatorialTree) initNode(minAvaiLocalHubID int, parent *combinatorialTreeNode, index *int) {
|
||
for i := minAvaiLocalHubID; i < len(t.stgIDToLocalStgID); i++ {
|
||
curIndex := *index
|
||
*index++
|
||
bitMp := t.blocksMaps[i]
|
||
bitMp.Or(&parent.blocksBitmap)
|
||
|
||
t.nodes[curIndex] = combinatorialTreeNode{
|
||
localHubID: i,
|
||
parent: parent,
|
||
blocksBitmap: bitMp,
|
||
}
|
||
t.initNode(i+1, &t.nodes[curIndex], index)
|
||
}
|
||
}
|
||
|
||
// 获得索引指定的节点所在的层
|
||
func (t *combinatorialTree) GetDepth(index int) int {
|
||
depth := 0
|
||
|
||
// 反复判断节点在哪个子树。从左到右,子树节点的数量呈现8 4 2的变化,由此可以得到每个子树的索引值的范围
|
||
subTreeCount := 1 << len(t.stgIDToLocalStgID)
|
||
for index > 0 {
|
||
if index < subTreeCount {
|
||
// 定位到一个子树后,深度+1,然后进入这个子树,使用同样的方法再进行定位。
|
||
// 进入子树后需要将索引值-1,因为要去掉子树的根节点
|
||
index--
|
||
depth++
|
||
} else {
|
||
// 如果索引值不在这个子树范围内,则将值减去子树的节点数量,
|
||
// 这样每一次都可以视为使用同样的逻辑对不同大小的树进行判断。
|
||
index -= subTreeCount
|
||
}
|
||
subTreeCount >>= 1
|
||
}
|
||
|
||
return depth
|
||
}
|
||
|
||
// 更新某一个算力中心节点的块分布位图,同时更新它对应组合树节点的所有子节点。
|
||
// 如果更新到某个节点时,已有K个块,那么就不会再更新它的子节点
|
||
func (t *combinatorialTree) UpdateBitmap(stgID clitypes.UserSpaceID, mp bitmap.Bitmap64, k int) {
|
||
t.blocksMaps[t.stgIDToLocalStgID[stgID]] = mp
|
||
// 首先定义两种遍历树节点时的移动方式:
|
||
// 1. 竖直移动(深度增加):从一个节点移动到它最左边的子节点。每移动一步,index+1
|
||
// 2. 水平移动:从一个节点移动到它右边的兄弟节点。每移动一步,根据它所在的深度,index+8,+4,+2
|
||
// LocalID从0开始,将其+1后得到移动步数steps。
|
||
// 将移动步数拆成多部分,分配到上述的两种移动方式上,并进行任意组合,且保证第一次为至少进行一次的竖直移动,移动之后的节点都会是同一个计算中心节点。
|
||
steps := t.stgIDToLocalStgID[stgID] + 1
|
||
for d := 1; d <= steps; d++ {
|
||
t.iterCombBits(len(t.stgIDToLocalStgID)-1, steps-d, 0, func(i int) {
|
||
index := d + i
|
||
node := &t.nodes[index]
|
||
|
||
newMp := t.blocksMaps[node.localHubID]
|
||
newMp.Or(&node.parent.blocksBitmap)
|
||
node.blocksBitmap = newMp
|
||
if newMp.Weight() >= k {
|
||
return
|
||
}
|
||
|
||
t.iterChildren(index, func(index, parentIndex, depth int) int {
|
||
curNode := &t.nodes[index]
|
||
parentNode := t.nodes[parentIndex]
|
||
|
||
newMp := t.blocksMaps[curNode.localHubID]
|
||
newMp.Or(&parentNode.blocksBitmap)
|
||
curNode.blocksBitmap = newMp
|
||
if newMp.Weight() >= k {
|
||
return iterActionSkip
|
||
}
|
||
|
||
return iterActionNone
|
||
})
|
||
})
|
||
}
|
||
}
|
||
|
||
// 遍历树,找到至少拥有K个块的树节点的最大深度
|
||
func (t *combinatorialTree) FindKBlocksMaxDepth(k int) int {
|
||
maxDepth := -1
|
||
t.iterChildren(0, func(index, parentIndex, depth int) int {
|
||
if t.nodes[index].blocksBitmap.Weight() >= k {
|
||
if maxDepth < depth {
|
||
maxDepth = depth
|
||
}
|
||
return iterActionSkip
|
||
}
|
||
// 如果到了叶子节点,还没有找到K个块,那就认为要满足K个块,至少需要再多一个节点,即深度+1。
|
||
// 由于遍历时采用的是深度优先的算法,因此遍历到这个叶子节点时,叶子节点再加一个节点的组合已经在前面搜索过,
|
||
// 所以用当前叶子节点深度+1来作为当前分支的结果就可以,即使当前情况下增加任意一个节点依然不够K块,
|
||
// 可以使用同样的思路去递推到当前叶子节点增加两个块的情况。
|
||
if t.nodes[index].localHubID == len(t.stgIDToLocalStgID)-1 {
|
||
if maxDepth < depth+1 {
|
||
maxDepth = depth + 1
|
||
}
|
||
}
|
||
|
||
return iterActionNone
|
||
})
|
||
|
||
if maxDepth == -1 || maxDepth > len(t.stgIDToLocalStgID) {
|
||
return len(t.stgIDToLocalStgID)
|
||
}
|
||
|
||
return maxDepth
|
||
}
|
||
|
||
func (t *combinatorialTree) iterCombBits(width int, count int, offset int, callback func(int)) {
|
||
if count == 0 {
|
||
callback(offset)
|
||
return
|
||
}
|
||
|
||
for b := width; b >= count; b-- {
|
||
t.iterCombBits(b-1, count-1, offset+(1<<b), callback)
|
||
}
|
||
}
|
||
|
||
func (t *combinatorialTree) iterChildren(index int, do func(index int, parentIndex int, depth int) int) {
|
||
curNode := &t.nodes[index]
|
||
childIndex := index + 1
|
||
curDepth := t.GetDepth(index)
|
||
|
||
childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
|
||
if childCounts == 0 {
|
||
return
|
||
}
|
||
|
||
childTreeNodeCnt := 1 << (childCounts - 1)
|
||
for c := 0; c < childCounts; c++ {
|
||
act := t.itering(childIndex, index, curDepth+1, do)
|
||
if act == iterActionBreak {
|
||
return
|
||
}
|
||
|
||
childIndex += childTreeNodeCnt
|
||
childTreeNodeCnt >>= 1
|
||
}
|
||
}
|
||
|
||
func (t *combinatorialTree) itering(index int, parentIndex int, depth int, do func(index int, parentIndex int, depth int) int) int {
|
||
act := do(index, parentIndex, depth)
|
||
if act == iterActionBreak {
|
||
return act
|
||
}
|
||
if act == iterActionSkip {
|
||
return iterActionNone
|
||
}
|
||
|
||
curNode := &t.nodes[index]
|
||
childIndex := index + 1
|
||
|
||
childCounts := len(t.stgIDToLocalStgID) - 1 - curNode.localHubID
|
||
if childCounts == 0 {
|
||
return iterActionNone
|
||
}
|
||
|
||
childTreeNodeCnt := 1 << (childCounts - 1)
|
||
for c := 0; c < childCounts; c++ {
|
||
act = t.itering(childIndex, index, depth+1, do)
|
||
if act == iterActionBreak {
|
||
return act
|
||
}
|
||
|
||
childIndex += childTreeNodeCnt
|
||
childTreeNodeCnt >>= 1
|
||
}
|
||
|
||
return iterActionNone
|
||
}
|
||
|
||
type combinatorialTreeNode struct {
|
||
localHubID int
|
||
parent *combinatorialTreeNode
|
||
blocksBitmap bitmap.Bitmap64 // 选择了这个中心之后,所有中心一共包含多少种块
|
||
}
|
||
|
||
type annealingSolution struct {
|
||
blockList []objectBlock // 所有节点的块分布情况
|
||
rmBlocks []bool // 要删除哪些块
|
||
disasterTolerance float64 // 本方案的容灾度
|
||
spaceCost float64 // 本方案的冗余度
|
||
minAccessCost float64 // 本方案的最小访问费用
|
||
}
|
||
|
||
func (t *ChangeRedundancy) startAnnealing(ctx *changeRedundancyContext, readerStgIDs []clitypes.UserSpaceID, object annealingObject) annealingSolution {
|
||
state := &annealingState{
|
||
ctx: ctx,
|
||
readerStgIDs: readerStgIDs,
|
||
stgsSortedByReader: make(map[clitypes.UserSpaceID][]stgDist),
|
||
object: object,
|
||
stgBlockBitmaps: make(map[clitypes.UserSpaceID]*bitmap.Bitmap64),
|
||
}
|
||
|
||
t.initBlockList(state)
|
||
if state.blockList == nil {
|
||
return annealingSolution{}
|
||
}
|
||
|
||
t.initNodeBlockBitmap(state)
|
||
|
||
t.sortNodeByReaderDistance(state)
|
||
|
||
state.rmBlocks = make([]bool, len(state.blockList))
|
||
state.inversedIndex = -1
|
||
state.stgCombTree = newCombinatorialTree(state.stgBlockBitmaps)
|
||
|
||
state.lastScore = t.calcScore(state)
|
||
state.maxScore = state.lastScore
|
||
state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
|
||
|
||
// 模拟退火算法的温度
|
||
curTemp := state.lastScore
|
||
// 结束温度
|
||
finalTemp := curTemp * 0.2
|
||
// 冷却率
|
||
coolingRate := 0.95
|
||
|
||
for curTemp > finalTemp {
|
||
state.inversedIndex = rand.Intn(len(state.rmBlocks))
|
||
block := state.blockList[state.inversedIndex]
|
||
state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
|
||
state.stgBlockBitmaps[block.UserSpaceID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
|
||
state.stgCombTree.UpdateBitmap(block.UserSpaceID, *state.stgBlockBitmaps[block.UserSpaceID], state.object.minBlockCnt)
|
||
|
||
curScore := t.calcScore(state)
|
||
|
||
dScore := curScore - state.lastScore
|
||
// 如果新方案比旧方案得分低,且没有要求强制接受新方案,那么就将变化改回去
|
||
if curScore == 0 || (dScore < 0 && !t.alwaysAccept(curTemp, dScore, coolingRate)) {
|
||
state.rmBlocks[state.inversedIndex] = !state.rmBlocks[state.inversedIndex]
|
||
state.stgBlockBitmaps[block.UserSpaceID].Set(block.Index, !state.rmBlocks[state.inversedIndex])
|
||
state.stgCombTree.UpdateBitmap(block.UserSpaceID, *state.stgBlockBitmaps[block.UserSpaceID], state.object.minBlockCnt)
|
||
// fmt.Printf("\n")
|
||
} else {
|
||
// fmt.Printf(" accept!\n")
|
||
state.lastScore = curScore
|
||
if state.maxScore < curScore {
|
||
state.maxScore = state.lastScore
|
||
state.maxScoreRmBlocks = lo2.ArrayClone(state.rmBlocks)
|
||
}
|
||
}
|
||
curTemp *= coolingRate
|
||
}
|
||
// fmt.Printf("final: %v\n", state.maxScoreRmBlocks)
|
||
return annealingSolution{
|
||
blockList: state.blockList,
|
||
rmBlocks: state.maxScoreRmBlocks,
|
||
disasterTolerance: state.lastDisasterTolerance,
|
||
spaceCost: state.lastSpaceCost,
|
||
minAccessCost: state.lastMinAccessCost,
|
||
}
|
||
}
|
||
|
||
func (t *ChangeRedundancy) initBlockList(ctx *annealingState) {
|
||
blocksMap := make(map[clitypes.UserSpaceID][]objectBlock)
|
||
|
||
// 先生成所有的影子块
|
||
for _, pinned := range ctx.object.pinnedAt {
|
||
blocks := make([]objectBlock, 0, ctx.object.totalBlockCount)
|
||
for i := 0; i < ctx.object.totalBlockCount; i++ {
|
||
blocks = append(blocks, objectBlock{
|
||
Index: i,
|
||
UserSpaceID: pinned,
|
||
HasShadow: true,
|
||
})
|
||
}
|
||
blocksMap[pinned] = blocks
|
||
}
|
||
|
||
// 再填充实际块
|
||
for _, b := range ctx.object.blocks {
|
||
blocks := blocksMap[b.UserSpaceID]
|
||
|
||
has := false
|
||
for i := range blocks {
|
||
if blocks[i].Index == b.Index {
|
||
blocks[i].HasEntity = true
|
||
blocks[i].FileHash = b.FileHash
|
||
has = true
|
||
break
|
||
}
|
||
}
|
||
|
||
if has {
|
||
continue
|
||
}
|
||
|
||
blocks = append(blocks, objectBlock{
|
||
Index: b.Index,
|
||
UserSpaceID: b.UserSpaceID,
|
||
HasEntity: true,
|
||
FileHash: b.FileHash,
|
||
Size: b.Size,
|
||
})
|
||
blocksMap[b.UserSpaceID] = blocks
|
||
}
|
||
|
||
var sortedBlocks []objectBlock
|
||
for _, bs := range blocksMap {
|
||
sortedBlocks = append(sortedBlocks, bs...)
|
||
}
|
||
sortedBlocks = sort2.Sort(sortedBlocks, func(left objectBlock, right objectBlock) int {
|
||
d := left.UserSpaceID - right.UserSpaceID
|
||
if d != 0 {
|
||
return int(d)
|
||
}
|
||
|
||
return left.Index - right.Index
|
||
})
|
||
|
||
ctx.blockList = sortedBlocks
|
||
}
|
||
|
||
func (t *ChangeRedundancy) initNodeBlockBitmap(state *annealingState) {
|
||
for _, b := range state.blockList {
|
||
mp, ok := state.stgBlockBitmaps[b.UserSpaceID]
|
||
if !ok {
|
||
nb := bitmap.Bitmap64(0)
|
||
mp = &nb
|
||
state.stgBlockBitmaps[b.UserSpaceID] = mp
|
||
}
|
||
mp.Set(b.Index, true)
|
||
}
|
||
}
|
||
|
||
func (t *ChangeRedundancy) sortNodeByReaderDistance(state *annealingState) {
|
||
for _, r := range state.readerStgIDs {
|
||
var nodeDists []stgDist
|
||
|
||
for n := range state.stgBlockBitmaps {
|
||
if r == n {
|
||
// 同节点时距离视为0.1
|
||
nodeDists = append(nodeDists, stgDist{
|
||
UserSpaceID: n,
|
||
Distance: consts.StorageDistanceSameStorage,
|
||
})
|
||
} else if state.ctx.allUserSpaces[r].UserSpace.MasterHub.LocationID == state.ctx.allUserSpaces[n].UserSpace.MasterHub.LocationID {
|
||
// 同地区时距离视为1
|
||
nodeDists = append(nodeDists, stgDist{
|
||
UserSpaceID: n,
|
||
Distance: consts.StorageDistanceSameLocation,
|
||
})
|
||
} else {
|
||
// 不同地区时距离视为5
|
||
nodeDists = append(nodeDists, stgDist{
|
||
UserSpaceID: n,
|
||
Distance: consts.StorageDistanceOther,
|
||
})
|
||
}
|
||
}
|
||
|
||
state.stgsSortedByReader[r] = sort2.Sort(nodeDists, func(left, right stgDist) int { return sort2.Cmp(left.Distance, right.Distance) })
|
||
}
|
||
}
|
||
|
||
func (t *ChangeRedundancy) calcScore(state *annealingState) float64 {
|
||
dt := t.calcDisasterTolerance(state)
|
||
ac := t.calcMinAccessCost(state)
|
||
sc := t.calcSpaceCost(state)
|
||
|
||
state.lastDisasterTolerance = dt
|
||
state.lastMinAccessCost = ac
|
||
state.lastSpaceCost = sc
|
||
|
||
dtSc := 1.0
|
||
if dt < 1 {
|
||
dtSc = 0
|
||
} else if dt >= 2 {
|
||
dtSc = 1.5
|
||
}
|
||
|
||
newSc := 0.0
|
||
if dt == 0 || ac == 0 {
|
||
newSc = 0
|
||
} else {
|
||
newSc = dtSc / (sc * ac)
|
||
}
|
||
|
||
// fmt.Printf("solu: %v, cur: %v, dt: %v, ac: %v, sc: %v \n", state.rmBlocks, newSc, dt, ac, sc)
|
||
return newSc
|
||
}
|
||
|
||
// 计算容灾度
|
||
func (t *ChangeRedundancy) calcDisasterTolerance(state *annealingState) float64 {
|
||
if state.inversedIndex != -1 {
|
||
node := state.blockList[state.inversedIndex]
|
||
state.stgCombTree.UpdateBitmap(node.UserSpaceID, *state.stgBlockBitmaps[node.UserSpaceID], state.object.minBlockCnt)
|
||
}
|
||
return float64(len(state.stgBlockBitmaps) - state.stgCombTree.FindKBlocksMaxDepth(state.object.minBlockCnt))
|
||
}
|
||
|
||
// 计算最小访问数据的代价
|
||
func (t *ChangeRedundancy) calcMinAccessCost(state *annealingState) float64 {
|
||
cost := math.MaxFloat64
|
||
for _, reader := range state.readerStgIDs {
|
||
tarNodes := state.stgsSortedByReader[reader]
|
||
gotBlocks := bitmap.Bitmap64(0)
|
||
thisCost := 0.0
|
||
|
||
for _, tar := range tarNodes {
|
||
tarNodeMp := state.stgBlockBitmaps[tar.UserSpaceID]
|
||
|
||
// 只需要从目的节点上获得缺少的块
|
||
curWeigth := gotBlocks.Weight()
|
||
// 下面的if会在拿到k个块之后跳出循环,所以or多了块也没关系
|
||
gotBlocks.Or(tarNodeMp)
|
||
// 但是算读取块的消耗时,不能多算,最多算读了k个块的消耗
|
||
willGetBlocks := math2.Min(gotBlocks.Weight()-curWeigth, state.object.minBlockCnt-curWeigth)
|
||
thisCost += float64(willGetBlocks) * float64(tar.Distance)
|
||
|
||
if gotBlocks.Weight() >= state.object.minBlockCnt {
|
||
break
|
||
}
|
||
}
|
||
if gotBlocks.Weight() >= state.object.minBlockCnt {
|
||
cost = math.Min(cost, thisCost)
|
||
}
|
||
}
|
||
|
||
return cost
|
||
}
|
||
|
||
// 计算冗余度
|
||
func (t *ChangeRedundancy) calcSpaceCost(ctx *annealingState) float64 {
|
||
blockCount := 0
|
||
for i, b := range ctx.blockList {
|
||
if ctx.rmBlocks[i] {
|
||
continue
|
||
}
|
||
|
||
if b.HasEntity {
|
||
blockCount++
|
||
}
|
||
if b.HasShadow {
|
||
blockCount++
|
||
}
|
||
}
|
||
// 所有算力中心上拥有的块的总数 / 一个对象被分成了几个块
|
||
return float64(blockCount) / float64(ctx.object.minBlockCnt)
|
||
}
|
||
|
||
// 如果新方案得分比旧方案小,那么在一定概率内也接受新方案
|
||
func (t *ChangeRedundancy) alwaysAccept(curTemp float64, dScore float64, coolingRate float64) bool {
|
||
v := math.Exp(dScore / curTemp / coolingRate)
|
||
// fmt.Printf(" -- chance: %v, temp: %v", v, curTemp)
|
||
return v > rand.Float64()
|
||
}
|
||
|
||
func (t *ChangeRedundancy) makePlansForRepObject(ctx *changeRedundancyContext, solu annealingSolution, obj clitypes.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[clitypes.UserSpaceID]bool) db.UpdatingObjectRedundancy {
|
||
entry := db.UpdatingObjectRedundancy{
|
||
ObjectID: obj.Object.ObjectID,
|
||
FileHash: obj.Object.FileHash,
|
||
Size: obj.Object.Size,
|
||
Redundancy: obj.Object.Redundancy,
|
||
}
|
||
|
||
ft := ioswitch2.NewFromTo()
|
||
|
||
fromStg := ctx.allUserSpaces[obj.Blocks[0].UserSpaceID].UserSpace
|
||
ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *fromStg.MasterHub, *fromStg, ioswitch2.RawStream()))
|
||
|
||
for i, f := range solu.rmBlocks {
|
||
hasCache := lo.ContainsBy(obj.Blocks, func(b clitypes.ObjectBlock) bool { return b.UserSpaceID == solu.blockList[i].UserSpaceID }) ||
|
||
lo.ContainsBy(obj.PinnedAt, func(n clitypes.UserSpaceID) bool { return n == solu.blockList[i].UserSpaceID })
|
||
willRm := f
|
||
|
||
if !willRm {
|
||
// 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
|
||
if !hasCache {
|
||
toStg := ctx.allUserSpaces[solu.blockList[i].UserSpaceID].UserSpace
|
||
ft.AddTo(ioswitch2.NewToShardStore(*toStg.MasterHub, *toStg, ioswitch2.RawStream(), fmt.Sprintf("%d.0", obj.Object.ObjectID)))
|
||
|
||
planningHubIDs[solu.blockList[i].UserSpaceID] = true
|
||
}
|
||
entry.Blocks = append(entry.Blocks, clitypes.ObjectBlock{
|
||
ObjectID: obj.Object.ObjectID,
|
||
Index: solu.blockList[i].Index,
|
||
UserSpaceID: solu.blockList[i].UserSpaceID,
|
||
FileHash: obj.Object.FileHash,
|
||
Size: solu.blockList[i].Size,
|
||
})
|
||
}
|
||
}
|
||
|
||
err := parser.Parse(ft, planBld)
|
||
if err != nil {
|
||
// TODO 错误处理
|
||
}
|
||
|
||
return entry
|
||
}
|
||
|
||
func (t *ChangeRedundancy) generateSysEventForRepObject(solu annealingSolution, obj clitypes.ObjectDetail) []datamap.SysEventBody {
|
||
var blockChgs []datamap.BlockChange
|
||
|
||
for i, f := range solu.rmBlocks {
|
||
hasCache := lo.ContainsBy(obj.Blocks, func(b clitypes.ObjectBlock) bool { return b.UserSpaceID == solu.blockList[i].UserSpaceID }) ||
|
||
lo.ContainsBy(obj.PinnedAt, func(n clitypes.UserSpaceID) bool { return n == solu.blockList[i].UserSpaceID })
|
||
willRm := f
|
||
|
||
if !willRm {
|
||
// 如果对象在退火后要保留副本的节点没有副本,则需要在这个节点创建副本
|
||
if !hasCache {
|
||
blockChgs = append(blockChgs, &datamap.BlockChangeClone{
|
||
BlockType: datamap.BlockTypeRaw,
|
||
SourceUserSpaceID: obj.Blocks[0].UserSpaceID,
|
||
TargetUserSpaceID: solu.blockList[i].UserSpaceID,
|
||
})
|
||
}
|
||
} else {
|
||
blockChgs = append(blockChgs, &datamap.BlockChangeDeleted{
|
||
Index: 0,
|
||
UserSpaceID: solu.blockList[i].UserSpaceID,
|
||
})
|
||
}
|
||
}
|
||
|
||
transEvt := &datamap.BodyBlockTransfer{
|
||
ObjectID: obj.Object.ObjectID,
|
||
PackageID: obj.Object.PackageID,
|
||
BlockChanges: blockChgs,
|
||
}
|
||
|
||
var blockDist []datamap.BlockDistributionObjectInfo
|
||
for i, f := range solu.rmBlocks {
|
||
if !f {
|
||
blockDist = append(blockDist, datamap.BlockDistributionObjectInfo{
|
||
BlockType: datamap.BlockTypeRaw,
|
||
Index: 0,
|
||
UserSpaceID: solu.blockList[i].UserSpaceID,
|
||
})
|
||
}
|
||
}
|
||
|
||
distEvt := &datamap.BodyBlockDistribution{
|
||
ObjectID: obj.Object.ObjectID,
|
||
PackageID: obj.Object.PackageID,
|
||
Path: obj.Object.Path,
|
||
Size: obj.Object.Size,
|
||
FileHash: obj.Object.FileHash,
|
||
FaultTolerance: solu.disasterTolerance,
|
||
Redundancy: solu.spaceCost,
|
||
AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
|
||
BlockDistribution: blockDist,
|
||
// TODO 不好计算传输量
|
||
}
|
||
|
||
return []datamap.SysEventBody{transEvt, distEvt}
|
||
}
|
||
|
||
func (t *ChangeRedundancy) makePlansForECObject(ctx *changeRedundancyContext, solu annealingSolution, obj clitypes.ObjectDetail, planBld *exec.PlanBuilder, planningHubIDs map[clitypes.UserSpaceID]bool) db.UpdatingObjectRedundancy {
|
||
entry := db.UpdatingObjectRedundancy{
|
||
ObjectID: obj.Object.ObjectID,
|
||
FileHash: obj.Object.FileHash,
|
||
Size: obj.Object.Size,
|
||
Redundancy: obj.Object.Redundancy,
|
||
}
|
||
|
||
reconstrct := make(map[clitypes.UserSpaceID]*[]int)
|
||
for i, f := range solu.rmBlocks {
|
||
block := solu.blockList[i]
|
||
if !f {
|
||
entry.Blocks = append(entry.Blocks, clitypes.ObjectBlock{
|
||
ObjectID: obj.Object.ObjectID,
|
||
Index: block.Index,
|
||
UserSpaceID: block.UserSpaceID,
|
||
FileHash: block.FileHash,
|
||
Size: block.Size,
|
||
})
|
||
|
||
// 如果这个块是影子块,那么就要从完整对象里重建这个块
|
||
if !block.HasEntity {
|
||
re, ok := reconstrct[block.UserSpaceID]
|
||
if !ok {
|
||
re = &[]int{}
|
||
reconstrct[block.UserSpaceID] = re
|
||
}
|
||
|
||
*re = append(*re, block.Index)
|
||
}
|
||
}
|
||
}
|
||
|
||
ecRed := obj.Object.Redundancy.(*clitypes.ECRedundancy)
|
||
|
||
for id, idxs := range reconstrct {
|
||
// 依次生成每个节点上的执行计划,因为如果放到一个计划里一起生成,不能保证每个节点上的块用的都是本节点上的副本
|
||
ft := ioswitch2.NewFromTo()
|
||
ft.ECParam = ecRed
|
||
ft.AddFrom(ioswitch2.NewFromShardstore(obj.Object.FileHash, *ctx.allUserSpaces[id].UserSpace.MasterHub, *ctx.allUserSpaces[id].UserSpace, ioswitch2.RawStream()))
|
||
|
||
for _, i := range *idxs {
|
||
ft.AddTo(ioswitch2.NewToShardStore(*ctx.allUserSpaces[id].UserSpace.MasterHub, *ctx.allUserSpaces[id].UserSpace, ioswitch2.ECStream(i), fmt.Sprintf("%d.%d", obj.Object.ObjectID, i)))
|
||
}
|
||
|
||
err := parser.Parse(ft, planBld)
|
||
if err != nil {
|
||
// TODO 错误处理
|
||
continue
|
||
}
|
||
|
||
planningHubIDs[id] = true
|
||
}
|
||
return entry
|
||
}
|
||
|
||
func (t *ChangeRedundancy) generateSysEventForECObject(solu annealingSolution, obj clitypes.ObjectDetail) []datamap.SysEventBody {
|
||
var blockChgs []datamap.BlockChange
|
||
|
||
reconstrct := make(map[clitypes.UserSpaceID]*[]int)
|
||
for i, f := range solu.rmBlocks {
|
||
block := solu.blockList[i]
|
||
if !f {
|
||
// 如果这个块是影子块,那么就要从完整对象里重建这个块
|
||
if !block.HasEntity {
|
||
re, ok := reconstrct[block.UserSpaceID]
|
||
if !ok {
|
||
re = &[]int{}
|
||
reconstrct[block.UserSpaceID] = re
|
||
}
|
||
|
||
*re = append(*re, block.Index)
|
||
}
|
||
} else {
|
||
blockChgs = append(blockChgs, &datamap.BlockChangeDeleted{
|
||
Index: block.Index,
|
||
UserSpaceID: block.UserSpaceID,
|
||
})
|
||
}
|
||
}
|
||
|
||
// 由于每一个需要被重建的块都是从同中心的副本里构建出来的,所以对于每一个中心都要产生一个BlockChangeEnDecode
|
||
for id, idxs := range reconstrct {
|
||
var tarBlocks []datamap.Block
|
||
for _, idx := range *idxs {
|
||
tarBlocks = append(tarBlocks, datamap.Block{
|
||
BlockType: datamap.BlockTypeEC,
|
||
Index: idx,
|
||
UserSpaceID: id,
|
||
})
|
||
}
|
||
blockChgs = append(blockChgs, &datamap.BlockChangeEnDecode{
|
||
SourceBlocks: []datamap.Block{{
|
||
BlockType: datamap.BlockTypeRaw,
|
||
Index: 0,
|
||
UserSpaceID: id, // 影子块的原始对象就在同一个节点上
|
||
}},
|
||
TargetBlocks: tarBlocks,
|
||
// 传输量为0
|
||
})
|
||
}
|
||
|
||
transEvt := &datamap.BodyBlockTransfer{
|
||
ObjectID: obj.Object.ObjectID,
|
||
PackageID: obj.Object.PackageID,
|
||
BlockChanges: blockChgs,
|
||
}
|
||
|
||
var blockDist []datamap.BlockDistributionObjectInfo
|
||
for i, f := range solu.rmBlocks {
|
||
if !f {
|
||
blockDist = append(blockDist, datamap.BlockDistributionObjectInfo{
|
||
BlockType: datamap.BlockTypeEC,
|
||
Index: solu.blockList[i].Index,
|
||
UserSpaceID: solu.blockList[i].UserSpaceID,
|
||
})
|
||
}
|
||
}
|
||
|
||
distEvt := &datamap.BodyBlockDistribution{
|
||
ObjectID: obj.Object.ObjectID,
|
||
PackageID: obj.Object.PackageID,
|
||
Path: obj.Object.Path,
|
||
Size: obj.Object.Size,
|
||
FileHash: obj.Object.FileHash,
|
||
FaultTolerance: solu.disasterTolerance,
|
||
Redundancy: solu.spaceCost,
|
||
AvgAccessCost: 0, // TODO 计算平均访问代价,从日常访问数据中统计
|
||
BlockDistribution: blockDist,
|
||
// TODO 不好计算传输量
|
||
}
|
||
|
||
return []datamap.SysEventBody{transEvt, distEvt}
|
||
}
|
||
|
||
func (t *ChangeRedundancy) executePlans(ctx *changeRedundancyContext, planBld *exec.PlanBuilder, planningSpaceIDs map[clitypes.UserSpaceID]bool, reen *distlock.Reentrant) (map[string]exec.VarValue, error) {
|
||
reqBlder := reqbuilder.NewBuilder()
|
||
for id, _ := range planningSpaceIDs {
|
||
reqBlder.Shard().Buzy(id)
|
||
}
|
||
err := reen.Lock(reqBlder.Build())
|
||
if err != nil {
|
||
return nil, fmt.Errorf("locking shard resources: %w", err)
|
||
}
|
||
|
||
wg := sync.WaitGroup{}
|
||
|
||
// 执行IO计划
|
||
var ioSwRets map[string]exec.VarValue
|
||
var ioSwErr error
|
||
wg.Add(1)
|
||
go func() {
|
||
defer wg.Done()
|
||
|
||
execCtx := exec.NewExecContext()
|
||
exec.SetValueByType(execCtx, ctx.ticktock.stgPool)
|
||
ret, err := planBld.Execute(execCtx).Wait(context.TODO())
|
||
if err != nil {
|
||
ioSwErr = fmt.Errorf("executing io switch plan: %w", err)
|
||
return
|
||
}
|
||
ioSwRets = ret
|
||
}()
|
||
|
||
wg.Wait()
|
||
|
||
if ioSwErr != nil {
|
||
return nil, ioSwErr
|
||
}
|
||
|
||
return ioSwRets, nil
|
||
}
|
||
|
||
func (t *ChangeRedundancy) populateECObjectEntry(entry *db.UpdatingObjectRedundancy, obj clitypes.ObjectDetail, ioRets map[string]exec.VarValue) {
|
||
for i := range entry.Blocks {
|
||
if entry.Blocks[i].FileHash != "" {
|
||
continue
|
||
}
|
||
|
||
key := fmt.Sprintf("%d.%d", obj.Object.ObjectID, entry.Blocks[i].Index)
|
||
// 不应该出现key不存在的情况
|
||
r := ioRets[key].(*ops2.ShardInfoValue)
|
||
entry.Blocks[i].FileHash = r.Hash
|
||
entry.Blocks[i].Size = r.Size
|
||
}
|
||
}
|