anolis-cloud-kernel/include/linux/kidled.h

440 lines
12 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MM_KIDLED_H
#define _LINUX_MM_KIDLED_H
#ifdef CONFIG_KIDLED
#include <linux/types.h>
#include <linux/mm.h>
#define KIDLED_VERSION "1.0"
struct mem_cgroup;
/*
* Kidled_scan_type define the scan type that kidled will
* work at. The default option is to scan page only, but
* it can be modified by a specified interface at any time.
*/
enum kidled_scan_type {
SCAN_TARGET_PAGE = 0,
SCAN_TARGET_SLAB,
SCAN_TARGET_ALL
};
#define KIDLED_SCAN_PAGE (1 << SCAN_TARGET_PAGE)
#define KIDLED_SCAN_SLAB (1 << SCAN_TARGET_SLAB)
#define KIDLED_SCAN_ALL (KIDLED_SCAN_PAGE | KIDLED_SCAN_SLAB)
/*
* We want to get more info about a specified idle page, whether it's
* a page cache or in active LRU list and so on. We use KIDLE_<flag>
* to mark these different page attributes, we support 4 flags:
*
* KIDLE_DIRTY : page is dirty or not;
* KIDLE_FILE : page is a page cache or not;
* KIDLE_UNEVIT : page is unevictable or evictable;
* KIDLE_ACTIVE : page is in active LRU list or not.
* KIDLE_SLAB : whether it belongs to an slab or not.
*
* Each KIDLE_<flag> occupies one bit position in a specified idle type.
* There exist total 2^4+1=17 idle types.
*/
#define KIDLE_BASE 0
#define KIDLE_DIRTY (1 << 0)
#define KIDLE_FILE (1 << 1)
#define KIDLE_UNEVICT (1 << 2)
#define KIDLE_ACTIVE (1 << 3)
#define KIDLE_SLAB (1 << 4)
#define KIDLE_NR_TYPE 17
/*
* Each page has an idle age which means how long the page is keeping
* in idle state, the age's unit is in one scan period. Each page's
* idle age will consume one byte, so the max age must be 255.
* Buckets are used for histogram sampling depends on the idle age,
* e.g. the bucket [5,15) means page's idle age ge than 5 scan periods
* and lt 15 scan periods. A specified bucket value is a split line of
* the idle age. We support a maximum of NUM_KIDLED_BUCKETS sampling
* regions.
*/
#define KIDLED_MAX_IDLE_AGE U8_MAX
#define NUM_KIDLED_BUCKETS 8
/*
* Since it's not convenient to get an immediate statistics for a memory
* cgroup, we use a ping-pong buffer. One is used to store the stable
* statistics which call it 'stable buffer', it's used for showing.
* Another is used to store the statistics being updated by scanning
* threads which call it 'unstable buffer'. Switch them when one scanning
* round is finished.
*/
#define KIDLED_STATS_NR_TYPE 2
/*
* When user wants not to account for a specified instance (e.g. may
* be a memory cgoup), then mark the corresponding buckets to be invalid.
* kidled will skip accounting when encounter invalid buckets. Note the
* scanning is still on.
*
* When users update new buckets, it means current statistics should be
* invalid. But we can't reset immediately, reasons as above. We'll reset
* at a safe point(i.e. one round finished). Store new buckets in stable
* stats's buckets, while mark unstable stats's buckets to be invalid.
*
* This value must be greater than KIDLED_MAX_IDLE_AGE, and can be only
* used for the first bucket value, so it can return quickly when call
* kidled_get_bucket(). User shouldn't use KIDLED_INVALID_BUCKET directly.
*/
#define KIDLED_INVALID_BUCKET (KIDLED_MAX_IDLE_AGE + 1)
/* Mark the higher byte as an sign of slab objects access in a round */
#define KIDLED_SLAB_ACCESS_MASK 0xff00
#define KIDLED_SLAB_ACCESS_SHIFT 0x8
#define KIDLED_MARK_BUCKET_INVALID(buckets) \
(buckets[0] = KIDLED_INVALID_BUCKET)
#define KIDLED_IS_BUCKET_INVALID(buckets) \
(buckets[0] == KIDLED_INVALID_BUCKET)
DECLARE_STATIC_KEY_FALSE(kidled_enabled_key);
static inline bool kidled_is_slab_scanned(unsigned short slab_age,
unsigned long scan_rounds)
{
return slab_age >> KIDLED_SLAB_ACCESS_SHIFT == (scan_rounds & 0xff);
}
/*
* We account number of idle pages depending on idle type and buckets
* for a specified instance (e.g. one memory cgroup or one process...)
*/
struct idle_page_stats {
int buckets[NUM_KIDLED_BUCKETS];
unsigned long count[KIDLE_NR_TYPE][NUM_KIDLED_BUCKETS];
};
/*
* we need to pass multiple parameter for coldpgs when reclaiming the
* free slab. 'threshold' aims to identify the colder slab objects
* which want to reclaim. 'freeable' stores the objects to be freed.
*/
struct kidled_slab_param {
unsigned int threshold;
struct list_head *freeable;
};
/*
* Duration is in seconds, it means kidled will take how long to finish
* one round (just try, no promise). Sequence number will be increased
* when user updates the sysfs file each time, it can protect readers
* won't get stale statistics by comparing the sequence number even
* duration keep the same. However, there exists a rare race that seq
* num may wrap and be the same as previous seq num. So we also check
* the duration to make readers won't get strange statistics. But it may
* be still stale when seq and duration are both the same as previous
* value, but I think it's acceptable because duration is the same at
* least.
*/
#define KIDLED_MAX_SCAN_DURATION U16_MAX /* max 65536 seconds */
struct kidled_scan_control {
union {
atomic_t val;
struct {
u16 seq; /* inc when update */
u16 duration; /* in seconds */
};
};
unsigned int scan_target; /* decide how kidled to scan */
};
extern struct kidled_scan_control kidled_scan_control;
extern unsigned int kidled_scan_target;
extern unsigned long kidled_scan_rounds;
#define KIDLED_OP_SET_DURATION (1 << 0)
#define KIDLED_OP_INC_SEQ (1 << 1)
static inline unsigned short *kidled_slab_age(struct page *page)
{
return (unsigned short *)((unsigned long)page->slab_age & ~0x2UL);
}
#ifdef CONFIG_MEMCG_KMEM
extern bool cgroup_memory_nokmem;
#else
#define cgroup_memory_nokmem 1
#endif
extern int kidled_alloc_slab_age(struct page *page, struct kmem_cache *s, gfp_t flags);
extern void kidled_free_slab_age(struct page *page);
extern void kidled_mem_cgroup_account(struct page *page,
void *ptr, int age, unsigned long size);
static inline void kidled_mem_cgroup_slab_account(void *object,
int age, int size)
{
struct page *page;
page = virt_to_head_page(object);
kidled_mem_cgroup_account(page, object, age, size);
}
static inline struct kidled_scan_control kidled_get_current_scan_control(void)
{
struct kidled_scan_control scan_control;
atomic_set(&scan_control.val, atomic_read(&kidled_scan_control.val));
scan_control.scan_target = kidled_scan_target;
return scan_control;
}
static inline unsigned int kidled_get_current_scan_duration(void)
{
struct kidled_scan_control scan_control =
kidled_get_current_scan_control();
return scan_control.duration;
}
static inline void kidled_reset_scan_control(struct kidled_scan_control *p)
{
atomic_set(&p->val, 0);
p->scan_target = KIDLED_SCAN_PAGE;
}
/*
* Compare with global kidled_scan_control, return true if equals.
*/
static inline bool kidled_is_scan_period_equal(struct kidled_scan_control *p)
{
return atomic_read(&p->val) == atomic_read(&kidled_scan_control.val);
}
static inline bool kidled_has_slab_target(struct kidled_scan_control *p)
{
return p->scan_target & KIDLED_SCAN_SLAB;
}
static inline bool kidled_has_page_target(struct kidled_scan_control *p)
{
return p->scan_target & KIDLED_SCAN_PAGE;
}
static inline bool kidled_has_slab_target_equal(struct kidled_scan_control *p)
{
if (!kidled_has_slab_target(p))
return false;
return kidled_scan_target & KIDLED_SCAN_SLAB;
}
static inline bool
kidled_is_scan_target_equal(struct kidled_scan_control *p)
{
return p->scan_target == kidled_scan_target;
}
static inline bool
kidled_has_slab_target_only(struct kidled_scan_control *p)
{
return p->scan_target == KIDLED_SCAN_SLAB;
}
static inline bool
kidled_has_page_target_only(struct kidled_scan_control *p)
{
return p->scan_target == KIDLED_SCAN_PAGE;
}
static inline bool
kidled_has_page_target_equal(struct kidled_scan_control *p)
{
if (!kidled_has_page_target(p))
return false;
return kidled_scan_target & KIDLED_SCAN_PAGE;
}
static inline void kidled_get_reset_type(struct kidled_scan_control *p,
bool *page_disabled, bool *slab_disabled)
{
if (kidled_has_page_target(p) && !kidled_has_page_target_equal(p))
*page_disabled = 1;
if (kidled_has_slab_target(p) && !kidled_has_slab_target_equal(p))
*slab_disabled = 1;
}
static inline bool kidled_set_scan_control(int op, u16 duration,
struct kidled_scan_control *orig)
{
bool retry = false;
/*
* atomic_cmpxchg() tries to update kidled_scan_control, shouldn't
* retry to avoid endless loop when caller specify a period.
*/
if (!orig) {
orig = &kidled_scan_control;
retry = true;
}
while (true) {
int new_period_val, old_period_val;
struct kidled_scan_control new_period;
old_period_val = atomic_read(&orig->val);
atomic_set(&new_period.val, old_period_val);
if (op & KIDLED_OP_INC_SEQ)
new_period.seq++;
if (op & KIDLED_OP_SET_DURATION)
new_period.duration = duration;
new_period_val = atomic_read(&new_period.val);
if (atomic_cmpxchg(&kidled_scan_control.val,
old_period_val,
new_period_val) == old_period_val)
return true;
if (!retry)
return false;
}
}
static inline void kidled_set_scan_duration(u16 duration)
{
kidled_set_scan_control(KIDLED_OP_INC_SEQ |
KIDLED_OP_SET_DURATION,
duration, NULL);
}
static inline bool is_kidled_enabled(void)
{
return static_branch_unlikely(&kidled_enabled_key);
}
/*
* Caller must specify the original scan period, avoid the race between
* the double operation and user's updates through sysfs interface.
*/
static inline bool
kidled_try_double_scan_control(struct kidled_scan_control orig)
{
u16 duration = orig.duration;
if (unlikely(duration == KIDLED_MAX_SCAN_DURATION))
return false;
duration <<= 1;
if (duration < orig.duration)
duration = KIDLED_MAX_SCAN_DURATION;
return kidled_set_scan_control(KIDLED_OP_INC_SEQ |
KIDLED_OP_SET_DURATION,
duration,
&orig);
}
/*
* Increase the sequence number while keep duration the same, it's used
* to start a new period immediately.
*/
static inline void kidled_inc_scan_seq(void)
{
kidled_set_scan_control(KIDLED_OP_INC_SEQ, 0, NULL);
}
static inline bool page_has_slab_age(struct page *page)
{
return ((unsigned long)page->slab_age & 0x2UL);
}
extern unsigned short kidled_get_slab_age(void *object);
extern void kidled_set_slab_age(void *object, unsigned short age);
static inline unsigned short kidled_inc_slab_age(void *object)
{
unsigned short slab_age = kidled_get_slab_age(object);
if (slab_age < KIDLED_MAX_IDLE_AGE) {
slab_age++;
kidled_set_slab_age(object, slab_age);
}
return slab_age;
}
static inline void kidled_clear_slab_scanned(void *object)
{
unsigned short slab_age = kidled_get_slab_age(object);
slab_age &= ~KIDLED_SLAB_ACCESS_MASK;
kidled_set_slab_age(object, slab_age);
}
static inline void kidled_mark_slab_scanned(void *object, unsigned long scan_rounds)
{
unsigned short slab_age = kidled_get_slab_age(object);
slab_age |= (scan_rounds & 0xff) << KIDLED_SLAB_ACCESS_SHIFT;
kidled_set_slab_age(object, slab_age);
}
extern const int kidled_default_buckets[NUM_KIDLED_BUCKETS];
#ifdef CONFIG_MEMCG
void kidled_mem_cgroup_move_stats(struct mem_cgroup *from,
struct mem_cgroup *to,
struct page *page,
unsigned long size);
#endif /* CONFIG_MEMCG */
#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
void kidled_free_page_age(pg_data_t *pgdat);
#endif
#else /* !CONFIG_KIDLED */
#ifdef CONFIG_MEMCG
static inline void kidled_mem_cgroup_move_stats(struct mem_cgroup *from,
struct mem_cgroup *to,
struct page *page,
unsigned long size)
{
}
#endif /* CONFIG_MEMCG */
static inline unsigned short kidled_get_slab_age(void *object)
{
return 0;
}
static inline void kidled_set_slab_age(void *object, unsigned short age)
{
}
static inline int kidled_alloc_slab_age(struct page *page, struct kmem_cache *s, gfp_t flags)
{
return 0;
}
static inline void kidled_free_slab_age(struct page *page)
{
}
static inline bool page_has_slab_age(struct page *page)
{
return false;
}
static inline unsigned int kidled_get_current_scan_duration(void)
{
return 0;
}
static inline bool is_kidled_enabled(void)
{
return false;
}
#endif /* CONFIG_KIDLED */
#endif /* _LINUX_MM_KIDLED_H */