anolis-cloud-kernel/include/linux/resctrl.h

470 lines
14 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RESCTRL_H
#define _RESCTRL_H
#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/pid.h>
#include <linux/resctrl_types.h>
#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL
#include <asm/resctrl.h>
#endif
#ifdef CONFIG_PROC_CPU_RESCTRL
int proc_resctrl_show(struct seq_file *m,
struct pid_namespace *ns,
struct pid *pid,
struct task_struct *tsk);
#endif
struct mon_config_info {
struct rdt_domain *d;
u32 evtid;
u32 mon_config;
};
#define INVALID_CONFIG_INDEX UINT_MAX
/**
* mon_event_config_index_get - get the hardware index for the
* configurable event
* @evtid: event id.
*
* Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
* 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
* INVALID_CONFIG_INDEX for invalid evtid
*/
static inline unsigned int mon_event_config_index_get(u32 evtid)
{
switch (evtid) {
case QOS_L3_MBM_TOTAL_EVENT_ID:
return 0;
case QOS_L3_MBM_LOCAL_EVENT_ID:
return 1;
default:
/* Should never reach here */
return INVALID_CONFIG_INDEX;
}
}
/* max value for struct rdt_domain's mbps_val */
#define MBA_MAX_MBPS U32_MAX
/*
* Resctrl uses a unsigned long as a closid bitmap. The maximum closid is size
* of unsigned long.
*/
#define RESCTRL_MAX_CLOSID BITS_PER_LONG
/*
* Resctrl uses u32 to hold the user-space config. The maximum bitmap size is
* 32.
*/
#define RESCTRL_MAX_CBM 32
/* The format for packing fields into the u64 'id' exposed to user-space */
#define RESCTRL_ID_CLOSID GENMASK(31, 0)
#define RESCTRL_ID_RMID GENMASK(63, 32)
extern unsigned int resctrl_rmid_realloc_limit;
extern unsigned int resctrl_rmid_realloc_threshold;
/*
* Value for XORing the id presented to user-space. This is to prevent
* user-space from depening on the layout, ensuring it is only used for passing
* back to kernel interfaces.
*/
extern u64 resctrl_id_obsfucation;
/**
* struct pseudo_lock_region - pseudo-lock region information
* @s: Resctrl schema for the resource to which this
* pseudo-locked region belongs
* @closid: The closid that this pseudo-locked region uses
* @d: RDT domain to which this pseudo-locked region
* belongs
* @cbm: bitmask of the pseudo-locked region
* @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
* completion
* @thread_done: variable used by waitqueue to test if pseudo-locking
* thread completed
* @cpu: core associated with the cache on which the setup code
* will be run
* @line_size: size of the cache lines
* @size: size of pseudo-locked region in bytes
* @kmem: the kernel memory associated with pseudo-locked region
* @minor: minor number of character device associated with this
* region
* @debugfs_dir: pointer to this region's directory in the debugfs
* filesystem
* @pm_reqs: Power management QoS requests related to this region
*/
struct pseudo_lock_region {
struct resctrl_schema *s;
u32 closid;
struct rdt_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
int thread_done;
int cpu;
unsigned int line_size;
unsigned int size;
void *kmem;
unsigned int minor;
struct dentry *debugfs_dir;
struct list_head pm_reqs;
};
/**
* struct resctrl_staged_config - parsed configuration to be applied
* @new_ctrl: new ctrl value to be loaded
* @have_new_ctrl: whether the user provided new_ctrl is valid
*/
struct resctrl_staged_config {
u32 new_ctrl;
bool have_new_ctrl;
};
/**
* struct rdt_domain - group of CPUs sharing a resctrl resource
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which CPUs share this resource
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
* @mbm_local: saved state for MBM local bandwidth
* @mbm_over: worker to periodically read MBM h/w counters
* @cqm_limbo: worker to periodically read CQM h/w counters
* @mbm_work_cpu: worker CPU for MBM h/w counters
* @cqm_work_cpu: worker CPU for CQM h/w counters
* @mbm_cntr_map: bitmap to track domain counter assignment
* @plr: pseudo-locked region (if any) associated with domain
* @staged_config: parsed configuration to be applied
* @mbps_val: When mba_sc is enabled, this holds the array of user
* specified control values for mba_sc in MBps, indexed
* by closid
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
struct delayed_work mbm_over;
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
unsigned long *mbm_cntr_map;
struct pseudo_lock_region *plr;
struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
u32 *mbps_val;
};
/**
* struct resctrl_cache - Cache allocation related data
* @cbm_len: Length of the cache bit mask
* @min_cbm_bits: Minimum number of consecutive bits to be set
* @shareable_bits: Bitmask of shareable resource with other
* executing entities
* @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid.
* @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
* @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
* level has CPU scope.
*/
struct resctrl_cache {
unsigned int cbm_len;
unsigned int min_cbm_bits;
unsigned int shareable_bits;
bool arch_has_sparse_bitmasks;
bool arch_has_empty_bitmaps;
bool arch_has_per_cpu_cfg;
};
/**
* enum membw_throttle_mode - System's memory bandwidth throttling mode
* @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
* @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
* always using smallest bandwidth percentage
* assigned to threads, aka "max throttling"
* @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
*/
enum membw_throttle_mode {
THREAD_THROTTLE_UNDEFINED = 0,
THREAD_THROTTLE_MAX,
THREAD_THROTTLE_PER_THREAD,
};
/**
* struct resctrl_membw - Memory bandwidth allocation related data
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
* @arch_needs_linear: True if we can't configure non-linear resources
* @throttle_mode: Bandwidth throttling mode when threads request
* different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled
* @hwdrc_mb: True if memory bandwidth HWDRC is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct resctrl_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
bool arch_needs_linear;
enum membw_throttle_mode throttle_mode;
bool mba_sc;
bool hwdrc_mb;
u32 *mb_map;
};
/**
* struct resctrl_mon - Monitoring related data
* @num_rmid: Number of RMIDs available
* @num_mbm_cntrs: Number of monitoring counters
* @mbm_cntr_assignable:Is system capable of supporting monitor assignment?
* @evt_list: List of monitoring events
*/
struct resctrl_mon {
int num_rmid;
int num_mbm_cntrs;
bool mbm_cntr_assignable;
struct list_head evt_list;
};
/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
* @cache_level: Which cache level defines scope of this resource
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
* @domains: RCU list of all domains for this resource
* @name: Name to use in "schemata" file.
* @data_width: Character width of data when displaying
* @default_ctrl: Specifies default cache cbm or memory B/W percent.
* @format_str: Per resource format string to show domain value
* @fflags: flags to choose base and info files
* @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth
* monitoring events can be configured.
* @cdp_capable: Is the CDP feature available on this resource
*/
struct rdt_resource {
int rid;
bool alloc_capable;
bool mon_capable;
int cache_level;
struct resctrl_cache cache;
struct resctrl_membw membw;
struct resctrl_mon mon;
struct list_head domains;
char *name;
int data_width;
u32 default_ctrl;
const char *format_str;
unsigned long fflags;
unsigned int mbm_cfg_mask;
bool cdp_capable;
};
/*
* Get the resource that exists at this level. If the level is not supproted
* a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES
* will return NULL.
*/
struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l);
/**
* struct resctrl_schema - configuration abilities of a resource presented to
* user-space
* @list: Member of resctrl_schema_all.
* @name: The name to use in the "schemata" file.
* @conf_type: Whether this schema is specific to code/data.
* @res: The resource structure exported by the architecture to describe
* the hardware that is configured by this schema.
* @num_closid: The number of closid that can be used with this schema. When
* features like CDP are enabled, this will be lower than the
* hardware supports for the resource.
*/
struct resctrl_schema {
struct list_head list;
char name[8];
enum resctrl_conf_type conf_type;
struct rdt_resource *res;
u32 num_closid;
};
/*
* Wait-queue for tasks waiting for a monitoring context to become available.
*/
extern struct wait_queue_head resctrl_mon_ctx_waiters;
struct resctrl_cpu_sync
{
u32 closid;
u32 rmid;
};
/*
* Update and re-load this CPUs defaults. Called via IPI, takes a pointer to
* struct resctrl_cpu_sync, or NULL.
*/
void resctrl_arch_sync_cpu_defaults(void *info);
/**
* __resctrl_id_decode() - unpack a known-good id that has been checked by
* resctrl_id_decode().
* @id: The value originally passed by user-space.
* @closid: Returned closid.
* @rmid: Returned rmid.
*
* Decodes the id field with no error checking. resctrl_id_decode() must have
* been used to check the id produces values that are in range and are
* allocated at the time of first use.
*/
static inline void __resctrl_id_decode(u64 id, u32 *closid, u32 *rmid)
{
id ^= resctrl_id_obsfucation;
*closid = FIELD_GET(RESCTRL_ID_CLOSID, id);
*rmid = FIELD_GET(RESCTRL_ID_RMID, id);
}
/**
* resctrl_id_decode() - unpack an id passed by user-space.
* @id: The value passed by user-space.
* @closid: Returned closid.
* @rmid: Returned rmid.
*
* Returns -EINVAL if @id doesn't correspond to an allocated control
* or monitor group. Returns 0 on success.
*
* Takes a mutex, call in process context.
*/
int resctrl_id_decode(u64 id, u32 *closid, u32 *rmid);
/* The number of closid supported by this resource regardless of CDP */
u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id);
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
/* For use by arch code that needs to remap resctrl's smaller CDP closid */
static inline u32 resctrl_get_config_index(u32 closid,
enum resctrl_conf_type type)
{
switch (type) {
default:
case CDP_NONE:
return closid;
case CDP_CODE:
return (closid * 2) + 1;
case CDP_DATA:
return (closid * 2);
}
}
/*
* Caller must be in a RCU read-side critical section, or hold the
* cpuhp read lock to prevent the struct rdt_domain being freed.
*/
static inline struct rdt_domain *
resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;
list_for_each_entry_rcu(d, &r->domains, list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->cpu_mask))
return d;
}
return NULL;
}
/*
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
*/
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val);
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type type);
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
int resctrl_online_cpu(unsigned int cpu);
void resctrl_offline_cpu(unsigned int cpu);
/**
* resctrl_arch_mon_ctx_alloc() - Allocate architecture specific resources need
* to use the monitors. This might sleep.
* @r: resource that will be used to read the counter.
* @evtid: the event that will be read, e.g. L3 occupancy.
*
* Call from process context, this might sleep until a context becomes
* available.
*/
int resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid);
/**
* resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
* for this resource and domain.
* @r: resource that the counter should be read from.
* @d: domain that the counter should be read from.
* @closid: closid that matches the rmid. The counter may
* match traffic of both closid and rmid, or rmid only.
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
* @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
*
* Call from process context on a CPU that belongs to domain @d.
*
* Return:
* 0 on success, or -EIO, -EINVAL etc on error.
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
u64 *val, int arch_mon_ctx);
/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
* and eventid.
* @r: The domain's resource.
* @d: The rmid's domain.
* @closid: The closid that matches the rmid. Counters may match both
* closid and rmid, or rmid only.
* @rmid: The rmid whose counter values should be reset.
* @eventid: The eventid whose counter values should be reset.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid,
enum resctrl_event_id eventid);
/**
* resctrl_arch_reset_rmid_all() - Reset all private state associated with
* all rmids and eventids.
* @r: The resctrl resource.
* @d: The domain for which all architectural counter state will
* be cleared.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d);
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
int resctrl_init(void);
void resctrl_exit(void);
#endif /* _RESCTRL_H */