推广

Linux源码 | EM能量模型

iseeyu2年前 (2024-08-15)推广114

本文地址:Linux源码 | EM能量模型
博客地址:https://hqber.com
个人原创作品,转载需联系我,必须标注署名、文章出处

本文是基于linux kernel 5.15.41

能量模型(EM)<kernel/power/energy_model.c | 源代码 | v5.15.41>框架是一种驱动程序与内核子系统之间的接口。其中驱动程序了解不同性能层级的设备所消耗的功率,而内核子系统愿意使用该信息做出能量感知决策。EM框架管理着系统中各个设备提供的“性能域”,也就是频率功率的映射表,相关的能量感知算法可通过接口获取相应的设备的“性能域”,进行性能成本估算。


Linux源码 | EM能量模型

EM能量模型debug节点:/sys/kernel/debug/energy_model

目前在当前内核版本中,仅仅支持CPU device,CPU设备的em_perf_state中power<active_power | 源代码 | v5.15.41>和cost值计算公式如下:

  • power = capacitance(电容,dtsi配置:dynamic-power-coefficient) * voltage^2 * frequency
  • cost = max_frequency * power / frequency

1. energy_model结构

<include/linux/energy_model.h | 源代码 | v5.15.41>

struct em_perf_state {
unsigned long frequency;
unsigned long power;
unsigned long cost;
};

struct em_perf_domain {
struct em_perf_state *table;
int nr_perf_states;
int milliwatts;
unsigned long cpus[];
};

API函数接口

//获取相应device的em_perf_domain结构
struct em_perf_domain *em_pd_get(struct device *dev);
//通过cpu id获取CPU device的em_perf_domain结构
struct em_perf_domain *em_cpu_get(int cpu);

//设备注册EM能量模型,这个接口是提供给设备使用
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *span,
bool milliwatts);
//设备注销EM能量模型,这个接口是提供给设备使用
void em_dev_unregister_perf_domain(struct device *dev);

2. em_dev_register_perf_domain

将设备注册到em能量模型,这个接口是提供给驱动设备使用

// 将设备注册到em能量模型
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
struct em_data_callback *cb, cpumask_t *cpus,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
int cpu, ret;

if (!dev || !nr_states || !cb)
return -EINVAL;

/*
* Use a mutex to serialize the registration of performance domains and
* let the driver-defined callback functions sleep.
*/
mutex_lock(&em_pd_mutex);

if (dev->em_pd) {
ret = -EEXIST;
goto unlock;
}

//判断是否是cpu设备,dev->bus是否是虚拟总线cpu_subsys
if (_is_cpu_device(dev)) {
if (!cpus) {
dev_err(dev, "EM: invalid CPU mask\n");
ret = -EINVAL;
goto unlock;
}


for_each_cpu(cpu, cpus) {
//返回CPU的性能域
if (em_cpu_get(cpu)) {
dev_err(dev, "EM: exists for CPU%d\n", cpu);
ret = -EEXIST;
goto unlock;
}
/*
* All CPUs of a domain must have the same
* micro-architecture since they all share the same
* table.
*/
// 获取cpu的capacity,当前cpu算力和上一个算力不相等
cap = arch_scale_cpu_capacity(cpu);
if (prev_cap && prev_cap != cap) {
dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
cpumask_pr_args(cpus));//printf输出cpumask

ret = -EINVAL;
goto unlock;
}
prev_cap = cap;
}
}

// 创建em_perf_domain
ret = em_create_pd(dev, nr_states, cb, cpus);
if (ret)
goto unlock;

dev->em_pd->milliwatts = milliwatts;

//创建em能量模型对应的设备debug接口:/sys/kernel/debug/energy_model
em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n");

unlock:
mutex_unlock(&em_pd_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
// 创建em_perf_domain
static int em_create_pd(struct device *dev, int nr_states,
struct em_data_callback *cb, cpumask_t *cpus)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
int cpu, ret;

if (_is_cpu_device(dev)) {
//创建em_perf_domain对象
pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
if (!pd)
return -ENOMEM;

//copy cpumask
cpumask_copy(em_span_cpus(pd), cpus);
} else {
// 非CPU设备
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return -ENOMEM;
}

// 创建性能映射表,频率和功耗映射关系,计算cost
ret = em_create_perf_table(dev, pd, nr_states, cb);
if (ret) {
kfree(pd);
return ret;
}

if (_is_cpu_device(dev))
for_each_cpu(cpu, cpus) {
cpu_dev = get_cpu_device(cpu);
cpu_dev->em_pd = pd;
}

dev->em_pd = pd;

return 0;
}
// 创建性能映射表
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
int nr_states, struct em_data_callback *cb)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
int i, ret;
u64 fmax;

table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;

// 创建功耗和频率映射表
/* Build the list of performance states for this performance domain */
for (i = 0, freq = 0; i < nr_states; i++, freq++) {
/*
* active_power() is a driver callback which ceils 'freq' to
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
// 回调设备的power计算函数,获取功耗和频率
ret = cb->active_power(&power, &freq, dev);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
goto free_ps_table;
}

/*
* We expect the driver callback to increase the frequency for
* higher performance states.
*/
// 新增的freq必须比上一个freq大,递增
if (freq <= prev_freq) {
dev_err(dev, "EM: non-increasing freq: %lu\n",
freq);
goto free_ps_table;
}

/*
* The power returned by active_state() is expected to be
* positive and to fit into 16 bits.
*/
if (!power || power > EM_MAX_POWER) {
dev_err(dev, "EM: invalid power: %lu\n",
power);
goto free_ps_table;
}

table[i].power = power;
table[i].frequency = prev_freq = freq;
}

//计算的cost,cost = max_freq * power / frequency
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res = em_scale_power(table[i].power);

table[i].cost = div64_u64(fmax * power_res,
table[i].frequency);
if (table[i].cost >= prev_cost) {
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
table[i].frequency);
} else {
prev_cost = table[i].cost;
}
}

pd->table = table;
pd->nr_perf_states = nr_states;

return 0;

free_ps_table:
kfree(table);
return -EINVAL;
}

3. em_pd_get

获取相应device的em_perf_domain结构,主要是给相关能量感知算法去调用,如:EAS、IPA。

struct em_perf_domain *em_pd_get(struct device *dev)
{
if (IS_ERR_OR_NULL(dev))
return NULL;

return dev->em_pd;
}

4. em_cpu_get

通过cpu id获取相应的cpu device的em_perf_domain结构,主要是给相关能量感知算法去调用,如:EAS、IPA。

struct em_perf_domain *em_cpu_get(int cpu)
{
struct device *cpu_dev;

cpu_dev = get_cpu_device(cpu);
if (!cpu_dev)
return NULL;

return em_pd_get(cpu_dev);
}
EXPORT_SYMBOL_GPL(em_cpu_get);

5. em_cpu_energy

计算在当前性能下cpu消耗的power

static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
unsigned long max_util, unsigned long sum_util,
unsigned long allowed_cpu_cap)
{
unsigned long freq, scale_cpu;
struct em_perf_state *ps;
int i, cpu;

if (!sum_util)
return 0;

/*
* In order to predict the performance state, map the utilization of
* the most utilized CPU of the performance domain to a requested
* frequency, like schedutil. Take also into account that the real
* frequency might be set lower (due to thermal capping). Thus, clamp
* max utilization to the allowed CPU capacity before calculating
* effective frequency.
*/
cpu = cpumask_first(to_cpumask(pd->cpus));
// 获取CPU归一化的capacity
scale_cpu = arch_scale_cpu_capacity(cpu);
ps = &pd->table[pd->nr_perf_states - 1];

max_util = map_util_perf(max_util);
// allowed_cpu_cap会限制max_util
max_util = min(max_util, allowed_cpu_cap);
// 获取通过max_util获取对应的最大freq
freq = map_util_freq(max_util, ps->frequency, scale_cpu);

/*
* Find the lowest performance state of the Energy Model above the
* requested frequency.
*/
// 找到最大freq对应的em table
for (i = 0; i < pd->nr_perf_states; i++) {
ps = &pd->table[i];
if (ps->frequency >= freq)
break;
}

/*
* The capacity of a CPU in the domain at the performance state (ps)
* can be computed as:
* 通过频率比例,计算当前性能下需要的cpu的capacity
* scale_cpu * ps->freq
* ps->cap = -------------------- (1)
* cpu_max_freq
*
* So, ignoring the costs of idle states (which are not available in
* the EM), the energy consumed by this CPU at that performance state
* is estimated as:
* 通过util比例,计算在当前性能下cpu消耗的power
* ps->power * cpu_util
* cpu_nrg = -------------------- (2)
* ps->cap
*
* since 'cpu_util / ps->cap' represents its percentage of busy time.
*
* NOTE: Although the result of this computation actually is in
* units of power, it can be manipulated as an energy value
* over a scheduling period, since it is assumed to be
* constant during that interval.
*
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
* of two terms:
*
* ps->power * cpu_max_freq cpu_util
* cpu_nrg = ------------------------ * --------- (3)
* ps->freq scale_cpu
*
* The first term is static, and is stored in the em_perf_state struct
* as 'ps->cost'.
*
* Since all CPUs of the domain have the same micro-architecture, they
* share the same 'ps->cost', and the same CPU capacity. Hence, the
* total energy of the domain (which is the simple sum of the energy of
* all of its CPUs) can be factorized as:
*
* ps->cost * \Sum cpu_util
* pd_nrg = ------------------------ (4)
* scale_cpu
*/
// 计算在当前性能下cpu消耗的power
return ps->cost * sum_util / scale_cpu;
}
本文地址:Linux源码 | EM能量模型
博客地址:https://hqber.com
个人原创作品,转载需联系我,必须标注署名、文章出处

扫描二维码推送至手机访问。

版权声明:本文由西安泽虎代运营发布,如需转载请注明出处。

转载请注明出处https://0291.com.cn/post/149013.html

相关文章

应用宝市场优化技巧策略 !

应用宝市场优化技巧策略 !

  作为第三方应用市场的带头者应用宝,其关键词覆盖并不算难。今天我们就来说说看关于应用宝的关键词覆盖小技巧。 一、关键词来源 首先还是来说说应用宝的关键词来源,在应用宝中,关键词权重大小比为:标题>副标题>应用标签和关键词>应用提供方=应用介绍...

日均百万GMV的直播间如何打造?(服饰行业)

日均百万GMV的直播间如何打造?(服饰行业)

  前言   2月4日,北京冬奥会开幕式拉开序幕,各国代表团逐一亮相,他们的入场服装让观众大饱眼福,这场盛会也成了大型服饰种草现场。   而运动品牌为何能不断突破自己,日均百万?   可能他们才是服装领域经久不衰的王牌...

抖音涨粉的方法全攻略,不看后悔一生

抖音涨粉的方法全攻略,不看后悔一生

音乐有不同的节拍和节奏,人体也有脉搏、呼吸等各种生理节奏。如果配合得好,音乐可以调节生理节奏。因为人体有明显的跟随音乐节奏的本能,音乐的速度也能带动身体动作的节奏。这是典型的说不想健康但是很诚实,所以我们可以用音乐来支配人的生理,然后影响人的情绪,从而达到上粉的目的。 1.账户位...

淘宝营业执照如何办理,淘宝注册营业执照地址(电商营业执照怎么办理)

淘宝营业执照如何办理,淘宝注册营业执照地址(电商营业执照怎么办理)

录淘宝小镇,点击在线电子注册,完善个人信息及资料,淘宝ID通过后,进入实人认证页面,用笔记本的摄像头拍照进行认证也可以。淘宝ID通过后,进入实人认证页面,签订市场主体告知承诺书,完善电子商务登记信息,然后提交审核。...

微信自媒体seo优化方式

微信自媒体seo优化方式

seo基础知识:养资源做自推广公司网站效果好不好?我们常常说站长要去做一些好的seo优化方式,seo优化方式也分为不同的情况,现在有的人就觉得,如果说是做seo优化,那可以养一些资源,做一些自媒体资源,然后慢慢的通过自媒体来推广自己的公司网站,这样的情况是否是好的呢?答案当然是肯定的,这种方式确实非...

手机网站SEO优化技术是什么。

手机网站SEO优化技术是什么。

随着当前移动终端趋势的不断发展,移动终端的用户流量已经逐渐超过PC终端,因此现在不仅要做PC终端网站的关键词排名,而且移动终端也应该逐渐引起关注。移动终端SEO网站的优化技术有哪些? 1、移动网站应该尽可能简单 ① 移动网站下载网页的速度比个人电脑网站慢得多,所以尽量减少网页的数量和大小。 ②...

现在,非常期待与您的又一次邂逅

我们努力让每一部企业宣传片和抖音短视频成为商业大片