linux qos 实现机制,linux的qos机制 - cgroup篇 (4)
下面來看各個(gè)子系統(tǒng)對(duì)cgroup的支持,第一篇先研究blkio子系統(tǒng)
blkio子系統(tǒng)支持三種類型的QoS控制:
blkio.weight, blkio.weight_device:這些是基于設(shè)備權(quán)重值的控制方式
blkio.throttle.read_bps_device,blkio.throttle.write_bps_device:這些是基于帶寬的控制方式
blkio.throttle.read_iops_device,blkio.throttle.write_iops_device:這些是基于iops的控制方式
其中基于權(quán)重的控制方式,必須依賴于CFQ調(diào)度器,而基于throttle的控制方式則只需要在通用塊層實(shí)現(xiàn)就可以了
1) 基于blkio的cgroup_subsys的定義如下:
struct cgroup_subsys blkio_subsys = {
.name = "blkio",
.create = blkiocg_create,
.can_attach_task = blkiocg_can_attach_task,
.attach_task = blkiocg_attach_task,
.destroy = blkiocg_destroy,
.populate = blkiocg_populate,
#ifdef CONFIG_BLK_CGROUP
/* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
.subsys_id = blkio_subsys_id,
#endif
.use_id = 1,
.module = THIS_MODULE,
};
blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup):初始化一個(gè)blkio_cgroup結(jié)構(gòu),并初始化blkio_cgroup->policy_list, blkio_cgroup->blkg_list
blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup):略過
blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup):初始化好blkio_files里所有的blkio_policy_node對(duì)應(yīng)的cgroup文件系統(tǒng)的文件
blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk):
/*
* We cannot support shared io contexts, as we have no mean to support
* two tasks with the same ioc in two different groups without major rework
* of the main cic data structures. ?For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
2) 基于blkio的policy的數(shù)據(jù)結(jié)構(gòu)定義如下:
struct blkio_policy_node {
struct list_head node;
dev_t dev;
/* This node belongs to max bw policy or porportional weight policy */
enum blkio_policy_id plid;
/* cgroup file to which this rule belongs to */
int fileid;
union {
unsigned int weight;
/*
* Rate read/write in terms of byptes per second
* Whether this rate represents read or write is determined
* by file type "fileid".
*/
u64 bps;
unsigned int iops;
} val;
};
struct blkio_policy_ops {
blkio_unlink_group_fn *blkio_unlink_group_fn;
blkio_update_group_weight_fn *blkio_update_group_weight_fn;
blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
};
enum blkio_policy_id {
BLKIO_POLICY_PROP = 0,/* Proportional Bandwidth division */
BLKIO_POLICY_THROTL,/* Throttling */
};
struct blkio_policy_type {
struct list_head list;
struct blkio_policy_ops ops;
enum blkio_policy_id plid;
};
blkio_policy_node,基本上可以看做一個(gè)cgroup文件系統(tǒng)下的一個(gè)配置文件對(duì)應(yīng)一個(gè)blkio_policy_node,一個(gè)cgroup目錄的所有的policy_node都會(huì)被鏈在一個(gè)blkio_cgroup->policy_list的鏈表中
blkio_policy_type根據(jù)不同的blkio_policy_id有不同的blkio_policy_ops,blkio_policy_register在cfq_init,throtl_init時(shí)被調(diào)用,這兩個(gè)初始化函數(shù)分別對(duì)應(yīng)基于權(quán)重的控制和基于閥值的控制,目前有兩個(gè)全局的blkio_policy_type的變量:
static struct blkio_policy_type blkio_policy_cfq = {
.ops = {
.blkio_unlink_group_fn =cfq_unlink_blkio_group,
.blkio_update_group_weight_fn =cfq_update_blkio_group_weight,
},
.plid = BLKIO_POLICY_PROP,
};
以及
static struct blkio_policy_type blkio_policy_throtl = {
.ops = {
.blkio_unlink_group_fn = throtl_unlink_blkio_group,
.blkio_update_group_read_bps_fn =
throtl_update_blkio_group_read_bps,
.blkio_update_group_write_bps_fn =
throtl_update_blkio_group_write_bps,
.blkio_update_group_read_iops_fn =
throtl_update_blkio_group_read_iops,
.blkio_update_group_write_iops_fn =
throtl_update_blkio_group_write_iops,
},
.plid = BLKIO_POLICY_THROTL,
};
3) 基于blkio的cgroup文件系統(tǒng)的數(shù)據(jù)結(jié)構(gòu)如下:
struct cftype blkio_files[] = {
{
.name = "weight_device",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
BLKIO_PROP_weight_device),
.read_seq_string = blkiocg_file_read,
.write_string = blkiocg_file_write,
.max_write_len = 256,
},
{
.name = "weight",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
BLKIO_PROP_weight),
.read_u64 = blkiocg_file_read_u64,
.write_u64 = blkiocg_file_write_u64,
},
{
.name = "throttle.read_bps_device",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_bps_device),
.read_seq_string = blkiocg_file_read,
.write_string = blkiocg_file_write,
.max_write_len = 256,
},
{
.name = "throttle.write_bps_device",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_bps_device),
.read_seq_string = blkiocg_file_read,
.write_string = blkiocg_file_write,
.max_write_len = 256,
},
{
.name = "throttle.read_iops_device",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_iops_device),
.read_seq_string = blkiocg_file_read,
.write_string = blkiocg_file_write,
.max_write_len = 256,
},
{
.name = "throttle.write_iops_device",
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_iops_device),
.read_seq_string = blkiocg_file_read,
.write_string = blkiocg_file_write,
.max_write_len = 256,
},
基本上調(diào)用的都是blkiocg_file_read,blkiocg_file_write
blkio_files中的struct cftype有個(gè)private成員變量,通過BLKIOFILE_PRIVATE宏來賦值,e.g.
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,?BLKIO_PROP_weight_device)
之后可以通過BLKIOFILE_POLICY獲取其policy類型:BLKIO_POLICY_THROTL或者BLKIO_POLICY_PROP,通過BLKIOFILE_ATTR獲取其文件名,所有的配置文件都有如下定義:
/* cgroup files owned by proportional weight policy */
enum blkcg_file_name_prop {
BLKIO_PROP_weight = 1,
BLKIO_PROP_weight_device,
BLKIO_PROP_io_service_bytes,
BLKIO_PROP_io_serviced,
BLKIO_PROP_time,
BLKIO_PROP_sectors,
BLKIO_PROP_unaccounted_time,
BLKIO_PROP_io_service_time,
BLKIO_PROP_io_wait_time,
BLKIO_PROP_io_merged,
BLKIO_PROP_io_queued,
BLKIO_PROP_avg_queue_size,
BLKIO_PROP_group_wait_time,
BLKIO_PROP_idle_time,
BLKIO_PROP_empty_time,
BLKIO_PROP_dequeue,
};
/* cgroup files owned by throttle policy */
enum blkcg_file_name_throtl {
BLKIO_THROTL_read_bps_device,
BLKIO_THROTL_write_bps_device,
BLKIO_THROTL_read_iops_device,
BLKIO_THROTL_write_iops_device,
BLKIO_THROTL_io_service_bytes,
BLKIO_THROTL_io_serviced,
};
static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,?struct seq_file *m):通過cftype得到POLICY_ID, POLICY_FILE_NAME,通過struct cgroup得到struct blkio_cgroup,然后調(diào)用blkio_read_policy_node_files,按照一定格式存到一個(gè)seq_file里面,可以參考blkio_print_policy_node函數(shù)
static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,?const char *buffer):先調(diào)用blkio_policy_parse_and_set生成一個(gè)新的blkio_policy_node,下面的步驟就是先刪了已有的policy node,再把新的policy node插入到blkio_cgroup->policy_list里面,最后調(diào)用blkio_update_policy_node_blkg,該函數(shù)對(duì)blkio_cgroup下面的所有blkio_group,都調(diào)用blkio_update_blkg_policy,該函數(shù)會(huì)根據(jù)blkio_policy_node的plid, fileid,調(diào)用不同的 blkio_update_xxxxx函數(shù),以weight為例,最終調(diào)用到blkio_update_group_weight,后者又調(diào)用cfq_update_blkio_group_weight(這是跟CFQ緊耦合的一個(gè)函數(shù),這里不做介紹了)
4) 幾個(gè)關(guān)鍵的數(shù)據(jù)結(jié)構(gòu)blkio_cgroup和blkio_group
struct blkio_cgroup {
struct cgroup_subsys_state css;
unsigned int weight;
spinlock_t lock;
struct hlist_head blkg_list;
struct list_head policy_list; /* list of blkio_policy_node */
};
struct blkio_group {
/* An rcu protected unique identifier for the group */
void *key;
struct hlist_node blkcg_node;
unsigned short blkcg_id;
/* Store cgroup path */
char path[128];
/* The device MKDEV(major, minor), this group has been created for */
dev_t dev;
/* policy which owns this blk group */
enum blkio_policy_id plid;
/* Need to serialize the stats in the case of reset/update */
spinlock_t stats_lock;
struct blkio_group_stats stats;
/* Per cpu stats pointer */
struct blkio_group_stats_cpu __percpu *stats_cpu;
};
blkio_cgroup代表了一個(gè)cgroup,但是這個(gè)cgroup里的進(jìn)程有可能會(huì)讀寫多個(gè)塊設(shè)備,所有通過一個(gè)cfq_data或者throtl_data的結(jié)構(gòu)作為紅黑樹的key,把多個(gè)blkio_group關(guān)聯(lián)到一個(gè)blkio_cgroup中。每個(gè)cfq_data或者throtl_data(根據(jù)policy的不同)實(shí)際上代表了一個(gè)塊設(shè)備
《新程序員》:云原生和全面數(shù)字化實(shí)踐50位技術(shù)專家共同創(chuàng)作,文字、視頻、音頻交互閱讀總結(jié)
以上是生活随笔為你收集整理的linux qos 实现机制,linux的qos机制 - cgroup篇 (4)的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 增值税申报比对不通过的原因
- 下一篇: c语言中指针的类型,学习C语言中的指针类