rtnetlink组数量与设置
在文件include/uapi/linux/netlink.h中定義了,目前共有32個(gè)組,除去第一個(gè)RTNLGRP_NONE,即31個(gè)組。
/* RTnetlink multicast groups */ enum rtnetlink_groups {RTNLGRP_NONE, #define RTNLGRP_NONE RTNLGRP_NONERTNLGRP_LINK, #define RTNLGRP_LINK RTNLGRP_LINK ...RTNLGRP_IPV4_MROUTE_R, #define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_RRTNLGRP_IPV6_MROUTE_R, #define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R__RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1)在看一下文件include/uapi/linux/netlink.h中定義的nl_groups為一個(gè)32位的變量。
struct sockaddr_nl {__kernel_sa_family_t nl_family; /* AF_NETLINK */unsigned short nl_pad; /* zero */__u32 nl_pid; /* port ID */__u32 nl_groups; /* multicast groups mask */ };這將導(dǎo)致在應(yīng)用層進(jìn)程group綁定的時(shí)候,group不能超過(guò)32個(gè)。參見(jiàn)以下的iproute2-5.9.0代碼中的monitor功能代碼,函數(shù)nl_mgrp會(huì)對(duì)group大于31的情況進(jìn)行判斷,因?yàn)楫?dāng)前最大的組RTNLGRP_IPV6_MROUTE_R值就是31。
int do_ipmonitor(int argc, char **argv) { int lnexthop = 0, nh_set = 1;char *file = NULL;unsigned int groups = 0;groups |= nl_mgrp(RTNLGRP_LINK);...groups |= nl_mgrp(RTNLGRP_MPLS_NETCONF);if (rtnl_open(&rth, groups) < 0)exit(1);這里固定寫(xiě)了31,而沒(méi)有使用宏定義RTNLGRP_MAX。并且,提示信息顯示,如果要監(jiān)聽(tīng)超過(guò)31的組,需要使用setsockopt接口。
static inline __u32 nl_mgrp(__u32 group) { if (group > 31 ) {fprintf(stderr, "Use setsockopt for this group %d\n", group);exit(-1);} return group ? (1 << (group - 1)) : 0;如下iproute2使用rtnl_open子函數(shù),bind函數(shù)使用的是sockaddr_nl結(jié)構(gòu)的成員nl_groups,其與subscriptions都是32位的長(zhǎng)度。
int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, int protocol) {rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);if (rth->fd < 0) {perror("Cannot open netlink socket");return -1;}...memset(&rth->local, 0, sizeof(rth->local));rth->local.nl_family = AF_NETLINK;rth->local.nl_groups = subscriptions;if (bind(rth->fd, (struct sockaddr *)&rth->local, sizeof(rth->local)) < 0) {perror("Cannot bind netlink socket");return -1;}內(nèi)核netlink設(shè)置組
首先看一內(nèi)核5.0中套接口結(jié)構(gòu)netlink_sock的定義,與組相關(guān)的有三個(gè)變量,其中,subscriptions表示監(jiān)聽(tīng)組的數(shù)量;groups保存監(jiān)聽(tīng)組的位圖bitmap;變量ngroups表示的是目前groups數(shù)組中最大可保存的組數(shù)量。
struct netlink_sock {/* struct sock has to be the first member of netlink_sock */struct sock sk;...u32 subscriptions;u32 ngroups;unsigned long *groups;netlink函數(shù)netlink_bind如下,如果地址結(jié)構(gòu)sockaddr_nl中的成員nl_groups有值,內(nèi)核將動(dòng)態(tài)分配其空間,參見(jiàn)以下函數(shù)netlink_realloc_groups。由此可見(jiàn)內(nèi)核支持監(jiān)聽(tīng)超過(guò)32個(gè)的groups。
static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) {struct sock *sk = sock->sk;struct net *net = sock_net(sk);struct netlink_sock *nlk = nlk_sk(sk);struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;unsigned long groups = nladdr->nl_groups;/* Only superuser is allowed to listen multicasts */if (groups) {if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))return -EPERM;err = netlink_realloc_groups(sk);如果當(dāng)前套接口的組數(shù)量少于long類型的位數(shù),將套接口地址groups中超過(guò)netlink_nl結(jié)構(gòu)中最大組位數(shù)的部分清零。感覺(jué)這里使用BITS_PER_LONG不合適,因?yàn)間roups的值取自nladdr->nl_groups,后者只有32位。
if (nlk->ngroups < BITS_PER_LONG)groups &= (1UL << nlk->ngroups) - 1;套接口nlk的成員變量subscriptions表示監(jiān)聽(tīng)的組數(shù)量,函數(shù)netlink_update_subscriptions更新其值,注意這里的運(yùn)算: hweight32(groups) - hweight32(nlk->groups[0])的結(jié)果表示監(jiān)聽(tīng)組的數(shù)量的變化,變化值加到原來(lái)的subscriptions上,就是新的監(jiān)聽(tīng)組數(shù)量。因?yàn)閚lk->groups數(shù)組的其它原始,比如nlk->groups[1]中也可能有監(jiān)聽(tīng)組,所以使用差值計(jì)算。
最后,將套接口nlk->groups[0]即第一個(gè)元素的低32位清零,或上新的監(jiān)聽(tīng)組位圖groups。
從這里可以看出,并沒(méi)有修改nlk->groups數(shù)組除首個(gè)元素的其它元素,由于nlk->groups元素類型為long,對(duì)于64位系統(tǒng),也沒(méi)有修改首個(gè)元素的高32位。
if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))goto unlock;netlink_unlock_table();netlink_table_grab();netlink_update_subscriptions(sk, nlk->subscriptions +hweight32(groups) -hweight32(nlk->groups[0]));nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;以下為netlink_realloc_groups函數(shù),如果協(xié)議(例如NETLINK_ROUTE)指定的組數(shù)量(groups),大于套接口nlk目前的組數(shù)量(ngroups),需要對(duì)nlk的數(shù)組進(jìn)行擴(kuò)充,完成之后,將新擴(kuò)充出來(lái)的空間進(jìn)行清零操作。
static int netlink_realloc_groups(struct sock *sk) {struct netlink_sock *nlk = nlk_sk(sk);unsigned int groups;unsigned long *new_groups;...groups = nl_table[sk->sk_protocol].groups;if (!nl_table[sk->sk_protocol].registered) {err = -ENOENT;goto out_unlock;}if (nlk->ngroups >= groups)goto out_unlock;new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);if (new_groups == NULL) {err = -ENOMEM;goto out_unlock;}memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));nlk->groups = new_groups;nlk->ngroups = groups;對(duì)于rtnetlink,其在注冊(cè)時(shí),groups數(shù)量設(shè)置為最大值RTNLGRP_MAX,對(duì)應(yīng)于目前內(nèi)核的31。
static int __net_init rtnetlink_net_init(struct net *net) {struct sock *sk;struct netlink_kernel_cfg cfg = {.groups = RTNLGRP_MAX,.input = rtnetlink_rcv,.cb_mutex = &rtnl_mutex,.flags = NL_CFG_F_NONROOT_RECV,.bind = rtnetlink_bind,};sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);之前提到的套接口結(jié)構(gòu)成員subscriptions變量,由函數(shù)netlink_update_subscriptions進(jìn)行更新,同時(shí),更新套接口的綁定鏈表。
static void netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions) {struct netlink_sock *nlk = nlk_sk(sk);if (nlk->subscriptions && !subscriptions)__sk_del_bind_node(sk);else if (!nlk->subscriptions && subscriptions)sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);nlk->subscriptions = subscriptions;netlink獲取組信息
如下函數(shù)netlink_getname所示, nladdr->nl_groups的取值為groups[0]中的值,雖然groups[0]為long類型,但是nl_groups為32位,所以對(duì)于64位系統(tǒng),只取得了低32位的值。
static int netlink_getname(struct socket *sock, struct sockaddr *addr, int peer) {struct sock *sk = sock->sk;struct netlink_sock *nlk = nlk_sk(sk);DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);nladdr->nl_family = AF_NETLINK;nladdr->nl_pad = 0;if (peer) {nladdr->nl_pid = nlk->dst_portid;nladdr->nl_groups = netlink_group_mask(nlk->dst_group);} else {nladdr->nl_pid = nlk->portid;netlink_lock_table();nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;setsockopt接口
除了以上netlink接口設(shè)置組,還可通過(guò)setsockopt進(jìn)行組設(shè)置,后者不受32位的限制。如下netlink_setsockopt,設(shè)置選項(xiàng)為NETLINK_ADD_MEMBERSHIP。
static int netlink_setsockopt(struct socket *sock, int level, int optname,char __user *optval, unsigned int optlen) {struct sock *sk = sock->sk;struct netlink_sock *nlk = nlk_sk(sk);switch (optname) {case NETLINK_ADD_MEMBERSHIP:case NETLINK_DROP_MEMBERSHIP: {if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))return -EPERM;err = netlink_realloc_groups(sk);if (err) return err;if (!val || val - 1 >= nlk->ngroups)return -EINVAL;if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {err = nlk->netlink_bind(sock_net(sk), val);if (err) return err;}netlink_table_grab();netlink_update_socket_mc(nlk, val, optname == NETLINK_ADD_MEMBERSHIP);如下netlink_update_socket_mc函數(shù),用于設(shè)置subscriptions數(shù)量,以及設(shè)置nlk->groups數(shù)組位。
static void netlink_update_socket_mc(struct netlink_sock *nlk,unsigned int group, int is_new) {int old, new = !!is_new, subscriptions;old = test_bit(group - 1, nlk->groups);subscriptions = nlk->subscriptions - old + new;if (new)__set_bit(group - 1, nlk->groups);else__clear_bit(group - 1, nlk->groups);netlink_update_subscriptions(&nlk->sk, subscriptions);netlink_update_listeners(&nlk->sk);在內(nèi)核5.9.9版本中,看到有新的組定義:RTNLGRP_NEXTHOP和RTNLGRP_BRVLAN,
/* RTnetlink multicast groups */ enum rtnetlink_groups {...RTNLGRP_IPV6_MROUTE_R, #define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_RRTNLGRP_NEXTHOP, #define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOPRTNLGRP_BRVLAN, #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN相應(yīng)的iproute2-5.9.0中,實(shí)現(xiàn)了函數(shù)rtnl_add_nl_group來(lái)設(shè)置新增加的組,這兩個(gè)新增組使用netlink的bind接口不能下發(fā)。
int do_ipmonitor(int argc, char **argv) {...if (rtnl_open(&rth, groups) < 0)exit(1);if (lnexthop && rtnl_add_nl_group(&rth, RTNLGRP_NEXTHOP) < 0) {fprintf(stderr, "Failed to add nexthop group to list\n");exit(1);}int rtnl_add_nl_group(struct rtnl_handle *rth, unsigned int group) {return setsockopt(rth->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,&group, sizeof(group)); }內(nèi)核版本 5.0
總結(jié)
以上是生活随笔為你收集整理的rtnetlink组数量与设置的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: 安卓界面UI设计的尺寸标注问题
- 下一篇: 图像增强系列之图像自动去暗角算法。