Netlink套接字是用以实现用户进程与内核进程通信的一种特殊的进程间通信(IPC) ,也是网络应用程序与内核通信的最常用的接口,用户态应用使用标准的socket API就可以使用netlink提供的强大功能。
Netlink是一种特殊的socket,它是Linux所特有的,类似于BSD系统中的AF_ROUTE但又远比它的功能强大,目前在Linux内核中使用netlink进行应用与内核通信的应用很多;包括:路由(NETLINK_ROUTE)、用户态socket协议(NETLINK_USERSOCK)、防火墙(NETLINK_FIREWALL)、Netfilter子系统(NETLINK_NETFILTER)、内核事件向用户态通知(NETLINK_KOBJECT_UEVENT),通用netlink(NETLINK_GENERIC)等。
嵌入式进阶教程分门别类整理好了,看的时候十分方便,由于内容较多,这里就截取一部分图吧。
需要的朋友私信【内核】即可领取。
相对于ioctl、sysfs、proc的优势:
Netlink协议簇初始化
Netlink协议簇初始化代码位于net/netlink/af_netlink.c中。
core_initcall(netlink_proto_init);static int __init netlink_proto_init(void){int i; // 注册netlink协议int err = proto_register(&netlink_proto, 0);if (err != 0)goto out;#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)err = bpf_iter_register();if (err)goto out;#endifBUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb)); // 申请netlink table,每种协议类型一个nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);if (!nl_table)goto panic; // 初始化netlink tablefor (i = 0; i < MAX_LINKS; i++) { // 初始化哈希表if (rhashtable_init(&nl_table[i].hash, &netlink_rhashtable_params) < 0) {while (--i > 0)rhashtable_destroy(&nl_table[i].hash);kfree(nl_table);goto panic;}} // 初始化应用层使用的NETLINK_USERSOCK协议类型的netlink(用于应用层进程间通信)netlink_add_usersock_entry(); // 向内核注册协议处理函数,即将netlink的socket创建处理函数注册到内核中sock_register(&netlink_family_ops); // 向内核所有的网络命名空间注册”子系统“的初始化和注销函数,在网络命名空间创建和注销时会调用这里注册的初始化和注销函数register_pernet_subsys(&netlink_net_ops);register_pernet_subsys(&netlink_tap_net_ops);/* The netlink device handler may be needed early. */ // 注册各个消息类型,注册指定的函数指针(至少其中一个必须为非NULL),以便在收到指定协议族和消息类型的请求消息时调用。rtnetlink_init();out:return err;panic:panic("netlink_init: Cannot allocate nl_table
");}创建Netlink
static const struct net_proto_family netlink_family_ops = {.family = PF_NETLINK,.create = netlink_create,.owner= THIS_MODULE,/* for consistency 8) */};static int netlink_create(struct net *net, struct socket *sock, int protocol, int kern){struct module *module = NULL;struct mutex *cb_mutex;struct netlink_sock *nlk;int (*bind)(struct net *net, int group);void (*unbind)(struct net *net, int group);int err = 0;sock->state = SS_UNCONNECTED; // 支持raw和dgram类型if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)return -ESOCKTNOSUPPORT; // 检查netlink协议类型,目前22个,最大支持32个if (protocol < 0 || protocol >= MAX_LINKS)return -EPROTONOSUPPORT;protocol = array_index_nospec(protocol, MAX_LINKS); // 锁表netlink_lock_table();#ifdef CONFIG_MODULES // netlink指定协议未注册,则加载模块并注册if (!nl_table[protocol].registered) {netlink_unlock_table();request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);netlink_lock_table();}#endif // 查找dodule\cb_mutex\bind\unbindif (nl_table[protocol].registered && try_module_get(nl_table[protocol].module))module = nl_table[protocol].module;elseerr = -EPROTONOSUPPORT;cb_mutex = nl_table[protocol].cb_mutex;bind = nl_table[protocol].bind;unbind = nl_table[protocol].unbind; // 释放锁netlink_unlock_table();if (err < 0)goto out;err = __netlink_create(net, sock, cb_mutex, protocol, kern);if (err < 0)goto out_module; // 增加netlink协议inuse计数器sock_prot_inuse_add(net, &netlink_proto, 1); // 继续初始化netlink_socknlk = nlk_sk(sock->sk);nlk->module = module;nlk->netlink_bind = bind;nlk->netlink_unbind = unbind;out:return err;out_module:module_put(module);goto out;}static int __netlink_create(struct net *net, struct socket *sock, struct mutex *cb_mutex, int protocol, int kern){struct sock *sk;struct netlink_sock *nlk; // 注册netlink socket处理函数sock->ops = &netlink_ops; // 创建内核sock对象sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);if (!sk)return -ENOMEM; // 使用sockt初始sksock_init_data(sock, sk); // sk转netlink_sock,并初始化netlink_socknlk = nlk_sk(sk);if (cb_mutex) {nlk->cb_mutex = cb_mutex;} else {nlk->cb_mutex = &nlk->cb_def_mutex;mutex_init(nlk->cb_mutex);lockdep_set_class_and_name(nlk->cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]);} // 初始化netlink_sock的等待队列init_waitqueue_head(&nlk->wait); // sk协议和析构sk->sk_destruct = netlink_sock_destruct;sk->sk_protocol = protocol;return 0;}static const struct proto_ops netlink_ops = {.family =PF_NETLINK,.owner =THIS_MODULE,.release =netlink_release,.bind =netlink_bind,.connect =netlink_connect,.socketpair =sock_no_socketpair,.accept =sock_no_accept,.getname =netlink_getname,.poll =datagram_poll,.ioctl =netlink_ioctl,.listen =sock_no_listen,.shutdown =sock_no_shutdown,.setsockopt =netlink_setsockopt,.getsockopt =netlink_getsockopt,.sendmsg =netlink_sendmsg,.recvmsg =netlink_recvmsg,.mmap =sock_no_mmap,.sendpage =sock_no_sendpage,};接收Netlink消息
从socket上接收数据包skb,并解析成netlink msg。
/* *As we do 4.4BSD message passing we use a 4.4BSD message passing *system, not 4.3. Thus msg_accrights(len) are now missing. They *belong in an obscure libc emulation or the bin. */struct msghdr {void*msg_name;/* ptr to socket address structure */intmsg_namelen;/* size of socket address structure */struct iov_itermsg_iter;/* data *//* * Ancillary data. msg_control_user is the user buffer used for the * recv* side when msg_control_is_user is set, msg_control is the kernel * buffer used for all other cases. */union {void*msg_control;void __user*msg_control_user;};boolmsg_control_is_user : 1;__kernel_size_tmsg_controllen;/* ancillary data buffer length */unsigned intmsg_flags;/* flags on received message */struct kiocb*msg_iocb;/* ptr to iocb for async requests */};static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags){struct scm_cookie scm; // 内核sock对象struct sock *sk = sock->sk; // netink_sock对象struct netlink_sock *nlk = nlk_sk(sk);int noblock = flags & MSG_DONTWAIT;size_t copied;struct sk_buff *skb, *data_skb;int err, ret;if (flags & MSG_OOB)return -EOPNOTSUPP;copied = 0; // 从sk上接收数据包skbskb = skb_recv_datagram(sk, flags, noblock, &err);if (skb == NULL)goto out;data_skb = skb;#ifdef CONFIG_COMPAT_NETLINK_MESSAGESif (unlikely(skb_shinfo(skb)->frag_list)) {/* * If this skb has a frag_list, then here that means that we * will have to use the frag_list skb's data for compat tasks * and the regular skb's data for normal (non-compat) tasks. * * If we need to send the compat skb, assign it to the * 'data_skb' variable so that it will be used below for data * copying. We keep 'skb' for everything else, including * freeing both later. */if (flags & MSG_CMSG_COMPAT)data_skb = skb_shinfo(skb)->frag_list;}#endif/* Record the max length of recvmsg() calls for future allocations */nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, SKB_WITH_OVERHEAD(32768)); // 计算需要拷贝的长度copied = data_skb->len;if (len < copied) {msg->msg_flags |= MSG_TRUNC;copied = len;} // 从skb拷贝数据到msgerr = skb_copy_datagram_msg(data_skb, 0, msg, copied); // socket地址和参数if (msg->msg_name) {DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);addr->nl_family = AF_NETLINK;addr->nl_pad = 0;addr->nl_pid= NETLINK_CB(skb).portid;addr->nl_groups= netlink_group_mask(NETLINK_CB(skb).dst_group);msg->msg_namelen = sizeof(*addr);}if (nlk->flags & NETLINK_F_RECV_PKTINFO)netlink_cmsg_recv_pktinfo(msg, skb);if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)netlink_cmsg_listen_all_nsid(sk, msg, skb); // 初始化scm_coookiememset(&scm, 0, sizeof(scm));scm.creds = *NETLINK_CREDS(skb);if (flags & MSG_TRUNC)copied = data_skb->len; // 释放skbskb_free_datagram(sk, skb);if (nlk->cb_running && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {ret = netlink_dump(sk);if (ret) {sk->sk_err = -ret;sk_error_report(sk);}} // scm:Socket level control messages processing,校验,并读取scm_cookie(进程信息、文件描述符等)scm_recv(sock, msg, &scm, flags);out: // 唤醒sk处理netlink_rcv_wake(sk);return err ? : copied;}发送Netlink消息
将要发送过的netlink msg构造成skb数据包,然后发送。
static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len){struct sock *sk = sock->sk;struct netlink_sock *nlk = nlk_sk(sk);DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);u32 dst_portid;u32 dst_group;struct sk_buff *skb;int err;struct scm_cookie scm;u32 netlink_skb_flags = 0;if (msg->msg_flags & MSG_OOB)return -EOPNOTSUPP;if (len == 0) {pr_warn_once("Zero length message leads to an empty skb
");return -ENODATA;} // scm:Socket level control messages processing,校验msg,并初始化scm_cookie(进程信息、文件描述符等)err = scm_send(sock, msg, &scm, true);if (err < 0)return err; // socket地址if (msg->msg_namelen) {err = -EINVAL;if (msg->msg_namelen < sizeof(struct sockaddr_nl))goto out;if (addr->nl_family != AF_NETLINK)goto out;dst_portid = addr->nl_pid;dst_group = ffs(addr->nl_groups);err = -EPERM;if ((dst_group || dst_portid) && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))goto out;netlink_skb_flags |= NETLINK_SKB_DST;} else {dst_portid = nlk->dst_portid;dst_group = nlk->dst_group;}/* Paired with WRITE_ONCE() in netlink_insert() */if (!READ_ONCE(nlk->bound)) {err = netlink_autobind(sock);if (err)goto out;} else {/* Ensure nlk is hashed and visible. */smp_rmb();}err = -EMSGSIZE;if (len > sk->sk_sndbuf - 32)goto out;err = -ENOBUFS; // 申请skbskb = netlink_alloc_large_skb(len, dst_group);if (skb == NULL)goto out;NETLINK_CB(skb).portid= nlk->portid;NETLINK_CB(skb).dst_group = dst_group;NETLINK_CB(skb).creds= scm.creds;NETLINK_CB(skb).flags= netlink_skb_flags;err = -EFAULT; // 拷贝msg到skbif (memcpy_from_msg(skb_put(skb, len), msg, len)) {kfree_skb(skb);goto out;} // 发送skberr = security_netlink_send(sk, skb);if (err) {kfree_skb(skb);goto out;} // 广播if (dst_group) {refcount_inc(&skb->users);netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);} // 单播err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags & MSG_DONTWAIT);out:scm_destroy(&scm);return err;}导读-最新发表 - 内核技术中文网 - 构建全国最权威的内核技术交流分享论坛
Linux内核中Netlink机制分析 (含运行代码) - 论坛 - 内核技术中文网 - 构建全国最权威的内核技术交流分享论坛
| 留言与评论(共有 0 条评论) “” |