linux kernel 网络协议栈之xps特性详解
xps全称是Transmit Packet Steering,是rfs/rps的作者Tom Herbert提交的又一个patch,预计会在2.6.37进入内核。
这个patch主要是针对多队列的网卡发送时的优化,当发送一个数据包的时候,它会根据cpu来选择对应的队列,而这个cpu map可以通过sysctl来设置:
1 |
|
struct xps_map {
//队列长度
unsigned int len;
unsigned int alloc_len;
struct rcu_head rcu;
//对应的队列序列号数组
u16 queues[0];
};
1 |
|
struct xps_dev_maps {
//rcu锁
struct rcu_head rcu;
//所有对列的cpu map数组
struct xps_map __rcu *cpu_map[0];
};
1 |
|
struct net_device {
…………………………..
#ifdef CONFIG_XPS
//保存当前设备的所有xps map.
struct xps_dev_maps __rcu *xps_maps;
#endif
……………………..
}
1 |
|
if (dev->real_num_tx_queues == 1)
queue_index = 0;
1 |
|
else if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb);
queue_index = dev_cap_txqueue(dev, queue_index);
1 |
|
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
………………………………
else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
if (queue_index < 0 || skb->ooo_okay ||
queue_index >= dev->real_num_tx_queues) {
int old_index = queue_index;
//开始计算队列索引
queue_index = get_xps_queue(dev, skb);
if (queue_index < 0)
//调用老的计算方法来计算queue index.
queue_index = skb_tx_hash(dev, skb);
………………………………………………
}
}
//存储队列索引
skb_set_queue_mapping(skb, queue_index);
//返回对应的queue
return netdev_get_tx_queue(dev, queue_index);
}
1 |
|
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
struct xps_dev_maps *dev_maps;
struct xps_map *map;
int queue_index = -1;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
//根据cpu id得到当前cpu对应的队列集合
map = rcu_dereference(
dev_maps->cpu_map[raw_smp_processor_id()]);
if (map) {
//如果队列集合长度为1,则说明是1:1对应
if (map->len == 1)
queue_index = map->queues[0];
else {
//否则开始计算hash值,接下来和老的计算hash方法一致。
u32 hash;
//如果sk_hash存在,则取得sk_hash(这个hash,在我们rps和rfs的时候计算过的,也就是四元组的hash值)
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
//否则开始重新计算
hash = (__force u16) skb->protocol ^
skb->rxhash;
hash = jhash_1word(hash, hashrnd);
//根据hash值来选择对应的队列
queue_index = map->queues[
((u64)hash * map->len) >> 32];
}
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
}
rcu_read_unlock();
return queue_index;
#else
return -1;
#endif
}
```