From 84920c1420e2b4a4150e5bb45ee5a23ea4641523 Mon Sep 17 00:00:00 2001 From: Tony Zelenoff Date: Thu, 26 Jan 2012 22:28:58 +0000 Subject: net: Allow ipv6 proxies and arp proxies be shown with iproute2 Add ability to return neighbour proxies list to caller if it sent full ndmsg structure and has NTF_PROXY flag set. Before this patch (and before iproute2 patches): $ ip neigh add proxy 2001::1 dev eth0 $ ip -6 neigh show $ After it and with applied iproute2 patches: $ ip neigh add proxy 2001::1 dev eth0 $ ip -6 neigh show 2001::1 dev eth0 proxy $ Compatibility with old versions of iproute2 is not broken, kernel checks for incoming structure size and properly works if old structure is came. [v2] * changed comments style. * removed useless line with continue and curly bracket. * changed incoming message size check from equal to more or equal. CC: davem@davemloft.net CC: kuznet@ms2.inr.ac.ru CC: netdev@vger.kernel.org CC: xemul@parallels.com Signed-off-by: Tony Zelenoff Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e287346e0934..f98ec444133a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2165,6 +2165,35 @@ nla_put_failure: return -EMSGSIZE; } +static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, + u32 pid, u32 seq, int type, unsigned int flags, + struct neigh_table *tbl) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = tbl->family; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = pn->flags | NTF_PROXY; + ndm->ndm_type = NDA_DST; + ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_state = NUD_NONE; + + NLA_PUT(skb, NDA_DST, tbl->key_len, pn->key); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + static void neigh_update_notify(struct neighbour *neigh) { call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); @@ -2214,23 +2243,78 @@ out: return rc; } +static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct pneigh_entry *n; + struct net *net = sock_net(skb->sk); + int rc, h, s_h = cb->args[3]; + int idx, s_idx = idx = cb->args[4]; + + read_lock_bh(&tbl->lock); + + for (h = 0; h <= PNEIGH_HASHMASK; h++) { + if (h < s_h) + continue; + if (h > s_h) + s_idx = 0; + for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { + if (dev_net(n->dev) != net) + continue; + if (idx < s_idx) + goto next; + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, tbl) <= 0) { + read_unlock_bh(&tbl->lock); + rc = -1; + goto out; + } + next: + idx++; + } + } + + read_unlock_bh(&tbl->lock); + rc = skb->len; +out: + cb->args[3] = h; + cb->args[4] = idx; + return rc; + +} + static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { struct neigh_table *tbl; int t, family, s_t; + int proxy = 0; + int err = 0; read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + + /* check for full ndmsg structure presence, family member is + * the same for both structures + */ + if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) && + ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY) + proxy = 1; + s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { + for (tbl = neigh_tables, t = 0; tbl && (err >= 0); + tbl = tbl->next, t++) { if (t < s_t || (family && tbl->family != family)) continue; if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (neigh_dump_table(tbl, skb, cb) < 0) - break; + if (proxy) + err = pneigh_dump_table(tbl, skb, cb); + else + err = neigh_dump_table(tbl, skb, cb); } read_unlock(&neigh_tbl_lock); -- cgit v1.2.3 From e6ec26935aec629f03e76c67f3bbda68dd0155e2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 29 Jan 2012 15:50:43 +0000 Subject: netpoll: Convert printks to np_ and add pr_fmt Use a more current message logging style. Add pr_fmt to prefix dmesg output with "netpoll: " Add macros to print np->name. Signed-off-by: Joe Perches Reviewed-by: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 63 ++++++++++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 35 deletions(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 556b08298669..b5232743d5dc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -9,6 +9,8 @@ * Copyright (C) 2002 Red Hat, Inc. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -55,6 +57,13 @@ static void arp_reply(struct sk_buff *skb); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); +#define np_info(np, fmt, ...) \ + pr_info("%s: " fmt, np->name, ##__VA_ARGS__) +#define np_err(np, fmt, ...) \ + pr_err("%s: " fmt, np->name, ##__VA_ARGS__) +#define np_notice(np, fmt, ...) \ + pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) + static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = @@ -627,18 +636,12 @@ out: void netpoll_print_options(struct netpoll *np) { - printk(KERN_INFO "%s: local port %d\n", - np->name, np->local_port); - printk(KERN_INFO "%s: local IP %pI4\n", - np->name, &np->local_ip); - printk(KERN_INFO "%s: interface '%s'\n", - np->name, np->dev_name); - printk(KERN_INFO "%s: remote port %d\n", - np->name, np->remote_port); - printk(KERN_INFO "%s: remote IP %pI4\n", - np->name, &np->remote_ip); - printk(KERN_INFO "%s: remote ethernet address %pM\n", - np->name, np->remote_mac); + np_info(np, "local port %d\n", np->local_port); + np_info(np, "local IP %pI4\n", &np->local_ip); + np_info(np, "interface '%s'\n", np->dev_name); + np_info(np, "remote port %d\n", np->remote_port); + np_info(np, "remote IP %pI4\n", &np->remote_ip); + np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); @@ -680,8 +683,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) goto parse_failed; *delim = 0; if (*cur == ' ' || *cur == '\t') - printk(KERN_INFO "%s: warning: whitespace" - "is not allowed\n", np->name); + np_info(np, "warning: whitespace is not allowed\n"); np->remote_port = simple_strtol(cur, NULL, 10); cur = delim; } @@ -705,8 +707,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) return 0; parse_failed: - printk(KERN_INFO "%s: couldn't parse config at '%s'!\n", - np->name, cur); + np_info(np, "couldn't parse config at '%s'!\n", cur); return -1; } EXPORT_SYMBOL(netpoll_parse_options); @@ -721,8 +722,8 @@ int __netpoll_setup(struct netpoll *np) if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || !ndev->netdev_ops->ndo_poll_controller) { - printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", - np->name, np->dev_name); + np_err(np, "%s doesn't support polling, aborting\n", + np->dev_name); err = -ENOTSUPP; goto out; } @@ -785,14 +786,12 @@ int netpoll_setup(struct netpoll *np) if (np->dev_name) ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { - printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", - np->name, np->dev_name); + np_err(np, "%s doesn't exist, aborting\n", np->dev_name); return -ENODEV; } if (ndev->master) { - printk(KERN_ERR "%s: %s is a slave device, aborting.\n", - np->name, np->dev_name); + np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; goto put; } @@ -800,16 +799,14 @@ int netpoll_setup(struct netpoll *np) if (!netif_running(ndev)) { unsigned long atmost, atleast; - printk(KERN_INFO "%s: device %s not up yet, forcing it\n", - np->name, np->dev_name); + np_info(np, "device %s not up yet, forcing it\n", np->dev_name); rtnl_lock(); err = dev_open(ndev); rtnl_unlock(); if (err) { - printk(KERN_ERR "%s: failed to open %s\n", - np->name, ndev->name); + np_err(np, "failed to open %s\n", ndev->name); goto put; } @@ -817,9 +814,7 @@ int netpoll_setup(struct netpoll *np) atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { if (time_after(jiffies, atmost)) { - printk(KERN_NOTICE - "%s: timeout waiting for carrier\n", - np->name); + np_notice(np, "timeout waiting for carrier\n"); break; } msleep(1); @@ -831,9 +826,7 @@ int netpoll_setup(struct netpoll *np) */ if (time_before(jiffies, atleast)) { - printk(KERN_NOTICE "%s: carrier detect appears" - " untrustworthy, waiting 4 seconds\n", - np->name); + np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); msleep(4000); } } @@ -844,15 +837,15 @@ int netpoll_setup(struct netpoll *np) if (!in_dev || !in_dev->ifa_list) { rcu_read_unlock(); - printk(KERN_ERR "%s: no IP address for %s, aborting\n", - np->name, np->dev_name); + np_err(np, "no IP address for %s, aborting\n", + np->dev_name); err = -EDESTADDRREQ; goto put; } np->local_ip = in_dev->ifa_list->ifa_local; rcu_read_unlock(); - printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); + np_info(np, "local IP %pI4\n", &np->local_ip); } np->dev = ndev; -- cgit v1.2.3 From 6f7062457fc98e1fa22f74d8f386ed241213dec6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 29 Jan 2012 15:50:44 +0000 Subject: netpoll: Neaten MAX_SKB_SIZE macro Add the types in the packet layout order. Signed-off-by: Joe Perches Reviewed-by: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5232743d5dc..4ce473ea5dc0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,9 +47,11 @@ static atomic_t trapped; #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 -#define MAX_SKB_SIZE \ - (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ - sizeof(struct iphdr) + sizeof(struct ethhdr)) +#define MAX_SKB_SIZE \ + (sizeof(struct ethhdr) + \ + sizeof(struct iphdr) + \ + sizeof(struct udphdr) + \ + MAX_UDP_CHUNK) static void zap_completion_queue(void); static void arp_reply(struct sk_buff *skb); -- cgit v1.2.3 From 7b6cd1ce72176e21be15a0ac153bdaa5be1b208a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 1 Feb 2012 10:54:43 +0000 Subject: PATCH V2 net-next] net: dev: Convert printks to pr_ Use the current logging style. Coalesce formats where appropriate. Update grammar where appropriate. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/core/dev.c | 94 ++++++++++++++++++++++++---------------------------------- 1 file changed, 39 insertions(+), 55 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 115dee1d985d..f1249472e90e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -446,7 +446,7 @@ void __dev_remove_pack(struct packet_type *pt) } } - printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); + pr_warn("dev_remove_pack: %p not found\n", pt); out: spin_unlock(&ptype_lock); } @@ -1039,8 +1039,7 @@ rollback: memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", + pr_err("%s: name change rollback failed: %d\n", dev->name, ret); } } @@ -1139,9 +1138,8 @@ void dev_load(struct net *net, const char *name) no_module = request_module("netdev-%s", name); if (no_module && capable(CAP_SYS_MODULE)) { if (!request_module("%s", name)) - pr_err("Loading kernel module for a network device " -"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s " -"instead\n", name); + pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", + name); } } EXPORT_SYMBOL(dev_load); @@ -1655,10 +1653,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (skb_network_header(skb2) < skb2->data || skb2->network_header > skb2->tail) { if (net_ratelimit()) - printk(KERN_CRIT "protocol %04x is " - "buggy, dev %s\n", - ntohs(skb2->protocol), - dev->name); + pr_crit("protocol %04x is buggy, dev %s\n", + ntohs(skb2->protocol), + dev->name); skb_reset_network_header(skb2); } @@ -1691,9 +1688,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) /* If TC0 is invalidated disable TC mapping */ if (tc->offset + tc->count > txq) { - pr_warning("Number of in use tx queues changed " - "invalidating tc mappings. Priority " - "traffic classification disabled!\n"); + pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n"); dev->num_tc = 0; return; } @@ -1704,11 +1699,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) tc = &dev->tc_to_txq[q]; if (tc->offset + tc->count > txq) { - pr_warning("Number of in use tx queues " - "changed. Priority %i to tc " - "mapping %i is no longer valid " - "setting map to 0\n", - i, q); + pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n", + i, q); netdev_set_prio_tc_map(dev, i, 0); } } @@ -2014,8 +2006,7 @@ EXPORT_SYMBOL(skb_gso_segment); void netdev_rx_csum_fault(struct net_device *dev) { if (net_ratelimit()) { - printk(KERN_ERR "%s: hw csum failure.\n", - dev ? dev->name : ""); + pr_err("%s: hw csum failure\n", dev ? dev->name : ""); dump_stack(); } } @@ -2332,9 +2323,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { if (net_ratelimit()) { - pr_warning("%s selects TX queue %d, but " - "real number of TX queues is %d\n", - dev->name, queue_index, dev->real_num_tx_queues); + pr_warn("%s selects TX queue %d, but real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); } return 0; } @@ -2578,16 +2569,16 @@ int dev_queue_xmit(struct sk_buff *skb) } HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) - printk(KERN_CRIT "Virtual device %s asks to " - "queue packet!\n", dev->name); + pr_crit("Virtual device %s asks to queue packet!\n", + dev->name); } else { /* Recursion is detected! It is possible, * unfortunately */ recursion_alert: if (net_ratelimit()) - printk(KERN_CRIT "Dead loop on virtual device " - "%s, fix it urgently!\n", dev->name); + pr_crit("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); } } @@ -3069,8 +3060,8 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) if (unlikely(MAX_RED_LOOP < ttl++)) { if (net_ratelimit()) - pr_warning( "Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); + pr_warn("Redir loop detected Dropping packet (%d->%d)\n", + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -4491,16 +4482,15 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) dev->flags &= ~IFF_PROMISC; else { dev->promiscuity -= inc; - printk(KERN_WARNING "%s: promiscuity touches roof, " - "set promiscuity failed, promiscuity feature " - "of device might be broken.\n", dev->name); + pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n", + dev->name); return -EOVERFLOW; } } if (dev->flags != old_flags) { - printk(KERN_INFO "device %s %s promiscuous mode\n", - dev->name, (dev->flags & IFF_PROMISC) ? "entered" : - "left"); + pr_info("device %s %s promiscuous mode\n", + dev->name, + dev->flags & IFF_PROMISC ? "entered" : "left"); if (audit_enabled) { current_uid_gid(&uid, &gid); audit_log(current->audit_context, GFP_ATOMIC, @@ -4573,9 +4563,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) dev->flags &= ~IFF_ALLMULTI; else { dev->allmulti -= inc; - printk(KERN_WARNING "%s: allmulti touches roof, " - "set allmulti failed, allmulti feature of " - "device might be broken.\n", dev->name); + pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n", + dev->name); return -EOVERFLOW; } } @@ -5232,8 +5221,8 @@ static void rollback_registered_many(struct list_head *head) * devices and proceed with the remaining. */ if (dev->reg_state == NETREG_UNINITIALIZED) { - pr_debug("unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); + pr_debug("unregister_netdevice: device %s/%p never was registered\n", + dev->name, dev); WARN_ON(1); list_del(&dev->unreg_list); @@ -5465,7 +5454,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL); if (!rx) { - pr_err("netdev: Unable to allocate %u rx queues.\n", count); + pr_err("netdev: Unable to allocate %u rx queues\n", count); return -ENOMEM; } dev->_rx = rx; @@ -5499,8 +5488,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); if (!tx) { - pr_err("netdev: Unable to allocate %u tx queues.\n", - count); + pr_err("netdev: Unable to allocate %u tx queues\n", count); return -ENOMEM; } dev->_tx = tx; @@ -5759,10 +5747,8 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); if (time_after(jiffies, warning_time + 10 * HZ)) { - printk(KERN_EMERG "unregister_netdevice: " - "waiting for %s to become free. Usage " - "count = %d\n", - dev->name, refcnt); + pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", + dev->name, refcnt); warning_time = jiffies; } } @@ -5813,7 +5799,7 @@ void netdev_run_todo(void) list_del(&dev->todo_list); if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { - printk(KERN_ERR "network todo '%s' but state %d\n", + pr_err("network todo '%s' but state %d\n", dev->name, dev->reg_state); dump_stack(); continue; @@ -5929,15 +5915,13 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); if (txqs < 1) { - pr_err("alloc_netdev: Unable to allocate device " - "with zero queues.\n"); + pr_err("alloc_netdev: Unable to allocate device with zero queues\n"); return NULL; } #ifdef CONFIG_RPS if (rxqs < 1) { - pr_err("alloc_netdev: Unable to allocate device " - "with zero RX queues.\n"); + pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); return NULL; } #endif @@ -5953,7 +5937,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { - printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); + pr_err("alloc_netdev: Unable to allocate device\n"); return NULL; } @@ -6486,8 +6470,8 @@ static void __net_exit default_device_exit(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { - printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", - __func__, dev->name, err); + pr_emerg("%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); BUG(); } } -- cgit v1.2.3 From 761b3ef50e1c2649cffbfa67a4dcb2dcdb7982ed Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 31 Jan 2012 13:47:36 +0800 Subject: cgroup: remove cgroup_subsys argument from callbacks The argument is not used at all, and it's not necessary, because a specific callback handler of course knows which subsys it belongs to. Now only ->pupulate() takes this argument, because the handlers of this callback always call cgroup_add_file()/cgroup_add_files(). So we reduce a few lines of code, though the shrinking of object size is minimal. 16 files changed, 113 insertions(+), 162 deletions(-) text data bss dec hex filename 5486240 656987 7039960 13183187 c928d3 vmlinux.o.orig 5486170 656987 7039960 13183117 c9288d vmlinux.o Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- Documentation/cgroups/cgroups.txt | 26 +++++++++------------ block/blk-cgroup.c | 22 +++++++----------- include/linux/cgroup.h | 29 ++++++++++------------- include/net/sock.h | 7 +++--- include/net/tcp_memcontrol.h | 2 +- kernel/cgroup.c | 43 +++++++++++++++++------------------ kernel/cgroup_freezer.c | 11 ++++----- kernel/cpuset.c | 16 ++++--------- kernel/events/core.c | 13 ++++------- kernel/sched/core.c | 20 +++++++--------- mm/memcontrol.c | 48 ++++++++++++++++----------------------- net/core/netprio_cgroup.c | 10 ++++---- net/core/sock.c | 6 ++--- net/ipv4/tcp_memcontrol.c | 2 +- net/sched/cls_cgroup.c | 10 ++++---- security/device_cgroup.c | 10 ++++---- 16 files changed, 113 insertions(+), 162 deletions(-) (limited to 'net/core') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index a7c96ae5557c..8e74980ab385 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -558,8 +558,7 @@ Each subsystem may export the following methods. The only mandatory methods are create/destroy. Any others that are null are presumed to be successful no-ops. -struct cgroup_subsys_state *create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +struct cgroup_subsys_state *create(struct cgroup *cgrp) (cgroup_mutex held by caller) Called to create a subsystem state object for a cgroup. The @@ -574,7 +573,7 @@ identified by the passed cgroup object having a NULL parent (since it's the root of the hierarchy) and may be an appropriate place for initialization code. -void destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +void destroy(struct cgroup *cgrp) (cgroup_mutex held by caller) The cgroup system is about to destroy the passed cgroup; the subsystem @@ -585,7 +584,7 @@ cgroup->parent is still valid. (Note - can also be called for a newly-created cgroup if an error occurs after this subsystem's create() method has been called for the new cgroup). -int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +int pre_destroy(struct cgroup *cgrp); Called before checking the reference count on each subsystem. This may be useful for subsystems which have some extra references even if @@ -593,8 +592,7 @@ there are not tasks in the cgroup. If pre_destroy() returns error code, rmdir() will fail with it. From this behavior, pre_destroy() can be called multiple times against a cgroup. -int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called prior to moving one or more tasks into a cgroup; if the @@ -615,8 +613,7 @@ fork. If this method returns 0 (success) then this should remain valid while the caller holds cgroup_mutex and it is ensured that either attach() or cancel_attach() will be called in future. -void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called when a task attach operation has failed after can_attach() has succeeded. @@ -625,23 +622,22 @@ function, so that the subsystem can implement a rollback. If not, not necessary. This will be called only about subsystems whose can_attach() operation have succeeded. The parameters are identical to can_attach(). -void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +void attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. The parameters are identical to can_attach(). -void fork(struct cgroup_subsy *ss, struct task_struct *task) +void fork(struct task_struct *task) Called when a task is forked into a cgroup. -void exit(struct cgroup_subsys *ss, struct task_struct *task) +void exit(struct task_struct *task) Called during task exit. -int populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +int populate(struct cgroup *cgrp) (cgroup_mutex held by caller) Called after creation of a cgroup to allow a subsystem to populate @@ -651,7 +647,7 @@ include/linux/cgroup.h for details). Note that although this method can return an error code, the error code is currently not always handled well. -void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) +void post_clone(struct cgroup *cgrp) (cgroup_mutex held by caller) Called during cgroup_create() to do any parameter @@ -659,7 +655,7 @@ initialization which might be required before a task could attach. For example in cpusets, no task may attach before 'cpus' and 'mems' are set up. -void bind(struct cgroup_subsys *ss, struct cgroup *root) +void bind(struct cgroup *root) (cgroup_mutex and ss->hierarchy_mutex held by caller) Called when a cgroup subsystem is rebound to a different hierarchy diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fa8f26309444..1359d637831f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -28,13 +28,10 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); -static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, - struct cgroup *); -static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, - struct cgroup_taskset *); -static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, - struct cgroup_taskset *); -static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); +static struct cgroup_subsys_state *blkiocg_create(struct cgroup *); +static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *); +static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *); +static void blkiocg_destroy(struct cgroup *); static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); /* for encoding cft->private value on file */ @@ -1548,7 +1545,7 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) ARRAY_SIZE(blkio_files)); } -static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +static void blkiocg_destroy(struct cgroup *cgroup) { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); unsigned long flags; @@ -1598,8 +1595,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) kfree(blkcg); } -static struct cgroup_subsys_state * -blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) +static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup) { struct blkio_cgroup *blkcg; struct cgroup *parent = cgroup->parent; @@ -1628,8 +1624,7 @@ done: * of the main cic data structures. For now we allow a task to change * its cgroup only if it's the only owner of its ioc. */ -static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; struct io_context *ioc; @@ -1648,8 +1643,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return ret; } -static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; struct io_context *ioc; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7da3e745b74c..501adb1b2f43 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -452,23 +452,18 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); */ struct cgroup_subsys { - struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, - struct cgroup *cgrp); - int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); - void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task); - int (*populate)(struct cgroup_subsys *ss, - struct cgroup *cgrp); - void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); - void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); + int (*pre_destroy)(struct cgroup *cgrp); + void (*destroy)(struct cgroup *cgrp); + int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task); + int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp); + void (*post_clone)(struct cgroup *cgrp); + void (*bind)(struct cgroup *root); int subsys_id; int active; diff --git a/include/net/sock.h b/include/net/sock.h index bb972d254dff..705d1add19a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,7 +68,7 @@ struct cgroup; struct cgroup_subsys; #ifdef CONFIG_NET int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss); +void mem_cgroup_sockets_destroy(struct cgroup *cgrp); #else static inline int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) @@ -76,7 +76,7 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) return 0; } static inline -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +void mem_cgroup_sockets_destroy(struct cgroup *cgrp) { } #endif @@ -869,8 +869,7 @@ struct proto { */ int (*init_cgroup)(struct cgroup *cgrp, struct cgroup_subsys *ss); - void (*destroy_cgroup)(struct cgroup *cgrp, - struct cgroup_subsys *ss); + void (*destroy_cgroup)(struct cgroup *cgrp); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif }; diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 3512082fa909..48410ff25c9e 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@ -13,7 +13,7 @@ struct tcp_memcontrol { struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); -void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); +void tcp_destroy_cgroup(struct cgroup *cgrp); unsigned long long tcp_max_memory(const struct mem_cgroup *memcg); void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx); #endif /* _TCP_MEMCG_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 43a224f167b5..865d89a580c7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -818,7 +818,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) if (ss->pre_destroy) { - ret = ss->pre_destroy(ss, cgrp); + ret = ss->pre_destroy(cgrp); if (ret) break; } @@ -846,7 +846,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * Release the subsystem state objects. */ for_each_subsys(cgrp->root, ss) - ss->destroy(ss, cgrp); + ss->destroy(cgrp); cgrp->root->number_of_cgroups--; mutex_unlock(&cgroup_mutex); @@ -1015,7 +1015,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, list_move(&ss->sibling, &root->subsys_list); ss->root = root; if (ss->bind) - ss->bind(ss, cgrp); + ss->bind(cgrp); mutex_unlock(&ss->hierarchy_mutex); /* refcount was already taken, and we're keeping it */ } else if (bit & removed_bits) { @@ -1025,7 +1025,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, BUG_ON(cgrp->subsys[i]->cgroup != cgrp); mutex_lock(&ss->hierarchy_mutex); if (ss->bind) - ss->bind(ss, dummytop); + ss->bind(dummytop); dummytop->subsys[i]->cgroup = dummytop; cgrp->subsys[i] = NULL; subsys[i]->root = &rootnode; @@ -1908,7 +1908,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, &tset); + retval = ss->can_attach(cgrp, &tset); if (retval) { /* * Remember on which subsystem the can_attach() @@ -1932,7 +1932,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, &tset); + ss->attach(cgrp, &tset); } synchronize_rcu(); @@ -1954,7 +1954,7 @@ out: */ break; if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, &tset); + ss->cancel_attach(cgrp, &tset); } } return retval; @@ -2067,7 +2067,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, &tset); + retval = ss->can_attach(cgrp, &tset); if (retval) { failed_ss = ss; goto out_cancel_attach; @@ -2104,7 +2104,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, &tset); + ss->attach(cgrp, &tset); } /* @@ -2128,7 +2128,7 @@ out_cancel_attach: if (ss == failed_ss) break; if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, &tset); + ss->cancel_attach(cgrp, &tset); } } out_free_group_list: @@ -3756,7 +3756,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); for_each_subsys(root, ss) { - struct cgroup_subsys_state *css = ss->create(ss, cgrp); + struct cgroup_subsys_state *css = ss->create(cgrp); if (IS_ERR(css)) { err = PTR_ERR(css); @@ -3770,7 +3770,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, } /* At error, ->destroy() callback has to free assigned ID. */ if (clone_children(parent) && ss->post_clone) - ss->post_clone(ss, cgrp); + ss->post_clone(cgrp); } cgroup_lock_hierarchy(root); @@ -3804,7 +3804,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, for_each_subsys(root, ss) { if (cgrp->subsys[ss->subsys_id]) - ss->destroy(ss, cgrp); + ss->destroy(cgrp); } mutex_unlock(&cgroup_mutex); @@ -4028,7 +4028,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) /* Create the top cgroup state for this subsystem */ list_add(&ss->sibling, &rootnode.subsys_list); ss->root = &rootnode; - css = ss->create(ss, dummytop); + css = ss->create(dummytop); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_cgroup_css(css, ss, dummytop); @@ -4117,7 +4117,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * no ss->create seems to need anything important in the ss struct, so * this can happen first (i.e. before the rootnode attachment). */ - css = ss->create(ss, dummytop); + css = ss->create(dummytop); if (IS_ERR(css)) { /* failure case - need to deassign the subsys[] slot. */ subsys[i] = NULL; @@ -4135,7 +4135,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) int ret = cgroup_init_idr(ss, css); if (ret) { dummytop->subsys[ss->subsys_id] = NULL; - ss->destroy(ss, dummytop); + ss->destroy(dummytop); subsys[i] = NULL; mutex_unlock(&cgroup_mutex); return ret; @@ -4233,7 +4233,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) * pointer to find their state. note that this also takes care of * freeing the css_id. */ - ss->destroy(ss, dummytop); + ss->destroy(dummytop); dummytop->subsys[ss->subsys_id] = NULL; mutex_unlock(&cgroup_mutex); @@ -4509,7 +4509,7 @@ void cgroup_fork_callbacks(struct task_struct *child) for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss->fork) - ss->fork(ss, child); + ss->fork(child); } } } @@ -4611,7 +4611,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) struct cgroup *old_cgrp = rcu_dereference_raw(cg->subsys[i])->cgroup; struct cgroup *cgrp = task_cgroup(tsk, i); - ss->exit(ss, cgrp, old_cgrp, tsk); + ss->exit(cgrp, old_cgrp, tsk); } } } @@ -5066,8 +5066,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) } #ifdef CONFIG_CGROUP_DEBUG -static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, - struct cgroup *cont) +static struct cgroup_subsys_state *debug_create(struct cgroup *cont) { struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); @@ -5077,7 +5076,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, return css; } -static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +static void debug_destroy(struct cgroup *cont) { kfree(cont->subsys[debug_subsys_id]); } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fc0646b78a64..f86e93920b62 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -128,8 +128,7 @@ struct cgroup_subsys freezer_subsys; * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) * sighand->siglock */ -static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) { struct freezer *freezer; @@ -142,8 +141,7 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, return &freezer->css; } -static void freezer_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void freezer_destroy(struct cgroup *cgroup) { struct freezer *freezer = cgroup_freezer(cgroup); @@ -164,8 +162,7 @@ static bool is_task_frozen_enough(struct task_struct *task) * a write to that file racing against an attach, and hence the * can_attach() result will remain valid until the attach completes. */ -static int freezer_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, +static int freezer_can_attach(struct cgroup *new_cgroup, struct cgroup_taskset *tset) { struct freezer *freezer; @@ -185,7 +182,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, return 0; } -static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +static void freezer_fork(struct task_struct *task) { struct freezer *freezer; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a09ac2b9a661..5d575836dba6 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1399,8 +1399,7 @@ static nodemask_t cpuset_attach_nodemask_from; static nodemask_t cpuset_attach_nodemask_to; /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ -static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct cpuset *cs = cgroup_cs(cgrp); struct task_struct *task; @@ -1436,8 +1435,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return 0; } -static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct mm_struct *mm; struct task_struct *task; @@ -1833,8 +1831,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) * (and likewise for mems) to the new cgroup. Called with cgroup_mutex * held. */ -static void cpuset_post_clone(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void cpuset_post_clone(struct cgroup *cgroup) { struct cgroup *parent, *child; struct cpuset *cs, *parent_cs; @@ -1857,13 +1854,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, /* * cpuset_create - create a cpuset - * ss: cpuset cgroup subsystem * cont: control group that the new cpuset will be part of */ -static struct cgroup_subsys_state *cpuset_create( - struct cgroup_subsys *ss, - struct cgroup *cont) +static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) { struct cpuset *cs; struct cpuset *parent; @@ -1902,7 +1896,7 @@ static struct cgroup_subsys_state *cpuset_create( * will call async_rebuild_sched_domains(). */ -static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +static void cpuset_destroy(struct cgroup *cont) { struct cpuset *cs = cgroup_cs(cont); diff --git a/kernel/events/core.c b/kernel/events/core.c index a8f4ac001a00..a5d1ee92b0d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6906,8 +6906,7 @@ unlock: device_initcall(perf_event_sysfs_init); #ifdef CONFIG_CGROUP_PERF -static struct cgroup_subsys_state *perf_cgroup_create( - struct cgroup_subsys *ss, struct cgroup *cont) +static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont) { struct perf_cgroup *jc; @@ -6924,8 +6923,7 @@ static struct cgroup_subsys_state *perf_cgroup_create( return &jc->css; } -static void perf_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void perf_cgroup_destroy(struct cgroup *cont) { struct perf_cgroup *jc; jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), @@ -6941,8 +6939,7 @@ static int __perf_cgroup_move(void *info) return 0; } -static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -6950,8 +6947,8 @@ static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, task_function_call(task, __perf_cgroup_move, task); } -static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) +static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df00cb09263e..ff12f7216062 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7530,8 +7530,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) struct task_group, css); } -static struct cgroup_subsys_state * -cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) { struct task_group *tg, *parent; @@ -7548,15 +7547,14 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) return &tg->css; } -static void -cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpu_cgroup_destroy(struct cgroup *cgrp) { struct task_group *tg = cgroup_tg(cgrp); sched_destroy_group(tg); } -static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static int cpu_cgroup_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7574,7 +7572,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return 0; } -static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static void cpu_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7584,8 +7582,8 @@ static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } static void -cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) +cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. @@ -7935,8 +7933,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { */ /* create a new cpu accounting group */ -static struct cgroup_subsys_state *cpuacct_create( - struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) { struct cpuacct *ca; @@ -7966,8 +7963,7 @@ out: } /* destroy an existing cpu accounting group */ -static void -cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpuacct_destroy(struct cgroup *cgrp) { struct cpuacct *ca = cgroup_ca(cgrp); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dbff4dcde35..ae2f0a8ab761 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4580,10 +4580,9 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return mem_cgroup_sockets_init(cont, ss); }; -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { - mem_cgroup_sockets_destroy(cont, ss); + mem_cgroup_sockets_destroy(cont); } #else static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) @@ -4591,8 +4590,7 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return 0; } -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { } #endif @@ -4884,7 +4882,7 @@ err_cleanup: } static struct cgroup_subsys_state * __ref -mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) +mem_cgroup_create(struct cgroup *cont) { struct mem_cgroup *memcg, *parent; long error = -ENOMEM; @@ -4946,20 +4944,18 @@ free_out: return ERR_PTR(error); } -static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static int mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); return mem_cgroup_force_empty(memcg, false); } -static void mem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void mem_cgroup_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(ss, cont); + kmem_cgroup_destroy(cont); mem_cgroup_put(memcg); } @@ -5296,9 +5292,8 @@ static void mem_cgroup_clear_mc(void) mem_cgroup_end_move(from); } -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); int ret = 0; @@ -5336,9 +5331,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, return ret; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { mem_cgroup_clear_mc(); } @@ -5453,9 +5447,8 @@ retry: up_read(&mm->mmap_sem); } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); struct mm_struct *mm = get_task_mm(p); @@ -5470,20 +5463,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { return 0; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { } #endif diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3a9fd4826b75..22036ab732cf 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -23,9 +23,8 @@ #include #include -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp); -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); struct cgroup_subsys net_prio_subsys = { @@ -120,8 +119,7 @@ static void update_netdev_tables(void) rtnl_unlock(); } -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; int ret; @@ -145,7 +143,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, return &cs->css; } -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cgrp_destroy(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; struct net_device *dev; diff --git a/net/core/sock.c b/net/core/sock.c index 5c5af9988f94..688037cb3b6e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -160,19 +160,19 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) out: list_for_each_entry_continue_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(cgrp); mutex_unlock(&proto_list_mutex); return ret; } -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +void mem_cgroup_sockets_destroy(struct cgroup *cgrp) { struct proto *proto; mutex_lock(&proto_list_mutex); list_for_each_entry_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(cgrp); mutex_unlock(&proto_list_mutex); } #endif diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..e714c6834c90 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -94,7 +94,7 @@ create_files: } EXPORT_SYMBOL(tcp_init_cgroup); -void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +void tcp_destroy_cgroup(struct cgroup *cgrp) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct cg_proto *cg_proto; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index f84fdc3a7f27..1afaa284fcd7 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -22,9 +22,8 @@ #include #include -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp); -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); struct cgroup_subsys net_cls_subsys = { @@ -51,8 +50,7 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) struct cgroup_cls_state, css); } -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_cls_state *cs; @@ -66,7 +64,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, return &cs->css; } -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cgrp_destroy(struct cgroup *cgrp) { kfree(cgrp_cls_state(cgrp)); } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 8b5b5d8612c6..c43a3323feea 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -61,8 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) struct cgroup_subsys devices_subsys; -static int devcgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgrp, struct cgroup_taskset *set) +static int devcgroup_can_attach(struct cgroup *new_cgrp, + struct cgroup_taskset *set) { struct task_struct *task = cgroup_taskset_first(set); @@ -156,8 +156,7 @@ remove: /* * called from kernel/cgroup.c with cgroup_lock() held. */ -static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; struct cgroup *parent_cgroup; @@ -195,8 +194,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, return &dev_cgroup->css; } -static void devcgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void devcgroup_destroy(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup; struct dev_whitelist_item *wh, *tmp; -- cgit v1.2.3 From 43480aecb1f538d4f6dd8b2c5d2b71fb98659072 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Feb 2012 08:51:50 +0000 Subject: gro: more generic L2 header check Shlomo Pongratz reported GRO L2 header check was suited for Ethernet only, and failed on IB/ipoib traffic. He provided a patch faking a zeroed header to let GRO aggregates frames. Roland Dreier, Herbert Xu, and others suggested we change GRO L2 header check to be more generic, ie not assuming L2 header is 14 bytes, but taking into account hard_header_len. __napi_gro_receive() has special handling for the common case (Ethernet) to avoid a memcmp() call and use an inline optimized function instead. Signed-off-by: Eric Dumazet Reported-by: Shlomo Pongratz Cc: Roland Dreier Cc: Or Gerlitz Cc: Herbert Xu Tested-by: Sean Hefty Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index f1249472e90e..763a0eda7158 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3491,14 +3491,20 @@ static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; for (p = napi->gro_list; p; p = p->next) { unsigned long diffs; diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.2.3 From de8261c2fa364397ed872fad1244d75364689168 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Feb 2012 04:09:20 +0000 Subject: gro: fix truesize underestimation skb_gro_receive() doesnt update truesize properly when adding one skb to frag_list. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da0c97f2fab4..f3a530780753 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2906,7 +2906,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) nskb->prev = p; nskb->data_len += p->len; - nskb->truesize += p->len; + nskb->truesize += p->truesize; nskb->len += p->len; *head = nskb; @@ -2916,6 +2916,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: + p->truesize += skb->truesize - len; if (offset > headlen) { unsigned int eat = offset - headlen; -- cgit v1.2.3 From 4934b0329f7150dcb5f90506860e2db32274c755 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:30:33 +0000 Subject: datagram: Factor out sk queue referencing This makes lines shorter and simplifies further patching. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/datagram.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/datagram.c b/net/core/datagram.c index 68bbf9f65cb0..6f54d0a17f8e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -180,18 +180,19 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * However, this function was correct in any case. 8) */ unsigned long cpu_flags; + struct sk_buff_head *queue = &sk->sk_receive_queue; - spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); - skb = skb_peek(&sk->sk_receive_queue); + spin_lock_irqsave(&queue->lock, cpu_flags); + skb = skb_peek(queue); if (skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { skb->peeked = 1; atomic_inc(&skb->users); } else - __skb_unlink(skb, &sk->sk_receive_queue); + __skb_unlink(skb, queue); } - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); + spin_unlock_irqrestore(&queue->lock, cpu_flags); if (skb) return skb; -- cgit v1.2.3 From 3f518bf745cbd6007d8069100fb9cb09e960c872 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:30:58 +0000 Subject: datagram: Add offset argument to __skb_recv_datagram This one is only considered for MSG_PEEK flag and the value pointed by it specifies where to start peeking bytes from. If the offset happens to point into the middle of the returned skb, the offset within this skb is put back to this very argument. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/datagram.c | 21 +++++++++++++-------- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b7317ff297f..f3cf43de3c2a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2046,7 +2046,7 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err); + int *peeked, int *off, int *err); extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); extern unsigned int datagram_poll(struct file *file, struct socket *sock, diff --git a/net/core/datagram.c b/net/core/datagram.c index 6f54d0a17f8e..d3cf12f62c8f 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -132,6 +132,8 @@ out_noerr: * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @off: an offset in bytes to peek skb from. Returns an offset + * within an skb where data actually starts * @peeked: returns non-zero if this packet has been seen before * @err: error code returned * @@ -158,7 +160,7 @@ out_noerr: * the standard around please. */ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err) + int *peeked, int *off, int *err) { struct sk_buff *skb; long timeo; @@ -183,19 +185,22 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, struct sk_buff_head *queue = &sk->sk_receive_queue; spin_lock_irqsave(&queue->lock, cpu_flags); - skb = skb_peek(queue); - if (skb) { + skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { + if (*off >= skb->len) { + *off -= skb->len; + continue; + } skb->peeked = 1; atomic_inc(&skb->users); } else __skb_unlink(skb, queue); - } - spin_unlock_irqrestore(&queue->lock, cpu_flags); - if (skb) + spin_unlock_irqrestore(&queue->lock, cpu_flags); return skb; + } + spin_unlock_irqrestore(&queue->lock, cpu_flags); /* User doesn't want to wait */ error = -EAGAIN; @@ -215,10 +220,10 @@ EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) { - int peeked; + int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, err); + &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cd99f1a0f59f..7c41ab84e72e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1167,7 +1167,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); bool slow; @@ -1183,7 +1183,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8aebf8f90436..37b0699e95e5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -342,7 +342,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; @@ -359,7 +359,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; -- cgit v1.2.3 From ef64a54f6e558155b4f149bb10666b9e914b6c54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:31:34 +0000 Subject: sock: Introduce the SO_PEEK_OFF sock option This one specifies where to start MSG_PEEK-ing queue data from. When set to negative value means that MSG_PEEK works as ususally -- peeks from the head of the queue always. When some bytes are peeked from queue and the peeking offset is non negative it is moved forward so that the next peek will return next portion of data. When non-peeking recvmsg occurs and the peeking offset is non negative is is moved backward so that the next peek will still peek the proper data (i.e. the one that would have been picked if there were no non peeking recv in between). The offset is set using per-proto opteration to let the protocol handle the locking issues and to check whether the peeking offset feature is supported by the protocol the socket belongs to. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- arch/alpha/include/asm/socket.h | 1 + arch/arm/include/asm/socket.h | 1 + arch/avr32/include/asm/socket.h | 1 + arch/cris/include/asm/socket.h | 1 + arch/frv/include/asm/socket.h | 1 + arch/h8300/include/asm/socket.h | 1 + arch/ia64/include/asm/socket.h | 1 + arch/m32r/include/asm/socket.h | 1 + arch/m68k/include/asm/socket.h | 1 + arch/mips/include/asm/socket.h | 1 + arch/mn10300/include/asm/socket.h | 1 + arch/parisc/include/asm/socket.h | 1 + arch/powerpc/include/asm/socket.h | 1 + arch/s390/include/asm/socket.h | 1 + arch/sparc/include/asm/socket.h | 1 + arch/xtensa/include/asm/socket.h | 1 + include/asm-generic/socket.h | 1 + include/linux/net.h | 1 + include/net/sock.h | 25 +++++++++++++++++++++++++ net/core/sock.c | 13 +++++++++++++ 20 files changed, 56 insertions(+) (limited to 'net/core') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 082355f159e6..16449d330dae 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -71,6 +71,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index dec6f9afb3cf..d958c74e5260 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 247b88c760be..30078f98b3ab 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index e269264df7c4..048aba64600c 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -66,6 +66,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index ce80fdadcce5..7a361810f3cc 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -64,6 +64,7 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index cf1daab6f27e..e7bbfcee5b99 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index 4b03664e3fb5..ced62de9d5a9 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -73,5 +73,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index e8b8c5bb053c..696cb4c7ca4e 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index d4708ce466e0..e8b41a6775f9 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index ad5c0a7a02a7..52104872e9e3 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -84,6 +84,7 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #ifdef __KERNEL__ diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 876356d78522..013fcc51698f 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -64,5 +64,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index d28c51b61067..f717c9bec16f 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -63,6 +63,7 @@ #define SO_WIFI_STATUS 0x4022 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 0x4023 /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index 2fc2af8fbf59..fe1c0b478fd7 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -71,5 +71,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 67b5c1b14b51..581702fa1b0c 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -72,5 +72,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 8af1b64168b3..68e2e2746f6f 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -60,6 +60,7 @@ #define SO_WIFI_STATUS 0x0025 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 0x0026 /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index bb06968be227..74818b161362 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -75,5 +75,6 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index 49c1704173e7..d9aaac0c36d4 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -67,4 +67,5 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/net.h b/include/linux/net.h index b29923006b11..be60c7f5e145 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -206,6 +206,7 @@ struct proto_ops { int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); + void (*set_peek_off)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..9c0553b9e451 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -357,6 +357,7 @@ struct sock { struct page *sk_sndmsg_page; struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; + __s32 sk_peek_off; int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; @@ -373,6 +374,30 @@ struct sock { void (*sk_destruct)(struct sock *sk); }; +static inline int sk_peek_offset(struct sock *sk, int flags) +{ + if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0)) + return sk->sk_peek_off; + else + return 0; +} + +static inline void sk_peek_offset_bwd(struct sock *sk, int val) +{ + if (sk->sk_peek_off >= 0) { + if (sk->sk_peek_off >= val) + sk->sk_peek_off -= val; + else + sk->sk_peek_off = 0; + } +} + +static inline void sk_peek_offset_fwd(struct sock *sk, int val) +{ + if (sk->sk_peek_off >= 0) + sk->sk_peek_off += val; +} + /* * Hashed lists helper routines */ diff --git a/net/core/sock.c b/net/core/sock.c index 02f8dfe320b7..19942d4bb6e6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -793,6 +793,12 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; + case SO_PEEK_OFF: + if (sock->ops->set_peek_off) + sock->ops->set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + break; default: ret = -ENOPROTOOPT; break; @@ -1018,6 +1024,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); break; + case SO_PEEK_OFF: + if (!sock->ops->set_peek_off) + return -EOPNOTSUPP; + + v.val = sk->sk_peek_off; + break; default: return -ENOPROTOOPT; } @@ -2092,6 +2104,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; + sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; -- cgit v1.2.3 From c5905afb0ee6550b42c49213da1c22d67316c194 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Feb 2012 08:31:31 +0100 Subject: static keys: Introduce 'struct static_key', static_key_true()/false() and static_key_slow_[inc|dec]() So here's a boot tested patch on top of Jason's series that does all the cleanups I talked about and turns jump labels into a more intuitive to use facility. It should also address the various misconceptions and confusions that surround jump labels. Typical usage scenarios: #include struct static_key key = STATIC_KEY_INIT_TRUE; if (static_key_false(&key)) do unlikely code else do likely code Or: if (static_key_true(&key)) do likely code else do unlikely code The static key is modified via: static_key_slow_inc(&key); ... static_key_slow_dec(&key); The 'slow' prefix makes it abundantly clear that this is an expensive operation. I've updated all in-kernel code to use this everywhere. Note that I (intentionally) have not pushed through the rename blindly through to the lowest levels: the actual jump-label patching arch facility should be named like that, so we want to decouple jump labels from the static-key facility a bit. On non-jump-label enabled architectures static keys default to likely()/unlikely() branches. Signed-off-by: Ingo Molnar Acked-by: Jason Baron Acked-by: Steven Rostedt Cc: a.p.zijlstra@chello.nl Cc: mathieu.desnoyers@efficios.com Cc: davem@davemloft.net Cc: ddaney.cavm@gmail.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20120222085809.GA26397@elte.hu Signed-off-by: Ingo Molnar --- arch/Kconfig | 29 ++++--- arch/ia64/include/asm/paravirt.h | 6 +- arch/ia64/kernel/paravirt.c | 4 +- arch/mips/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/jump_label.h | 2 +- arch/s390/include/asm/jump_label.h | 2 +- arch/sparc/include/asm/jump_label.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +- arch/x86/include/asm/paravirt.h | 6 +- arch/x86/kernel/kvm.c | 4 +- arch/x86/kernel/paravirt.c | 4 +- arch/x86/kvm/mmu_audit.c | 8 +- include/linux/jump_label.h | 139 ++++++++++++++++++++++++---------- include/linux/netdevice.h | 4 +- include/linux/netfilter.h | 6 +- include/linux/perf_event.h | 12 +-- include/linux/static_key.h | 1 + include/linux/tracepoint.h | 8 +- include/net/sock.h | 6 +- kernel/events/core.c | 16 ++-- kernel/jump_label.c | 128 ++++++++++++++++++------------- kernel/sched/core.c | 18 ++--- kernel/sched/fair.c | 8 +- kernel/sched/sched.h | 14 ++-- kernel/tracepoint.c | 20 ++--- net/core/dev.c | 24 +++--- net/core/net-sysfs.c | 4 +- net/core/sock.c | 4 +- net/core/sysctl_net_core.c | 4 +- net/ipv4/tcp_memcontrol.c | 6 +- net/netfilter/core.c | 6 +- 31 files changed, 298 insertions(+), 205 deletions(-) create mode 100644 include/linux/static_key.h (limited to 'net/core') diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -47,18 +47,29 @@ config KPROBES If in doubt, say "N". config JUMP_LABEL - bool "Optimize trace point call sites" + bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL help + This option enables a transparent branch optimization that + makes certain almost-always-true or almost-always-false branch + conditions even cheaper to execute within the kernel. + + Certain performance-sensitive kernel code, such as trace points, + scheduler functionality, networking code and KVM have such + branches and include support for this optimization technique. + If it is detected that the compiler has support for "asm goto", - the kernel will compile trace point locations with just a - nop instruction. When trace points are enabled, the nop will - be converted to a jump to the trace function. This technique - lowers overhead and stress on the branch prediction of the - processor. - - On i386, options added to the compiler flags may increase - the size of the kernel slightly. + the kernel will compile such branches with just a nop + instruction. When the condition flag is toggled to true, the + nop will be converted to a jump instruction to execute the + conditional block of instructions. + + This technique lowers overhead and stress on the branch prediction + of the processor and generally makes the kernel faster. The update + of the condition is slower, but those are always very rare. + + ( On 32-bit x86, the necessary options added to the compiler + flags may increase the size of the kernel slightly. ) config OPTPROBES def_bool y diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) pv_time_ops.init_missing_ticks_accounting(cpu); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline int paravirt_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { * pv_time_ops * time operations */ -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static int ia64_native_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,7 +20,7 @@ #define WORD_INSN ".word" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\tnop\n\t" "nop\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -13,7 +13,7 @@ #define ASM_ALIGN ".balign 4" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,7 +7,7 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -9,12 +9,12 @@ #define JUMP_LABEL_NOP_SIZE 5 -#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" +#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:" - JUMP_LABEL_INITIAL_NOP + STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline u64 paravirt_steal_clock(int cpu) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -438,9 +438,9 @@ void __init kvm_guest_init(void) static __init int activate_jump_labels(void) { if (has_steal_clock) { - jump_label_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) - jump_label_inc(¶virt_steal_rq_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); } return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) __native_flush_tlb_single(addr); } -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) } static bool mmu_audit; -static struct jump_label_key mmu_audit_key; +static struct static_key mmu_audit_key; static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { @@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { - if (static_branch((&mmu_audit_key))) + if (static_key_false((&mmu_audit_key))) __kvm_mmu_audit(vcpu, point); } @@ -259,7 +259,7 @@ static void mmu_audit_enable(void) if (mmu_audit) return; - jump_label_inc(&mmu_audit_key); + static_key_slow_inc(&mmu_audit_key); mmu_audit = true; } @@ -268,7 +268,7 @@ static void mmu_audit_disable(void) if (!mmu_audit) return; - jump_label_dec(&mmu_audit_key); + static_key_slow_dec(&mmu_audit_key); mmu_audit = false; } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f7c69580fea7..2172da2d9bb4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -9,15 +9,15 @@ * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support the result - * of a "if (static_branch(&key))" statement is a unconditional branch (which + * of a "if (static_key_false(&key))" statement is a unconditional branch (which * defaults to false - and the true block is placed out of line). * - * However at runtime we can change the 'static' branch target using - * jump_label_{inc,dec}(). These function as a 'reference' count on the key + * However at runtime we can change the branch target using + * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key * object and for as long as there are references all branches referring to * that particular key will point to the (out of line) true block. * - * Since this relies on modifying code the jump_label_{inc,dec}() functions + * Since this relies on modifying code the static_key_slow_{inc,dec}() functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total @@ -26,12 +26,26 @@ * * When the control is directly exposed to userspace it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) - * cause significant performance degradation. Struct jump_label_key_deferred and - * jump_label_dec_deferred() provide for this. + * cause significant performance degradation. Struct static_key_deferred and + * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, it falls back to a simple * conditional branch. - */ + * + * struct static_key my_key = STATIC_KEY_INIT_TRUE; + * + * if (static_key_true(&my_key)) { + * } + * + * will result in the true case being in-line and starts the key with a single + * reference. Mixing static_key_true() and static_key_false() on the same key is not + * allowed. + * + * Not initializing the key (static data is initialized to 0s anyway) is the + * same as using STATIC_KEY_INIT_FALSE and static_key_false() is + * equivalent with static_branch(). + * +*/ #include #include @@ -39,16 +53,17 @@ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) -struct jump_label_key { +struct static_key { atomic_t enabled; +/* Set lsb bit to 1 if branch is default true, 0 ot */ struct jump_entry *entries; #ifdef CONFIG_MODULES - struct jump_label_mod *next; + struct static_key_mod *next; #endif }; -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; unsigned long timeout; struct delayed_work work; }; @@ -66,13 +81,34 @@ struct module; #ifdef HAVE_JUMP_LABEL -#ifdef CONFIG_MODULES -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} -#else -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} -#endif +#define JUMP_LABEL_TRUE_BRANCH 1UL + +static +inline struct jump_entry *jump_label_get_entries(struct static_key *key) +{ + return (struct jump_entry *)((unsigned long)key->entries + & ~JUMP_LABEL_TRUE_BRANCH); +} + +static inline bool jump_label_get_branch_default(struct static_key *key) +{ + if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) + return true; + return false; +} + +static __always_inline bool static_key_false(struct static_key *key) +{ + return arch_static_branch(key); +} -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_true(struct static_key *key) +{ + return !static_key_false(key); +} + +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) { return arch_static_branch(key); } @@ -88,21 +124,24 @@ extern void arch_jump_label_transform(struct jump_entry *entry, extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern int jump_label_text_reserved(void *start, void *end); -extern void jump_label_inc(struct jump_label_key *key); -extern void jump_label_dec(struct jump_label_key *key); -extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); -extern bool jump_label_enabled(struct jump_label_key *key); +extern void static_key_slow_inc(struct static_key *key); +extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern bool static_key_enabled(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -extern void jump_label_rate_limit(struct jump_label_key_deferred *key, - unsigned long rl); +extern void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) #else /* !HAVE_JUMP_LABEL */ #include -#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} - -struct jump_label_key { +struct static_key { atomic_t enabled; }; @@ -110,30 +149,45 @@ static __always_inline void jump_label_init(void) { } -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; }; -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_false(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static __always_inline bool static_key_true(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled))) + if (likely(atomic_read(&key->enabled)) > 0) return true; return false; } -static inline void jump_label_inc(struct jump_label_key *key) +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static inline void static_key_slow_inc(struct static_key *key) { atomic_inc(&key->enabled); } -static inline void jump_label_dec(struct jump_label_key *key) +static inline void static_key_slow_dec(struct static_key *key) { atomic_dec(&key->enabled); } -static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) +static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { - jump_label_dec(&key->key); + static_key_slow_dec(&key->key); } static inline int jump_label_text_reserved(void *start, void *end) @@ -144,9 +198,9 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline bool jump_label_enabled(struct jump_label_key *key) +static inline bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } static inline int jump_label_apply_nops(struct module *mod) @@ -154,13 +208,20 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, +static inline void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { } + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) + #endif /* HAVE_JUMP_LABEL */ -#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) -#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) +#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#define jump_label_enabled static_key_enabled #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..7dfaae7846ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -214,8 +214,8 @@ enum { #include #ifdef CONFIG_RPS -#include -extern struct jump_label_key rps_needed; +#include +extern struct static_key rps_needed; #endif struct neighbour; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d0..29734be334c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if defined(CONFIG_JUMP_LABEL) -#include -extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#include +extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) - return static_branch(&nf_hooks_needed[pf][hook]); + return static_key_false(&nf_hooks_needed[pf][hook]); return !list_empty(&nf_hooks[pf][hook]); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 412b790f5da6..0d21e6f1cf53 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -514,7 +514,7 @@ struct perf_guest_info_callbacks { #include #include #include -#include +#include #include #include @@ -1038,7 +1038,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); @@ -1066,7 +1066,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - if (static_branch(&perf_swevent_enabled[event_id])) { + if (static_key_false(&perf_swevent_enabled[event_id])) { if (!regs) { perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; @@ -1075,12 +1075,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) } } -extern struct jump_label_key_deferred perf_sched_events; +extern struct static_key_deferred perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); } @@ -1089,7 +1089,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 000000000000..27bd3f8a0857 --- /dev/null +++ b/include/linux/static_key.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index fc36da97ff7e..bd96ecd0e05c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include struct module; struct tracepoint; @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct jump_label_key key; + struct static_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -145,7 +145,7 @@ static inline void tracepoint_synchronize_unregister(void) extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (static_branch(&__tracepoint_##name.key)) \ + if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ @@ -188,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ static struct tracepoint * const __tracepoint_ptr_##name __used \ __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..dcde2d9268cd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include @@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #endif /* SOCK_REFCNT_DEBUG */ #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) -extern struct jump_label_key memcg_socket_limit_enabled; +extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) { return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); } -#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) +#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) #else #define mem_cgroup_sockets_enabled 0 static inline struct cg_proto *parent_cg_proto(struct proto *proto, diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c3b9de55f6b..5e0f8bb89b2b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -128,7 +128,7 @@ enum event_type_t { * perf_sched_events : >0 events exist * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ -struct jump_label_key_deferred perf_sched_events __read_mostly; +struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static atomic_t nr_mmap_events __read_mostly; @@ -2769,7 +2769,7 @@ static void free_event(struct perf_event *event) if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); if (event->attr.comm) @@ -2780,7 +2780,7 @@ static void free_event(struct perf_event *event) put_callchain_buffers(); if (is_cgroup_event(event)) { atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); } } @@ -4982,7 +4982,7 @@ fail: return err; } -struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) { @@ -4990,7 +4990,7 @@ static void sw_perf_event_destroy(struct perf_event *event) WARN_ON(event->parent); - jump_label_dec(&perf_swevent_enabled[event_id]); + static_key_slow_dec(&perf_swevent_enabled[event_id]); swevent_hlist_put(event); } @@ -5020,7 +5020,7 @@ static int perf_swevent_init(struct perf_event *event) if (err) return err; - jump_label_inc(&perf_swevent_enabled[event_id]); + static_key_slow_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; } @@ -5843,7 +5843,7 @@ done: if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); if (event->attr.comm) @@ -6081,7 +6081,7 @@ SYSCALL_DEFINE5(perf_event_open, * - that may need work on context switch */ atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); } /* diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 543782e7cdd2..bf9dcadbb53a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #ifdef HAVE_JUMP_LABEL @@ -29,10 +29,11 @@ void jump_label_unlock(void) mutex_unlock(&jump_label_mutex); } -bool jump_label_enabled(struct jump_label_key *key) +bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } +EXPORT_SYMBOL_GPL(static_key_enabled); static int jump_label_cmp(const void *a, const void *b) { @@ -58,22 +59,26 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); } -static void jump_label_update(struct jump_label_key *key, int enable); +static void jump_label_update(struct static_key *key, int enable); -void jump_label_inc(struct jump_label_key *key) +void static_key_slow_inc(struct static_key *key) { if (atomic_inc_not_zero(&key->enabled)) return; jump_label_lock(); - if (atomic_read(&key->enabled) == 0) - jump_label_update(key, JUMP_LABEL_ENABLE); + if (atomic_read(&key->enabled) == 0) { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_ENABLE); + else + jump_label_update(key, JUMP_LABEL_DISABLE); + } atomic_inc(&key->enabled); jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_inc); +EXPORT_SYMBOL_GPL(static_key_slow_inc); -static void __jump_label_dec(struct jump_label_key *key, +static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { @@ -85,32 +90,35 @@ static void __jump_label_dec(struct jump_label_key *key, if (rate_limit) { atomic_inc(&key->enabled); schedule_delayed_work(work, rate_limit); - } else - jump_label_update(key, JUMP_LABEL_DISABLE); - + } else { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_DISABLE); + else + jump_label_update(key, JUMP_LABEL_ENABLE); + } jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_dec); static void jump_label_update_timeout(struct work_struct *work) { - struct jump_label_key_deferred *key = - container_of(work, struct jump_label_key_deferred, work.work); - __jump_label_dec(&key->key, 0, NULL); + struct static_key_deferred *key = + container_of(work, struct static_key_deferred, work.work); + __static_key_slow_dec(&key->key, 0, NULL); } -void jump_label_dec(struct jump_label_key *key) +void static_key_slow_dec(struct static_key *key) { - __jump_label_dec(key, 0, NULL); + __static_key_slow_dec(key, 0, NULL); } +EXPORT_SYMBOL_GPL(static_key_slow_dec); -void jump_label_dec_deferred(struct jump_label_key_deferred *key) +void static_key_slow_dec_deferred(struct static_key_deferred *key) { - __jump_label_dec(&key->key, key->timeout, &key->work); + __static_key_slow_dec(&key->key, key->timeout, &key->work); } +EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); - -void jump_label_rate_limit(struct jump_label_key_deferred *key, +void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { key->timeout = rl; @@ -153,7 +161,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry arch_jump_label_transform(entry, type); } -static void __jump_label_update(struct jump_label_key *key, +static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, int enable) { @@ -170,27 +178,40 @@ static void __jump_label_update(struct jump_label_key *key, } } +static enum jump_label_type jump_label_type(struct static_key *key) +{ + bool true_branch = jump_label_get_branch_default(key); + bool state = static_key_enabled(key); + + if ((!true_branch && state) || (true_branch && !state)) + return JUMP_LABEL_ENABLE; + + return JUMP_LABEL_DISABLE; +} + void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; - struct jump_label_key *key = NULL; + struct static_key *key = NULL; struct jump_entry *iter; jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; + struct static_key *iterk; - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + iterk = (struct static_key *)(unsigned long)iter->key; + arch_jump_label_transform_static(iter, jump_label_type(iterk)); if (iterk == key) continue; key = iterk; - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; #ifdef CONFIG_MODULES key->next = NULL; #endif @@ -200,8 +221,8 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES -struct jump_label_mod { - struct jump_label_mod *next; +struct static_key_mod { + struct static_key_mod *next; struct jump_entry *entries; struct module *mod; }; @@ -221,9 +242,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end) start, end); } -static void __jump_label_mod_update(struct jump_label_key *key, int enable) +static void __jump_label_mod_update(struct static_key *key, int enable) { - struct jump_label_mod *mod = key->next; + struct static_key_mod *mod = key->next; while (mod) { struct module *m = mod->mod; @@ -254,11 +275,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; - - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); } } @@ -267,8 +284,8 @@ static int jump_label_add_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm; + struct static_key *key = NULL; + struct static_key_mod *jlm; /* if the module doesn't have jump label entries, just return */ if (iter_start == iter_stop) @@ -277,28 +294,30 @@ static int jump_label_add_module(struct module *mod) jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->key == (jump_label_t)(unsigned long)key) - continue; + struct static_key *iterk; - key = (struct jump_label_key *)(unsigned long)iter->key; + iterk = (struct static_key *)(unsigned long)iter->key; + if (iterk == key) + continue; + key = iterk; if (__module_address(iter->key) == mod) { - atomic_set(&key->enabled, 0); - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; key->next = NULL; continue; } - - jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); + jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; - jlm->mod = mod; jlm->entries = iter; jlm->next = key->next; key->next = jlm; - if (jump_label_enabled(key)) + if (jump_label_type(key) == JUMP_LABEL_ENABLE) __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); } @@ -310,14 +329,14 @@ static void jump_label_del_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm, **prev; + struct static_key *key = NULL; + struct static_key_mod *jlm, **prev; for (iter = iter_start; iter < iter_stop; iter++) { if (iter->key == (jump_label_t)(unsigned long)key) continue; - key = (struct jump_label_key *)(unsigned long)iter->key; + key = (struct static_key *)(unsigned long)iter->key; if (__module_address(iter->key) == mod) continue; @@ -419,9 +438,10 @@ int jump_label_text_reserved(void *start, void *end) return ret; } -static void jump_label_update(struct jump_label_key *key, int enable) +static void jump_label_update(struct static_key *key, int enable) { - struct jump_entry *entry = key->entries, *stop = __stop___jump_table; + struct jump_entry *stop = __stop___jump_table; + struct jump_entry *entry = jump_label_get_entries(key); #ifdef CONFIG_MODULES struct module *mod = __module_address((unsigned long)key); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e053..112c6824476b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v) #ifdef HAVE_JUMP_LABEL -#define jump_label_key__true jump_label_key_enabled -#define jump_label_key__false jump_label_key_disabled +#define jump_label_key__true STATIC_KEY_INIT_TRUE +#define jump_label_key__false STATIC_KEY_INIT_FALSE #define SCHED_FEAT(name, enabled) \ jump_label_key__##enabled , -struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { +struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { #include "features.h" }; @@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (jump_label_enabled(&sched_feat_keys[i])) - jump_label_dec(&sched_feat_keys[i]); + if (static_key_enabled(&sched_feat_keys[i])) + static_key_slow_dec(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!jump_label_enabled(&sched_feat_keys[i])) - jump_label_inc(&sched_feat_keys[i]); + if (!static_key_enabled(&sched_feat_keys[i])) + static_key_slow_inc(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) delta -= irq_delta; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_branch((¶virt_steal_rq_enabled))) { + if (static_key_false((¶virt_steal_rq_enabled))) { u64 st; steal = paravirt_steal_clock(cpu_of(rq)); @@ -2756,7 +2756,7 @@ void account_idle_time(cputime_t cputime) static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT - if (static_branch(¶virt_steal_enabled)) { + if (static_key_false(¶virt_steal_enabled)) { u64 steal, st = 0; steal = paravirt_steal_clock(smp_processor_id()); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669d..423547ada38a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1399,20 +1399,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) #ifdef CONFIG_CFS_BANDWIDTH #ifdef HAVE_JUMP_LABEL -static struct jump_label_key __cfs_bandwidth_used; +static struct static_key __cfs_bandwidth_used; static inline bool cfs_bandwidth_used(void) { - return static_branch(&__cfs_bandwidth_used); + return static_key_false(&__cfs_bandwidth_used); } void account_cfs_bandwidth_used(int enabled, int was_enabled) { /* only need to count groups transitioning between enabled/!enabled */ if (enabled && !was_enabled) - jump_label_inc(&__cfs_bandwidth_used); + static_key_slow_inc(&__cfs_bandwidth_used); else if (!enabled && was_enabled) - jump_label_dec(&__cfs_bandwidth_used); + static_key_slow_dec(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..b4cd6d8ea150 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ #ifdef CONFIG_SCHED_DEBUG -# include +# include # define const_debug __read_mostly #else # define const_debug const @@ -630,18 +630,18 @@ enum { #undef SCHED_FEAT #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) -static __always_inline bool static_branch__true(struct jump_label_key *key) +static __always_inline bool static_branch__true(struct static_key *key) { - return likely(static_branch(key)); /* Not out of line branch. */ + return static_key_true(key); /* Not out of line branch. */ } -static __always_inline bool static_branch__false(struct jump_label_key *key) +static __always_inline bool static_branch__false(struct static_key *key) { - return unlikely(static_branch(key)); /* Out of line branch. */ + return static_key_false(key); /* Out of line branch. */ } #define SCHED_FEAT(name, enabled) \ -static __always_inline bool static_branch_##name(struct jump_label_key *key) \ +static __always_inline bool static_branch_##name(struct static_key *key) \ { \ return static_branch__##enabled(key); \ } @@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ #undef SCHED_FEAT -extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; +extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f1539decd99d..d96ba22dabfa 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include extern struct tracepoint * const __start___tracepoints_ptrs[]; extern struct tracepoint * const __stop___tracepoints_ptrs[]; @@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, { WARN_ON(strcmp((*entry)->name, elem->name) != 0); - if (elem->regfunc && !jump_label_enabled(&elem->key) && active) + if (elem->regfunc && !static_key_enabled(&elem->key) && active) elem->regfunc(); - else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) + else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) elem->unregfunc(); /* @@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, * is used. */ rcu_assign_pointer(elem->funcs, (*entry)->funcs); - if (active && !jump_label_enabled(&elem->key)) - jump_label_inc(&elem->key); - else if (!active && jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (active && !static_key_enabled(&elem->key)) + static_key_slow_inc(&elem->key); + else if (!active && static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); } /* @@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, */ static void disable_tracepoint(struct tracepoint *elem) { - if (elem->unregfunc && jump_label_enabled(&elem->key)) + if (elem->unregfunc && static_key_enabled(&elem->key)) elem->unregfunc(); - if (jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); rcu_assign_pointer(elem->funcs, NULL); } diff --git a/net/core/dev.c b/net/core/dev.c index 115dee1d985d..da7ce7f0e566 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -134,7 +134,7 @@ #include #include #include -#include +#include #include #include "net-sysfs.h" @@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -static struct jump_label_key netstamp_needed __read_mostly; +static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call jump_label_dec() from irq context +/* We are not allowed to call static_key_slow_dec() from irq context * If net_disable_timestamp() is called from irq context, defer the - * jump_label_dec() calls. + * static_key_slow_dec() calls. */ static atomic_t netstamp_needed_deferred; #endif @@ -1457,12 +1457,12 @@ void net_enable_timestamp(void) if (deferred) { while (--deferred) - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); return; } #endif WARN_ON(in_interrupt()); - jump_label_inc(&netstamp_needed); + static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1474,19 +1474,19 @@ void net_disable_timestamp(void) return; } #endif - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); } EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp.tv64 = 0; - if (static_branch(&netstamp_needed)) + if (static_key_false(&netstamp_needed)) __net_timestamp(skb); } #define net_timestamp_check(COND, SKB) \ - if (static_branch(&netstamp_needed)) { \ + if (static_key_false(&netstamp_needed)) { \ if ((COND) && !(SKB)->tstamp.tv64) \ __net_timestamp(SKB); \ } \ @@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); -struct jump_label_key rps_needed __read_mostly; +struct static_key rps_needed __read_mostly; static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb) trace_netif_rx(skb); #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb) return NET_RX_SUCCESS; #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu, ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a1727cda03d7..495586232aa1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, spin_unlock(&rps_map_lock); if (map) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (old_map) { kfree_rcu(old_map, rcu); - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); } free_cpumask_var(mask); return len; diff --git a/net/core/sock.c b/net/core/sock.c index 3e81fd2e3c75..3a4e5817a2a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,7 +111,7 @@ #include #include #include -#include +#include #include #include @@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -struct jump_label_key memcg_socket_limit_enabled; +struct static_key memcg_socket_limit_enabled; EXPORT_SYMBOL(memcg_socket_limit_enabled); /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d05559d4d9cd..0c2850874254 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (orig_sock_table) { - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..602fb305365f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); if (val != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); } EXPORT_SYMBOL(tcp_destroy_cgroup); @@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) net->ipv4.sysctl_tcp_mem[i]); if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) - jump_label_inc(&memcg_socket_limit_enabled); + static_key_slow_inc(&memcg_socket_limit_enabled); return 0; } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b4e8ff05b301..e1b7e051332e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); #if defined(CONFIG_JUMP_LABEL) -struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg) list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; } @@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); } -- cgit v1.2.3 From 36eabda3d094dae30a74350c6289c163349b744d Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 11 Feb 2012 15:39:14 +0000 Subject: net: Support RXFCS feature flag. When set on hardware that supports the feature, this causes the Ethernet FCS to be appended to the end of the skb. Useful for sniffing packets. Signed-off-by: Ben Greear Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- Documentation/networking/netdev-features.txt | 6 ++++++ include/linux/netdev_features.h | 2 ++ net/core/ethtool.c | 1 + 3 files changed, 9 insertions(+) (limited to 'net/core') diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index 4b1c0dcef84c..7d2781230d30 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -152,3 +152,9 @@ NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN headers. Some drivers set this because the cards can't handle the bigger MTU. [FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU VLANs. This may be not useful, though.] + +* rx-fcs + +This requests that the NIC append the Ethernet Frame Checksum (FCS) +to the end of the skb data. This allows sniffers and other tools to +read the CRC recorded by the NIC on receipt of the packet. diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 77f5202977ce..d1331865f830 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -54,6 +54,7 @@ enum { NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ NETIF_F_NOCACHE_COPY_BIT, /* Use no-cache copyfromuser */ NETIF_F_LOOPBACK_BIT, /* Enable loopback */ + NETIF_F_RXFCS_BIT, /* Append FCS to skb pkt data */ /* * Add your fresh new feature above and remember to update @@ -98,6 +99,7 @@ enum { #define NETIF_F_TSO __NETIF_F(TSO) #define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) +#define NETIF_F_RXFCS __NETIF_F(RXFCS) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3f79db1b612a..080161924a0d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXCSUM_BIT] = "rx-checksum", [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", [NETIF_F_LOOPBACK_BIT] = "loopback", + [NETIF_F_RXFCS_BIT] = "rx-fcs", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -- cgit v1.2.3 From 3bdc0eba0b8b47797f4a76e377dd8360f317450f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 11 Feb 2012 15:39:30 +0000 Subject: net: Add framework to allow sending packets with customized CRC. This is useful for testing RX handling of frames with bad CRCs. Requires driver support to actually put the packet on the wire properly. Signed-off-by: Ben Greear Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- arch/alpha/include/asm/socket.h | 3 +++ arch/arm/include/asm/socket.h | 3 +++ arch/avr32/include/asm/socket.h | 3 +++ arch/cris/include/asm/socket.h | 3 +++ arch/frv/include/asm/socket.h | 3 +++ arch/h8300/include/asm/socket.h | 3 +++ arch/ia64/include/asm/socket.h | 3 +++ arch/m32r/include/asm/socket.h | 3 +++ arch/m68k/include/asm/socket.h | 3 +++ arch/mips/include/asm/socket.h | 3 +++ arch/mn10300/include/asm/socket.h | 3 +++ arch/parisc/include/asm/socket.h | 4 ++++ arch/powerpc/include/asm/socket.h | 3 +++ arch/s390/include/asm/socket.h | 3 +++ arch/sparc/include/asm/socket.h | 4 ++++ arch/xtensa/include/asm/socket.h | 3 +++ include/asm-generic/socket.h | 4 ++++ include/linux/if.h | 2 ++ include/linux/netdevice.h | 8 +++++++- include/linux/skbuff.h | 4 +++- include/net/sock.h | 4 ++++ net/core/skbuff.c | 1 + net/core/sock.c | 5 +++++ net/packet/af_packet.c | 32 ++++++++++++++++++++++++++++---- 24 files changed, 104 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 16449d330dae..dcb221a4b5be 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -73,6 +73,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index d958c74e5260..6433cadb6ed4 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 30078f98b3ab..a473f8c6a9aa 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 048aba64600c..ae52825021af 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -68,6 +68,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index 7a361810f3cc..a5b1d7dbb205 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -66,5 +66,8 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index e7bbfcee5b99..ec4554e7b04b 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index ced62de9d5a9..41fc28a4a18a 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -75,4 +75,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index 696cb4c7ca4e..a15f40b52783 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index e8b41a6775f9..d1be684edf97 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index 52104872e9e3..a2ed6fdad4e0 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -86,6 +86,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 013fcc51698f..820463a484b8 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index f717c9bec16f..1b52c2c31a7a 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -65,6 +65,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x4023 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x4024 + + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index fe1c0b478fd7..3d5179bb122f 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -73,4 +73,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 581702fa1b0c..c91b720965c0 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -74,4 +74,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 68e2e2746f6f..bea1568ae4af 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -62,6 +62,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x0026 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x0027 + + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 74818b161362..e36c68184920 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -77,4 +77,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index d9aaac0c36d4..b1bea03274d5 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -68,4 +68,8 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/if.h b/include/linux/if.h index 06b6ef60c821..f995c663c493 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -80,6 +80,8 @@ * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IFF_TEAM_PORT 0x40000 /* device used as team port */ +#define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..f1b7d037c2c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1082,7 +1082,8 @@ struct net_device { const struct header_ops *header_ops; unsigned int flags; /* interface flags (a la BSD) */ - unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. + * See if.h for definitions. */ unsigned short gflags; unsigned short padded; /* How much padding added by alloc_netdev() */ @@ -2650,6 +2651,11 @@ static inline int netif_is_bond_slave(struct net_device *dev) return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } +static inline bool netif_supports_nofcs(struct net_device *dev) +{ + return dev->priv_flags & IFF_SUPP_NOFCS; +} + extern struct pernet_operations __net_initdata loopback_net_ops; /* Logging, debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c11a44ea1bf4..06a4c0fd7bef 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -361,6 +361,7 @@ typedef unsigned char *sk_buff_data_t; * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not + * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -459,7 +460,8 @@ struct sk_buff { __u8 l4_rxhash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - /* 10/12 bit hole (depending on ndisc_nodetype presence) */ + __u8 no_fcs:1; + /* 9/11 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #ifdef CONFIG_NET_DMA diff --git a/include/net/sock.h b/include/net/sock.h index 9c0553b9e451..ba761e7de252 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -615,6 +615,10 @@ enum sock_flags { SOCK_RXQ_OVFL, SOCK_ZEROCOPY, /* buffers from userspace */ SOCK_WIFI_STATUS, /* push wifi status to userspace */ + SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS. + * Will use last 4 bytes of packet sent from + * user-space instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3a530780753..6eb656acdfe5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -592,6 +592,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; + new->no_fcs = old->no_fcs; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif diff --git a/net/core/sock.c b/net/core/sock.c index 19942d4bb6e6..55011cb691ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -799,6 +799,11 @@ set_rcvbuf: else ret = -EOPNOTSUPP; break; + + case SO_NOFCS: + sock_valbool_flag(sk, SOCK_NOFCS, valbool); + break; + default: ret = -ENOPROTOOPT; break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2dbb32b988c4..ae2d484416dd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1459,6 +1459,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct net_device *dev; __be16 proto = 0; int err; + int extra_len = 0; /* * Get and verify the address. @@ -1493,8 +1494,16 @@ retry: * raw protocol and you must do your own fragmentation at this level. */ + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { @@ -1526,7 +1535,7 @@ retry: goto retry; } - if (len > (dev->mtu + dev->hard_header_len)) { + if (len > (dev->mtu + dev->hard_header_len + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -1548,6 +1557,9 @@ retry: if (err < 0) goto out_unlock; + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -2209,6 +2221,7 @@ static int packet_snd(struct socket *sock, struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; int hlen, tlen; + int extra_len = 0; /* * Get and verify the address. @@ -2288,8 +2301,16 @@ static int packet_snd(struct socket *sock, } } + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2315,7 +2336,7 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; - if (!gso_type && (len > dev->mtu + reserve)) { + if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -2353,6 +2374,9 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + /* * Now send it */ -- cgit v1.2.3 From 5e0c03c8cd40e5c3b7ba624b8ba9a343de79ade1 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 11 Feb 2012 15:39:45 +0000 Subject: net: Support RX-ALL feature flag. This flag requests that network devices pass all received frames up the stack, even ones with errors such as invalid FCS (frame check sum). This will allow sniffers to see bad packets and perhaps give the user some idea how to fix the problem. Signed-off-by: Ben Greear Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- Documentation/networking/netdev-features.txt | 7 +++++++ include/linux/netdev_features.h | 2 ++ net/core/ethtool.c | 1 + 3 files changed, 10 insertions(+) (limited to 'net/core') diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index 7d2781230d30..4164f5c02e4b 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -158,3 +158,10 @@ VLANs. This may be not useful, though.] This requests that the NIC append the Ethernet Frame Checksum (FCS) to the end of the skb data. This allows sniffers and other tools to read the CRC recorded by the NIC on receipt of the packet. + +* rx-all + +This requests that the NIC receive all possible frames, including errored +frames (such as bad FCS, etc). This can be helpful when sniffing a link with +bad packets on it. Some NICs may receive more packets if also put into normal +PROMISC mdoe. diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index d1331865f830..5ac32123035a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -55,6 +55,7 @@ enum { NETIF_F_NOCACHE_COPY_BIT, /* Use no-cache copyfromuser */ NETIF_F_LOOPBACK_BIT, /* Enable loopback */ NETIF_F_RXFCS_BIT, /* Append FCS to skb pkt data */ + NETIF_F_RXALL_BIT, /* Receive errored frames too */ /* * Add your fresh new feature above and remember to update @@ -100,6 +101,7 @@ enum { #define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) +#define NETIF_F_RXALL __NETIF_F(RXALL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 080161924a0d..6d6d7d25caaa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -74,6 +74,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", [NETIF_F_LOOPBACK_BIT] = "loopback", [NETIF_F_RXFCS_BIT] = "rx-fcs", + [NETIF_F_RXALL_BIT] = "rx-all", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -- cgit v1.2.3 From bc2f7996858db66f2d5b154aac10971655f72cad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Feb 2012 14:48:34 -0500 Subject: net: Add missing getsockopt for SO_NOFCS. Signed-off-by: David S. Miller --- net/core/sock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 55011cb691ad..216719cb5c7f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1035,6 +1035,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_peek_off; break; + case SO_NOFCS: + v.val = !!sock_flag(sk, SOCK_NOFCS); + break; default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 80d326fab534a5380e8f6e509a0b9076655a9670 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Feb 2012 14:30:15 +0000 Subject: netlink: add netlink_dump_control structure for netlink_dump_start() Davem considers that the argument list of this interface is getting out of control. This patch tries to address this issue following his proposal: struct netlink_dump_control c = { .dump = dump, .done = done, ... }; netlink_dump_start(..., &c); Suggested by David S. Miller. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- crypto/crypto_user.c | 10 +++++++--- drivers/infiniband/core/netlink.c | 10 +++++++--- include/linux/netlink.h | 10 +++++++--- net/core/rtnetlink.c | 9 +++++++-- net/ipv4/inet_diag.c | 18 ++++++++++++------ net/netfilter/ipset/ip_set_core.c | 10 +++++++--- net/netfilter/nf_conntrack_netlink.c | 18 ++++++++++++------ net/netfilter/nfnetlink_acct.c | 6 ++++-- net/netlink/af_netlink.c | 11 ++++------- net/netlink/genetlink.c | 9 +++++++-- net/unix/diag.c | 10 ++++++---- net/xfrm/xfrm_user.c | 9 +++++++-- 12 files changed, 87 insertions(+), 43 deletions(-) (limited to 'net/core') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 16f8693cc147..b6ac1387770c 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -389,9 +389,13 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) (nlh->nlmsg_flags & NLM_F_DUMP))) { if (link->dump == NULL) return -EINVAL; - - return netlink_dump_start(crypto_nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(crypto_nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index d1c8196d15d7..396e29370304 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -147,9 +147,13 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (op < 0 || op >= client->nops || !client->cb_table[RDMA_NL_GET_OP(op)].dump) return -EINVAL; - return netlink_dump_start(nls, skb, nlh, - client->cb_table[op].dump, - NULL, 0); + + { + struct netlink_dump_control c = { + .dump = client->cb_table[op].dump, + }; + return netlink_dump_start(nls, skb, nlh, &c); + } } } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a390e9d54827..1f8c1a95f57c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -248,11 +248,15 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) #define NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) +struct netlink_dump_control { + int (*dump)(struct sk_buff *skb, struct netlink_callback *); + int (*done)(struct netlink_callback*); + u16 min_dump_alloc; +}; + extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, struct netlink_callback*), - int (*done)(struct netlink_callback*), - u16 min_dump_alloc); + struct netlink_dump_control *control); #define NL_NONROOT_RECV 0x1 diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd450027..7aef62e53113 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1981,8 +1981,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, - NULL, min_dump_alloc); + { + struct netlink_dump_control c = { + .dump = dumpit, + .min_dump_alloc = min_dump_alloc, + }; + err = netlink_dump_start(rtnl, skb, nlh, &c); + } rtnl_lock(); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index fcf281819cd4..8d25a1c557eb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -960,9 +960,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, nlh, - inet_diag_dump_compat, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + } } return inet_diag_get_exact_compat(skb, nlh); @@ -985,9 +988,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, h, - inet_diag_dump, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } } return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 32dbf0fa89db..e7f90e7082b4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1162,9 +1162,13 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - return netlink_dump_start(ctnl, skb, nlh, - ip_set_dump_start, - ip_set_dump_done, 0); + { + struct netlink_dump_control c = { + .dump = ip_set_dump_start, + .done = ip_set_dump_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } } /* Add, del and test */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b033c0c9..61f7feb7932b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -977,9 +977,13 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, - ctnetlink_done, 0); + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_table, + .done = ctnetlink_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) @@ -1850,9 +1854,11 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(ctnl, skb, nlh, - ctnetlink_exp_dump_table, - ctnetlink_exp_done, 0); + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, + .done = ctnetlink_exp_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 11ba013e47f6..3eb348bfc4fb 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -171,8 +171,10 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, char *acct_name; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, - NULL, 0); + struct netlink_dump_control c = { + .dump = nfnl_acct_dump, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); } if (!tb[NFACCT_NAME]) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d751e3d4b4b..ab74845876d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1736,10 +1736,7 @@ errout_skb: int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, - struct netlink_callback *), - int (*done)(struct netlink_callback *), - u16 min_dump_alloc) + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1750,10 +1747,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb == NULL) return -ENOBUFS; - cb->dump = dump; - cb->done = done; + cb->dump = control->dump; + cb->done = control->done; cb->nlh = nlh; - cb->min_dump_alloc = min_dump_alloc; + cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a1154717219e..9f40441d7a7d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -563,8 +563,13 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; genl_unlock(); - err = netlink_dump_start(net->genl_sock, skb, nlh, - ops->dumpit, ops->done, 0); + { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + err = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } genl_lock(); return err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 6b7697fd911b..4195555aea65 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -301,10 +301,12 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(sock_diag_nlsk, skb, h, - unix_diag_dump, NULL, 0); - else + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = unix_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } else return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 66b84fbf2746..7128dde0fe1a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2299,8 +2299,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, -- cgit v1.2.3 From 48752f6513012a1b078da08b145d5c40a644f058 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 8 Feb 2012 00:45:00 +0000 Subject: rtnetlink: Fix VF IFLA policy Add VF spoof check to IFLA policy. The original patch I submitted to add the spoof checking feature to rtnl failed to add the proper policy rule that identifies the data type and len. This patch corrects that oversight. No bugs have been reported against this but it may cause some problem for the netlink message parsing that uses the policy table. CC: stable@vger.kernel.org Signed-off-by: Greg Rose Tested-by: Sibai Li Signed-off-by: Jeff Kirsher --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5cf39cd7da85..2be10181d583 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1132,6 +1132,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { .len = sizeof(struct ifla_vf_vlan) }, [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_spoofchk) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { -- cgit v1.2.3 From 77a1abf54f4b003ad6e59c535045b2ad89fedfeb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2012 04:50:09 +0000 Subject: net: export netdev_stats_to_stats64 Some drivers use internal netdev stats member to store part of their stats, yet advertize ndo_get_stats64() to implement some 64bit fields. Allow them to use netdev_stats_to_stats64() helper to make the copy of netdev stats before they compute their 64bit counters. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b7d037c2c5..4d279c5287f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2557,6 +2557,8 @@ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); +extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; diff --git a/net/core/dev.c b/net/core/dev.c index 763a0eda7158..5ef3b65c3687 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5834,12 +5834,12 @@ void netdev_run_todo(void) /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ -static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, - const struct net_device_stats *netdev_stats) +void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats) { #if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*stats64)); + BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); + memcpy(stats64, netdev_stats, sizeof(*stats64)); #else size_t i, n = sizeof(*stats64) / sizeof(u64); const unsigned long *src = (const unsigned long *)netdev_stats; @@ -5851,6 +5851,7 @@ static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, dst[i] = src[i]; #endif } +EXPORT_SYMBOL(netdev_stats_to_stats64); /** * dev_get_stats - get network device statistics -- cgit v1.2.3 From 95f050bf7f64be5168ae2e2c715bb0b0ded141d1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Mar 2012 16:12:15 -0500 Subject: net: Use bool for return value of dev_valid_name(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d279c5287f8..a89933bc4f2f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2122,7 +2122,7 @@ extern int netdev_rx_handler_register(struct net_device *dev, void *rx_handler_data); extern void netdev_rx_handler_unregister(struct net_device *dev); -extern int dev_valid_name(const char *name); +extern bool dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); diff --git a/net/core/dev.c b/net/core/dev.c index 5ef3b65c3687..0090809af7bd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -848,21 +848,21 @@ EXPORT_SYMBOL(dev_get_by_flags_rcu); * to allow sysfs to work. We also disallow any kind of * whitespace. */ -int dev_valid_name(const char *name) +bool dev_valid_name(const char *name) { if (*name == '\0') - return 0; + return false; if (strlen(name) >= IFNAMSIZ) - return 0; + return false; if (!strcmp(name, ".") || !strcmp(name, "..")) - return 0; + return false; while (*name) { if (*name == '/' || isspace(*name)) - return 0; + return false; name++; } - return 1; + return true; } EXPORT_SYMBOL(dev_valid_name); -- cgit v1.2.3 From 43db362d3adda9e0a915ddb9a8d1a41186e19179 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Sun, 11 Mar 2012 12:51:50 +0000 Subject: net: get rid of some pointless casts to sockaddr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following 4 functions: move_addr_to_kernel move_addr_to_user verify_iovec verify_compat_iovec are always effectively called with a sockaddr_storage. Make this explicit by changing their signature. This removes a large number of casts from sockaddr_storage to sockaddr. Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/linux/socket.h | 4 ++-- include/net/compat.h | 2 +- net/compat.c | 2 +- net/core/iovec.c | 2 +- net/socket.c | 36 ++++++++++++++---------------------- 5 files changed, 19 insertions(+), 27 deletions(-) (limited to 'net/core') diff --git a/include/linux/socket.h b/include/linux/socket.h index d0e77f607a79..da2d3e2543f3 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -326,11 +326,11 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, int offset, unsigned int len, __wsum *csump); -extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); +extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); -extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); +extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); struct timespec; diff --git a/include/net/compat.h b/include/net/compat.h index 9ee75edcc295..a974ae92d182 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -41,7 +41,7 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *); #endif /* defined(CONFIG_COMPAT) */ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); -extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); +extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr_storage *, int); extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, unsigned, unsigned); diff --git a/net/compat.c b/net/compat.c index 6def90e0a112..64b4515a64e6 100644 --- a/net/compat.c +++ b/net/compat.c @@ -79,7 +79,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) /* I've named the args so it is easy to tell whose space the pointers are in. */ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, - struct sockaddr *kern_address, int mode) + struct sockaddr_storage *kern_address, int mode) { int tot_len; diff --git a/net/core/iovec.c b/net/core/iovec.c index c40f27e7d208..7e7aeb01de45 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,7 +35,7 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode) { int size, ct, err; diff --git a/net/socket.c b/net/socket.c index 28a96af484b4..12a48d846223 100644 --- a/net/socket.c +++ b/net/socket.c @@ -181,7 +181,7 @@ static DEFINE_PER_CPU(int, sockets_in_use); * invalid addresses -EFAULT is returned. On a success 0 is returned. */ -int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) +int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr) { if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) return -EINVAL; @@ -209,7 +209,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) * specified. Zero is returned for a success. */ -static int move_addr_to_user(struct sockaddr *kaddr, int klen, +static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, void __user *uaddr, int __user *ulen) { int err; @@ -1449,7 +1449,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); + err = move_addr_to_kernel(umyaddr, addrlen, &address); if (err >= 0) { err = security_socket_bind(sock, (struct sockaddr *)&address, @@ -1556,7 +1556,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user((struct sockaddr *)&address, + err = move_addr_to_user(&address, len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; @@ -1605,7 +1605,7 @@ SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); + err = move_addr_to_kernel(uservaddr, addrlen, &address); if (err < 0) goto out_put; @@ -1645,7 +1645,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); if (err) goto out_put; - err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); + err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1677,7 +1677,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1); if (!err) - err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, + err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -1716,7 +1716,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, msg.msg_controllen = 0; msg.msg_namelen = 0; if (addr) { - err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); + err = move_addr_to_kernel(addr, addr_len, &address); if (err < 0) goto out_put; msg.msg_name = (struct sockaddr *)&address; @@ -1779,7 +1779,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, err = sock_recvmsg(sock, &msg, size, flags); if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user((struct sockaddr *)&address, + err2 = move_addr_to_user(&address, msg.msg_namelen, addr, addr_len); if (err2 < 0) err = err2; @@ -1933,13 +1933,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, /* This will also move the address data into kernel space */ if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(msg_sys, iov, - (struct sockaddr *)&address, - VERIFY_READ); + err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ); } else - err = verify_iovec(msg_sys, iov, - (struct sockaddr *)&address, - VERIFY_READ); + err = verify_iovec(msg_sys, iov, &address, VERIFY_READ); if (err < 0) goto out_freeiov; total_len = err; @@ -2143,13 +2139,9 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(msg_sys, iov, - (struct sockaddr *)&addr, - VERIFY_WRITE); + err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); } else - err = verify_iovec(msg_sys, iov, - (struct sockaddr *)&addr, - VERIFY_WRITE); + err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; @@ -2166,7 +2158,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, len = err; if (uaddr != NULL) { - err = move_addr_to_user((struct sockaddr *)&addr, + err = move_addr_to_user(&addr, msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) -- cgit v1.2.3