From 84fa7933a33f806bbbaae6775e87459b1ec584c0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 29 Aug 2006 16:44:56 -0700 Subject: [NET]: Replace CHECKSUM_HW by CHECKSUM_PARTIAL/CHECKSUM_COMPLETE Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose checksum still needs to be completed) and CHECKSUM_COMPLETE (for incoming packets, device supplied full checksum). Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d4a1ec3bded5..fc82f6f6e1c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,12 +1166,12 @@ EXPORT_SYMBOL(netif_device_attach); * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */ -int skb_checksum_help(struct sk_buff *skb, int inward) +int skb_checksum_help(struct sk_buff *skb) { unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) + if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { @@ -1223,7 +1223,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -1444,11 +1444,11 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_PARTIAL && (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto out_kfree_skb; gso: -- cgit v1.2.3 From b6fe17d6cc5d570b72f8e4da351b593c5a680355 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:06:13 -0700 Subject: [NET] netdev: Check name length Some improvements to robust name interface. These API's are safe now by convention, but it is worth providing some safety checks against future bugs. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index fc82f6f6e1c1..14de297d024d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -640,6 +640,8 @@ int dev_valid_name(const char *name) { if (*name == '\0') return 0; + if (strlen(name) >= IFNAMSIZ) + return 0; if (!strcmp(name, ".") || !strcmp(name, "..")) return 0; @@ -3191,13 +3193,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, struct net_device *dev; int alloc_size; + BUG_ON(strlen(name) >= sizeof(dev->name)); + /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { - printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); + printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); return NULL; } -- cgit v1.2.3 From 85670cc1faa2e1472e4a423cbf0b5e3d55c5ba88 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Sep 2006 16:45:45 -0700 Subject: [NET_SCHED]: Fix fallout from dev->qdisc RCU change The move of qdisc destruction to a rcu callback broke locking in the entire qdisc layer by invalidating previously valid assumptions about the context in which changes to the qdisc tree occur. The two assumptions were: - since changes only happen in process context, read_lock doesn't need bottem half protection. Now invalid since destruction of inner qdiscs, classifiers, actions and estimators happens in the RCU callback unless they're manually deleted, resulting in dead-locks when read_lock in process context is interrupted by write_lock_bh in bottem half context. - since changes only happen under the RTNL, no additional locking is necessary for data not used during packet processing (f.e. u32_list). Again, since destruction now happens in the RCU callback, this assumption is not valid anymore, causing races while using this data, which can result in corruption or use-after-free. Instead of "fixing" this by disabling bottem halfs everywhere and adding new locks/refcounting, this patch makes these assumptions valid again by moving destruction back to process context. Since only the dev->qdisc pointer is protected by RCU, but ->enqueue and the qdisc tree are still protected by dev->qdisc_lock, destruction of the tree can be performed immediately and only the final free needs to happen in the rcu callback to make sure dev_queue_xmit doesn't access already freed memory. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 14 ++++++----- net/sched/cls_api.c | 4 +-- net/sched/sch_api.c | 16 ++++++------ net/sched/sch_generic.c | 66 ++++++++++++++++--------------------------------- 4 files changed, 39 insertions(+), 61 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 14de297d024d..4d891beab138 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1480,14 +1480,16 @@ gso: if (q->enqueue) { /* Grab device queue */ spin_lock(&dev->queue_lock); + q = dev->qdisc; + if (q->enqueue) { + rc = q->enqueue(skb, q); + qdisc_run(dev); + spin_unlock(&dev->queue_lock); - rc = q->enqueue(skb, q); - - qdisc_run(dev); - + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; + } spin_unlock(&dev->queue_lock); - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; } /* The device has no queue. Common case for software devices: diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7e14f14058e9..37a184021647 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -401,7 +401,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) return skb->len; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); if (!tcm->tcm_parent) q = dev->qdisc_sleeping; else @@ -458,7 +458,7 @@ errout: if (cl) cops->put(q, cl); out: - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); dev_put(dev); return skb->len; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a19eff12cf78..0b6489291140 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -195,14 +195,14 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { if (q->handle == handle) { - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); return q; } } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); return NULL; } @@ -837,7 +837,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx > s_idx) s_q_idx = 0; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); q_idx = 0; list_for_each_entry(q, &dev->qdisc_list, list) { if (q_idx < s_q_idx) { @@ -846,12 +846,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) } if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); goto done; } q_idx++; } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); } done: @@ -1074,7 +1074,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - read_lock_bh(&qdisc_tree_lock); + read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && @@ -1096,7 +1096,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) break; t++; } - read_unlock_bh(&qdisc_tree_lock); + read_unlock(&qdisc_tree_lock); cb->args[0] = t; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6f9151899795..88c6a99ce53c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,11 +45,10 @@ The idea is the following: - enqueue, dequeue are serialized via top level device spinlock dev->queue_lock. - - tree walking is protected by read_lock_bh(qdisc_tree_lock) + - tree walking is protected by read_lock(qdisc_tree_lock) and this lock is used only in process context. - - updates to tree are made under rtnl semaphore or - from softirq context (__qdisc_destroy rcu-callback) - hence this lock needs local bh disabling. + - updates to tree are made only under rtnl semaphore, + hence this lock may be made without local bh disabling. qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! */ @@ -57,14 +56,14 @@ DEFINE_RWLOCK(qdisc_tree_lock); void qdisc_lock_tree(struct net_device *dev) { - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); spin_lock_bh(&dev->queue_lock); } void qdisc_unlock_tree(struct net_device *dev) { spin_unlock_bh(&dev->queue_lock); - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } /* @@ -483,20 +482,6 @@ void qdisc_reset(struct Qdisc *qdisc) static void __qdisc_destroy(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); - struct Qdisc_ops *ops = qdisc->ops; - -#ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif - write_lock(&qdisc_tree_lock); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - write_unlock(&qdisc_tree_lock); - module_put(ops->owner); - - dev_put(qdisc->dev); kfree((char *) qdisc - qdisc->padded); } @@ -504,32 +489,23 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - struct list_head cql = LIST_HEAD_INIT(cql); - struct Qdisc *cq, *q, *n; + struct Qdisc_ops *ops = qdisc->ops; if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) + !atomic_dec_and_test(&qdisc->refcnt)) return; - if (!list_empty(&qdisc->list)) { - if (qdisc->ops->cl_ops == NULL) - list_del(&qdisc->list); - else - list_move(&qdisc->list, &cql); - } - - /* unlink inner qdiscs from dev->qdisc_list immediately */ - list_for_each_entry(cq, &cql, list) - list_for_each_entry_safe(q, n, &qdisc->dev->qdisc_list, list) - if (TC_H_MAJ(q->parent) == TC_H_MAJ(cq->handle)) { - if (q->ops->cl_ops == NULL) - list_del_init(&q->list); - else - list_move_tail(&q->list, &cql); - } - list_for_each_entry_safe(cq, n, &cql, list) - list_del_init(&cq->list); + list_del(&qdisc->list); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); +#endif + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + module_put(ops->owner); + dev_put(qdisc->dev); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -549,15 +525,15 @@ void dev_activate(struct net_device *dev) printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); list_add_tail(&qdisc->list, &dev->qdisc_list); - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } else { qdisc = &noqueue_qdisc; } - write_lock_bh(&qdisc_tree_lock); + write_lock(&qdisc_tree_lock); dev->qdisc_sleeping = qdisc; - write_unlock_bh(&qdisc_tree_lock); + write_unlock(&qdisc_tree_lock); } if (!netif_carrier_ok(dev)) -- cgit v1.2.3