From 85dfb745ee40232876663ae206cba35f24ab2a40 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 18 Feb 2013 16:24:20 +0100 Subject: af_key: initialize satype in key_notify_policy_flush() This field was left uninitialized. Some user daemons perform check against this field. Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9ef79851f297..d5a4a796f025 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2694,6 +2694,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; -- cgit v1.2.3 From 734907e82d21a75a514b80164185427a832a00c0 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 8 Feb 2013 09:30:23 -0800 Subject: openvswitch: Fix ovs_vport_cmd_del return value on success If the pointer does not represent an error then the PTR_ERR macro may still return a nonzero value. The fix is the same as in ovs_vport_cmd_set. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f996db343247..5e275b903f3c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1772,6 +1772,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) goto exit_unlock; + err = 0; ovs_dp_detach_port(vport); genl_notify(reply, genl_info_net(info), info->snd_portid, -- cgit v1.2.3 From cb7c5bdffb727a3d4dea5247d9d1d52238b01d90 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 8 Feb 2013 13:18:01 -0800 Subject: openvswitch: Fix ovs_vport_cmd_new return value on success If the pointer does not represent an error then the PTR_ERR macro may still return a nonzero value. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 5e275b903f3c..a2cd3e6d03a2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1691,6 +1691,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; + err = 0; reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { -- cgit v1.2.3 From a15ff76c955d17cf58313097e4a24124da022b1d Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Fri, 15 Feb 2013 11:07:43 -0800 Subject: openvswitch: Call genlmsg_end in queue_userspace_packet Without genlmsg_end the upcall message ends (according to nlmsg_len) after the struct ovs_header. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a2cd3e6d03a2..cae1062f94ba 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -395,6 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); + genlmsg_end(user_skb, upcall); err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: -- cgit v1.2.3 From 17b682a04841233f827073b327c6533e478dfcd4 Mon Sep 17 00:00:00 2001 From: Rich Lane Date: Tue, 19 Feb 2013 11:10:30 -0800 Subject: openvswitch: Fix parsing invalid LLC/SNAP ethertypes Before this patch, if an LLC/SNAP packet with OUI 00:00:00 had an ethertype less than 1536 the flow key given to userspace in the upcall would contain the invalid ethertype (for example, 3). If userspace attempted to insert a kernel flow for this key it would be rejected by ovs_flow_from_nlattrs. This patch allows OVS to pass the OFTest pktact.DirectBadLlcPackets. Signed-off-by: Rich Lane Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c3294cebc4f2..0c98d406124b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -484,7 +484,11 @@ static __be16 parse_ethertype(struct sk_buff *skb) return htons(ETH_P_802_2); __skb_pull(skb, sizeof(struct llc_snap_hdr)); - return llc->ethertype; + + if (ntohs(llc->ethertype) >= 1536) + return llc->ethertype; + + return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, -- cgit v1.2.3 From 7b024082b2b279af58e24ebd46e81777723d58da Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Feb 2013 17:32:26 +0800 Subject: openvswitch: fix the calculation of checksum for vlan header In vlan_insert_tag(), we insert a 4-byte VLAN header _after_ mac header: memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); ... veth->h_vlan_proto = htons(ETH_P_8021Q); ... veth->h_vlan_TCI = htons(vlan_tci); so after it, we should recompute the checksum to include these 4 bytes. skb->data still points to the mac header, therefore VLAN header is at (2 * ETH_ALEN = 12) bytes after it, not (ETH_HLEN = 14) bytes. This can also be observed via tcpdump: 0x0000: ffff ffff ffff 5254 005d 6f6e 8100 000a 0x0010: 0806 0001 0800 0604 0001 5254 005d 6f6e 0x0020: c0a8 026e 0000 0000 0000 c0a8 0282 Similar for __pop_vlan_tci(), the vlan header we remove is the one overwritten in: memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); Therefore the VLAN_HLEN = 4 bytes after 2 * ETH_ALEN is the part we want to sub from checksum. Cc: David S. Miller Cc: Jesse Gross Signed-off-by: Cong Wang Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index ac2defeeba83..d4d5363c7ba7 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *current_tci = vhdr->h_vlan_TCI; @@ -115,7 +115,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data - + ETH_HLEN, VLAN_HLEN, 0)); + + (2 * ETH_ALEN), VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); -- cgit v1.2.3 From d176ca2a48ff2b5d7becfacdcbd1d72c73bd22d1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Feb 2013 19:41:26 +0800 Subject: openvswitch: remove some useless comments These comments are useless in upstream kernel. Cc: David S. Miller Cc: Jesse Gross Signed-off-by: Cong Wang Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.c | 3 +-- net/openvswitch/vport.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 670cbc3518de..2130d61c384a 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). - * (No one comes after us, since we tell handle_bridge() that we took - * the packet.) */ + */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 70af0bedbac4..6255e48e64c4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -326,8 +326,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * @skb: skb that was received * * Must be called with rcu_read_lock. The packet cannot be shared and - * skb->data should point to the Ethernet header. The caller must have already - * called compute_ip_summed() to initialize the checksumming fields. + * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { -- cgit v1.2.3 From 27a737ff7cb062fb9cbceba9b44d60aa74862bfa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Mar 2013 23:17:08 +0100 Subject: mac80211: always synchronize_net() during station removal If there are keys left during station removal, then a synchronize_net() will be done (for each key, I have a patch to address this for 3.10), otherwise it won't be done at all which causes issues because the station could be used for TX while it's being removed from the driver -- that might confuse the driver. Fix this by always doing synchronize_net() if no key was present any more. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a79ce820cb50..238a0cca320e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -766,6 +766,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta) struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; int ret, i; + bool have_key = false; might_sleep(); @@ -793,12 +794,19 @@ int __must_check __sta_info_destroy(struct sta_info *sta) list_del_rcu(&sta->list); mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) + for (i = 0; i < NUM_DEFAULT_KEYS; i++) { __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); - if (sta->ptk) + have_key = true; + } + if (sta->ptk) { __ieee80211_key_free(key_mtx_dereference(local, sta->ptk)); + have_key = true; + } mutex_unlock(&local->key_mtx); + if (!have_key) + synchronize_net(); + sta->dead = true; local->num_sta--; -- cgit v1.2.3 From 021fcdc13acbab78589325ae2db0b384b4ee7222 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 7 Mar 2013 11:08:29 +0200 Subject: cfg80211: fix inconsistency in trace for rdev_set_mac_acl There is NETDEV_ENTRY that was incorrectly assigned as WIPHY_ASSIGN, fix it. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- net/wireless/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b7a531380e19..6847d043edea 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1778,7 +1778,7 @@ TRACE_EVENT(rdev_set_mac_acl, ), TP_fast_assign( WIPHY_ASSIGN; - WIPHY_ASSIGN; + NETDEV_ASSIGN; __entry->acl_policy = params->acl_policy; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", -- cgit v1.2.3 From 1345ee6a6d90813f972379fad8b75f17026fc8b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Mar 2013 10:31:05 +0100 Subject: cfg80211: fix potential BSS memory leak and update In the odd case that while updating information from a beacon, a BSS was found that is part of a hidden group, we drop the new information. In this case, however, we leak the IE buffer from the update, and erroneously update the entry's timestamp so it will never time out. Fix both these issues. Cc: Larry Finger Signed-off-by: Johannes Berg --- net/wireless/scan.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 674aadca0079..e93bd31d23bb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -698,11 +698,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR); if (found) { - found->pub.beacon_interval = tmp->pub.beacon_interval; - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - /* Update IEs */ if (rcu_access_pointer(tmp->pub.proberesp_ies)) { const struct cfg80211_bss_ies *old; @@ -723,6 +718,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, if (found->pub.hidden_beacon_bss && !list_empty(&found->hidden_list)) { + const struct cfg80211_bss_ies *f; + /* * The found BSS struct is one of the probe * response members of a group, but we're @@ -732,6 +729,10 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, * SSID to showing it, which is confusing so * drop this information. */ + + f = rcu_access_pointer(tmp->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, + rcu_head); goto drop; } @@ -761,6 +762,11 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } + + found->pub.beacon_interval = tmp->pub.beacon_interval; + found->pub.signal = tmp->pub.signal; + found->pub.capability = tmp->pub.capability; + found->ts = tmp->ts; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; -- cgit v1.2.3 From b141e811a0763bb107af4cd99d456193ccdb8053 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 11:04:45 +0100 Subject: NFC: llcp: Decrease socket ack log when accepting a connection This is really difficult to test with real NFC devices, but without this fix an LLCP server will eventually refuse new connections. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5332751943a9..5c7cdf3f2a83 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -278,6 +278,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, pr_debug("Returning sk state %d\n", sk->sk_state); + sk_acceptq_removed(parent); + return sk; } -- cgit v1.2.3 From 3536da06db0baa675f32de608c0a4c0f5ef0e9ff Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 15:40:04 +0100 Subject: NFC: llcp: Clean local timers and works when removing a device Whenever an adapter is removed we must clean all the local structures, especially the timers and scheduled work. Otherwise those asynchronous threads will eventually try to access the freed nfc_dev pointer if an LLCP link is up. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 7f8266dd14cb..77e1d97b3996 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -142,20 +142,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) return local; } -static void local_release(struct kref *ref) +static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - struct nfc_llcp_local *local; - - local = container_of(ref, struct nfc_llcp_local, ref); - - list_del(&local->list); - nfc_llcp_socket_release(local, false); + nfc_llcp_socket_release(local, listen); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); +} + +static void local_release(struct kref *ref) +{ + struct nfc_llcp_local *local; + + local = container_of(ref, struct nfc_llcp_local, ref); + + list_del(&local->list); + local_cleanup(local, false); kfree(local); } @@ -1427,6 +1432,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev) return; } + local_cleanup(local, false); + nfc_llcp_local_put(local); } -- cgit v1.2.3 From e6a3a4bb856a6fba551b43376c80f45836132710 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 16:33:30 +0100 Subject: NFC: llcp: Clean raw sockets from nfc_llcp_socket_release Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 77e1d97b3996..8a35423ead54 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -133,6 +133,35 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) } write_unlock(&local->sockets.lock); + + /* + * If we want to keep the listening sockets alive, + * we don't touch the RAW ones. + */ + if (listen == true) + return; + + write_lock(&local->raw_sockets.lock); + + sk_for_each_safe(sk, tmp, &local->raw_sockets.head) { + llcp_sock = nfc_llcp_sock(sk); + + bh_lock_sock(sk); + + nfc_llcp_socket_purge(llcp_sock); + + sk->sk_state = LLCP_CLOSED; + + sk->sk_state_change(sk); + + bh_unlock_sock(sk); + + sock_orphan(sk); + + sk_del_node_init(sk); + } + + write_unlock(&local->raw_sockets.lock); } struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) -- cgit v1.2.3 From 3bbc0ceb7ac2bf694d31362eea2c71a680e5deeb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 17:01:06 +0100 Subject: NFC: llcp: Report error to pending sockets when a device is removed Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 8a35423ead54..b530afadd76c 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -68,7 +68,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) } } -static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) +static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, + int err) { struct sock *sk; struct hlist_node *tmp; @@ -100,7 +101,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_accept_unlink(accept_sk); + if (err) + accept_sk->sk_err = err; accept_sk->sk_state = LLCP_CLOSED; + accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); @@ -123,7 +127,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) continue; } + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -150,8 +157,9 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_socket_purge(llcp_sock); + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; - sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -173,7 +181,7 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - nfc_llcp_socket_release(local, listen); + nfc_llcp_socket_release(local, listen, ENXIO); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); @@ -1382,7 +1390,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) return; /* Close and purge all existing sockets */ - nfc_llcp_socket_release(local, true); + nfc_llcp_socket_release(local, true, 0); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, -- cgit v1.2.3 From 07e5a5f5ab7474589c15fc5d88e4f7fc43979530 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Mar 2013 13:22:05 +0100 Subject: mac80211: fix crash with P2P Device returning action frames If a P2P Device interface receives an unhandled action frame, we attempt to return it. This crashes because it doesn't have a channel context. Fix the crash by using status->band and properly mark the return frame as an off-channel frame. Reported-by: Ilan Peer Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bb73ed2d20b9..c6844ad080be 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2675,7 +2675,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) memset(nskb->cb, 0, sizeof(nskb->cb)); - ieee80211_tx_skb(rx->sdata, nskb); + if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb); + + info->flags = IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK | + IEEE80211_TX_CTL_NO_CCK_RATE; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = + local->hw.offchannel_tx_hw_queue; + } + + __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, + status->band); } dev_kfree_skb(rx->skb); return RX_QUEUED; -- cgit v1.2.3 From b47506d91259c29b9c75c404737eb6525556f9b4 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Mon, 4 Mar 2013 10:39:49 +0800 Subject: batman-adv: verify tt len does not exceed packet len batadv_iv_ogm_process() accesses the packet using the tt_num_changes attribute regardless of the real packet len (assuming the length check was done before). Therefore a length check is needed to avoid reading random memory. Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a0b253ecadaf..a5bb0a769eb9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1288,7 +1288,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; /* unpack the aggregated packets and process them one by one */ - do { + while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, + batadv_ogm_packet->tt_num_changes)) { tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff, @@ -1299,8 +1300,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, packet_pos = packet_buff + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; - } while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tt_num_changes)); + } kfree_skb(skb); return NET_RX_SUCCESS; -- cgit v1.2.3 From 4660c7f498c07c43173142ea95145e9dac5a6d14 Mon Sep 17 00:00:00 2001 From: David Ward Date: Mon, 11 Mar 2013 10:43:39 +0000 Subject: net/ipv4: Ensure that location of timestamp option is stored This is needed in order to detect if the timestamp option appears more than once in a packet, to remove the option if the packet is fragmented, etc. My previous change neglected to store the option location when the router addresses were prespecified and Pointer > Length. But now the option location is also stored when Flag is an unrecognized value, to ensure these option handling behaviors are still performed. Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 310a3647c83d..ec7264514a82 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -370,7 +370,6 @@ int ip_options_compile(struct net *net, } switch (optptr[3]&0xF) { case IPOPT_TS_TSONLY: - opt->ts = optptr - iph; if (skb) timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; @@ -381,7 +380,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; if (rt) { spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); @@ -396,7 +394,6 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - opt->ts = optptr - iph; { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); @@ -429,12 +426,12 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 3; goto error; } - opt->ts = optptr - iph; if (skb) { optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); opt->is_changed = 1; } } + opt->ts = optptr - iph; break; case IPOPT_RA: if (optlen < 4) { -- cgit v1.2.3 From 3da889b616164bde76a37350cf28e0d17a94e979 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 11 Mar 2013 13:52:17 +0000 Subject: bridge: reserve space for IFLA_BRPORT_FAST_LEAVE The bridge multicast fast leave feature was added sufficient space was not reserved in the netlink message. This means the flag may be lost in netlink events and results of queries. Found by observation while looking up some netlink stuff for discussion with Vlad. Problem introduced by commit c2d3babfafbb9f6629cfb47139758e59a5eb0d80 Author: David S. Miller Date: Wed Dec 5 16:24:45 2012 -0500 bridge: implement multicast fast leave Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 27aa3ee517ce..db12a0fcfe50 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -29,6 +29,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + 0; } -- cgit v1.2.3 From c80a8512ee3a8e1f7c3704140ea55f21dc6bd651 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 11 Mar 2013 20:30:44 +0000 Subject: net/core: move vlan_depth out of while loop in skb_network_protocol() [ Bug added added in commit 05e8ef4ab2d8087d (net: factor out skb_mac_gso_segment() from skb_gso_segment() ) ] move vlan_depth out of while loop, or else vlan_depth always is ETH_HLEN, can not be increased, and lead to infinite loop when frame has two vlan headers. Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index dffbef70cd31..d540ced1f6c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2219,9 +2219,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; while (type == htons(ETH_P_8021Q)) { - int vlan_depth = ETH_HLEN; struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) -- cgit v1.2.3 From f2815633504b442ca0b0605c16bf3d88a3a0fcea Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 Mar 2013 15:53:23 +0000 Subject: sctp: Use correct sideffect command in duplicate cookie handling When SCTP is done processing a duplicate cookie chunk, it tries to delete a newly created association. For that, it has to set the right association for the side-effect processing to work. However, when it uses the SCTP_CMD_NEW_ASOC command, that performs more work then really needed (like hashing the associationa and assigning it an id) and there is no point to do that only to delete the association as a next step. In fact, it also creates an impossible condition where an association may be found by the getsockopt() call, and that association is empty. This causes a crash in some sctp getsockopts. The solution is rather simple. We simply use SCTP_CMD_SET_ASOC command that doesn't have all the overhead and does exactly what we need. Reported-by: Karl Heiss Tested-by: Karl Heiss CC: Neil Horman Signed-off-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5131fcfedb03..de1a0138317f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } /* Delete the tempory new association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); /* Restore association pointer to provide SCTP command interpeter -- cgit v1.2.3 From 2317f449af30073cfa6ec8352e4a65a89e357bdd Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Thu, 7 Mar 2013 21:39:37 +0000 Subject: sctp: don't break the loop while meeting the active_path so as to find the matched transport sctp_assoc_lookup_tsn() function searchs which transport a certain TSN was sent on, if not found in the active_path transport, then go search all the other transports in the peer's transport_addr_list, however, we should continue to the next entry rather than break the loop when meet the active_path transport. Signed-off-by: Xufeng Zhang Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 43cd0dd9149d..d2709e2b7be6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1079,7 +1079,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc, transports) { if (transport == active) - break; + continue; list_for_each_entry(chunk, &transport->transmitted, transmitted_list) { if (key == chunk->subh.data_hdr->tsn) { -- cgit v1.2.3 From 16fad69cfe4adbbfa813de516757b87bcae36d93 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2013 05:40:32 +0000 Subject: tcp: fix skb_availroom() Chrome OS team reported a crash on a Pixel ChromeBook in TCP stack : https://code.google.com/p/chromium/issues/detail?id=182056 commit a21d45726acac (tcp: avoid order-1 allocations on wifi and tx path) did a poor choice adding an 'avail_size' field to skb, while what we really needed was a 'reserved_tailroom' one. It would have avoided commit 22b4a4f22da (tcp: fix retransmit of partially acked frames) and this commit. Crash occurs because skb_split() is not aware of the 'avail_size' management (and should not be aware) Signed-off-by: Eric Dumazet Reported-by: Mukesh Agrawal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 +++++-- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 821c7f45d2a7..6f2bb860e051 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -500,7 +500,7 @@ struct sk_buff { union { __u32 mark; __u32 dropcount; - __u32 avail_size; + __u32 reserved_tailroom; }; sk_buff_data_t inner_transport_header; @@ -1447,7 +1447,10 @@ static inline int skb_tailroom(const struct sk_buff *skb) */ static inline int skb_availroom(const struct sk_buff *skb) { - return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len; + if (skb_is_nonlinear(skb)) + return 0; + + return skb->end - skb->tail - skb->reserved_tailroom; } /** diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 47e854fcae24..e22020790709 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -775,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) * Make sure that we have exactly size bytes * available to the caller, no more, no less. */ - skb->avail_size = size; + skb->reserved_tailroom = skb->end - skb->tail - size; return skb; } __kfree_skb(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2b4461074da..817fbb396bc8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); - skb->avail_size -= eat; len -= eat; if (!len) return; -- cgit v1.2.3 From eb20ff9c91ddcb2d55c1849a87d3db85af5e88a9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 13 Mar 2013 19:46:20 -0300 Subject: Bluetooth: Fix not closing SCO sockets in the BT_CONNECT2 state With deferred setup for SCO, it is possible that userspace closes the socket when it is in the BT_CONNECT2 state, after the Connect Request is received but before the Accept Synchonous Connection is sent. If this happens the following crash was observed, when the connection is terminated: [ +0.000003] hci_sync_conn_complete_evt: hci0 status 0x10 [ +0.000005] sco_connect_cfm: hcon ffff88003d1bd800 bdaddr 40:98:4e:32:d7:39 status 16 [ +0.000003] sco_conn_del: hcon ffff88003d1bd800 conn ffff88003cc8e300, err 110 [ +0.000015] BUG: unable to handle kernel NULL pointer dereference at 0000000000000199 [ +0.000906] IP: [] __lock_acquire+0xed/0xe82 [ +0.000000] PGD 3d21f067 PUD 3d291067 PMD 0 [ +0.000000] Oops: 0002 [#1] SMP [ +0.000000] Modules linked in: rfcomm bnep btusb bluetooth [ +0.000000] CPU 0 [ +0.000000] Pid: 1481, comm: kworker/u:2H Not tainted 3.9.0-rc1-25019-gad82cdd #1 Bochs Bochs [ +0.000000] RIP: 0010:[] [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP: 0018:ffff88003c3c19d8 EFLAGS: 00010002 [ +0.000000] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000000000 [ +0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003d1be868 [ +0.000000] RBP: ffff88003c3c1a98 R08: 0000000000000002 R09: 0000000000000000 [ +0.000000] R10: ffff88003d1be868 R11: ffff88003e20b000 R12: 0000000000000002 [ +0.000000] R13: ffff88003aaa8000 R14: 000000000000006e R15: ffff88003d1be850 [ +0.000000] FS: 0000000000000000(0000) GS:ffff88003e200000(0000) knlGS:0000000000000000 [ +0.000000] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ +0.000000] CR2: 0000000000000199 CR3: 000000003c1cb000 CR4: 00000000000006b0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000000] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ +0.000000] Process kworker/u:2H (pid: 1481, threadinfo ffff88003c3c0000, task ffff88003aaa8000) [ +0.000000] Stack: [ +0.000000] ffffffff81b16342 0000000000000000 0000000000000000 ffff88003d1be868 [ +0.000000] ffffffff00000000 00018c0c7863e367 000000003c3c1a28 ffffffff8101efbd [ +0.000000] 0000000000000000 ffff88003e3d2400 ffff88003c3c1a38 ffffffff81007c7a [ +0.000000] Call Trace: [ +0.000000] [] ? kvm_clock_read+0x34/0x3b [ +0.000000] [] ? paravirt_sched_clock+0x9/0xd [ +0.000000] [] ? sched_clock+0x9/0xb [ +0.000000] [] ? sched_clock_local+0x12/0x75 [ +0.000000] [] lock_acquire+0x93/0xb1 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? lock_release_holdtime.part.22+0x4e/0x55 [ +0.000000] [] _raw_spin_lock+0x40/0x74 [ +0.000000] [] ? spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] ? _raw_spin_unlock+0x23/0x36 [ +0.000000] [] spin_lock+0x9/0xb [bluetooth] [ +0.000000] [] sco_conn_del+0x76/0xbb [bluetooth] [ +0.000000] [] sco_connect_cfm+0x2da/0x2e9 [bluetooth] [ +0.000000] [] hci_proto_connect_cfm+0x38/0x65 [bluetooth] [ +0.000000] [] hci_sync_conn_complete_evt.isra.79+0x11a/0x13e [bluetooth] [ +0.000000] [] hci_event_packet+0x153b/0x239d [bluetooth] [ +0.000000] [] ? _raw_spin_unlock_irqrestore+0x48/0x5c [ +0.000000] [] hci_rx_work+0xf3/0x2e3 [bluetooth] [ +0.000000] [] process_one_work+0x1dc/0x30b [ +0.000000] [] ? process_one_work+0x172/0x30b [ +0.000000] [] ? spin_lock_irq+0x9/0xb [ +0.000000] [] worker_thread+0x123/0x1d2 [ +0.000000] [] ? manage_workers+0x240/0x240 [ +0.000000] [] kthread+0x9d/0xa5 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] [] ret_from_fork+0x7c/0xb0 [ +0.000000] [] ? __kthread_parkme+0x60/0x60 [ +0.000000] Code: d7 44 89 8d 50 ff ff ff 4c 89 95 58 ff ff ff e8 44 fc ff ff 44 8b 8d 50 ff ff ff 48 85 c0 4c 8b 95 58 ff ff ff 0f 84 7a 04 00 00 ff 80 98 01 00 00 83 3d 25 41 a7 00 00 45 8b b5 e8 05 00 00 [ +0.000000] RIP [] __lock_acquire+0xed/0xe82 [ +0.000000] RSP [ +0.000000] CR2: 0000000000000199 [ +0.000000] ---[ end trace e73cd3b52352dd34 ]--- Cc: stable@vger.kernel.org [3.8] Signed-off-by: Vinicius Costa Gomes Tested-by: Frederic Dalleau Signed-off-by: Gustavo Padovan --- net/bluetooth/sco.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57f250c20e39..aaf1957bc4fe 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -361,6 +361,7 @@ static void __sco_sock_close(struct sock *sk) sco_chan_del(sk, ECONNRESET); break; + case BT_CONNECT2: case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); -- cgit v1.2.3 From 0d98da5d845e0d0293055913ce65c9904b3b902a Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 7 Mar 2013 17:20:46 +0000 Subject: netfilter: nf_conntrack: register pernet subsystem before register L4 proto In (c296bb4 netfilter: nf_conntrack: refactor l4proto support for netns) the l4proto gre/dccp/udplite/sctp registration happened before the pernet subsystem, which is wrong. Register pernet subsystem before register L4proto since after register L4proto, init_conntrack may try to access the resources which allocated in register_pernet_subsys. Reported-by: Alexey Dobriyan Cc: Alexey Dobriyan Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_gre.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_sctp.c | 12 ++++++------ net/netfilter/nf_conntrack_proto_udplite.c | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 432f95780003..ba65b2041eb4 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -969,6 +969,10 @@ static int __init nf_conntrack_proto_dccp_init(void) { int ret; + ret = register_pernet_subsys(&dccp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&dccp_proto4); if (ret < 0) goto out_dccp4; @@ -977,16 +981,12 @@ static int __init nf_conntrack_proto_dccp_init(void) if (ret < 0) goto out_dccp6; - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&dccp_proto6); out_dccp6: nf_ct_l4proto_unregister(&dccp_proto4); out_dccp4: + unregister_pernet_subsys(&dccp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index bd7d01d9c7e7..155ce9f8a0db 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -420,18 +420,18 @@ static int __init nf_ct_proto_gre_init(void) { int ret; - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + if (ret < 0) + goto out_gre4; + return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); out_gre4: + unregister_pernet_subsys(&proto_gre_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 480f616d5936..ec83536def9a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -888,6 +888,10 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; + ret = register_pernet_subsys(&sctp_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); if (ret < 0) goto out_sctp4; @@ -896,16 +900,12 @@ static int __init nf_conntrack_proto_sctp_init(void) if (ret < 0) goto out_sctp6; - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); out_sctp6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); out_sctp4: + unregister_pernet_subsys(&sctp_net_ops); +out_pernet: return ret; } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 157489581c31..ca969f6273f7 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void) { int ret; + ret = register_pernet_subsys(&udplite_net_ops); + if (ret < 0) + goto out_pernet; + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); if (ret < 0) goto out_udplite4; @@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void) if (ret < 0) goto out_udplite6; - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - goto out_pernet; - return 0; -out_pernet: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); out_udplite6: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); out_udplite4: + unregister_pernet_subsys(&udplite_net_ops); +out_pernet: return ret; } -- cgit v1.2.3 From bae99f7a1d372374aaf9ed8910f3b825da995b36 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2013 06:03:18 +0000 Subject: netfilter: nfnetlink_queue: fix incorrect initialization of copy range field 2^16 = 0xffff, not 0xfffff (note the extra 'f'). Not dangerous since you adjust it to min_t(data_len, skb->len) just after on. Reported-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 858fd52c1040..1cb48540f86a 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -112,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid) inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; + inst->copy_range = 0xffff; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); -- cgit v1.2.3 From a82783c91d5dce680dbd290ebf301a520b0e72a5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Mar 2013 20:11:01 +0000 Subject: netfilter: ip6t_NPT: restrict to mangle table As the translation is stateless, using it in nat table doesn't work (only initial packet is translated). filter table OUTPUT works but won't re-route the packet after translation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 83acc1405a18..33608c610276 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -114,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) static struct xt_target ip6t_npt_target_reg[] __read_mostly = { { .name = "SNPT", + .table = "mangle", .target = ip6t_snpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, @@ -124,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = { }, { .name = "DNPT", + .table = "mangle", .target = ip6t_dnpt_tg, .targetsize = sizeof(struct ip6t_npt_tginfo), .checkentry = ip6t_npt_checkentry, -- cgit v1.2.3 From 8c6216d7f118a128678270824b6a1286a63863ca Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Wed, 13 Mar 2013 02:37:49 +0000 Subject: Revert "ip_gre: make ipgre_tunnel_xmit() not parse network header as IP unconditionally" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 412ed94744d16806fbec3bd250fd94e71cde5a1f. The commit is wrong as tiph points to the outer IPv4 header which is installed at ipgre_header() and not the inner one which is protocol dependant. This commit broke succesfully opennhrp which use PF_PACKET socket with ETH_P_NHRP protocol. Additionally ssl_addr is set to the link-layer IPv4 address. This address is written by ipgre_header() to the skb earlier, and this is the IPv4 header tiph should point to - regardless of the inner protocol payload. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0ef0e674ec5..91d66dbde9c0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -798,10 +798,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; - if (skb->protocol == htons(ETH_P_IP)) - tiph = (const struct iphdr *)skb->data; - else - tiph = &tunnel->parms.iph; + tiph = (const struct iphdr *)skb->data; } else { gre_hlen = tunnel->hlen; tiph = &tunnel->parms.iph; -- cgit v1.2.3 From a5b8db91442fce9c9713fcd656c3698f1adde1d6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Mar 2013 04:18:58 +0000 Subject: rtnetlink: Mask the rta_type when range checking Range/validity checks on rta_type in rtnetlink_rcv_msg() do not account for flags that may be set. This causes the function to return -EINVAL when flags are set on the type (for example NLA_F_NESTED). Signed-off-by: Vlad Yasevich Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a585d45cc9d9..5fb8d7e47294 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2621,7 +2621,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type; + unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; if (flavor) { if (flavor > rta_max[sz_idx]) return -EINVAL; -- cgit v1.2.3 From 3d84fa98aca7f05f7010022bc45acb1b50326332 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 15 Mar 2013 06:39:12 +0000 Subject: bridge: Add support for setting BR_ROOT_BLOCK flag. Most of the support was already there. The only thing that was missing was the call to set the flag. Add this call. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index db12a0fcfe50..299fc5f40a26 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -330,6 +330,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); -- cgit v1.2.3 From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 15 Mar 2013 01:03:33 -0700 Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids. Don't allow spoofing pids over unix domain sockets in the corner cases where a user has created a user namespace but has not yet created a pid namespace. Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" --- net/core/scm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/scm.c b/net/core/scm.c index 905dcc6ad1e3..2dc6cdaaae8a 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || + ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || -- cgit v1.2.3 From 0d4f0608619de59fd8169dd8e72aadc28d80e715 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Mar 2013 07:01:28 +0000 Subject: tcp: dont handle MTU reduction on LISTEN socket When an ICMP ICMP_FRAG_NEEDED (or ICMPV6_PKT_TOOBIG) message finds a LISTEN socket, and this socket is currently owned by the user, we set TCP_MTU_REDUCED_DEFERRED flag in listener tsq_flags. This is bad because if we clone the parent before it had a chance to clear the flag, the child inherits the tsq_flags value, and next tcp_release_cb() on the child will decrement sk_refcnt. Result is that we might free a live TCP socket, as reported by Dormando. IPv4: Attempt to release TCP socket in state 1 Fix this issue by testing sk_state against TCP_LISTEN early, so that we set TCP_MTU_REDUCED_DEFERRED on appropriate sockets (not a LISTEN one) This bug was introduced in commit 563d34d05786 (tcp: dont drop MTU reduction indications) Reported-by: dormando Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 14 +++++++------- net/ipv6/tcp_ipv6.c | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a8ec457310f..d09203c63264 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 mtu = tcp_sk(sk)->mtu_info; - /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs - * send out by Linux are always <576bytes so they should go through - * unfragmented). - */ - if (sk->sk_state == TCP_LISTEN) - return; - dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = info; if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9b6460055df5..f6d629fd6aee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -389,6 +389,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (type == ICMPV6_PKT_TOOBIG) { + /* We are not interested in TCP_LISTEN and open_requests + * (SYN-ACKs send out by Linux are always <576bytes so + * they should go through unfragmented). + */ + if (sk->sk_state == TCP_LISTEN) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); -- cgit v1.2.3 From cf2e39429c245245db889fffdfbdf3f889a6cb22 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:06 +0200 Subject: ipvs: fix sctp chunk length order Fix wrong but non-fatal access to chunk length. sch->length should be in network order, next chunk should be aligned to 4 bytes. Problem noticed in sparse output. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index ae8ec6f27688..cd1d7298f7ba 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; - int ihl; + int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ihl = ip_hdrlen(skb); #endif - sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), - sizeof(_sctpch), &_sctpch); + cofs = ihl + sizeof(sctp_sctphdr_t); + sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; @@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { - sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + - sch->length), sizeof(_sctpch), &_sctpch); - if (sch) { - if (sch->type == SCTP_CID_ABORT) + int clen = ntohs(sch->length); + + if (clen >= sizeof(sctp_chunkhdr_t)) { + sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), + sizeof(_sctpch), &_sctpch); + if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } -- cgit v1.2.3 From 0c12582fbcdea0cbb0dfd224e1c5f9a8428ffa18 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:04 +0200 Subject: ipvs: add backup_only flag to avoid loops Dmitry Akindinov is reporting for a problem where SYNs are looping between the master and backup server when the backup server is used as real server in DR mode and has IPVS rules to function as director. Even when the backup function is enabled we continue to forward traffic and schedule new connections when the current master is using the backup server as real server. While this is not a problem for NAT, for DR and TUN method the backup server can not determine if a request comes from client or from director. To avoid such loops add new sysctl flag backup_only. It can be needed for DR/TUN setups that do not need backup and director function at the same time. When the backup function is enabled we stop any forwarding and pass the traffic to the local stack (real server mode). The flag disables the director function when the backup function is enabled. For setups that enable backup function for some virtual services and director function for other virtual services there should be another more complex solution to support DR/TUN mode, may be to assign per-virtual service syncid value, so that we can differentiate the requests. Reported-by: Dmitry Akindinov Tested-by: German Myzovsky Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- Documentation/networking/ipvs-sysctl.txt | 7 +++++++ include/net/ip_vs.h | 12 ++++++++++++ net/netfilter/ipvs/ip_vs_core.c | 12 ++++++++---- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index f2a2488f1bf3..9573d0c48c6e 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -15,6 +15,13 @@ amemthresh - INTEGER enabled and the variable is automatically set to 2, otherwise the strategy is disabled and the variable is set to 1. +backup_only - BOOLEAN + 0 - disabled (default) + not 0 - enabled + + If set, disable the director function while the server is + in backup mode to avoid packet loops for DR/TUN methods. + conntrack - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68c69d54d392..fce8e6b66d55 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -976,6 +976,7 @@ struct netns_ipvs { int sysctl_sync_retries; int sysctl_nat_icmp_send; int sysctl_pmtu_disc; + int sysctl_backup_only; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1067,6 +1068,12 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return ipvs->sysctl_pmtu_disc; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return ipvs->sync_state & IP_VS_STATE_BACKUP && + ipvs->sysctl_backup_only; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1114,6 +1121,11 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs) return 1; } +static inline int sysctl_backup_only(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 47edf5a40a59..18b4bc55fa3d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1577,7 +1577,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, &iph); @@ -1654,7 +1655,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1815,13 +1815,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp(skb, &r, hooknum); @@ -1835,6 +1837,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct netns_ipvs *ipvs; struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); @@ -1843,7 +1846,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, /* ipvs enabled in this netns ? */ net = skb_net(skb); - if (!net_ipvs(net)->enable) + ipvs = net_ipvs(net); + if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c68198bf9128..9e2d1cccd1eb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "backup_only", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; ipvs->sysctl_pmtu_disc = 1; tbl[idx++].data = &ipvs->sysctl_pmtu_disc; + tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); -- cgit v1.2.3 From bf93ad72cd8cfabe66a7b3d66236a1266d357189 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 9 Mar 2013 23:25:05 +0200 Subject: ipvs: remove extra rcu lock In 3.7 we added code that uses ipv4_update_pmtu but after commit c5ae7d4192 (ipv4: must use rcu protection while calling fib_lookup) the RCU lock is not needed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 18b4bc55fa3d..61f49d241712 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - rcu_read_lock(); ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); - rcu_read_unlock(); /* Client uses PMTUD? */ if (!(cih->frag_off & htons(IP_DF))) goto ignore_ipip; -- cgit v1.2.3 From 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 15 Mar 2013 11:32:30 +0000 Subject: inet: limit length of fragment queue hash table bucket lists This patch introduces a constant limit of the fragment queue hash table bucket list lengths. Currently the limit 128 is choosen somewhat arbitrary and just ensures that we can fill up the fragment cache with empty packets up to the default ip_frag_high_thresh limits. It should just protect from list iteration eating considerable amounts of cpu. If we reach the maximum length in one hash bucket a warning is printed. This is implemented on the caller side of inet_frag_find to distinguish between the different users of inet_fragment.c. I dropped the out of memory warning in the ipv4 fragment lookup path, because we already get a warning by the slab allocator. Cc: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_frag.h | 9 +++++++++ net/ipv4/inet_fragment.c | 20 +++++++++++++++++++- net/ipv4/ip_fragment.c | 11 ++++------- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------ net/ipv6/reassembly.c | 8 ++++++-- 5 files changed, 44 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 76c3fe5ecc2e..0a1dcc2fa2f5 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,6 +43,13 @@ struct inet_frag_queue { #define INETFRAGS_HASHSZ 64 +/* averaged: + * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ / + * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or + * struct frag_queue)) + */ +#define INETFRAGS_MAXDEPTH 128 + struct inet_frags { struct hlist_head hash[INETFRAGS_HASHSZ]; /* This rwlock is a global lock (seperate per IPv4, IPv6 and @@ -76,6 +83,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix); static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 245ae078a07f..f4fd23de9b13 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,6 +21,7 @@ #include #include +#include #include static void inet_frag_secret_rebuild(unsigned long dummy) @@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, __releases(&f->lock) { struct inet_frag_queue *q; + int depth = 0; hlist_for_each_entry(q, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { @@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, read_unlock(&f->lock); return q; } + depth++; } read_unlock(&f->lock); - return inet_frag_create(nf, f, key); + if (depth <= INETFRAGS_MAXDEPTH) + return inet_frag_create(nf, f, key); + else + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); + +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, + const char *prefix) +{ + static const char msg[] = "inet_frag_find: Fragment hash bucket" + " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) + ". Dropping fragment.\n"; + + if (PTR_ERR(q) == -ENOBUFS) + LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); +} +EXPORT_SYMBOL(inet_frag_maybe_warn_overflow); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b6d30acb600c..a6445b843ef4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -292,14 +292,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); - if (q == NULL) - goto out_nomem; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct ipq, q); - -out_nomem: - LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); - return NULL; } /* Is the fragment too far ahead to be part of ipq? */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54087e96d7b8..6700069949dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,6 +14,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv6-nf: " fmt + #include #include #include @@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); - if (q == NULL) - goto oom; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct frag_queue, q); - -oom: - return NULL; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c6a77290c6e..196ab9347ad1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include #include #include @@ -185,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (q == NULL) + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); return NULL; - + } return container_of(q, struct frag_queue, q); } -- cgit v1.2.3 From 3dd6664fac7e6041bfc8756ae9e8c78f59108cd9 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 19 Mar 2013 13:09:59 +0000 Subject: netfilter: remove unused "config IP_NF_QUEUE" Kconfig symbol IP_NF_QUEUE is unused since commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84 ("netfilter: remove ip_queue support"). Let's remove it too. Signed-off-by: Paul Bolle Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ce2d43e1f09f..0d755c50994b 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" - depends on NETFILTER_ADVANCED - help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n -- cgit v1.2.3 From bec964ed3b2549086e1fdfbf7f1ce8c22f89baa4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 20 Mar 2013 10:50:50 +0100 Subject: NFC: llcp: Detach socket from process context only when releasing the socket Calling sock_orphan when e.g. the NFC adapter is removed can lead to kernel crashes when e.g. a connection less client is sleeping on the Rx workqueue, waiting for data to show up. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 6 ------ net/nfc/llcp/sock.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index b530afadd76c..336813ff8cd5 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -107,8 +107,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); - - sock_orphan(accept_sk); } if (listen == true) { @@ -134,8 +132,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } @@ -164,8 +160,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, bh_unlock_sock(sk); - sock_orphan(sk); - sk_del_node_init(sk); } diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5c7cdf3f2a83..e488e440186a 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -464,8 +464,6 @@ static int llcp_sock_release(struct socket *sock) nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); - - sock_orphan(accept_sk); } } -- cgit v1.2.3 From 0017c0b57500606aab894cdb02fdf3380ddd60ee Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 11 Mar 2013 10:32:16 +0100 Subject: xfrm: Fix replay notification for esn. We may miscalculate the sequence number difference from the last time we send a notification if a sequence number wrap occured in the meantime. We fix this by adding a separate replay notify function for esn. Here we take the high bits of the sequence number into account to calculate the difference. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 35754cc8a9e5..a3906737f49e 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -334,6 +334,72 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) +{ + u32 seq_diff, oseq_diff; + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (!x->replay_maxdiff) + break; + + if (replay_esn->seq_hi == preplay_esn->seq_hi) + seq_diff = replay_esn->seq - preplay_esn->seq; + else + seq_diff = UINT_MAX - preplay_esn->seq + + replay_esn->seq; + + if (replay_esn->oseq_hi == preplay_esn->oseq_hi) + oseq_diff = replay_esn->oseq - preplay_esn->oseq; + else + oseq_diff = UINT_MAX - preplay_esn->oseq + + replay_esn->oseq; + + if (seq_diff < x->replay_maxdiff && + oseq_diff < x->replay_maxdiff) { + + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; @@ -510,7 +576,7 @@ static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_bmp, + .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; -- cgit v1.2.3 From b3155155440190de78fb501953136a6c5b82e327 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 20 Mar 2013 16:00:16 +0100 Subject: NFC: llcp: Remove possible double call to kfree_skb kfree_skb was called twice when the socket receive queue is full Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 336813ff8cd5..ee25f25f0cd6 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -821,7 +821,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } nfc_llcp_sock_put(llcp_sock); @@ -1022,7 +1021,6 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, skb_get(skb); } else { pr_err("Receive queue is full\n"); - kfree_skb(skb); } } -- cgit v1.2.3 From f1e79e208076ffe7bad97158275f1c572c04f5c7 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Tue, 19 Mar 2013 01:47:27 +0000 Subject: genetlink: trigger BUG_ON if a group name is too long Trigger BUG_ON if a group name is longer than GENL_NAMSIZ. Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f2aabb6f4105..5a55be3f17a5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family, int err = 0; BUG_ON(grp->name[0] == '\0'); + BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); genl_lock(); -- cgit v1.2.3 From 44046a593eb770dbecdabf1c82bcd252f2a8337b Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:12 +0000 Subject: udp: add encap_destroy callback Users of udp encapsulation currently have an encap_rcv callback which they can use to hook into the udp receive path. In situations where a encapsulation user allocates resources associated with a udp encap socket, it may be convenient to be able to also hook the proto .destroy operation. For example, if an encap user holds a reference to the udp socket, the destroy hook might be used to relinquish this reference. This patch adds a socket destroy hook into udp, which is set and enabled in the same way as the existing encap_rcv hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/udp.h | 1 + net/ipv4/udp.c | 7 +++++++ net/ipv6/udp.c | 8 ++++++++ 3 files changed, 16 insertions(+) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index 9d81de123c90..42278bbf7a88 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -68,6 +68,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + void (*encap_destroy)(struct sock *sk); }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 265c42cf963c..0a073a263720 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); + if (static_key_false(&udp_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 599e1ba6d1ce..d8e5e852fc7a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1285,10 +1285,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.2.3 From 9980d001cec86c3c75f3a6008ddb73c397ea3b3e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:13 +0000 Subject: l2tp: add udp encap socket destroy handler L2TP sessions hold a reference to the tunnel socket to prevent it going away while sessions are still active. However, since tunnel destruction is handled by the sock sk_destruct callback there is a catch-22: a tunnel with sessions cannot be deleted since each session holds a reference to the tunnel socket. If userspace closes a managed tunnel socket, or dies, the tunnel will persist and it will be neccessary to individually delete the sessions using netlink commands. This is ugly. To prevent this occuring, this patch leverages the udp encapsulation socket destroy callback to gain early notification when the tunnel socket is closed. This allows us to safely close the sessions running in the tunnel, dropping the tunnel socket references in the process. The tunnel socket is then destroyed as normal, and the tunnel resources deallocated in sk_destruct. While we're at it, ensure that l2tp_tunnel_closeall correctly drops session references to allow the sessions to be deleted rather than leaking. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d36875f3427e..ee726a752292 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1282,6 +1282,7 @@ static void l2tp_tunnel_destruct(struct sock *sk) /* No longer an encapsulation socket. See net/ipv4/udp.c */ (udp_sk(sk))->encap_type = 0; (udp_sk(sk))->encap_rcv = NULL; + (udp_sk(sk))->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; @@ -1360,6 +1361,8 @@ again: if (session->deref != NULL) (*session->deref)(session); + l2tp_session_dec_refcount(session); + write_lock_bh(&tunnel->hlist_lock); /* Now restart from the beginning of this hash @@ -1373,6 +1376,16 @@ again: write_unlock_bh(&tunnel->hlist_lock); } +/* Tunnel socket destroy hook for UDP encapsulation */ +static void l2tp_udp_encap_destroy(struct sock *sk) +{ + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } +} + /* Really kill the tunnel. * Come here only when all sessions have been cleared from the tunnel. */ @@ -1668,6 +1681,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP; udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; + udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) udpv6_encap_enable(); -- cgit v1.2.3 From e34f4c7050e5471b6d4fb25380713937fc837514 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:14 +0000 Subject: l2tp: export l2tp_tunnel_closeall l2tp_core internally uses l2tp_tunnel_closeall to close all sessions in a tunnel when a UDP-encapsulation socket is destroyed. We need to do something similar for IP-encapsulation sockets. Export l2tp_tunnel_closeall as a GPL symbol to enable l2tp_ip and l2tp_ip6 to call it from their .destroy handlers. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- net/l2tp/l2tp_core.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ee726a752292..287e327342d1 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -114,7 +114,6 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); static inline struct l2tp_net *l2tp_pernet(struct net *net) { @@ -1312,7 +1311,7 @@ end: /* When the tunnel is closed, all the attached sessions need to go too. */ -static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { int hash; struct hlist_node *walk; @@ -1375,6 +1374,7 @@ again: } write_unlock_bh(&tunnel->hlist_lock); } +EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8eb8f1d47f3a..b0861f68a10b 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -240,6 +240,7 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); extern int l2tp_session_delete(struct l2tp_session *session); -- cgit v1.2.3 From 936063175afd895913a5e9db77e1a0ef43ea44ea Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:15 +0000 Subject: l2tp: close sessions in ip socket destroy callback l2tp_core hooks UDP's .destroy handler to gain advance warning of a tunnel socket being closed from userspace. We need to do the same thing for IP-encapsulation sockets. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 6 ++++++ net/l2tp/l2tp_ip6.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7f41b7051269..571db8dd2292 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + sk_refcnt_debug_dec(sk); } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 41f2f8126ebc..c74f5a91ff6a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { + struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); + if (tunnel) { + l2tp_tunnel_closeall(tunnel); + sock_put(sk); + } + inet6_destroy_sock(sk); } -- cgit v1.2.3 From 2b551c6e7d5bca2c78c216b15ef675653d4f459a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:16 +0000 Subject: l2tp: close sessions before initiating tunnel delete When a user deletes a tunnel using netlink, all the sessions in the tunnel should also be deleted. Since running sessions will pin the tunnel socket with the references they hold, have the l2tp_tunnel_delete close all sessions in a tunnel before finally closing the tunnel socket. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 287e327342d1..0dd50c079f29 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1737,6 +1737,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_closeall(tunnel); return (false == queue_work(l2tp_wq, &tunnel->del_work)); } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -- cgit v1.2.3 From 8abbbe8ff572fd84d1b98eb9acf30611a97cf72e Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:17 +0000 Subject: l2tp: take a reference for kernel sockets in l2tp_tunnel_sock_lookup When looking up the tunnel socket in struct l2tp_tunnel, hold a reference whether the socket was created by the kernel or by userspace. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0dd50c079f29..45373fee38c5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -191,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) } else { /* Socket is owned by kernelspace */ sk = tunnel->sock; + sock_hold(sk); } out: @@ -209,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk) } sock_put(sk); } + sock_put(sk); } EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put); -- cgit v1.2.3 From 02d13ed5f94af38c37d1abd53462fe48d78bcc9d Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:18 +0000 Subject: l2tp: don't BUG_ON sk_socket being NULL It is valid for an existing struct sock object to have a NULL sk_socket pointer, so don't BUG_ON in l2tp_tunnel_del_work if that should occur. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 45373fee38c5..e841ef2a68a5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1412,19 +1412,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work) return; sock = sk->sk_socket; - BUG_ON(!sock); - /* If the tunnel socket was created directly by the kernel, use the - * sk_* API to release the socket now. Otherwise go through the - * inet_* layer to shut the socket down, and let userspace close it. + /* If the tunnel socket was created by userspace, then go through the + * inet layer to shut the socket down, and let userspace close it. + * Otherwise, if we created the socket directly within the kernel, use + * the sk API to release it here. * In either case the tunnel resources are freed in the socket * destructor when the tunnel socket goes away. */ - if (sock->file == NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + if (tunnel->fd >= 0) { + if (sock) + inet_shutdown(sock, 2); } else { - inet_shutdown(sock, 2); + if (sock) + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sk); } l2tp_tunnel_sock_put(sk); -- cgit v1.2.3 From 48f72f92b31431c40279b0fba6c5588e07e67d95 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:19 +0000 Subject: l2tp: add session reorder queue purge function to core If an l2tp session is deleted, it is necessary to delete skbs in-flight on the session's reorder queue before taking it down. Rather than having each pseudowire implementation reaching into the l2tp_session struct to handle this itself, provide a function in l2tp_core to purge the session queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 17 +++++++++++++++++ net/l2tp/l2tp_core.h | 1 + 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e841ef2a68a5..69c316dd02dc 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -829,6 +829,23 @@ discard: } EXPORT_SYMBOL(l2tp_recv_common); +/* Drop skbs from the session's reorder_q + */ +int l2tp_session_queue_purge(struct l2tp_session *session) +{ + struct sk_buff *skb = NULL; + BUG_ON(!session); + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + while ((skb = skb_dequeue(&session->reorder_q))) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + if (session->deref) + (*session->deref)(session); + } + return 0; +} +EXPORT_SYMBOL_GPL(l2tp_session_queue_purge); + /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame * here. The skb is not on a list when we get here. * Returns 0 if the packet was a data packet and was successfully passed on. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index b0861f68a10b..d40713d105fc 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -246,6 +246,7 @@ extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunne extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); +extern int l2tp_session_queue_purge(struct l2tp_session *session); extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); -- cgit v1.2.3 From 4c6e2fd35460208596fa099ee0750a4b0438aa5c Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:20 +0000 Subject: l2tp: purge session reorder queue on delete Add calls to l2tp_session_queue_purge as a part of l2tp_tunnel_closeall and l2tp_session_delete. Pseudowire implementations which are deleted only via. l2tp_core l2tp_session_delete calls can dispense with their own code for flushing the reorder queue. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 69c316dd02dc..c00f31b8cc04 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1373,6 +1373,8 @@ again: synchronize_rcu(); } + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); @@ -1813,6 +1815,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_free); */ int l2tp_session_delete(struct l2tp_session *session) { + l2tp_session_queue_purge(session); + if (session->session_close != NULL) (*session->session_close)(session); -- cgit v1.2.3 From cf2f5c886a209377daefd5d2ba0bcd49c3887813 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:21 +0000 Subject: l2tp: push all ppp pseudowire shutdown through .release handler If userspace deletes a ppp pseudowire using the netlink API, either by directly deleting the session or by deleting the tunnel that contains the session, we need to tear down the corresponding pppox channel. Rather than trying to manage two pppox unbind codepaths, switch the netlink and l2tp_core session_close handlers to close via. the l2tp_ppp socket .release handler. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 53 ++++++++++------------------------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6a53371dba1f..7e3e16aefcb5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session) { struct pppol2tp_session *ps = l2tp_session_priv(session); struct sock *sk = ps->sock; - struct sk_buff *skb; + struct socket *sock = sk->sk_socket; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (session->session_id == 0) - goto out; - - if (sk != NULL) { - lock_sock(sk); - - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { - pppox_unbind_sock(sk); - sk->sk_state = PPPOX_DEAD; - sk->sk_state_change(sk); - } - - /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } - release_sock(sk); + if (sock) { + inet_shutdown(sock, 2); + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } - -out: return; } @@ -525,16 +508,12 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); /* Purge any queued data */ - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); if (session != NULL) { - struct sk_buff *skb; - while ((skb = skb_dequeue(&session->reorder_q))) { - kfree_skb(skb); - sock_put(sk); - } + l2tp_session_queue_purge(session); sock_put(sk); } + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); release_sock(sk); @@ -880,18 +859,6 @@ out: return error; } -/* Called when deleting sessions via the netlink interface. - */ -static int pppol2tp_session_delete(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock == NULL) - l2tp_session_dec_refcount(session); - - return 0; -} - #endif /* CONFIG_L2TP_V3 */ /* getname() support. @@ -1839,7 +1806,7 @@ static const struct pppox_proto pppol2tp_proto = { static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = { .session_create = pppol2tp_session_create, - .session_delete = pppol2tp_session_delete, + .session_delete = l2tp_session_delete, }; #endif /* CONFIG_L2TP_V3 */ -- cgit v1.2.3 From 7b7c0719cd7afee725b920d75ec6a500b76107e6 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:22 +0000 Subject: l2tp: avoid deadlock in l2tp stats update l2tp's u64_stats writers were incorrectly synchronised, making it possible to deadlock a 64bit machine running a 32bit kernel simply by sending the l2tp code netlink commands while passing data through l2tp sessions. Previous discussion on netdev determined that alternative solutions such as spinlock writer synchronisation or per-cpu data would bring unjustified overhead, given that most users interested in high volume traffic will likely be running 64bit kernels on 64bit hardware. As such, this patch replaces l2tp's use of u64_stats with atomic_long_t, thereby avoiding the deadlock. Ref: http://marc.info/?l=linux-netdev&m=134029167910731&w=2 http://marc.info/?l=linux-netdev&m=134079868111131&w=2 Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 +++++++++++++------------------------------------ net/l2tp/l2tp_core.h | 19 ++++++------- net/l2tp/l2tp_debugfs.c | 28 +++++++++--------- net/l2tp/l2tp_netlink.c | 72 ++++++++++++++++++----------------------------- net/l2tp/l2tp_ppp.c | 46 +++++++++++++++--------------- 5 files changed, 93 insertions(+), 147 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c00f31b8cc04..97d30ac67c88 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -374,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; - struct l2tp_stats *sstats; spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); @@ -385,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, L2TP_SKB_CB(skbp)->ns, skb_queue_len(&session->reorder_q)); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_oos_packets++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } @@ -404,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; - struct l2tp_stats *tstats, *sstats; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += length; - sstats->rx_packets++; - sstats->rx_bytes += length; - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&tunnel->stats.rx_packets); + atomic_long_add(length, &tunnel->stats.rx_bytes); + atomic_long_inc(&session->stats.rx_packets); + atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ @@ -451,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; - struct l2tp_stats *sstats; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other @@ -459,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ start: spin_lock_bh(&session->reorder_q.lock); - sstats = &session->stats; skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); + atomic_long_inc(&session->stats.rx_errors); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -624,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; int offset; u32 ns, nr; - struct l2tp_stats *sstats = &session->stats; /* The ref count is increased since we now hold a pointer to * the session. Take care to decrement the refcnt when exiting @@ -641,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, "%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_cookie_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; @@ -712,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -733,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_warn(session, L2TP_MSG_SEQ, "%s: recv data has no seq numbers when required. Discarding.\n", session->name); - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } @@ -789,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * packets */ if (L2TP_SKB_CB(skb)->ns != session->nr) { - u64_stats_update_begin(&sstats->syncp); - sstats->rx_seq_discards++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", session->name, L2TP_SKB_CB(skb)->ns, @@ -817,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, return; discard: - u64_stats_update_begin(&sstats->syncp); - sstats->rx_errors++; - u64_stats_update_end(&sstats->syncp); + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); if (session->deref) @@ -861,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, u32 tunnel_id, session_id; u16 version; int length; - struct l2tp_stats *tstats; if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb)) goto discard_bad_csum; @@ -950,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, discard_bad_csum: LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name); UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0); - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - tstats->rx_errors++; - u64_stats_update_end(&tstats->syncp); + atomic_long_inc(&tunnel->stats.rx_errors); kfree_skb(skb); return 0; @@ -1080,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, struct l2tp_tunnel *tunnel = session->tunnel; unsigned int len = skb->len; int error; - struct l2tp_stats *tstats, *sstats; /* Debug */ if (session->send_seq) @@ -1109,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, error = ip_queue_xmit(skb, fl); /* Update stats */ - tstats = &tunnel->stats; - u64_stats_update_begin(&tstats->syncp); - sstats = &session->stats; - u64_stats_update_begin(&sstats->syncp); if (error >= 0) { - tstats->tx_packets++; - tstats->tx_bytes += len; - sstats->tx_packets++; - sstats->tx_bytes += len; + atomic_long_inc(&tunnel->stats.tx_packets); + atomic_long_add(len, &tunnel->stats.tx_bytes); + atomic_long_inc(&session->stats.tx_packets); + atomic_long_add(len, &session->stats.tx_bytes); } else { - tstats->tx_errors++; - sstats->tx_errors++; + atomic_long_inc(&tunnel->stats.tx_errors); + atomic_long_inc(&session->stats.tx_errors); } - u64_stats_update_end(&tstats->syncp); - u64_stats_update_end(&sstats->syncp); return 0; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index d40713d105fc..519b013f8b31 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -36,16 +36,15 @@ enum { struct sk_buff; struct l2tp_stats { - u64 tx_packets; - u64 tx_bytes; - u64 tx_errors; - u64 rx_packets; - u64 rx_bytes; - u64 rx_seq_discards; - u64 rx_oos_packets; - u64 rx_errors; - u64 rx_cookie_discards; - struct u64_stats_sync syncp; + atomic_long_t tx_packets; + atomic_long_t tx_bytes; + atomic_long_t tx_errors; + atomic_long_t rx_packets; + atomic_long_t rx_bytes; + atomic_long_t rx_seq_discards; + atomic_long_t rx_oos_packets; + atomic_long_t rx_errors; + atomic_long_t rx_cookie_discards; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index c3813bc84552..072d7202e182 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0, atomic_read(&tunnel->ref_count)); - seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); if (tunnel->show != NULL) tunnel->show(m, tunnel); @@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "\n"); } - seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (session->show != NULL) session->show(m, session); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c1bab22db85e..0825ff26e113 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np = NULL; #endif - struct l2tp_stats stats; - unsigned int start; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); @@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&tunnel->stats.syncp); - stats.tx_packets = tunnel->stats.tx_packets; - stats.tx_bytes = tunnel->stats.tx_bytes; - stats.tx_errors = tunnel->stats.tx_errors; - stats.rx_packets = tunnel->stats.rx_packets; - stats.rx_bytes = tunnel->stats.rx_bytes; - stats.rx_errors = tunnel->stats.rx_errors; - stats.rx_seq_discards = tunnel->stats.rx_seq_discards; - stats.rx_oos_packets = tunnel->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&tunnel->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&tunnel->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&tunnel->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&tunnel->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&tunnel->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&tunnel->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&tunnel->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&tunnel->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&tunnel->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); @@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; struct sock *sk = NULL; - struct l2tp_stats stats; - unsigned int start; sk = tunnel->sock; @@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nest == NULL) goto nla_put_failure; - do { - start = u64_stats_fetch_begin(&session->stats.syncp); - stats.tx_packets = session->stats.tx_packets; - stats.tx_bytes = session->stats.tx_bytes; - stats.tx_errors = session->stats.tx_errors; - stats.rx_packets = session->stats.rx_packets; - stats.rx_bytes = session->stats.rx_bytes; - stats.rx_errors = session->stats.rx_errors; - stats.rx_seq_discards = session->stats.rx_seq_discards; - stats.rx_oos_packets = session->stats.rx_oos_packets; - } while (u64_stats_fetch_retry(&session->stats.syncp, start)); - - if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) || - nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) || - nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) || - nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) || + if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, + atomic_long_read(&session->stats.tx_packets)) || + nla_put_u64(skb, L2TP_ATTR_TX_BYTES, + atomic_long_read(&session->stats.tx_bytes)) || + nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, + atomic_long_read(&session->stats.tx_errors)) || + nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, + atomic_long_read(&session->stats.rx_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_BYTES, + atomic_long_read(&session->stats.rx_bytes)) || nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, - stats.rx_seq_discards) || + atomic_long_read(&session->stats.rx_seq_discards)) || nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS, - stats.rx_oos_packets) || - nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors)) + atomic_long_read(&session->stats.rx_oos_packets)) || + nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, + atomic_long_read(&session->stats.rx_errors))) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7e3e16aefcb5..9d0eb8c13530 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -260,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int session->name); /* Not bound. Nothing we can do, so discard. */ - session->stats.rx_errors++; + atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } @@ -992,14 +992,14 @@ end: static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, struct l2tp_stats *stats) { - dest->tx_packets = stats->tx_packets; - dest->tx_bytes = stats->tx_bytes; - dest->tx_errors = stats->tx_errors; - dest->rx_packets = stats->rx_packets; - dest->rx_bytes = stats->rx_bytes; - dest->rx_seq_discards = stats->rx_seq_discards; - dest->rx_oos_packets = stats->rx_oos_packets; - dest->rx_errors = stats->rx_errors; + dest->tx_packets = atomic_long_read(&stats->tx_packets); + dest->tx_bytes = atomic_long_read(&stats->tx_bytes); + dest->tx_errors = atomic_long_read(&stats->tx_errors); + dest->rx_packets = atomic_long_read(&stats->rx_packets); + dest->rx_bytes = atomic_long_read(&stats->rx_bytes); + dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards); + dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets); + dest->rx_errors = atomic_long_read(&stats->rx_errors); } /* Session ioctl helper. @@ -1633,14 +1633,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) tunnel->name, (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N', atomic_read(&tunnel->ref_count) - 1); - seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n", tunnel->debug, - (unsigned long long)tunnel->stats.tx_packets, - (unsigned long long)tunnel->stats.tx_bytes, - (unsigned long long)tunnel->stats.tx_errors, - (unsigned long long)tunnel->stats.rx_packets, - (unsigned long long)tunnel->stats.rx_bytes, - (unsigned long long)tunnel->stats.rx_errors); + atomic_long_read(&tunnel->stats.tx_packets), + atomic_long_read(&tunnel->stats.tx_bytes), + atomic_long_read(&tunnel->stats.tx_errors), + atomic_long_read(&tunnel->stats.rx_packets), + atomic_long_read(&tunnel->stats.rx_bytes), + atomic_long_read(&tunnel->stats.rx_errors)); } static void pppol2tp_seq_session_show(struct seq_file *m, void *v) @@ -1675,14 +1675,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->lns_mode ? "LNS" : "LAC", session->debug, jiffies_to_msecs(session->reorder_timeout)); - seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n", + seq_printf(m, " %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n", session->nr, session->ns, - (unsigned long long)session->stats.tx_packets, - (unsigned long long)session->stats.tx_bytes, - (unsigned long long)session->stats.tx_errors, - (unsigned long long)session->stats.rx_packets, - (unsigned long long)session->stats.rx_bytes, - (unsigned long long)session->stats.rx_errors); + atomic_long_read(&session->stats.tx_packets), + atomic_long_read(&session->stats.tx_bytes), + atomic_long_read(&session->stats.tx_errors), + atomic_long_read(&session->stats.rx_packets), + atomic_long_read(&session->stats.rx_bytes), + atomic_long_read(&session->stats.rx_errors)); if (po) seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); -- cgit v1.2.3 From f6e16b299bacaa71c6604a784f2d088a966f8c23 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 19 Mar 2013 06:11:23 +0000 Subject: l2tp: unhash l2tp sessions on delete, not on free If we postpone unhashing of l2tp sessions until the structure is freed, we risk: 1. further packets arriving and getting queued while the pseudowire is being closed down 2. the recv path hitting "scheduling while atomic" errors in the case that recv drops the last reference to a session and calls l2tp_session_free while in atomic context As such, l2tp sessions should be unhashed from l2tp_core data structures early in the teardown process prior to calling pseudowire close. For pseudowires like l2tp_ppp which have multiple shutdown codepaths, provide an unhash hook. Signed-off-by: Tom Parkin Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 75 ++++++++++++++++++++++++---------------------------- net/l2tp/l2tp_core.h | 1 + net/l2tp/l2tp_ppp.c | 12 +++------ 3 files changed, 38 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 97d30ac67c88..8aecf5df6656 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1316,26 +1316,12 @@ again: hlist_del_init(&session->hlist); - /* Since we should hold the sock lock while - * doing any unbinding, we need to release the - * lock we're holding before taking that lock. - * Hold a reference to the sock so it doesn't - * disappear as we're jumping between locks. - */ if (session->ref != NULL) (*session->ref)(session); write_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } - + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close != NULL) @@ -1732,64 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); */ void l2tp_session_free(struct l2tp_session *session) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = session->tunnel; BUG_ON(atomic_read(&session->ref_count) != 0); - tunnel = session->tunnel; - if (tunnel != NULL) { + if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (session->session_id != 0) + atomic_dec(&l2tp_session_count); + sock_put(tunnel->sock); + session->tunnel = NULL; + l2tp_tunnel_dec_refcount(tunnel); + } + + kfree(session); + + return; +} +EXPORT_SYMBOL_GPL(l2tp_session_free); + +/* Remove an l2tp session from l2tp_core's hash lists. + * Provides a tidyup interface for pseudowire code which can't just route all + * shutdown via. l2tp_session_delete and a pseudowire-specific session_close + * callback. + */ +void __l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; - /* Delete the session from the hash */ + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ write_lock_bh(&tunnel->hlist_lock); hlist_del_init(&session->hlist); write_unlock_bh(&tunnel->hlist_lock); - /* Unlink from the global hash if not L2TPv2 */ + /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); synchronize_rcu(); } - - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); - - sock_put(tunnel->sock); - - /* This will delete the tunnel context if this - * is the last session on the tunnel. - */ - session->tunnel = NULL; - l2tp_tunnel_dec_refcount(tunnel); } - - kfree(session); - - return; } -EXPORT_SYMBOL_GPL(l2tp_session_free); +EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { + if (session->ref) + (*session->ref)(session); + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) (*session->session_close)(session); - + if (session->deref) + (*session->ref)(session); l2tp_session_dec_refcount(session); - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); - /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 519b013f8b31..485a490fd990 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -242,6 +242,7 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +extern void __l2tp_session_unhash(struct l2tp_session *session); extern int l2tp_session_delete(struct l2tp_session *session); extern void l2tp_session_free(struct l2tp_session *session); extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 9d0eb8c13530..637a341c1e2d 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -466,19 +466,12 @@ static void pppol2tp_session_close(struct l2tp_session *session) */ static void pppol2tp_session_destruct(struct sock *sk) { - struct l2tp_session *session; - - if (sk->sk_user_data != NULL) { - session = sk->sk_user_data; - if (session == NULL) - goto out; - + struct l2tp_session *session = sk->sk_user_data; + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); l2tp_session_dec_refcount(session); } - -out: return; } @@ -509,6 +502,7 @@ static int pppol2tp_release(struct socket *sock) /* Purge any queued data */ if (session != NULL) { + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); sock_put(sk); } -- cgit v1.2.3 From 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Mar 2013 06:39:29 +0000 Subject: flow_keys: include thoff into flow_keys for later usage In skb_flow_dissect(), we perform a dissection of a skbuff. Since we're doing the work here anyway, also store thoff for a later usage, e.g. in the BPF filter. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/flow_keys.h | 1 + net/core/flow_dissector.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 80461c1ae9ef..bb8271d487b7 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -9,6 +9,7 @@ struct flow_keys { __be32 ports; __be16 port16[2]; }; + u16 thoff; u8 ip_proto; }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9d4c7201400d..e187bf06d673 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -140,6 +140,8 @@ ipv6: flow->ports = *ports; } + flow->thoff = (u16) nhoff; + return true; } EXPORT_SYMBOL(skb_flow_dissect); -- cgit v1.2.3 From 283951f95b067877ca5ea77afaa212bb1e0507b5 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Tue, 19 Mar 2013 08:19:29 +0000 Subject: ipconfig: Fix newline handling in log message. When using ipconfig the logs currently look like: Single name server: [ 3.467270] IP-Config: Complete: [ 3.470613] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.480670] host=infigo-1, domain=, nis-domain=(none) [ 3.486166] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.492910] nameserver0=172.16.42.1[ 3.496853] ALSA device list: Three name servers: [ 3.496949] IP-Config: Complete: [ 3.500293] device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1 [ 3.510367] host=infigo-1, domain=, nis-domain=(none) [ 3.515864] bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath= [ 3.522635] nameserver0=172.16.42.1, nameserver1=172.16.42.100 [ 3.529149] , nameserver2=172.16.42.200 Fix newline handling for these cases Signed-off-by: Martin Fuzzey Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 98cbc6877019..bf6c5cf31aed 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; -- cgit v1.2.3 From 896ee0eee6261e30c3623be931c3f621428947df Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Mar 2013 05:19:24 +0000 Subject: net/irda: add missing error path release_sock call This makes sure that release_sock is called for all error conditions in irda_getsockopt. Signed-off-by: Kees Cook Reported-by: Brad Spengler Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/irda/af_irda.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d07e3a626446..d28e7f014cc6 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2583,8 +2583,10 @@ bed: NULL, NULL, NULL); /* Check if the we got some results */ - if (!self->cachedaddr) - return -EAGAIN; /* Didn't find any devices */ + if (!self->cachedaddr) { + err = -EAGAIN; /* Didn't find any devices */ + goto out; + } daddr = self->cachedaddr; /* Cleanup */ self->cachedaddr = 0; -- cgit v1.2.3 From ce1eadda6badef9e4e3460097ede674fca47383d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2013 20:26:57 +0100 Subject: cfg80211: fix wdev tracing crash Arend reported a crash in tracing if the driver returns an ERR_PTR() value from the add_virtual_intf() callback. This is due to the tracing then still attempting to dereference the "pointer", fix this by using IS_ERR_OR_NULL(). Reported-by: Arend van Spriel Tested-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6847d043edea..7586de77a2f8 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -27,7 +27,8 @@ #define WIPHY_PR_ARG __entry->wiphy_name #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) +#define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \ + ? wdev->identifier : 0) #define WDEV_PR_FMT "wdev(%u)" #define WDEV_PR_ARG (__entry->id) -- cgit v1.2.3 From 8b305780ed0c49a49c6bd58a4372fd6b22a5a71e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Mar 2013 22:25:17 +0100 Subject: mac80211: fix virtual monitor interface locking The virtual monitor interface has a locking issue, it calls into the channel context code with the iflist mutex held which isn't allowed since it is usually acquired the other way around. The mutex is still required for the interface iteration, but need not be held across the channel calls. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index baaa8608e52d..3bfe2612c8c2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -349,21 +349,19 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata) static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int ret = 0; + int ret; if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return 0; - mutex_lock(&local->iflist_mtx); + ASSERT_RTNL(); if (local->monitor_sdata) - goto out_unlock; + return 0; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL); - if (!sdata) { - ret = -ENOMEM; - goto out_unlock; - } + if (!sdata) + return -ENOMEM; /* set up data */ sdata->local = local; @@ -377,13 +375,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (WARN_ON(ret)) { /* ok .. stupid driver, it asked for this! */ kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_check_queues(sdata); if (ret) { kfree(sdata); - goto out_unlock; + return ret; } ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef, @@ -391,13 +389,14 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) if (ret) { drv_remove_interface(local, sdata); kfree(sdata); - goto out_unlock; + return ret; } + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); - out_unlock: mutex_unlock(&local->iflist_mtx); - return ret; + + return 0; } static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) @@ -407,14 +406,20 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)) return; + ASSERT_RTNL(); + mutex_lock(&local->iflist_mtx); sdata = rcu_dereference_protected(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx)); - if (!sdata) - goto out_unlock; + if (!sdata) { + mutex_unlock(&local->iflist_mtx); + return; + } rcu_assign_pointer(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + synchronize_net(); ieee80211_vif_release_channel(sdata); @@ -422,8 +427,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) drv_remove_interface(local, sdata); kfree(sdata); - out_unlock: - mutex_unlock(&local->iflist_mtx); } /* -- cgit v1.2.3 From f4541d60a449afd40448b06496dcd510f505928e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Mar 2013 17:36:09 +0000 Subject: tcp: preserve ACK clocking in TSO A long standing problem with TSO is the fact that tcp_tso_should_defer() rearms the deferred timer, while it should not. Current code leads to following bad bursty behavior : 20:11:24.484333 IP A > B: . 297161:316921(19760) ack 1 win 119 20:11:24.484337 IP B > A: . ack 263721 win 1117 20:11:24.485086 IP B > A: . ack 265241 win 1117 20:11:24.485925 IP B > A: . ack 266761 win 1117 20:11:24.486759 IP B > A: . ack 268281 win 1117 20:11:24.487594 IP B > A: . ack 269801 win 1117 20:11:24.488430 IP B > A: . ack 271321 win 1117 20:11:24.489267 IP B > A: . ack 272841 win 1117 20:11:24.490104 IP B > A: . ack 274361 win 1117 20:11:24.490939 IP B > A: . ack 275881 win 1117 20:11:24.491775 IP B > A: . ack 277401 win 1117 20:11:24.491784 IP A > B: . 316921:332881(15960) ack 1 win 119 20:11:24.492620 IP B > A: . ack 278921 win 1117 20:11:24.493448 IP B > A: . ack 280441 win 1117 20:11:24.494286 IP B > A: . ack 281961 win 1117 20:11:24.495122 IP B > A: . ack 283481 win 1117 20:11:24.495958 IP B > A: . ack 285001 win 1117 20:11:24.496791 IP B > A: . ack 286521 win 1117 20:11:24.497628 IP B > A: . ack 288041 win 1117 20:11:24.498459 IP B > A: . ack 289561 win 1117 20:11:24.499296 IP B > A: . ack 291081 win 1117 20:11:24.500133 IP B > A: . ack 292601 win 1117 20:11:24.500970 IP B > A: . ack 294121 win 1117 20:11:24.501388 IP B > A: . ack 295641 win 1117 20:11:24.501398 IP A > B: . 332881:351881(19000) ack 1 win 119 While the expected behavior is more like : 20:19:49.259620 IP A > B: . 197601:202161(4560) ack 1 win 119 20:19:49.260446 IP B > A: . ack 154281 win 1212 20:19:49.261282 IP B > A: . ack 155801 win 1212 20:19:49.262125 IP B > A: . ack 157321 win 1212 20:19:49.262136 IP A > B: . 202161:206721(4560) ack 1 win 119 20:19:49.262958 IP B > A: . ack 158841 win 1212 20:19:49.263795 IP B > A: . ack 160361 win 1212 20:19:49.264628 IP B > A: . ack 161881 win 1212 20:19:49.264637 IP A > B: . 206721:211281(4560) ack 1 win 119 20:19:49.265465 IP B > A: . ack 163401 win 1212 20:19:49.265886 IP B > A: . ack 164921 win 1212 20:19:49.266722 IP B > A: . ack 166441 win 1212 20:19:49.266732 IP A > B: . 211281:215841(4560) ack 1 win 119 20:19:49.267559 IP B > A: . ack 167961 win 1212 20:19:49.268394 IP B > A: . ack 169481 win 1212 20:19:49.269232 IP B > A: . ack 171001 win 1212 20:19:49.269241 IP A > B: . 215841:221161(5320) ack 1 win 119 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Van Jacobson Cc: Neal Cardwell Cc: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 817fbb396bc8..5d0b4387cba6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1809,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) goto send_now; } - /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies << 1); + /* Ok, it looks like it is advisable to defer. + * Do not rearm the timer if already set to not break TCP ACK clocking. + */ + if (!tp->tso_deferred) + tp->tso_deferred = 1 | (jiffies << 1); return true; -- cgit v1.2.3 From f9f475292dbb0e7035fb6661d1524761ea0888d9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Mar 2013 15:04:07 +0100 Subject: cfg80211: always check for scan end on P2P device If a P2P device wdev is removed while it has a scan, then the scan completion might crash later as it is already freed by that time. To avoid the crash always check the scan completion when the P2P device is being removed for some reason. If the driver already canceled it, don't want and free it, otherwise warn and leak it to avoid later crashes. In order to do this, locking needs to be changed away from the rdev mutex (which can't always be guaranteed). For now, use the sched_scan_mtx instead, I'll rename it to just scan_mtx in a later patch. Signed-off-by: Johannes Berg --- net/wireless/core.c | 64 ++++++++++++++++++++++++++++++++++++------------- net/wireless/core.h | 3 +++ net/wireless/nl80211.c | 52 +++++++++++++++++++++------------------- net/wireless/scan.c | 8 ++++--- net/wireless/sme.c | 6 +++-- net/wireless/wext-sme.c | 6 +++++ 6 files changed, 92 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 922002105062..11743d48cbc2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -212,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) rdev_rfkill_poll(rdev); } +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + lockdep_assert_held(&rdev->devlist_mtx); + lockdep_assert_held(&rdev->sched_scan_mtx); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) + return; + + if (!wdev->p2p_started) + return; + + rdev_stop_p2p_device(rdev, wdev); + wdev->p2p_started = false; + + rdev->opencount--; + + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + bool busy = work_busy(&rdev->scan_done_wk); + + /* + * If the work isn't pending or running (in which case it would + * be waiting for the lock we hold) the driver didn't properly + * cancel the scan when the interface was removed. In this case + * warn and leak the scan request object to not crash later. + */ + WARN_ON(!busy); + + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !busy); + } +} + static int cfg80211_rfkill_set_block(void *data, bool blocked) { struct cfg80211_registered_device *rdev = data; @@ -221,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; rtnl_lock(); - mutex_lock(&rdev->devlist_mtx); + + /* read-only iteration need not hold the devlist_mtx */ list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { @@ -231,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + /* but this requires it */ + mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); + mutex_unlock(&rdev->devlist_mtx); break; default: break; } } - mutex_unlock(&rdev->devlist_mtx); rtnl_unlock(); return 0; @@ -745,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work) wdev = container_of(work, struct wireless_dev, cleanup_work); rdev = wiphy_to_dev(wdev->wiphy); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } - cfg80211_unlock_rdev(rdev); - - mutex_lock(&rdev->sched_scan_mtx); - if (WARN_ON(rdev->sched_scan_req && rdev->sched_scan_req->dev == wdev->netdev)) { __cfg80211_stop_sched_scan(rdev, false); @@ -781,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) return; mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - if (!wdev->p2p_started) - break; - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - rdev->opencount--; + cfg80211_stop_p2p_device(rdev, wdev); break; default: WARN_ON_ONCE(1); break; } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -937,6 +965,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -968,6 +997,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index 3aec0e429d8a..5845c2b37aa8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -503,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d44ab216c0ec..58e13a8c95f9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4702,14 +4702,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req) - return -EBUSY; + mutex_lock(&rdev->sched_scan_mtx); + if (rdev->scan_req) { + err = -EBUSY; + goto unlock; + } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); - if (!n_channels) - return -EINVAL; + if (!n_channels) { + err = -EINVAL; + goto unlock; + } } else { enum ieee80211_band band; n_channels = 0; @@ -4723,23 +4728,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; - if (n_ssids > wiphy->max_scan_ssids) - return -EINVAL; + if (n_ssids > wiphy->max_scan_ssids) { + err = -EINVAL; + goto unlock; + } if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; - if (ie_len > wiphy->max_scan_ie_len) - return -EINVAL; + if (ie_len > wiphy->max_scan_ie_len) { + err = -EINVAL; + goto unlock; + } request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); - if (!request) - return -ENOMEM; + if (!request) { + err = -ENOMEM; + goto unlock; + } if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; @@ -4876,6 +4887,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) kfree(request); } + unlock: + mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -7749,20 +7762,9 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - if (!wdev->p2p_started) - return 0; - - rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } + mutex_lock(&rdev->sched_scan_mtx); + cfg80211_stop_p2p_device(rdev, wdev); + mutex_unlock(&rdev->sched_scan_mtx); return 0; } @@ -8486,7 +8488,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (WARN_ON(!req)) return 0; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e93bd31d23bb..fd99ea495b7e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - ASSERT_RDEV_LOCK(rdev); + lockdep_assert_held(&rdev->sched_scan_mtx); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - cfg80211_lock_rdev(rdev); + mutex_lock(&rdev->sched_scan_mtx); ___cfg80211_scan_done(rdev, false); - cfg80211_unlock_rdev(rdev); + mutex_unlock(&rdev->sched_scan_mtx); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -1062,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); + mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1168,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: + mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); cfg80211_unlock_rdev(rdev); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f432bd3755b1..09d994d192ff 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); + lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -320,11 +321,9 @@ void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); wdev_lock(wdev); __cfg80211_sme_scan_done(dev); wdev_unlock(wdev); - mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx); } void cfg80211_sme_rx_auth(struct net_device *dev, @@ -924,9 +923,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, int err; mutex_lock(&rdev->devlist_mtx); + /* might request scan - scan_mtx -> wdev_mtx dependency */ + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); return err; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index fb9622f6d99c..e79cb5c0655a 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; @@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); return err; -- cgit v1.2.3 From 370bd005937c0e00f9104a602f9fe1dd6b21b54b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 19 Mar 2013 17:50:50 -0700 Subject: mac80211: Don't restart sta-timer if not associated. I found another crash when deleting lots of virtual stations in a congested environment. I think the problem is that the ieee80211_mlme_notify_scan_completed could call ieee80211_restart_sta_timer for a stopped interface that was about to be deleted. With the following patch I am unable to reproduce the crash. Signed-off-by: Ben Greear [move check, also make the same change in mesh] Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 ++- net/mac80211/mlme.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 29ce2aa87e7b..4749b3858695 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1060,7 +1060,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif)) + if (ieee80211_vif_is_mesh(&sdata->vif) && + ieee80211_sdata_running(sdata)) ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 141577412d84..82cc30318a86 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3608,8 +3608,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) /* Restart STA timers */ rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_restart_sta_timer(sdata); + } rcu_read_unlock(); } -- cgit v1.2.3 From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2013 14:38:28 +0000 Subject: net: remove a WARN_ON() in net_enable_timestamp() The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false positive, in socket clone path, run from softirq context : [ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80() [ 3641.668811] Call Trace: [ 3641.671254] [] warn_slowpath_common+0x87/0xc0 [ 3641.677871] [] warn_slowpath_null+0x1a/0x20 [ 3641.683683] [] net_enable_timestamp+0x7b/0x80 [ 3641.689668] [] sk_clone_lock+0x425/0x450 [ 3641.695222] [] inet_csk_clone_lock+0x16/0x170 [ 3641.701213] [] tcp_create_openreq_child+0x29/0x820 [ 3641.707663] [] ? ipt_do_table+0x222/0x670 [ 3641.713354] [] tcp_v4_syn_recv_sock+0xab/0x3d0 [ 3641.719425] [] tcp_check_req+0x3da/0x530 [ 3641.724979] [] ? inet_hashinfo_init+0x60/0x80 [ 3641.730964] [] ? tcp_v4_rcv+0x79f/0xbe0 [ 3641.736430] [] tcp_v4_do_rcv+0x38d/0x4f0 [ 3641.741985] [] tcp_v4_rcv+0xa7a/0xbe0 Its safe at this point because the parent socket owns a reference on the netstamp_needed, so we cant have a 0 -> 1 transition, which requires to lock a mutex. Instead of refining the check, lets remove it, as all known callers are safe. If it ever changes in the future, static_key_slow_inc() will complain anyway. Reported-by: Laurent Chavey Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d540ced1f6c6..b13e5c766c11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1545,7 +1545,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); -- cgit v1.2.3 From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Mar 2013 19:14:07 +0000 Subject: 8021q: fix a potential use-after-free vlan_vid_del() could possibly free ->vlan_info after a RCU grace period, however, we may still refer to the freed memory area by 'grp' pointer. Found by code inspection. This patch moves vlan_vid_del() as behind as possible. Cc: Patrick McHardy Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a18714469bf7..85addcd9372b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) @@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -- cgit v1.2.3 From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001 From: Hong zhi guo Date: Sat, 23 Mar 2013 02:27:50 +0000 Subject: bridge: fix crash when set mac address of br interface When I tried to set mac address of a bridge interface to a mac address which already learned on this bridge, I got system hang. The cause is straight forward: function br_fdb_change_mac_address calls fdb_insert with NULL source nbp. Then an fdb lookup is performed. If an fdb entry is found and it's local, it's OK. But if it's not local, source is dereferenced for printk without NULL check. Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b0812c91c0f0..bab338e6270d 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } -- cgit v1.2.3 From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 24 Mar 2013 10:42:25 +0000 Subject: tcp: undo spurious timeout after SACK reneging On SACK reneging the sender immediately retransmits and forces a timeout but disables Eifel (undo). If the (buggy) receiver does not drop any packet this can trigger a false slow-start retransmit storm driven by the ACKs of the original packets. This can be detected with undo and TCP timestamps. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..3bd55bad230a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } -- cgit v1.2.3 From 799ef90c55e692e096d8bd9e5871b95264b1e9ba Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 20 Mar 2013 22:59:11 +0100 Subject: xfrm: Fix esn sequence number diff calculation in xfrm_replay_notify_esn() Commit 0017c0b "xfrm: Fix replay notification for esn." is off by one for the sequence number wrapped case as UINT_MAX is 0xffffffff, not 0x100000000. ;) Just calculate the diff like done everywhere else in the file. Signed-off-by: Mathias Krause Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index a3906737f49e..8dafe6d3c6e4 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -359,14 +359,12 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event) if (replay_esn->seq_hi == preplay_esn->seq_hi) seq_diff = replay_esn->seq - preplay_esn->seq; else - seq_diff = UINT_MAX - preplay_esn->seq - + replay_esn->seq; + seq_diff = ~preplay_esn->seq + replay_esn->seq + 1; if (replay_esn->oseq_hi == preplay_esn->oseq_hi) oseq_diff = replay_esn->oseq - preplay_esn->oseq; else - oseq_diff = UINT_MAX - preplay_esn->oseq - + replay_esn->oseq; + oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1; if (seq_diff < x->replay_maxdiff && oseq_diff < x->replay_maxdiff) { -- cgit v1.2.3 From 3fbd45ca8d1c98f3c2582ef8bc70ade42f70947b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Mar 2013 11:51:14 +0100 Subject: mac80211: fix remain-on-channel cancel crash If a ROC item is canceled just as it expires, the work struct may be scheduled while it is running (and waiting for the mutex). This results in it being run after being freed, which obviously crashes. To fix this don't free it when aborting is requested but instead mark it as "to be freed", which makes the work a no-op and allows freeing it outside. Cc: stable@vger.kernel.org [3.6+] Reported-by: Jouni Malinen Tested-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 ++++-- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/offchannel.c | 23 +++++++++++++++++------ 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb306814576a..a6893602f87a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2582,7 +2582,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, list_del(&dep->list); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); return 0; } @@ -2622,7 +2622,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, ieee80211_start_next_roc(local); mutex_unlock(&local->mtx); - ieee80211_roc_notify_destroy(found); + ieee80211_roc_notify_destroy(found, true); } else { /* work may be pending so use it all the time */ found->abort = true; @@ -2632,6 +2632,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, /* work will clean up etc */ flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); } return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 388580a1bada..7bdefd901f9d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -309,6 +309,7 @@ struct ieee80211_roc_work { struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; + bool to_be_freed; unsigned long hw_start_time; @@ -1347,7 +1348,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); void ieee80211_sw_roc_work(struct work_struct *work); void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cc79b4a2e821..430bd254e496 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -297,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) { struct ieee80211_roc_work *dep, *tmp; + if (WARN_ON(roc->to_be_freed)) + return; + /* was never transmitted */ if (roc->frame) { cfg80211_mgmt_tx_status(&roc->sdata->wdev, @@ -316,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep); + ieee80211_roc_notify_destroy(dep, true); - kfree(roc); + if (free) + kfree(roc); + else + roc->to_be_freed = true; } void ieee80211_sw_roc_work(struct work_struct *work) @@ -331,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work) mutex_lock(&local->mtx); + if (roc->to_be_freed) + goto out_unlock; + if (roc->abort) goto finish; @@ -370,7 +379,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, !roc->abort); if (started) { drv_flush(local, false); @@ -410,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work) list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); @@ -460,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) list_for_each_entry_safe(roc, tmp, &tmp_list, list) { if (local->ops->remain_on_channel) { list_del(&roc->list); - ieee80211_roc_notify_destroy(roc); + ieee80211_roc_notify_destroy(roc, true); } else { ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); /* work will clean up etc */ flush_delayed_work(&roc->work); + WARN_ON(!roc->to_be_freed); + kfree(roc); } } -- cgit v1.2.3 From 558724a5b2a73ad0c7638e21e8dffc419d267b6c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Mar 2013 01:28:18 +0000 Subject: netfilter: nfnetlink_queue: fix error return code in nfnetlink_queue_init() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 1cb48540f86a..42680b2baa11 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -1062,8 +1062,10 @@ static int __init nfnetlink_queue_init(void) #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) + proc_net_netfilter, &nfqnl_file_ops)) { + status = -ENOMEM; goto cleanup_subsys; + } #endif register_netdevice_notifier(&nfqnl_dev_notifier); -- cgit v1.2.3 From deadcfc3324410726cd6a663fb4fc46be595abe7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 23 Mar 2013 16:57:59 +0100 Subject: netfilter: nfnetlink_acct: return -EINVAL if object name is empty If user-space tries to create accounting object with an empty name, then return -EINVAL. Reported-by: Michael Zintakis Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_acct.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 589d686f0b4c..dc3fd5d44464 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); + if (strlen(acct_name) == 0) + return -EINVAL; list_for_each_entry(nfacct, &nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) -- cgit v1.2.3 From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Mar 2013 11:23:40 -0400 Subject: SUNRPC: Add barriers to ensure read ordering in rpc_wake_up_task_queue_locked We need to be careful when testing task->tk_waitqueue in rpc_wake_up_task_queue_locked, because it can be changed while we are holding the queue->lock. By adding appropriate memory barriers, we can ensure that it is safe to test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25ddec9..f8529fc8e542 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* -- cgit v1.2.3 From 382a103b2b528a3085cde4ac56fc69d92a828b72 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Mar 2013 22:30:09 +0100 Subject: mac80211: fix idle handling sequence Corey Richardson reported that my idle handling cleanup (commit fd0f979a1b, "mac80211: simplify idle handling") broke ath9k_htc. The reason appears to be that it wants to go out of idle before switching channels. To fix it, reimplement that sequence. Reported-by: Corey Richardson Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 17 ++++++++++++++--- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 78c0d90dd641..931be419ab5a 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -63,6 +63,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; + u32 changed; int err; lockdep_assert_held(&local->chanctx_mtx); @@ -76,6 +77,13 @@ ieee80211_new_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; + /* acquire mutex to prevent idle from changing */ + mutex_lock(&local->mtx); + /* turn idle off *before* setting channel -- some drivers need that */ + changed = ieee80211_idle_off(local); + if (changed) + ieee80211_hw_config(local, changed); + if (!local->use_chanctx) { local->_oper_channel_type = cfg80211_get_chandef_type(chandef); @@ -85,14 +93,17 @@ ieee80211_new_chanctx(struct ieee80211_local *local, err = drv_add_chanctx(local, ctx); if (err) { kfree(ctx); - return ERR_PTR(err); + ctx = ERR_PTR(err); + + ieee80211_recalc_idle(local); + goto out; } } + /* and keep the mutex held until the new chanctx is on the list */ list_add_rcu(&ctx->list, &local->chanctx_list); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); + out: mutex_unlock(&local->mtx); return ctx; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7bdefd901f9d..5672533a0832 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1362,6 +1362,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); +u32 ieee80211_idle_off(struct ieee80211_local *local); void ieee80211_recalc_idle(struct ieee80211_local *local); void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3bfe2612c8c2..58150f877ec3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -static u32 ieee80211_idle_off(struct ieee80211_local *local) +u32 ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; -- cgit v1.2.3 From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Mar 2013 03:18:33 +0000 Subject: unix: fix a race condition in unix_release() As reported by Jan, and others over the past few years, there is a race condition caused by unix_release setting the sock->sk pointer to NULL before properly marking the socket as dead/orphaned. This can cause a problem with the LSM hook security_unix_may_send() if there is another socket attempting to write to this partially released socket in between when sock->sk is set to NULL and it is marked as dead/orphaned. This patch fixes this by only setting sock->sk to NULL after the socket has been marked as dead; I also take the opportunity to make unix_release_sock() a void function as it only ever returned 0/success. Dave, I think this one should go on the -stable pile. Special thanks to Jan for coming up with a reproducer for this problem. Reported-by: Jan Stancek Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f163ec..f153a8d6e339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) -- cgit v1.2.3 From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Tue, 26 Mar 2013 01:52:45 +0800 Subject: ipv6: fix bad free of addrconf_init_net Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c7e615f902..26512250e095 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.2.3 From 39a352a5b5896403ad4ce842a9bc3845a01c02cd Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 26 Mar 2013 14:35:57 +0100 Subject: NFC: llcp: Keep the connected socket parent pointer alive And avoid decreasing the ack log twice when dequeueing connected LLCP sockets. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index e488e440186a..8f025746f337 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -270,7 +270,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } if (sk->sk_state == LLCP_CONNECTED || !newsock) { - nfc_llcp_accept_unlink(sk); + list_del_init(&lsk->accept_queue); + sock_put(sk); + if (newsock) sock_graft(sk, newsock); -- cgit v1.2.3 From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 25 Mar 2013 17:02:04 +0000 Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL SCM_SCREDENTIALS should apply to write() syscalls only either source or destination socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong, and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom). Origionally-authored-by: Karel Srot Signed-off-by: Ding Tianhong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e339..971282b6f6a3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + (other->sk_socket && + test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.3 From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ipip.h | 16 ++++++---------- net/ipv4/af_inet.c | 3 +-- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/ipip.h b/include/net/ipip.h index fd19625ff99d..982141c15200 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -77,15 +77,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f7661..c929d9c1c4b6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.2.3 From a9341512c372fcc628dabc619898d910a06c54bc Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 26 Mar 2013 15:48:38 -0700 Subject: openvswitch: Preallocate reply skb in ovs_vport_cmd_set(). Allocation of the Netlink notification skb can potentially fail after changing vport configuration. In general, we try to avoid this by undoing any change we made but that is difficult for existing objects. This avoids the problem by preallocating the buffer (which is fixed size). Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a4b724708a1a..6980c3e6f066 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1593,10 +1593,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } + BUG_ON(retval < 0); + return skb; } @@ -1726,24 +1724,32 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) err = -EINVAL; + reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!reply) { + err = -ENOMEM; + goto exit_unlock; + } + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_unlock; + goto exit_free; + if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + rtnl_unlock(); + return 0; + +exit_free: + kfree_skb(reply); exit_unlock: rtnl_unlock(); return err; -- cgit v1.2.3 From d3e1101c9b75574e68380b5cb10c9395fd8855de Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Wed, 27 Mar 2013 20:41:17 +0800 Subject: openvswitch: correct an invalid BUG_ON table->count is uint32_t Signed-off-by: Hong Zhiguo Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e4215c73d..67a2b783fe70 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -795,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; - BUG_ON(table->count < 0); } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -- cgit v1.2.3 From 5389090b59f7f72a30e25f5fd1fc560340543970 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 27 Mar 2013 03:57:10 +0000 Subject: netfilter: nf_conntrack: fix error return code Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in function nf_conntrack_standalone_init(). Signed-off-by: Wei Yongjun Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce401fd1c..fedee3943661 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -568,6 +568,7 @@ static int __init nf_conntrack_standalone_init(void) register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) { pr_err("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; goto out_sysctl; } #endif -- cgit v1.2.3 From fcca143d696092110ae1e361866576804fe887f3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 27 Mar 2013 03:22:45 +0000 Subject: rtnetlink: fix error return code in rtnl_link_fill() Fix to return a negative error code from the error handling case instead of 0(possible overwrite to 0 by ops->fill_xstats call), as returned elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5fb8d7e47294..b65441da74ab 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) } if (ops->fill_info) { data = nla_nest_start(skb, IFLA_INFO_DATA); - if (data == NULL) + if (data == NULL) { + err = -EMSGSIZE; goto err_cancel_link; + } err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; -- cgit v1.2.3 From ea872d7712528ad991bdabb63515bc00ee10993e Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Wed, 27 Mar 2013 05:41:59 +0000 Subject: sch: add missing u64 in psched_ratecfg_precompute() It seems that commit commit 292f1c7ff6cc10516076ceeea45ed11833bb71c7 Author: Jiri Pirko Date: Tue Feb 12 00:12:03 2013 +0000 sch: make htb_rate_cfg and functions around that generic adds little regression. Before: # tc qdisc add dev eth0 root handle 1: htb default ffff # tc class add dev eth0 classid 1:ffff htb rate 5Gbit # tc -s class show dev eth0 class htb 1:ffff root prio 0 rate 5000Mbit ceil 5000Mbit burst 625b cburst 625b Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) rate 0bit 0pps backlog 0b 0p requeues 0 lended: 0 borrowed: 0 giants: 0 tokens: 31 ctokens: 31 After: # tc qdisc add dev eth0 root handle 1: htb default ffff # tc class add dev eth0 classid 1:ffff htb rate 5Gbit # tc -s class show dev eth0 class htb 1:ffff root prio 0 rate 1544Mbit ceil 1544Mbit burst 625b cburst 625b Sent 5073 bytes 41 pkt (dropped 0, overlimits 0 requeues 0) rate 1976bit 2pps backlog 0b 0p requeues 0 lended: 41 borrowed: 0 giants: 0 tokens: 1802 ctokens: 1802 This probably due to lost u64 cast of rate parameter in psched_ratecfg_precompute() (net/sched/sch_generic.c). Signed-off-by: Sergey Popovich Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ffad48109a22..eac7e0ee23c1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -904,7 +904,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) u64 mult; int shift; - r->rate_bps = rate << 3; + r->rate_bps = (u64)rate << 3; r->shift = 0; r->mult = 1; /* -- cgit v1.2.3 From 1c4a154e5253687c51123956dfcee9e9dfa8542d Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 26 Mar 2013 08:13:34 +0000 Subject: ipv6: don't accept node local multicast traffic from the wire Erik Hugne's errata proposal (Errata ID: 3480) to RFC4291 has been verified: http://www.rfc-editor.org/errata_search.php?eid=3480 We have to check for pkt_type and loopback flag because either the packets are allowed to travel over the loopback interface (in which case pkt_type is PACKET_HOST and IFF_LOOPBACK flag is set) or they travel over a non-loopback interface back to us (in which case PACKET_TYPE is PACKET_LOOPBACK and IFF_LOOPBACK flag is not set). Cc: Erik Hugne Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e33fe0ab2568..2bab2aa59745 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -118,6 +118,18 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* RFC4291 Errata ID: 3480 + * Interface-Local scope spans only a single interface on a + * node and is useful only for loopback transmission of + * multicast. Packets with interface-local scope received + * from another node must be discarded. + */ + if (!(skb->pkt_type == PACKET_LOOPBACK || + dev->flags & IFF_LOOPBACK) && + ipv6_addr_is_multicast(&hdr->daddr) && + IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) + goto err; + /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope * field contains the reserved value 0; if such a packet is received, it -- cgit v1.2.3 From 50eab0503a7579ada512e4968738b7c9737cf36e Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 27 Mar 2013 23:42:41 +0000 Subject: net: fix the use of this_cpu_ptr flush_tasklet is not percpu var, and percpu is percpu var, and this_cpu_ptr(&info->cache->percpu->flush_tasklet) is not equal to &this_cpu_ptr(info->cache->percpu)->flush_tasklet 1f743b076(use this_cpu_ptr per-cpu helper) introduced this bug. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/core/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index c56ea6f7f6c7..2bfd081c59f7 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -328,7 +328,7 @@ static void flow_cache_flush_per_cpu(void *data) struct flow_flush_info *info = data; struct tasklet_struct *tasklet; - tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } -- cgit v1.2.3 From a561cf7edf9863198bfccecfc5cfe26d951ebd20 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 27 Mar 2013 23:13:26 +0000 Subject: net: core: Remove redundant call to 'nf_reset' in 'dev_forward_skb' 'nf_reset' is called just prior calling 'netif_rx'. No need to call it twice. Reported-by: Igor Michailov Signed-off-by: Shmulik Ladkani Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b13e5c766c11..6591440cc03d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1624,7 +1624,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_orphan(skb); - nf_reset(skb); if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); -- cgit v1.2.3 From cd68ddd4c29ab523440299f24ff2417fe7a0dca6 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 28 Mar 2013 13:52:00 +0000 Subject: net: fq_codel: Fix off-by-one error Currently, we hold a max of sch->limit -1 number of packets instead of sch->limit packets. Fix this off-by-one error. Signed-off-by: Vijay Subramanian Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4e606fcb2534..55786283a3df 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - if (++sch->q.qlen < sch->limit) + if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; q->drop_overlimit++; -- cgit v1.2.3 From 00cfec37484761a44a3b6f4675a54caa618210ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Mar 2013 03:01:22 +0000 Subject: net: add a synchronize_net() in netdev_rx_handler_unregister() commit 35d48903e97819 (bonding: fix rx_handler locking) added a race in bonding driver, reported by Steven Rostedt who did a very good diagnosis : I'm currently debugging a crash in an old 3.0-rt kernel that one of our customers is seeing. The bug happens with a stress test that loads and unloads the bonding module in a loop (I don't know all the details as I'm not the one that is directly interacting with the customer). But the bug looks to be something that may still be present and possibly present in mainline too. It will just be much harder to trigger it in mainline. In -rt, interrupts are threads, and can schedule in and out just like any other thread. Note, mainline now supports interrupt threads so this may be easily reproducible in mainline as well. I don't have the ability to tell the customer to try mainline or other kernels, so my hands are somewhat tied to what I can do. But according to a core dump, I tracked down that the eth irq thread crashed in bond_handle_frame() here: slave = bond_slave_get_rcu(skb->dev); bond = slave->bond; <--- BUG the slave returned was NULL and accessing slave->bond caused a NULL pointer dereference. Looking at the code that unregisters the handler: void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } Which is basically: dev->rx_handler = NULL; dev->rx_handler_data = NULL; And looking at __netif_receive_skb() we have: rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } switch (rx_handler(&skb)) { My question to all of you is, what stops this interrupt from happening while the bonding module is unloading? What happens if the interrupt triggers and we have this: CPU0 CPU1 ---- ---- rx_handler = skb->dev->rx_handler netdev_rx_handler_unregister() { dev->rx_handler = NULL; dev->rx_handler_data = NULL; rx_handler() bond_handle_frame() { slave = skb->dev->rx_handler; bond = slave->bond; <-- NULL pointer dereference!!! What protection am I missing in the bond release handler that would prevent the above from happening? We can fix bug this in two ways. First is adding a test in bond_handle_frame() and others to check if rx_handler_data is NULL. A second way is adding a synchronize_net() in netdev_rx_handler_unregister() to make sure that a rcu protected reader has the guarantee to see a non NULL rx_handler_data. The second way is better as it avoids an extra test in fast path. Reported-by: Steven Rostedt Signed-off-by: Eric Dumazet Cc: Jiri Pirko Cc: Paul E. McKenney Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6591440cc03d..13e6447f0398 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3313,6 +3313,7 @@ int netdev_rx_handler_register(struct net_device *dev, if (dev->rx_handler) return -EBUSY; + /* Note: rx_handler_data must be set before rx_handler */ rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); rcu_assign_pointer(dev->rx_handler, rx_handler); @@ -3333,6 +3334,11 @@ void netdev_rx_handler_unregister(struct net_device *dev) ASSERT_RTNL(); RCU_INIT_POINTER(dev->rx_handler, NULL); + /* a reader seeing a non NULL rx_handler in a rcu_read_lock() + * section has a guarantee to see a non NULL rx_handler_data + * as well. + */ + synchronize_net(); RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -- cgit v1.2.3 From 90e0970f8788cef2c8f5183af5a98f4f94600faf Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Sat, 30 Mar 2013 21:20:14 +0400 Subject: cfg80211: sched_scan_mtx lock in cfg80211_conn_work() Introduced in f9f475292dbb0e7035fb6661d1524761ea0888d9 ("cfg80211: always check for scan end on P2P device") cfg80211_conn_scan() which requires sched_scan_mtx to be held can be called from cfg80211_conn_work(). Without this we are hitting multiple warnings like the following: WARNING: at net/wireless/sme.c:88 cfg80211_conn_scan+0x1dc/0x3a0 [cfg80211]() Hardware name: 0578A21 Modules linked in: ... Pid: 620, comm: kworker/3:1 Not tainted 3.9.0-rc4-next-20130328+ #326 Call Trace: [] warn_slowpath_common+0x72/0xa0 [] warn_slowpath_null+0x22/0x30 [] cfg80211_conn_scan+0x1dc/0x3a0 [cfg80211] [] cfg80211_conn_do_work+0x94/0x380 [cfg80211] [] cfg80211_conn_work+0xa2/0x130 [cfg80211] [] process_one_work+0x198/0x450 Signed-off-by: Artem Savkov Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 09d994d192ff..482c70e70127 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -224,6 +224,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_lock(); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); + mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); @@ -248,6 +249,7 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } + mutex_unlock(&rdev->sched_scan_mtx); mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); rtnl_unlock(); -- cgit v1.2.3 From f0f6ee1f70c4eaab9d52cf7d255df4bd89f8d1c2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 1 Apr 2013 03:01:32 +0000 Subject: cbq: incorrect processing of high limits currently cbq works incorrectly for limits > 10% real link bandwidth, and practically does not work for limits > 50% real link bandwidth. Below are results of experiments taken on 1 Gbit link In shaper | Actual Result -----------+--------------- 100M | 108 Mbps 200M | 244 Mbps 300M | 412 Mbps 500M | 893 Mbps This happen because of q->now changes incorrectly in cbq_dequeue(): when it is called before real end of packet transmitting, L2T is greater than real time delay, q_now gets an extra boost but never compensate it. To fix this problem we prevent change of q->now until its synchronization with real time. Signed-off-by: Vasily Averin Reviewed-by: Alexey Kuznetsov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 13aa47aa2ffb..1bc210ffcba2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch) cbq_update(q); if ((incr -= incr2) < 0) incr = 0; + q->now += incr; + } else { + if (now > q->now) + q->now = now; } - q->now += incr; q->now_rt = now; for (;;) { -- cgit v1.2.3 From 25fb6ca4ed9cad72f14f61629b68dc03c0d9713f Mon Sep 17 00:00:00 2001 From: Balakumaran Kannan Date: Tue, 2 Apr 2013 16:15:05 +0530 Subject: net IPv6 : Fix broken IPv6 routing table after loopback down-up IPv6 Routing table becomes broken once we do ifdown, ifup of the loopback(lo) interface. After down-up, routes of other interface's IPv6 addresses through 'lo' are lost. IPv6 addresses assigned to all interfaces are routed through 'lo' for internal communication. Once 'lo' is down, those routing entries are removed from routing table. But those removed entries are not being re-created properly when 'lo' is brought up. So IPv6 addresses of other interfaces becomes unreachable from the same machine. Also this breaks communication with other machines because of NDISC packet processing failure. This patch fixes this issue by reading all interface's IPv6 addresses and adding them to IPv6 routing table while bringing up 'lo'. ==Testing== Before applying the patch: $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ sudo ifdown lo $ sudo ifup lo $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ After applying the patch: $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ sudo ifdown lo $ sudo ifup lo $ route -A inet6 Kernel IPv6 routing table Destination Next Hop Flag Met Ref Use If 2000::20/128 :: U 256 0 0 eth0 fe80::/64 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo ::1/128 :: Un 0 1 0 lo 2000::20/128 :: Un 0 1 0 lo fe80::xxxx:xxxx:xxxx:xxxx/128 :: Un 0 1 0 lo ff00::/8 :: U 256 0 0 eth0 ::/0 :: !n -1 1 1 lo $ Signed-off-by: Balakumaran Kannan Signed-off-by: Maruthi Thotad Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 26512250e095..a459c4f5b769 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2529,6 +2529,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; + struct net_device *sp_dev; + struct inet6_ifaddr *sp_ifa; + struct rt6_info *sp_rt; /* ::1 */ @@ -2540,6 +2543,30 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); + + /* Add routes to other interface's IPv6 addresses */ + for_each_netdev(dev_net(dev), sp_dev) { + if (!strcmp(sp_dev->name, dev->name)) + continue; + + idev = __in6_dev_get(sp_dev); + if (!idev) + continue; + + read_lock_bh(&idev->lock); + list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { + + if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) + continue; + + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + + /* Failure cases are ignored */ + if (!IS_ERR(sp_rt)) + ip6_ins_rt(sp_rt); + } + read_unlock_bh(&idev->lock); + } } static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) -- cgit v1.2.3 From 990454b5a48babde44a23c0f22bae5523f4fdf13 Mon Sep 17 00:00:00 2001 From: Reilly Grant Date: Mon, 1 Apr 2013 11:41:52 -0700 Subject: VSOCK: Handle changes to the VMCI context ID. The VMCI context ID of a virtual machine may change at any time. There is a VMCI event which signals this but datagrams may be processed before this is handled. It is therefore necessary to be flexible about the destination context ID of any datagrams received. (It can be assumed to be correct because it is provided by the hypervisor.) The context ID on existing sockets should be updated to reflect how the hypervisor is currently referring to the system. Signed-off-by: Reilly Grant Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- net/vmw_vsock/vmci_transport.c | 31 ++++++++++++++++++++----------- net/vmw_vsock/vsock_addr.c | 10 ---------- net/vmw_vsock/vsock_addr.h | 2 -- 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ca511c4f388a..d8079daf1bde 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -207,7 +207,7 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) struct vsock_sock *vsk; list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) - if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + if (addr->svm_port == vsk->local_addr.svm_port) return sk_vsock(vsk); return NULL; @@ -220,8 +220,8 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, list_for_each_entry(vsk, vsock_connected_sockets(src, dst), connected_table) { - if (vsock_addr_equals_addr(src, &vsk->remote_addr) - && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) && + dst->svm_port == vsk->local_addr.svm_port) { return sk_vsock(vsk); } } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index a70ace83a153..1f6508e249ae 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -464,19 +464,16 @@ static struct sock *vmci_transport_get_pending( struct vsock_sock *vlistener; struct vsock_sock *vpending; struct sock *pending; + struct sockaddr_vm src; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); vlistener = vsock_sk(listener); list_for_each_entry(vpending, &vlistener->pending_links, pending_links) { - struct sockaddr_vm src; - struct sockaddr_vm dst; - - vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); - vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pkt->dst_port == vpending->local_addr.svm_port) { pending = sk_vsock(vpending); sock_hold(pending); goto found; @@ -739,10 +736,15 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ bh_lock_sock(sk); - if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) - vmci_trans(vsk)->notify_ops->handle_notify_pkt( - sk, pkt, true, &dst, &src, - &bh_process_pkt); + if (!sock_owned_by_user(sk)) { + /* The local context ID may be out of date, update it. */ + vsk->local_addr.svm_cid = dst.svm_cid; + + if (sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + } bh_unlock_sock(sk); @@ -902,6 +904,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) lock_sock(sk); + /* The local context ID may be out of date. */ + vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; + switch (sk->sk_state) { case SS_LISTEN: vmci_transport_recv_listen(sk, pkt); @@ -958,6 +963,10 @@ static int vmci_transport_recv_listen(struct sock *sk, pending = vmci_transport_get_pending(sk, pkt); if (pending) { lock_sock(pending); + + /* The local context ID may be out of date. */ + vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; + switch (pending->sk_state) { case SS_CONNECTING: err = vmci_transport_recv_connecting_server(sk, diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c index b7df1aea7c59..ec2611b4ea0e 100644 --- a/net/vmw_vsock/vsock_addr.c +++ b/net/vmw_vsock/vsock_addr.c @@ -64,16 +64,6 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, } EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other) -{ - return (addr->svm_cid == VMADDR_CID_ANY || - other->svm_cid == VMADDR_CID_ANY || - addr->svm_cid == other->svm_cid) && - addr->svm_port == other->svm_port; -} -EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); - int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr) { diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h index cdfbcefdf843..9ccd5316eac0 100644 --- a/net/vmw_vsock/vsock_addr.h +++ b/net/vmw_vsock/vsock_addr.h @@ -24,8 +24,6 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr); void vsock_addr_unbind(struct sockaddr_vm *addr); bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, const struct sockaddr_vm *other); -bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, - const struct sockaddr_vm *other); int vsock_addr_cast(const struct sockaddr *addr, size_t len, struct sockaddr_vm **out_addr); -- cgit v1.2.3 From 906b1c394d0906a154fbdc904ca506bceb515756 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 30 Mar 2013 10:23:12 +0000 Subject: netfilter: ip6t_NPT: Fix translation for non-multiple of 32 prefix lengths The bitmask used for the prefix mangling was being calculated incorrectly, leading to the wrong part of the address being replaced when the prefix length wasn't a multiple of 32. Signed-off-by: Matthias Schiffer Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 33608c610276..cb631143721c 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -57,7 +57,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, if (pfx_len - i >= 32) mask = 0; else - mask = htonl(~((1 << (pfx_len - i)) - 1)); + mask = htonl((1 << (i - pfx_len + 32)) - 1); idx = i / 32; addr->s6_addr32[idx] &= mask; -- cgit v1.2.3 From 4543fbefe6e06a9e40d9f2b28d688393a299f079 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 2 Apr 2013 17:10:07 -0400 Subject: net: count hw_addr syncs so that unsync works properly. A few drivers use dev_uc_sync/unsync to synchronize the address lists from master down to slave/lower devices. In some cases (bond/team) a single address list is synched down to multiple devices. At the time of unsync, we have a leak in these lower devices, because "synced" is treated as a boolean and the address will not be unsynced for anything after the first device/call. Treat "synced" as a count (same as refcount) and allow all unsync calls to work. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev_addr_lists.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8bfa95600e48..6151e903eef0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -210,9 +210,9 @@ struct netdev_hw_addr { #define NETDEV_HW_ADDR_T_SLAVE 3 #define NETDEV_HW_ADDR_T_UNICAST 4 #define NETDEV_HW_ADDR_T_MULTICAST 5 - bool synced; bool global_use; int refcount; + int synced; struct rcu_head rcu_head; }; diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bd2eb9d3e369..abdc9e6ef33e 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -37,7 +37,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->type = addr_type; ha->refcount = 1; ha->global_use = global; - ha->synced = false; + ha->synced = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; @@ -165,7 +165,7 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, addr_len, ha->type); if (err) break; - ha->synced = true; + ha->synced++; ha->refcount++; } else if (ha->refcount == 1) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); @@ -186,7 +186,7 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, if (ha->synced) { __hw_addr_del(to_list, ha->addr, addr_len, ha->type); - ha->synced = false; + ha->synced--; __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } -- cgit v1.2.3 From 25da0e3e9d3fb2b522bc2a598076735850310eb1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:13:35 +0000 Subject: Revert "af_unix: dont send SCM_CREDENTIAL when dest socket is NULL" This reverts commit 14134f6584212d585b310ce95428014b653dfaf6. The problem that the above patch was meant to address is that af_unix messages are not being coallesced because we are sending unnecesarry credentials. Not sending credentials in maybe_add_creds totally breaks unconnected unix domain sockets that wish to send credentails to other sockets. In practice this break some versions of udev because they receive a message and the sending uid is bogus so they drop the message. Reported-by: Sven Joachim Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a3..f153a8d6e339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - (other->sk_socket && - test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.3 From 0e82e7f6dfeec1013339612f74abc2cdd29d43d2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:14:47 +0000 Subject: af_unix: If we don't care about credentials coallesce all messages It was reported that the following LSB test case failed https://lsbbugs.linuxfoundation.org/attachment.cgi?id=2144 because we were not coallescing unix stream messages when the application was expecting us to. The problem was that the first send was before the socket was accepted and thus sock->sk_socket was NULL in maybe_add_creds, and the second send after the socket was accepted had a non-NULL value for sk->socket and thus we could tell the credentials were not needed so we did not bother. The unnecessary credentials on the first message cause unix_stream_recvmsg to start verifying that all messages had the same credentials before coallescing and then the coallescing failed because the second message had no credentials. Ignoring credentials when we don't care in unix_stream_recvmsg fixes a long standing pessimization which would fail to coallesce messages when reading from a unix stream socket if the senders were different even if we did not care about their credentials. I have tested this and verified that the in the LSB test case mentioned above that the messages do coallesce now, while the were failing to coallesce without this change. Reported-by: Karel Srot Reported-by: Ding Tianhong Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e339..2db702d82e7d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1993,7 +1993,7 @@ again: if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; -- cgit v1.2.3 From 34e2ed34a035de07277cca817fe8264324398141 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 08:33:00 +0000 Subject: net: ipv4: notify when address lifetime changes if userspace changes lifetime of address, send netlink notification and call notifier. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f678507bc829..96083b7a436b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -802,8 +802,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; - - set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); + ifa = ifa_existing; + set_ifa_lifetime(ifa, valid_lft, prefered_lft); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); + blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } -- cgit v1.2.3 From 124dff01afbdbff251f0385beca84ba1b9adda68 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 5 Apr 2013 20:42:05 +0200 Subject: netfilter: don't reset nf_trace in nf_reset() Commit 130549fe ("netfilter: reset nf_trace in nf_reset") added code to reset nf_trace in nf_reset(). This is wrong and unnecessary. nf_reset() is used in the following cases: - when passing packets up the the socket layer, at which point we want to release all netfilter references that might keep modules pinned while the packet is queued. nf_trace doesn't matter anymore at this point. - when encapsulating or decapsulating IPsec packets. We want to continue tracing these packets after IPsec processing. - when passing packets through virtual network devices. Only devices on that encapsulate in IPv4/v6 matter since otherwise nf_trace is not used anymore. Its not entirely clear whether those packets should be traced after that, however we've always done that. - when passing packets through virtual network devices that make the packet cross network namespace boundaries. This is the only cases where we clearly want to reset nf_trace and is also what the original patch intended to fix. Add a new function nf_reset_trace() and use it in dev_forward_skb() to fix this properly. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ net/core/dev.c | 1 + 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72b396751de7..b8292d8cc9fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2641,6 +2641,10 @@ static inline void nf_reset(struct sk_buff *skb) nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; #endif +} + +static inline void nf_reset_trace(struct sk_buff *skb) +{ #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) skb->nf_trace = 0; #endif diff --git a/net/core/dev.c b/net/core/dev.c index 13e6447f0398..e7d68ed8aafe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1639,6 +1639,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb->mark = 0; secpath_reset(skb); nf_reset(skb); + nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit v1.2.3 From a58e0be6f6b3eb2079b0b8fedc9df6fa86869f1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Mar 2013 12:52:59 -0400 Subject: SUNRPC: Remove extra xprt_put() While testing error cases where rpc_new_client() fails, I saw some oopses. If rpc_new_client() fails, it already invokes xprt_put(). Thus __rpc_clone_client() does not need to invoke it again. Introduced by commit 1b63a751 "SUNRPC: Refactor rpc_clone_client()" Fri Sep 14, 2012. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..9df26b785dc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -512,7 +512,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new = rpc_new_client(args, xprt); if (IS_ERR(new)) { err = PTR_ERR(new); - goto out_put; + goto out_err; } atomic_inc(&clnt->cl_count); @@ -525,8 +525,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_chatty = clnt->cl_chatty; return new; -out_put: - xprt_put(xprt); out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); -- cgit v1.2.3 From f05c124a70a4953a66acbd6d6c601ea1eb5d0fa7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 14:13:21 -0400 Subject: SUNRPC: Fix a potential memory leak in rpc_new_client If the call to rpciod_up() fails, we currently leak a reference to the struct rpc_xprt. As part of the fix, we also remove the redundant check for xprt!=NULL. This is already taken care of by the callers. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9df26b785dc7..d5f35f15af98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -304,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru err = rpciod_up(); if (err) goto out_no_rpciod; - err = -EINVAL; - if (!xprt) - goto out_no_xprt; + err = -EINVAL; if (args->version >= program->nrvers) goto out_err; version = program->version[args->version]; @@ -382,10 +380,9 @@ out_no_principal: out_no_stats: kfree(clnt); out_err: - xprt_put(xprt); -out_no_xprt: rpciod_down(); out_no_rpciod: + xprt_put(xprt); return ERR_PTR(err); } -- cgit v1.2.3 From 3a7b21eaf4fb3c971bdb47a98f570550ddfe4471 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 5 Apr 2013 08:13:30 +0000 Subject: netfilter: nf_ct_sip: don't drop packets with offsets pointing outside the packet Some Cisco phones create huge messages that are spread over multiple packets. After calculating the offset of the SIP body, it is validated to be within the packet and the packet is dropped otherwise. This breaks operation of these phones. Since connection tracking is supposed to be passive, just let those packets pass unmodified and untracked. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0e7d423324c3..e0c4373b4747 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1593,10 +1593,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, end += strlen("\r\n\r\n") + clen; msglen = origlen = end - dptr; - if (msglen > datalen) { - nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); - return NF_DROP; - } + if (msglen > datalen) + return NF_ACCEPT; ret = process_sip_msg(skb, ct, protoff, dataoff, &dptr, &msglen); -- cgit v1.2.3 From 50a75a8914539c5dcd441c5f54d237a666a426fd Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Sun, 7 Apr 2013 04:53:15 +0000 Subject: ipv6/tcp: Stop processing ICMPv6 redirect messages Tetja Rediske found that if the host receives an ICMPv6 redirect message after sending a SYN+ACK, the connection will be reset. He bisected it down to 093d04d (ipv6: Change skb->data before using icmpv6_notify() to propagate redirect), but the origin of the bug comes from ec18d9a26 (ipv6: Add redirect support to all protocol icmp error handlers.). The bug simply did not trigger prior to 093d04d, because skb->data did not point to the inner IP header and thus icmpv6_notify did not call the correct err_handler. This patch adds the missing "goto out;" in tcp_v6_err. After receiving an ICMPv6 Redirect, we should not continue processing the ICMP in tcp_v6_err, as this may trigger the removal of request-socks or setting sk_err(_soft). Reported-by: Tetja Rediske Signed-off-by: Christoph Paasch Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f6d629fd6aee..46a5be85be87 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -386,6 +386,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { -- cgit v1.2.3 From 9b3e617f3df53822345a8573b6d358f6b9e5ed87 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:47 +0000 Subject: atm: update msg_namelen in vcc_recvmsg() The current code does not fill the msg_name member in case it is set. It also does not set the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix that by simply setting msg_namelen to 0 as obviously nobody cared about vcc_recvmsg() not filling the msg_name in case it was set. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/atm/common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 7b491006eaf4..737bef59ce89 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; + msg->msg_namelen = 0; + if (sock->state != SS_CONNECTED) return -ENOTCONN; -- cgit v1.2.3 From ef3313e84acbf349caecae942ab3ab731471f1a1 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:48 +0000 Subject: ax25: fix info leak via msg_name in ax25_recvmsg() When msg_namelen is non-zero the sockaddr info gets filled out, as requested, but the code fails to initialize the padding bytes of struct sockaddr_ax25 inserted by the compiler for alignment. Additionally the msg_namelen value is updated to sizeof(struct full_sockaddr_ax25) but is not always filled up to this size. Both issues lead to the fact that the code will leak uninitialized kernel stack bytes in net/socket.c. Fix both issues by initializing the memory with memset(0). Cc: Ralf Baechle Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7b11f8bc5071..e277e38f736b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1642,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, ax25_address src; const unsigned char *mac = skb_mac_header(skb); + memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, &digi, NULL, NULL); sax->sax25_family = AF_AX25; -- cgit v1.2.3 From 4683f42fde3977bdb4e8a09622788cc8b5313778 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:49 +0000 Subject: Bluetooth: fix possible info leak in bt_sock_recvmsg() In case the socket is already shutting down, bt_sock_recvmsg() returns with 0 without updating msg_namelen leading to net/socket.c leaking the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix this by moving the msg_namelen assignment in front of the shutdown test. Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/bluetooth/af_bluetooth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b35a78..0d1b08cc76e1 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -230,6 +230,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -237,8 +239,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From e11e0455c0d7d3d62276a0c55d9dfbc16779d691 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:50 +0000 Subject: Bluetooth: RFCOMM - Fix missing msg_namelen update in rfcomm_sock_recvmsg() If RFCOMM_DEFER_SETUP is set in the flags, rfcomm_sock_recvmsg() returns early with 0 without updating the possibly set msg_namelen member. This, in turn, leads to a 128 byte kernel stack leak in net/socket.c. Fix this by updating msg_namelen in this case. For all other cases it will be handled in bt_sock_stream_recvmsg(). Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c23bae86263b..7c9224bcce17 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -608,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); + msg->msg_namelen = 0; return 0; } -- cgit v1.2.3 From c8c499175f7d295ef867335bceb9a76a2c3cdc38 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:51 +0000 Subject: Bluetooth: SCO - Fix missing msg_namelen update in sco_sock_recvmsg() If the socket is in state BT_CONNECT2 and BT_SK_DEFER_SETUP is set in the flags, sco_sock_recvmsg() returns early with 0 without updating the possibly set msg_namelen member. This, in turn, leads to a 128 byte kernel stack leak in net/socket.c. Fix this by updating msg_namelen in this case. For all other cases it will be handled in bt_sock_recvmsg(). Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/bluetooth/sco.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index fad0302bdb32..fb6192c9812e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -665,6 +665,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { hci_conn_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; + msg->msg_namelen = 0; release_sock(sk); return 0; -- cgit v1.2.3 From 2d6fbfe733f35c6b355c216644e08e149c61b271 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:52 +0000 Subject: caif: Fix missing msg_namelen update in caif_seqpkt_recvmsg() The current code does not fill the msg_name member in case it is set. It also does not set the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix that by simply setting msg_namelen to 0 as obviously nobody cared about caif_seqpkt_recvmsg() not filling the msg_name in case it was set. Cc: Sjur Braendeland Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 095259f83902..ff2ff3ce6965 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; + m->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; -- cgit v1.2.3 From 5ae94c0d2f0bed41d6718be743985d61b7f5c47d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:53 +0000 Subject: irda: Fix missing msg_namelen update in irda_recvmsg_dgram() The current code does not fill the msg_name member in case it is set. It also does not set the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix that by simply setting msg_namelen to 0 as obviously nobody cared about irda_recvmsg_dgram() not filling the msg_name in case it was set. Cc: Samuel Ortiz Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/irda/af_irda.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index d28e7f014cc6..e493b3397ae3 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1386,6 +1386,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + msg->msg_namelen = 0; + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) -- cgit v1.2.3 From a5598bd9c087dc0efc250a5221e5d0e6f584ee88 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:54 +0000 Subject: iucv: Fix missing msg_namelen update in iucv_sock_recvmsg() The current code does not fill the msg_name member in case it is set. It also does not set the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix that by simply setting msg_namelen to 0 as obviously nobody cared about iucv_sock_recvmsg() not filling the msg_name in case it was set. Cc: Ursula Braun Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe4284..bf6935820001 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1328,6 +1328,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb, *rskb, *cskb; int err = 0; + msg->msg_namelen = 0; + if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && -- cgit v1.2.3 From b860d3cc62877fad02863e2a08efff69a19382d2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:55 +0000 Subject: l2tp: fix info leak in l2tp_ip6_recvmsg() The L2TP code for IPv6 fails to initialize the l2tp_conn_id member of struct sockaddr_l2tpip6 and therefore leaks four bytes kernel stack in l2tp_ip6_recvmsg() in case msg_name is set. Initialize l2tp_conn_id with 0 to avoid the info leak. Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index c74f5a91ff6a..b8a6039314e8 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -690,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, lsa->l2tp_addr = ipv6_hdr(skb)->saddr; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = IP6CB(skb)->iif; } -- cgit v1.2.3 From c77a4b9cffb6215a15196ec499490d116dfad181 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:56 +0000 Subject: llc: Fix missing msg_namelen update in llc_ui_recvmsg() For stream sockets the code misses to update the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. The msg_namelen update is also missing for datagram sockets in case the socket is shutting down during receive. Fix both issues by setting msg_namelen to 0 early. It will be updated later if we're going to fill the msg_name member. Cc: Arnaldo Carvalho de Melo Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/llc/af_llc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 88709882c464..48aaa89253e0 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, int target; /* Read at least this many bytes */ long timeo; + msg->msg_namelen = 0; + lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) -- cgit v1.2.3 From 3ce5efad47b62c57a4f5c54248347085a750ce0e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:57 +0000 Subject: netrom: fix info leak via msg_name in nr_recvmsg() In case msg_name is set the sockaddr info gets filled out, as requested, but the code fails to initialize the padding bytes of struct sockaddr_ax25 inserted by the compiler for alignment. Also the sax25_ndigis member does not get assigned, leaking four more bytes. Both issues lead to the fact that the code will leak uninitialized kernel stack bytes in net/socket.c. Fix both issues by initializing the memory with memset(0). Cc: Ralf Baechle Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d1fa1d9ffd2e..7fcb307dea47 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,6 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { + memset(sax, 0, sizeof(sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); -- cgit v1.2.3 From d26d6504f23e803824e8ebd14e52d4fc0a0b09cb Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:58 +0000 Subject: NFC: llcp: fix info leaks via msg_name in llcp_sock_recvmsg() The code in llcp_sock_recvmsg() does not initialize all the members of struct sockaddr_nfc_llcp when filling the sockaddr info. Nor does it initialize the padding bytes of the structure inserted by the compiler for alignment. Also, if the socket is in state LLCP_CLOSED or is shutting down during receive the msg_namelen member is not updated to 0 while otherwise returning with 0, i.e. "success". The msg_namelen update is also missing for stream and seqpacket sockets which don't fill the sockaddr info. Both issues lead to the fact that the code will leak uninitialized kernel stack bytes in net/socket.c. Fix the first issue by initializing the memory used for sockaddr info with memset(0). Fix the second one by setting msg_namelen to 0 early. It will be updated later if we're going to fill the msg_name member. Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/nfc/llcp/sock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 8f025746f337..6c94447ec414 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -646,6 +646,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && @@ -691,6 +693,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); + memset(sockaddr, 0, sizeof(*sockaddr)); sockaddr->sa_family = AF_NFC; sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; sockaddr->dsap = ui_cb->dsap; -- cgit v1.2.3 From 4a184233f21645cf0b719366210ed445d1024d72 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:51:59 +0000 Subject: rose: fix info leak via msg_name in rose_recvmsg() The code in rose_recvmsg() does not initialize all of the members of struct sockaddr_rose/full_sockaddr_rose when filling the sockaddr info. Nor does it initialize the padding bytes of the structure inserted by the compiler for alignment. This will lead to leaking uninitialized kernel stack bytes in net/socket.c. Fix the issue by initializing the memory used for sockaddr info with memset(0). Cc: Ralf Baechle Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/rose/af_rose.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf68e6e4054a..9c8347451597 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (srose != NULL) { + memset(srose, 0, msg->msg_namelen); srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; -- cgit v1.2.3 From 60085c3d009b0df252547adb336d1ccca5ce52ec Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:52:00 +0000 Subject: tipc: fix info leaks via msg_name in recv_msg/recv_stream The code in set_orig_addr() does not initialize all of the members of struct sockaddr_tipc when filling the sockaddr info -- namely the union is only partly filled. This will make recv_msg() and recv_stream() -- the only users of this function -- leak kernel stack memory as the msg_name member is a local variable in net/socket.c. Additionally to that both recv_msg() and recv_stream() fail to update the msg_namelen member to 0 while otherwise returning with 0, i.e. "success". This is the case for, e.g., non-blocking sockets. This will lead to a 128 byte kernel stack leak in net/socket.c. Fix the first issue by initializing the memory of the union with memset(0). Fix the second one by setting msg_namelen to 0 early as it will be updated later if we're going to fill the msg_name member. Cc: Jon Maloy Cc: Allan Stephens Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/tipc/socket.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a9622b6cd916..515ce38e4f4c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -790,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) if (addr) { addr->family = AF_TIPC; addr->addrtype = TIPC_ADDR_ID; + memset(&addr->addr, 0, sizeof(addr->addr)); addr->addr.id.ref = msg_origport(msg); addr->addr.id.node = msg_orignode(msg); addr->addr.name.domain = 0; /* could leave uninitialized */ @@ -904,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1013,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } + /* will be updated in set_orig_addr() if needed */ + m->msg_namelen = 0; + target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -- cgit v1.2.3 From 680d04e0ba7e926233e3b9cee59125ce181f66ba Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:52:01 +0000 Subject: VSOCK: vmci - fix possible info leak in vmci_transport_dgram_dequeue() In case we received no data on the call to skb_recv_datagram(), i.e. skb->data is NULL, vmci_transport_dgram_dequeue() will return with 0 without updating msg_namelen leading to net/socket.c leaking the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Fix this by moving the already existing msg_namelen assignment a few lines above. Cc: Andy King Cc: Dmitry Torokhov Cc: George Zhang Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 1f6508e249ae..5e04d3d96285 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1736,6 +1736,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; + msg->msg_namelen = 0; + /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); @@ -1768,7 +1770,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (err) goto out; - msg->msg_namelen = 0; if (msg->msg_name) { struct sockaddr_vm *vm_addr; -- cgit v1.2.3 From d5e0d0f607a7a029c6563a0470d88255c89a8d11 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 7 Apr 2013 01:52:02 +0000 Subject: VSOCK: Fix missing msg_namelen update in vsock_stream_recvmsg() The code misses to update the msg_namelen member to 0 and therefore makes net/socket.c leak the local, uninitialized sockaddr_storage variable to userland -- 128 bytes of kernel stack memory. Cc: Andy King Cc: Dmitry Torokhov Cc: George Zhang Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d8079daf1bde..7f93e2a42d7a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1670,6 +1670,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; + msg->msg_namelen = 0; + lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { -- cgit v1.2.3 From 62a40a15554d6924a58b3e9f8756e0d683dc9c0c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Apr 2013 11:52:34 +0200 Subject: mac80211: fix LED in idle handling feng xiangjun reports that my commit 382a103b2b528a3085cde4ac56fc69d92a828b72 Author: Johannes Berg Date: Fri Mar 22 22:30:09 2013 +0100 mac80211: fix idle handling sequence broke the wireless status LED. The reason is that we now call ieee80211_idle_off() when the channel context is assigned, and that doesn't recalculate the LED state. Fix this by making that function a wrapper around most of idle recalculation while forcing active. Reported-by: feng xiangjun Tested-by: feng xiangjun Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 58150f877ec3..9ed49ad0380f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); } -u32 ieee80211_idle_off(struct ieee80211_local *local) +static u32 __ieee80211_idle_off(struct ieee80211_local *local) { if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; @@ -87,7 +87,7 @@ u32 ieee80211_idle_off(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -static u32 ieee80211_idle_on(struct ieee80211_local *local) +static u32 __ieee80211_idle_on(struct ieee80211_local *local) { if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; @@ -98,16 +98,18 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) return IEEE80211_CONF_CHANGE_IDLE; } -void ieee80211_recalc_idle(struct ieee80211_local *local) +static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, + bool force_active) { bool working = false, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; struct ieee80211_roc_work *roc; - u32 change; lockdep_assert_held(&local->mtx); - active = !list_empty(&local->chanctx_list) || local->monitors; + active = force_active || + !list_empty(&local->chanctx_list) || + local->monitors; if (!local->ops->remain_on_channel) { list_for_each_entry(roc, &local->roc_list, list) { @@ -132,9 +134,18 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); if (working || scanning || active) - change = ieee80211_idle_off(local); - else - change = ieee80211_idle_on(local); + return __ieee80211_idle_off(local); + return __ieee80211_idle_on(local); +} + +u32 ieee80211_idle_off(struct ieee80211_local *local) +{ + return __ieee80211_recalc_idle(local, true); +} + +void ieee80211_recalc_idle(struct ieee80211_local *local) +{ + u32 change = __ieee80211_recalc_idle(local, false); if (change) ieee80211_hw_config(local, change); } -- cgit v1.2.3 From 05a324b9c50c3edbe0ce48ee3e37b210859ef1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:38 +0000 Subject: net: ipv4: reset check_lifetime_work after changing lifetime This will result in calling check_lifetime in nearest opportunity and that function will adjust next time to call check_lifetime correctly. Without this, check_lifetime is called in time computed by previous run, not affecting modified lifetime. Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 96083b7a436b..00386e02e708 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -804,6 +804,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg return -EEXIST; ifa = ifa_existing; set_ifa_lifetime(ifa, valid_lft, prefered_lft); + cancel_delayed_work(&check_lifetime_work); + schedule_delayed_work(&check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } -- cgit v1.2.3 From c988d1e8cbf722e34ee6124b8b89d47fec655b51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Apr 2013 23:39:39 +0000 Subject: net: ipv4: fix schedule while atomic bug in check_lifetime() move might_sleep operations out of the rcu_read_lock() section. Also fix iterating over ifa_dev->ifa_list Introduced by: commit 5c766d642bcaf "ipv4: introduce address lifetime" Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 58 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 00386e02e708..c6287cd978c2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; + struct hlist_node *n; int i; now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + bool change_needed = false; + + rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; @@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work) if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && age >= ifa->ifa_valid_lft) { - struct in_ifaddr **ifap ; - - rtnl_lock(); - for (ifap = &ifa->ifa_dev->ifa_list; - *ifap != NULL; ifap = &ifa->ifa_next) { - if (*ifap == ifa) - inet_del_ifa(ifa->ifa_dev, - ifap, 1); - } - rtnl_unlock(); + change_needed = true; } else if (ifa->ifa_preferred_lft == INFINITY_LIFE_TIME) { continue; @@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work) next = ifa->ifa_tstamp + ifa->ifa_valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { - ifa->ifa_flags |= IFA_F_DEPRECATED; - rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); - } + if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + change_needed = true; } else if (time_before(ifa->ifa_tstamp + ifa->ifa_preferred_lft * HZ, next)) { @@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work) ifa->ifa_preferred_lft * HZ; } } + rcu_read_unlock(); + if (!change_needed) + continue; + rtnl_lock(); + hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + unsigned long age; + + if (ifa->ifa_flags & IFA_F_PERMANENT) + continue; + + /* We try to batch several events at once. */ + age = (now - ifa->ifa_tstamp + + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + + if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && + age >= ifa->ifa_valid_lft) { + struct in_ifaddr **ifap; + + for (ifap = &ifa->ifa_dev->ifa_list; + *ifap != NULL; ifap = &(*ifap)->ifa_next) { + if (*ifap == ifa) { + inet_del_ifa(ifa->ifa_dev, + ifap, 1); + break; + } + } + } else if (ifa->ifa_preferred_lft != + INFINITY_LIFE_TIME && + age >= ifa->ifa_preferred_lft && + !(ifa->ifa_flags & IFA_F_DEPRECATED)) { + ifa->ifa_flags |= IFA_F_DEPRECATED; + rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); + } + } + rtnl_unlock(); } - rcu_read_unlock(); next_sec = round_jiffies_up(next); next_sched = next; -- cgit v1.2.3 From 88c5b5ce5cb57af6ca2a7cf4d5715fa320448ff9 Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Mon, 8 Apr 2013 05:45:26 +0000 Subject: rtnetlink: Call nlmsg_parse() with correct header length Signed-off-by: Michael Riesch Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Jiri Benc Cc: "Theodore Ts'o" Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rustad Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b65441da74ab..23854b51a259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1072,7 +1072,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) @@ -1922,7 +1922,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; - if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, + if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); -- cgit v1.2.3 From f9c41a62bba3f3f7ef3541b2a025e3371bcbba97 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sun, 7 Apr 2013 22:19:26 +0000 Subject: af_iucv: fix recvmsg by replacing skb_pull() function When receiving data messages, the "BUG_ON(skb->len < skb->data_len)" in the skb_pull() function triggers a kernel panic. Replace the skb_pull logic by a per skb offset as advised by Eric Dumazet. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Reviewed-by: Hendrik Brueckner Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 8 ++++++++ net/iucv/af_iucv.c | 34 ++++++++++++++++------------------ 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index cc7c19732389..714cc9a54a4c 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -130,6 +130,14 @@ struct iucv_sock { enum iucv_tx_notify n); }; +struct iucv_skb_cb { + u32 class; /* target class of message */ + u32 tag; /* tag associated with message */ + u32 offset; /* offset for skb receival */ +}; + +#define IUCV_SKB_CB(__skb) ((struct iucv_skb_cb *)&((__skb)->cb[0])) + /* iucv socket options (SOL_IUCV) */ #define SO_IPRMDATA_MSG 0x0080 /* send/recv IPRM_DATA msgs */ #define SO_MSGLIMIT 0x1000 /* get/set IUCV MSGLIMIT */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index bf6935820001..206ce6db2c36 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] = #define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) -/* macros to set/get socket control buffer at correct offset */ -#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */ -#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag)) -#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ -#define CB_TRGCLS_LEN (TRGCLS_SIZE) - #define __iucv_sock_wait(sk, condition, timeo, ret) \ do { \ DEFINE_WAIT(__wait); \ @@ -1141,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + IUCV_SKB_CB(skb)->tag = txmsg.tag; if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_sent); @@ -1224,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len) return -ENOMEM; /* copy target class to control buffer of new skb */ - memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); + IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class; /* copy data fragment */ memcpy(nskb->data, skb->data + copied, size); @@ -1256,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, /* store msg target class in the second 4 bytes of skb ctrl buffer */ /* Note: the first 4 bytes are reserved for msg tag */ - memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = msg->class; /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ if ((msg->flags & IUCV_IPRMDATA) && len > 7) { @@ -1292,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } } + IUCV_SKB_CB(skb)->offset = 0; if (sock_queue_rcv_skb(sk, skb)) skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); } @@ -1327,6 +1322,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned int copied, rlen; struct sk_buff *skb, *rskb, *cskb; int err = 0; + u32 offset; msg->msg_namelen = 0; @@ -1348,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - rlen = skb->len; /* real length of skb */ + offset = IUCV_SKB_CB(skb)->offset; + rlen = skb->len - offset; /* real length of skb */ copied = min_t(unsigned int, rlen, len); if (!rlen) sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; cskb = skb; - if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { + if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1372,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, * get the trgcls from the control buffer of the skb due to * fragmentation of original iucv message. */ err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, - CB_TRGCLS_LEN, CB_TRGCLS(skb)); + sizeof(IUCV_SKB_CB(skb)->class), + (void *)&IUCV_SKB_CB(skb)->class); if (err) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); @@ -1384,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, /* SOCK_STREAM: re-queue skb if it contains unreceived data */ if (sk->sk_type == SOCK_STREAM) { - skb_pull(skb, copied); - if (skb->len) { - skb_queue_head(&sk->sk_receive_queue, skb); + if (copied < rlen) { + IUCV_SKB_CB(skb)->offset = offset + copied; goto done; } } @@ -1405,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, spin_lock_bh(&iucv->message_q.lock); rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { + IUCV_SKB_CB(rskb)->offset = 0; if (sock_queue_rcv_skb(sk, rskb)) { skb_queue_head(&iucv->backlog_skb_q, rskb); @@ -1832,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { + if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } @@ -2093,6 +2091,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); + IUCV_SKB_CB(skb)->offset = 0; spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { if (sock_queue_rcv_skb(sk, skb)) { @@ -2197,8 +2196,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, /* fall through and receive zero length data */ case 0: /* plain data frame */ - memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, - CB_TRGCLS_LEN); + IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; err = afiucv_hs_callback_rx(sk, skb); break; default: -- cgit v1.2.3 From c802d759623acbd6e1ee9fbdabae89159a513913 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 10:07:19 +0800 Subject: netrom: fix invalid use of sizeof in nr_recvmsg() sizeof() when applied to a pointer typed expression gives the size of the pointer, not that of the pointed data. Introduced by commit 3ce5ef(netrom: fix info leak via msg_name in nr_recvmsg) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7fcb307dea47..103bd704b5fc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1173,7 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, } if (sax != NULL) { - memset(sax, 0, sizeof(sax)); + memset(sax, 0, sizeof(*sax)); sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); -- cgit v1.2.3 From 3480a2125923e4b7a56d79efc76743089bf273fc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Apr 2013 14:16:04 +0800 Subject: can: gw: use kmem_cache_free() instead of kfree() Memory allocated by kmem_cache_alloc() should be freed using kmem_cache_free(), not kfree(). Cc: linux-stable # >= v3.2 Signed-off-by: Wei Yongjun Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc5ebea..117814a7e73c 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -466,7 +466,7 @@ static int cgw_notifier(struct notifier_block *nb, if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } } @@ -864,7 +864,7 @@ static void cgw_remove_all_jobs(void) hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); } } @@ -920,7 +920,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) hlist_del(&gwj->list); cgw_unregister_filter(gwj); - kfree(gwj); + kmem_cache_free(cgw_cache, gwj); err = 0; break; } -- cgit v1.2.3 From ca10b9e9a8ca7342ee07065289cbe74ac128c169 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Apr 2013 17:58:11 +0000 Subject: selinux: add a skb_owned_by() hook Commit 90ba9b1986b5ac (tcp: tcp_make_synack() can use alloc_skb()) broke certain SELinux/NetLabel configurations by no longer correctly assigning the sock to the outgoing SYNACK packet. Cost of atomic operations on the LISTEN socket is quite big, and we would like it to happen only if really needed. This patch introduces a new security_ops->skb_owned_by() method, that is a void operation unless selinux is active. Reported-by: Miroslav Vadkerti Diagnosed-by: Paul Moore Signed-off-by: Eric Dumazet Cc: "David S. Miller" Cc: linux-security-module@vger.kernel.org Acked-by: James Morris Tested-by: Paul Moore Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 8 ++++++++ net/ipv4/tcp_output.c | 1 + security/capability.c | 6 ++++++ security/security.c | 5 +++++ security/selinux/hooks.c | 7 +++++++ 5 files changed, 27 insertions(+) (limited to 'net') diff --git a/include/linux/security.h b/include/linux/security.h index eee7478cda70..6c3a78ace051 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1638,6 +1638,7 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); + void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2588,6 +2589,8 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); + #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2779,6 +2782,11 @@ static inline int security_tun_dev_open(void *security) { return 0; } + +static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d0b4387cba6..b44cf81d8178 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2709,6 +2709,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, MAX_TCP_HEADER); skb_dst_set(skb, dst); + security_skb_owned_by(skb, sk); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/security/capability.c b/security/capability.c index 579775088967..6783c3e6c88e 100644 --- a/security/capability.c +++ b/security/capability.c @@ -737,6 +737,11 @@ static int cap_tun_dev_open(void *security) { return 0; } + +static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1071,6 +1076,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, tun_dev_open); set_to_cap_if_null(ops, tun_dev_attach_queue); set_to_cap_if_null(ops, tun_dev_attach); + set_to_cap_if_null(ops, skb_owned_by); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_cap_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/security.c b/security/security.c index 7b88c6aeaed4..03f248b84e9f 100644 --- a/security/security.c +++ b/security/security.c @@ -1290,6 +1290,11 @@ int security_tun_dev_open(void *security) } EXPORT_SYMBOL(security_tun_dev_open); +void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + security_ops->skb_owned_by(skb, sk); +} + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fa28c88900c..7171a957b933 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -51,6 +51,7 @@ #include #include #include /* for local_port_range[] */ +#include #include /* struct or_callable used in sock_rcv_skb */ #include #include @@ -4363,6 +4364,11 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); } +static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk) +{ + skb_set_owner_w(skb, sk); +} + static int selinux_secmark_relabel_packet(u32 sid) { const struct task_security_struct *__tsec; @@ -5664,6 +5670,7 @@ static struct security_operations selinux_ops = { .tun_dev_attach_queue = selinux_tun_dev_attach_queue, .tun_dev_attach = selinux_tun_dev_attach, .tun_dev_open = selinux_tun_dev_open, + .skb_owned_by = selinux_skb_owned_by, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, -- cgit v1.2.3 From 02f815cb6d3f57914228be84df9613ee5a01c2e6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:19 +0000 Subject: netfilter: ipset: list:set: fix reference counter update The last element can be replaced or pushed off and in both cases the reference counter must be updated. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 8371c2bac2e4..09c744aa8982 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -174,9 +174,13 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, { const struct set_elem *e = list_set_elem(map, i); - if (i == map->size - 1 && e->id != IPSET_INVALID_ID) - /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + if (e->id != IPSET_INVALID_ID) { + const struct set_elem *x = list_set_elem(map, map->size - 1); + + /* Last element replaced or pushed off */ + if (x->id != IPSET_INVALID_ID) + ip_set_put_byindex(x->id); + } if (with_timeout(map->timeout)) list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else -- cgit v1.2.3 From 6eb4c7e96e19fd2c38a103472048fc0e0e0a3ec3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 9 Apr 2013 08:57:20 +0000 Subject: netfilter: ipset: hash:*net*: nomatch flag not excluded on set resize If a resize is triggered the nomatch flag is not excluded at hashing, which leads to the element missed at lookup in the resized set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_ahash.h | 30 +++++++++++++++++++++------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 18 +++++++++++++++++ net/netfilter/ipset/ip_set_hash_net.c | 22 ++++++++++++++++++-- net/netfilter/ipset/ip_set_hash_netiface.c | 22 ++++++++++++++++++-- net/netfilter/ipset/ip_set_hash_netport.c | 18 +++++++++++++++++ 5 files changed, 99 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h index 01d25e6fc792..0214c4c146fa 100644 --- a/include/linux/netfilter/ipset/ip_set_ahash.h +++ b/include/linux/netfilter/ipset/ip_set_ahash.h @@ -291,6 +291,7 @@ ip_set_hash_destroy(struct ip_set *set) #define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist) #define type_pf_data_next TOKEN(TYPE, PF, _data_next) #define type_pf_data_flags TOKEN(TYPE, PF, _data_flags) +#define type_pf_data_reset_flags TOKEN(TYPE, PF, _data_reset_flags) #ifdef IP_SET_HASH_WITH_NETS #define type_pf_data_match TOKEN(TYPE, PF, _data_match) #else @@ -385,9 +386,9 @@ type_pf_resize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; retry: @@ -412,9 +413,16 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_add(m, data, AHASH_MAX(h), 0); + ret = type_pf_elem_add(m, data, AHASH_MAX(h), flags); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -836,9 +844,9 @@ type_pf_tresize(struct ip_set *set, bool retried) struct ip_set_hash *h = set->data; struct htable *t, *orig = h->table; u8 htable_bits = orig->htable_bits; - const struct type_pf_elem *data; + struct type_pf_elem *data; struct hbucket *n, *m; - u32 i, j; + u32 i, j, flags = 0; int ret; /* Try to cleanup once */ @@ -873,10 +881,17 @@ retry: n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { data = ahash_tdata(n, j); +#ifdef IP_SET_HASH_WITH_NETS + flags = 0; + type_pf_data_reset_flags(data, &flags); +#endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0, - ip_set_timeout_get(type_pf_data_timeout(data))); + ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), flags, + ip_set_timeout_get(type_pf_data_timeout(data))); if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS + type_pf_data_flags(data, flags); +#endif read_unlock_bh(&set->lock); ahash_destroy(t); if (ret == -EAGAIN) @@ -1187,6 +1202,7 @@ type_pf_gc_init(struct ip_set *set) #undef type_pf_data_tlist #undef type_pf_data_next #undef type_pf_data_flags +#undef type_pf_data_reset_flags #undef type_pf_data_match #undef type_pf_elem diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f2627226a087..10a30b4fc7db 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -104,6 +104,15 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) { @@ -414,6 +423,15 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 4b677cf6bf7d..d6a59154d710 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -87,7 +87,16 @@ hash_net4_data_copy(struct hash_net4_elem *dst, static inline void hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -308,7 +317,16 @@ hash_net6_data_copy(struct hash_net6_elem *dst, static inline void hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 6ba985f1c96f..f2b0a3c30130 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -198,7 +198,16 @@ hash_netiface4_data_copy(struct hash_netiface4_elem *dst, static inline void hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } } static inline int @@ -494,7 +503,7 @@ hash_netiface6_data_copy(struct hash_netiface6_elem *dst, static inline void hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) { - dst->nomatch = flags & IPSET_FLAG_NOMATCH; + dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } static inline int @@ -503,6 +512,15 @@ hash_netiface6_data_match(const struct hash_netiface6_elem *elem) return elem->nomatch ? -ENOTEMPTY : 1; } +static inline void +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline void hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) { diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index af20c0c5ced2..349deb672a2d 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -104,6 +104,15 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport4_data_match(const struct hash_netport4_elem *elem) { @@ -375,6 +384,15 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); } +static inline void +hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) +{ + if (dst->nomatch) { + *flags = IPSET_FLAG_NOMATCH; + dst->nomatch = 0; + } +} + static inline int hash_netport6_data_match(const struct hash_netport6_elem *elem) { -- cgit v1.2.3 From 7b119dc06d871405fc7c3e9a73a6c987409ba639 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Apr 2013 21:38:36 +0200 Subject: mac80211: fix cfg80211 interaction on auth/assoc request If authentication (or association with FT) is requested by userspace, mac80211 currently doesn't tell cfg80211 that it disconnected from the AP. That leaves inconsistent state: cfg80211 thinks it's connected while mac80211 thinks it's not. Typically this won't last long, as soon as mac80211 reports the new association to cfg80211 the old one goes away. If, however, the new authentication or association doesn't succeed, then cfg80211 will forever think the old one still exists and will refuse attempts to authenticate or associate with the AP it thinks it's connected to. Anders reported that this leads to it taking a very long time to reconnect to a network, or never even succeeding. I tested this with an AP hacked to never respond to auth frames, and one that works, and with just those two the system never recovers because one won't work and cfg80211 thinks it's connected to the other so refuses connections to it. To fix this, simply make mac80211 tell cfg80211 when it is no longer connected to the old AP, while authenticating or associating to a new one. Cc: stable@vger.kernel.org Reported-by: Anders Kaseorg Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82cc30318a86..346ad4cfb013 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3964,8 +3964,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* prep auth_data so we don't go into idle on disassoc */ ifmgd->auth_data = auth_data; - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4025,8 +4033,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) - ieee80211_set_disassoc(sdata, 0, 0, false, NULL); + if (ifmgd->associated) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, + WLAN_REASON_UNSPECIFIED, + false, frame_buf); + + __cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); + } if (ifmgd->auth_data && !ifmgd->auth_data->done) { err = -EBUSY; -- cgit v1.2.3 From d66954a066158781ccf9c13c91d0316970fe57b6 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 11 Apr 2013 08:55:07 +0000 Subject: tcp: incoming connections might use wrong route under synflood There is a bug in cookie_v4_check (net/ipv4/syncookies.c): flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); Here we do not respect sk->sk_bound_dev_if, therefore wrong dst_entry may be taken. This dst_entry is used by new socket (get_cookie_sock -> tcp_v4_syn_recv_sock), so its packets may take the wrong path. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377fb11c..397e0f69435f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -349,8 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), - RT_SCOPE_UNIVERSE, IPPROTO_TCP, + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, ireq->loc_addr, th->source, th->dest); -- cgit v1.2.3 From 50bceae9bd3569d56744882f3012734d48a1d413 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 11 Apr 2013 10:57:18 +0000 Subject: tcp: Reallocate headroom if it would overflow csum_start If a TCP retransmission gets partially ACKed and collapsed multiple times it is possible for the headroom to grow beyond 64K which will overflow the 16bit skb->csum_start which is based on the start of the headroom. It has been observed rarely in the wild with IPoIB due to the 64K MTU. Verify if the acking and collapsing resulted in a headroom exceeding what csum_start can cover and reallocate the headroom if so. A big thank you to Jim Foraker and the team at LLNL for helping out with the investigation and testing. Reported-by: Jim Foraker Signed-off-by: Thomas Graf Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b44cf81d8178..509912a5ff98 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2388,8 +2388,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - /* make sure skb->data is aligned on arches that require it */ - if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + /* make sure skb->data is aligned on arches that require it + * and check if ack-trimming & collapsing extended the headroom + * beyond what csum_start can cover. + */ + if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || + skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -- cgit v1.2.3 From c2d421e171868586939c328dfb91bab840fe4c49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Apr 2013 04:22:39 +0000 Subject: netfilter: nf_nat: fix race when unloading protocol modules following oops was reported: RIP: 0010:[] [] nf_nat_cleanup_conntrack+0x42/0x70 [nf_nat] RSP: 0018:ffff880202c63d40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8801ac7bec28 RCX: ffff8801d0eedbe0 RDX: dead000000200200 RSI: 0000000000000011 RDI: ffffffffa03265b8 [..] Call Trace: [..] [] destroy_conntrack+0xbd/0x110 [nf_conntrack] Happens when a conntrack timeout expires right after first part of the nat cleanup has completed (bysrc hash removal), but before part 2 has completed (re-initialization of nat area). [ destroy callback tries to delete bysrc again ] Patrick suggested to just remove the affected conntracks -- the connections won't work properly anyway without nat transformation. So, lets do that. Reported-by: CAI Qian Cc: Patrick McHardy Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 8d5769c6d16e..ad24be070e53 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -467,33 +467,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; - bool hash; }; -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int nf_nat_proto_clean(struct nf_conn *i, void *data) +/* kill conntracks with affected NAT section */ +static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; struct nf_conn_nat *nat = nfct_nat(i); if (!nat) return 0; - if (!(i->status & IPS_SRC_NAT_DONE)) - return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; - if (clean->hash) { - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); - } else { - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | - IPS_SEQ_ADJUST); - } - return 0; + return i->status & IPS_NAT_MASK ? 1 : 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) @@ -505,16 +494,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - - /* Step 2 - clean NAT section */ - clean.hash = false; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -526,16 +507,9 @@ static void nf_nat_l3proto_clean(u8 l3proto) struct net *net; rtnl_lock(); - /* Step 1 - remove from bysource hash */ - clean.hash = true; - for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); - synchronize_rcu(); - /* Step 2 - clean NAT section */ - clean.hash = false; for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); rtnl_unlock(); } @@ -773,7 +747,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } -- cgit v1.2.3 From fb745e9a037895321781d066aa24757ceabf9df9 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 11 Apr 2013 13:47:15 +0000 Subject: net/802/mrp: fix possible race condition when calling mrp_pdu_queue() (Adapted from a very similar change to net/802/garp.c by Cong Wang.) mrp_pdu_queue() should ways be called with the applicant spin lock. mrp_uninit_applicant() only holds the rtnl lock which is not enough; a race is possible because mrp_rcv() is called in BH context: mrp_rcv() |->mrp_pdu_parse_msg() |->mrp_pdu_parse_vecattr() |->mrp_pdu_parse_vecattr_event() |-> mrp_attr_event() |-> mrp_pdu_append_vecattr_event() |-> mrp_pdu_queue() Cc: Cong Wang Cc: Eric Dumazet Signed-off-by: David Ward Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/802/mrp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/802/mrp.c b/net/802/mrp.c index a4cc3229952a..e085bcc754f6 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -870,8 +870,12 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + + spin_lock(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); mrp_pdu_queue(app); + spin_unlock(&app->lock); + mrp_queue_xmit(app); dev_mc_del(dev, appl->group_address); -- cgit v1.2.3 From f88c91ddba958e9a5dd4a5ee8c52a0faa790f586 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 14 Apr 2013 23:18:43 +0800 Subject: ipv6: statically link register_inet6addr_notifier() Tomas reported the following build error: net/built-in.o: In function `ieee80211_unregister_hw': (.text+0x10f0e1): undefined reference to `unregister_inet6addr_notifier' net/built-in.o: In function `ieee80211_register_hw': (.text+0x10f610): undefined reference to `register_inet6addr_notifier' make: *** [vmlinux] Error 1 when built IPv6 as a module. So we have to statically link these symbols. Reported-by: Tomas Melin Cc: Tomas Melin Cc: "David S. Miller" Cc: YOSHIFUJI Hidaki Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 24 +++--------------------- net/ipv6/addrconf_core.c | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 40be2a0d8ae1..84a6440f1f19 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -199,6 +199,7 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +extern int inet6addr_notifier_call_chain(unsigned long val, void *v); extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, struct ipv6_devconf *devconf); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a459c4f5b769..dae802c0af7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -168,8 +168,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev); -static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); - static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, @@ -837,7 +835,7 @@ out2: rcu_read_unlock_bh(); if (likely(err == 0)) - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); else { kfree(ifa); ifa = ERR_PTR(err); @@ -927,7 +925,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_DELADDR, ifp); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); /* * Purge or update corresponding prefix @@ -2988,7 +2986,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); } in6_ifa_put(ifa); @@ -4869,22 +4867,6 @@ static struct pernet_operations addrconf_ops = { .exit = addrconf_exit_net, }; -/* - * Device notifier - */ - -int register_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(register_inet6addr_notifier); - -int unregister_inet6addr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&inet6addr_chain, nb); -} -EXPORT_SYMBOL(unregister_inet6addr_notifier); - static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d051e5f4bf34..72104562c864 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr) } EXPORT_SYMBOL(__ipv6_addr_type); +static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); + +int register_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(register_inet6addr_notifier); + +int unregister_inet6addr_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&inet6addr_chain, nb); +} +EXPORT_SYMBOL(unregister_inet6addr_notifier); + +int inet6addr_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&inet6addr_chain, val, v); +} +EXPORT_SYMBOL(inet6addr_notifier_call_chain); -- cgit v1.2.3 From 8f3359bdc83f1abb1989e0817ab0e0b18667c828 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 13 Apr 2013 14:06:07 +0000 Subject: bridge: make user modified path cost sticky Keep a STP port path cost value if it was set by a user. Don't replace it with the link-speed based path cost whenever the link goes down and comes back up. Reported-by: Roopa Prabhu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 3 ++- net/bridge/br_private.h | 1 + net/bridge/br_stp_if.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b91431c6b..459dab22b3f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -67,7 +67,8 @@ void br_port_carrier_check(struct net_bridge_port *p) struct net_device *dev = p->dev; struct net_bridge *br = p->br; - if (netif_running(dev) && netif_oper_up(dev)) + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) p->path_cost = port_cost(dev); if (!netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3cbf5beb3d4b..d2c043a857b6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -156,6 +156,7 @@ struct net_bridge_port #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 0bdb4ebd362b..d45e760141bb 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) path_cost > BR_MAX_PATH_COST) return -ERANGE; + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); -- cgit v1.2.3 From 06848c10f720cbc20e3b784c0df24930b7304b93 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 13 Apr 2013 15:49:03 +0000 Subject: esp4: fix error return code in esp_output() Fix to return a negative error code from the error handling case instead of 0, as returned elsewhere in this function. Signed-off-by: Wei Yongjun Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3b4f0cd2e63e..4cfe34d4cc96 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) /* skb is pure payload to encrypt */ - err = -ENOMEM; - esp = x->data; aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); -- cgit v1.2.3 From 97599dc792b45b1669c3cdb9a4b365aad0232f65 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Apr 2013 12:55:41 +0000 Subject: net: drop dst before queueing fragments Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path) added a bug in IP defragmentation handling, as non refcounted dst could escape an RCU protected section. Commit 64f3b9e203bd068 (net: ip_expire() must revalidate route) fixed the case of timeouts, but not the general problem. Tom Parkin noticed crashes in UDP stack and provided a patch, but further analysis permitted us to pinpoint the root cause. Before queueing a packet into a frag list, we must drop its dst, as this dst has limited lifetime (RCU protected) When/if a packet is finally reassembled, we use the dst of the very last skb, still protected by RCU and valid, as the dst of the reassembled packet. Use same logic in IPv6, as there is no need to hold dst references. Reported-by: Tom Parkin Tested-by: Tom Parkin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 14 ++++++++++---- net/ipv6/reassembly.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b843ef4..52c273ea05c3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -248,8 +248,7 @@ static void ip_expire(unsigned long arg) if (!head->dev) goto out_rcu_unlock; - /* skb dst is stale, drop it, and perform route lookup again */ - skb_dst_drop(head); + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); @@ -523,9 +522,16 @@ found: qp->q.max_size = skb->len + ihl; if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + qp->q.meat == qp->q.len) { + unsigned long orefdst = skb->_skb_refdst; + skb->_skb_refdst = 0UL; + err = ip_frag_reasm(qp, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); inet_frag_lru_move(&qp->q); return -EINPROGRESS; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab9347ad1..0ba10e53a629 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -330,9 +330,17 @@ found: } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } + skb_dst_drop(skb); inet_frag_lru_move(&fq->q); return -1; -- cgit v1.2.3 From fe8a93b95145c66adf196eea4a919dfe0b7c57db Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 3 Apr 2013 19:10:26 +0200 Subject: batman-adv: make is_my_mac() check for the current mesh only On a multi-mesh node (a node running more than one batman-adv virtual interface) batadv_is_my_mac() has to check MAC addresses of hard interfaces belonging to the current mesh only. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/main.c | 5 ++++- net/batman-adv/main.h | 2 +- net/batman-adv/routing.c | 38 ++++++++++++++++++++------------------ net/batman-adv/translation-table.c | 2 +- net/batman-adv/vis.c | 4 ++-- 5 files changed, 28 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0488d70c8c35..fa563e497c48 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -169,7 +169,7 @@ void batadv_mesh_free(struct net_device *soft_iface) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -int batadv_is_my_mac(const uint8_t *addr) +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -178,6 +178,9 @@ int batadv_is_my_mac(const uint8_t *addr) if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ced08b936a96..d40910dfc8ea 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -162,7 +162,7 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -int batadv_is_my_mac(const uint8_t *addr); +int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_hard_iface * batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5ee21cebbbb0..319f2906c71a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -402,7 +402,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, goto out; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) goto out; icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; @@ -416,7 +416,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, } /* packet for me */ - if (batadv_is_my_mac(icmp_packet->dst)) + if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); /* TTL exceeded */ @@ -548,7 +548,8 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } -static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) +static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; @@ -567,7 +568,7 @@ static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) return -1; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return -1; return 0; @@ -582,7 +583,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) char tt_flag; size_t packet_size; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; /* I could need to modify it */ @@ -614,7 +615,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - if (batadv_is_my_mac(tt_query->dst)) { + if (batadv_is_my_mac(bat_priv, tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ @@ -657,14 +658,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, + sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - if (!batadv_is_my_mac(roam_adv_packet->dst)) + if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) return batadv_route_unicast_packet(skb, recv_if); /* check if it is a backbone gateway. we don't accept @@ -967,7 +969,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, * last time) the packet had an updated information or not */ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - if (!batadv_is_my_mac(unicast_packet->dest)) { + if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) { orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); /* if it is not possible to find the orig_node representing the @@ -1044,14 +1046,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { if (is4addr) { batadv_dat_inc_counter(bat_priv, unicast_4addr_packet->subtype); @@ -1088,7 +1090,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct sk_buff *new_skb = NULL; int ret; - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) @@ -1097,7 +1099,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; /* packet for me */ - if (batadv_is_my_mac(unicast_packet->dest)) { + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) @@ -1151,13 +1153,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, goto out; /* ignore broadcasts sent by myself */ - if (batadv_is_my_mac(ethhdr->h_source)) + if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto out; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ - if (batadv_is_my_mac(bcast_packet->orig)) + if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; if (bcast_packet->header.ttl < 2) @@ -1243,14 +1245,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return NET_RX_DROP; /* ignore own packets */ - if (batadv_is_my_mac(vis_packet->vis_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) return NET_RX_DROP; - if (batadv_is_my_mac(vis_packet->sender_orig)) + if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) return NET_RX_DROP; switch (vis_packet->vis_type) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a66a021a60..7abee19567e9 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1953,7 +1953,7 @@ out: bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tt_query_packet *tt_request) { - if (batadv_is_my_mac(tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, tt_request->dst)) { /* don't answer backbone gws! */ if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) return true; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c053244b97bd..6a1e646be96d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -477,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, /* Are we the target for this VIS packet? */ if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(vis_packet->target_orig)) + batadv_is_my_mac(bat_priv, vis_packet->target_orig)) are_target = 1; spin_lock_bh(&bat_priv->vis.hash_lock); @@ -496,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, batadv_send_list_add(bat_priv, info); /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(packet->target_orig)) { + } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { batadv_send_list_add(bat_priv, info); } -- cgit v1.2.3 From 5add189a125e6b497e31bffdaaed8145ec6d4984 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Apr 2013 14:30:54 +0200 Subject: netfilter: ipset: bitmap:ip,mac: fix listing with timeout The type when timeout support was enabled, could not list all elements, just the first ones which could fit into one netlink message: it just did not continue listing after the first message. Reported-by: Yoann JUET Signed-off-by: Jozsef Kadlecsik Tested-by: Yoann JUET Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 0f92dc24cb89..d7df6ac2c6f1 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -339,7 +339,11 @@ bitmap_ipmac_tlist(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); ipset_nest_end(skb, atd); - return -EMSGSIZE; + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; } static int -- cgit v1.2.3 From f83a7ea2075ca896f2dbf07672bac9cf3682ff74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Apr 2013 22:45:24 +0000 Subject: netfilter: xt_rpfilter: skip locally generated broadcast/multicast, too Alex Efros reported rpfilter module doesn't match following packets: IN=br.qemu SRC=192.168.2.1 DST=192.168.2.255 [ .. ] (netfilter bugzilla #814). Problem is that network stack arranges for the locally generated broadcasts to appear on the interface they were sent out, so the IFF_LOOPBACK check doesn't trigger. As -m rpfilter is restricted to PREROUTING, we can check for existing rtable instead, it catches locally-generated broad/multicast case, too. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 8 +++++++- net/ipv6/netfilter/ip6t_rpfilter.c | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c30130062cd6..c49dcd0284a0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, return dev_match; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info; @@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) info = par->matchinfo; invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5060d54199ab..e0983f3648a6 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, return ret; } +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *) skb_dst(skb); + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; @@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; - if (par->in->flags & IFF_LOOPBACK) + if (rpfilter_is_local(skb)) return true ^ invert; iph = ipv6_hdr(skb); -- cgit v1.2.3 From 12fb3dd9dc3c64ba7d64cec977cca9b5fb7b1d4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Apr 2013 07:19:48 +0000 Subject: tcp: call tcp_replace_ts_recent() from tcp_ack() commit bd090dfc634d (tcp: tcp_replace_ts_recent() should not be called from tcp_validate_incoming()) introduced a TS ecr bug in slow path processing. 1 A > B P. 1:10001(10000) ack 1 2 B < A . 1:1(0) ack 1 win 257 3 A > B . 1:1001(1000) ack 1 win 227 4 A > B . 1001:2001(1000) ack 1 win 227 (ecr 200 should be ecr 300 in packets 3 & 4) Problem is tcp_ack() can trigger send of new packets (retransmits), reflecting the prior TSval, instead of the TSval contained in the currently processed incoming packet. Fix this by calling tcp_replace_ts_recent() from tcp_ack() after the checks, but before the actions. Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 64 +++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bd55bad230a..13b9c08fc158 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -113,6 +113,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3564,6 +3565,27 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +static void tcp_store_ts_recent(struct tcp_sock *tp) +{ + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); +} + +static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) +{ + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3607,6 +3629,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + if (flag & FLAG_UPDATE_TS_RECENT) + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3927,27 +3955,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif -static inline void tcp_store_ts_recent(struct tcp_sock *tp) -{ - tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = get_seconds(); -} - -static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) -{ - if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { - /* PAWS bug workaround wrt. ACK frames, the PAWS discard - * extra check below makes sure this can only happen - * for pure ACK frames. -DaveM - * - * Not only, also it occurs for expired timestamps. - */ - - if (tcp_paws_check(&tp->rx_opt, 0)) - tcp_store_ts_recent(tp); - } -} - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -5543,14 +5550,9 @@ slow_path: return 0; step5: - if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ @@ -5986,7 +5988,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: @@ -6137,11 +6140,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - /* ts_recent update must be made after we are sure that the packet - * is in window. - */ - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - /* step 6: check the URG bit */ tcp_urg(sk, skb, th); -- cgit v1.2.3 From e15465e1808542743627f13d1c0cbb7eacc82b83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Apr 2013 21:10:38 +0000 Subject: irda: small read past the end of array in debug code The "reason" can come from skb->data[] and it hasn't been capped so it can be from 0-255 instead of just 0-6. For example in irlmp_state_dtr() the code does: reason = skb->data[3]; ... irlmp_disconnect_indication(self, reason, skb); Also LMREASON has a couple other values which don't have entries in the irlmp_reasons[] array. And 0xff is a valid reason as well which means "unknown". So far as I can see we don't actually care about "reason" except for in the debug code. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/irda/irlmp.h | 3 ++- net/irda/iriap.c | 3 ++- net/irda/irlmp.c | 10 +++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index f74109144d3f..f132924cc9da 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -256,7 +256,8 @@ static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) return (self && self->lap) ? self->lap->daddr : 0; } -extern const char *irlmp_reasons[]; +const char *irlmp_reason_str(LM_REASON reason); + extern int sysctl_discovery_timeout; extern int sysctl_discovery_slots; extern int sysctl_discovery; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 29340a9a6fb9..e1b37f5a2691 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap, { struct iriap_cb *self; - IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); self = instance; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 6115a44c0a24..1064621da6f6 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -66,8 +66,15 @@ const char *irlmp_reasons[] = { "LM_LAP_RESET", "LM_INIT_DISCONNECT", "ERROR, NOT USED", + "UNKNOWN", }; +const char *irlmp_reason_str(LM_REASON reason) +{ + reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); + return irlmp_reasons[reason]; +} + /* * Function irlmp_init (void) * @@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, { struct lsap_cb *lsap; - IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); + IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__, + irlmp_reason_str(reason), reason); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); -- cgit v1.2.3 From cb95ec6261a205bde6451b972fbd6e1581608cae Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Apr 2013 16:49:10 +0000 Subject: pkt_sched: fix error return code in fw_change_attrs() Fix to return -EINVAL when tb[TCA_FW_MASK] is set and head->mask != 0xFFFFFFFF instead of 0 (ifdef CONFIG_NET_CLS_IND and tb[TCA_FW_INDEV]), as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 1135d8227f9b..9b97172db84a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -204,7 +204,6 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (err < 0) return err; - err = -EINVAL; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); tcf_bind_filter(tp, &f->res, base); @@ -218,6 +217,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + err = -EINVAL; if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) -- cgit v1.2.3 From c846ad9b880ece01bb4d8d07ba917734edf0324f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 19 Apr 2013 10:45:52 +0000 Subject: net: rate-limit warn-bad-offload splats. If one does do something unfortunate and allow a bad offload bug into the kernel, this the skb_warn_bad_offload can effectively live-lock the system, filling the logs with the same error over and over. Add rate limitation to this so that box remains otherwise functional in this case. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e7d68ed8aafe..b24ab0e98eb4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,6 +2148,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb) struct net_device *dev = skb->dev; const char *driver = ""; + if (!net_ratelimit()) + return; + if (dev && dev->dev.parent) driver = dev_driver_string(dev->dev.parent); -- cgit v1.2.3