From 4282d60689d4f21b40692029080440cc58e8a17d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Jan 2015 11:36:55 -0500 Subject: tracefs: Add new tracefs file system Add a separate file system to handle the tracing directory. Currently it is part of debugfs, but that is starting to show its limits. One thing is that in order to access the tracing infrastructure, you need to mount debugfs. As that includes debugging from all sorts of sub systems in the kernel, it is not considered advisable to mount such an all encompassing debugging system. Having the tracing system in its own file systems gives access to the tracing sub system without needing to include all other systems. Another problem with tracing using the debugfs system is that the instances use mkdir to create sub buffers. debugfs does not support mkdir from userspace so to implement it, special hacks were used. By controlling the file system that the tracing infrastructure uses, this can be properly done without hacks. Signed-off-by: Steven Rostedt --- include/uapi/linux/magic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 7d664ea85ebd..7b1425a6b370 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -58,6 +58,8 @@ #define STACK_END_MAGIC 0x57AC6E9D +#define TRACEFS_MAGIC 0x74726163 + #define V9FS_MAGIC 0x01021997 #define BDEVFS_MAGIC 0x62646576 -- cgit v1.2.3 From cd67cd5eb25ae9a7bafbfd3d52d4c05e1d80af3b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 6 Feb 2015 09:44:44 +0200 Subject: ipvs: use 64-bit rates in stats IPVS stats are limited to 2^(32-10) conns/s and packets/s, 2^(32-5) bytes/s. It is time to use 64 bits: * Change all conn/packet kernel counters to 64-bit and update them in u64_stats_update_{begin,end} section * In kernel use struct ip_vs_kstats instead of the user-space struct ip_vs_stats_user and use new func ip_vs_export_stats_user to export it to sockopt users to preserve compatibility with 32-bit values * Rename cpu counters "ustats" to "cnt" * To netlink users provide additionally 64-bit stats: IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64. Old stats remain for old binaries. * We can use ip_vs_copy_stats in ip_vs_stats_percpu_show Thanks to Chris Caputo for providing initial patch for ip_vs_est.c Signed-off-by: Chris Caputo Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 50 ++++++++---- include/uapi/linux/ip_vs.h | 7 +- net/netfilter/ipvs/ip_vs_core.c | 36 +++++---- net/netfilter/ipvs/ip_vs_ctl.c | 174 ++++++++++++++++++++++++++-------------- net/netfilter/ipvs/ip_vs_est.c | 102 ++++++++++++----------- 5 files changed, 226 insertions(+), 143 deletions(-) (limited to 'include/uapi') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 615b20b58545..a627fe690c19 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -365,15 +365,15 @@ struct ip_vs_seq { /* counters per cpu */ struct ip_vs_counters { - __u32 conns; /* connections scheduled */ - __u32 inpkts; /* incoming packets */ - __u32 outpkts; /* outgoing packets */ + __u64 conns; /* connections scheduled */ + __u64 inpkts; /* incoming packets */ + __u64 outpkts; /* outgoing packets */ __u64 inbytes; /* incoming bytes */ __u64 outbytes; /* outgoing bytes */ }; /* Stats per cpu */ struct ip_vs_cpu_stats { - struct ip_vs_counters ustats; + struct ip_vs_counters cnt; struct u64_stats_sync syncp; }; @@ -383,23 +383,40 @@ struct ip_vs_estimator { u64 last_inbytes; u64 last_outbytes; - u32 last_conns; - u32 last_inpkts; - u32 last_outpkts; - - u32 cps; - u32 inpps; - u32 outpps; - u32 inbps; - u32 outbps; + u64 last_conns; + u64 last_inpkts; + u64 last_outpkts; + + u64 cps; + u64 inpps; + u64 outpps; + u64 inbps; + u64 outbps; +}; + +/* + * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user + */ +struct ip_vs_kstats { + u64 conns; /* connections scheduled */ + u64 inpkts; /* incoming packets */ + u64 outpkts; /* outgoing packets */ + u64 inbytes; /* incoming bytes */ + u64 outbytes; /* outgoing bytes */ + + u64 cps; /* current connection rate */ + u64 inpps; /* current in packet rate */ + u64 outpps; /* current out packet rate */ + u64 inbps; /* current in byte rate */ + u64 outbps; /* current out byte rate */ }; struct ip_vs_stats { - struct ip_vs_stats_user ustats; /* statistics */ + struct ip_vs_kstats kstats; /* kernel statistics */ struct ip_vs_estimator est; /* estimator */ struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */ spinlock_t lock; /* spin lock */ - struct ip_vs_stats_user ustats0; /* reset values */ + struct ip_vs_kstats kstats0; /* reset values */ }; struct dst_entry; @@ -1388,8 +1405,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); -void ip_vs_read_estimator(struct ip_vs_stats_user *dst, - struct ip_vs_stats *stats); +void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); /* Various IPVS packet transmitters (from ip_vs_xmit.c) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index cabe95d5b461..3199243f2028 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -358,6 +358,8 @@ enum { IPVS_SVC_ATTR_PE_NAME, /* name of ct retriever */ + IPVS_SVC_ATTR_STATS64, /* nested attribute for service stats */ + __IPVS_SVC_ATTR_MAX, }; @@ -387,6 +389,8 @@ enum { IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */ + IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */ + __IPVS_DEST_ATTR_MAX, }; @@ -410,7 +414,8 @@ enum { /* * Attributes used to describe service or destination entry statistics * - * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS + * Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS, + * IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64. */ enum { IPVS_STATS_ATTR_UNSPEC = 0, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 990decba1fe4..c9470c86308f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.inbytes += skb->len; + s->cnt.inpkts++; + s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); - s->ustats.outbytes += skb->len; + s->cnt.outpkts++; + s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); } } @@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct ip_vs_cpu_stats *s; s = this_cpu_ptr(cp->dest->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(svc->stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats.cpustats); - s->ustats.conns++; + u64_stats_update_begin(&s->syncp); + s->cnt.conns++; + u64_stats_update_end(&s->syncp); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e55759056361..6fd60059faf0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net) } static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) { -#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c spin_lock_bh(&src->lock); @@ -746,6 +746,21 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) spin_unlock_bh(&src->lock); } +static void +ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) +{ + dst->conns = (u32)src->conns; + dst->inpkts = (u32)src->inpkts; + dst->outpkts = (u32)src->outpkts; + dst->inbytes = src->inbytes; + dst->outbytes = src->outbytes; + dst->cps = (u32)src->cps; + dst->inpps = (u32)src->inpps; + dst->outpps = (u32)src->outpps; + dst->inbps = (u32)src->inbps; + dst->outbps = (u32)src->outbps; +} + static void ip_vs_zero_stats(struct ip_vs_stats *stats) { @@ -753,7 +768,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) /* get current counters as zero point, rates are zeroed */ -#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c +#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c IP_VS_ZERO_STATS_COUNTER(conns); IP_VS_ZERO_STATS_COUNTER(inpkts); @@ -2044,7 +2059,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats_user show; + struct ip_vs_kstats show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2053,17 +2068,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, - show.inpkts, show.outpkts, - (unsigned long long) show.inbytes, - (unsigned long long) show.outbytes); - -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)show.conns, + (unsigned long long)show.inpkts, + (unsigned long long)show.outpkts, + (unsigned long long)show.inbytes, + (unsigned long long)show.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, "%8X %8X %8X %16X %16X\n", - show.cps, show.inpps, show.outpps, - show.inbps, show.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", + (unsigned long long)show.cps, + (unsigned long long)show.inpps, + (unsigned long long)show.outpps, + (unsigned long long)show.inbps, + (unsigned long long)show.outbps); return 0; } @@ -2086,7 +2106,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) struct net *net = seq_file_single_net(seq); struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; - struct ip_vs_stats_user rates; + struct ip_vs_kstats kstats; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2098,41 +2118,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) for_each_possible_cpu(i) { struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; do { start = u64_stats_fetch_begin_irq(&u->syncp); - inbytes = u->ustats.inbytes; - outbytes = u->ustats.outbytes; + conns = u->cnt.conns; + inpkts = u->cnt.inpkts; + outpkts = u->cnt.outpkts; + inbytes = u->cnt.inbytes; + outbytes = u->cnt.outbytes; } while (u64_stats_fetch_retry_irq(&u->syncp, start)); - seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)inbytes, - (__u64)outbytes); + seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", + i, (u64)conns, (u64)inpkts, + (u64)outpkts, (u64)inbytes, + (u64)outbytes); } - spin_lock_bh(&tot_stats->lock); - - seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", - tot_stats->ustats.conns, tot_stats->ustats.inpkts, - tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); - - ip_vs_read_estimator(&rates, tot_stats); + ip_vs_copy_stats(&kstats, tot_stats); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", + (unsigned long long)kstats.conns, + (unsigned long long)kstats.inpkts, + (unsigned long long)kstats.outpkts, + (unsigned long long)kstats.inbytes, + (unsigned long long)kstats.outbytes); -/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ +/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, - " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq, " %8X %8X %8X %16X %16X\n", - rates.cps, - rates.inpps, - rates.outpps, - rates.inbps, - rates.outbps); + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", + kstats.cps, + kstats.inpps, + kstats.outpps, + kstats.inbps, + kstats.outbps); return 0; } @@ -2400,6 +2420,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; + struct ip_vs_kstats kstats; sched = rcu_dereference_protected(src->scheduler, 1); dst->protocol = src->protocol; @@ -2411,7 +2432,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; dst->num_dests = src->num_dests; - ip_vs_copy_stats(&dst->stats, &src->stats); + ip_vs_copy_stats(&kstats, &src->stats); + ip_vs_export_stats_user(&dst->stats, &kstats); } static inline int @@ -2485,6 +2507,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, int count = 0; struct ip_vs_dest *dest; struct ip_vs_dest_entry entry; + struct ip_vs_kstats kstats; memset(&entry, 0, sizeof(entry)); list_for_each_entry(dest, &svc->destinations, n_list) { @@ -2506,7 +2529,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, entry.activeconns = atomic_read(&dest->activeconns); entry.inactconns = atomic_read(&dest->inactconns); entry.persistconns = atomic_read(&dest->persistconns); - ip_vs_copy_stats(&entry.stats, &dest->stats); + ip_vs_copy_stats(&kstats, &dest->stats); + ip_vs_export_stats_user(&entry.stats, &kstats); if (copy_to_user(&uptr->entrytable[count], &entry, sizeof(entry))) { ret = -EFAULT; @@ -2798,25 +2822,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { }; static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, - struct ip_vs_stats *stats) + struct ip_vs_kstats *kstats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + + if (!nl_stats) + return -EMSGSIZE; + + if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || + nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || + nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) + goto nla_put_failure; + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, + struct ip_vs_kstats *kstats) { - struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) return -EMSGSIZE; - ip_vs_copy_stats(&ustats, stats); - - if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) || - nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) || - nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) || - nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) || - nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) || - nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps)) + if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) || + nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) || + nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) || + nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps)) goto nla_put_failure; nla_nest_end(skb, nl_stats); @@ -2835,6 +2885,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct nlattr *nl_service; struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; + struct ip_vs_kstats kstats; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2860,7 +2911,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + ip_vs_copy_stats(&kstats, &svc->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_service); @@ -3032,6 +3086,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) { struct nlattr *nl_dest; + struct ip_vs_kstats kstats; nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); if (!nl_dest) @@ -3054,7 +3109,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) atomic_read(&dest->persistconns)) || nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) goto nla_put_failure; - if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + ip_vs_copy_stats(&kstats, &dest->stats); + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) + goto nla_put_failure; + if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) goto nla_put_failure; nla_nest_end(skb, nl_dest); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 1425e9a924c4..ef0eb0a8d552 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -45,17 +45,19 @@ NOTES. - * The stored value for average bps is scaled by 2^5, so that maximal - rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10. + * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. - * A lot code is taken from net/sched/estimator.c + * Netlink users can see 64-bit values but sockopt users are restricted + to 32-bit values for conns, packets, bps, cps and pps. + + * A lot of code is taken from net/core/gen_estimator.c */ /* * Make a summary from each cpu */ -static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, +static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, struct ip_vs_cpu_stats __percpu *stats) { int i; @@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, for_each_possible_cpu(i) { struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); unsigned int start; - __u64 inbytes, outbytes; + u64 conns, inpkts, outpkts, inbytes, outbytes; + if (add) { - sum->conns += s->ustats.conns; - sum->inpkts += s->ustats.inpkts; - sum->outpkts += s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - inbytes = s->ustats.inbytes; - outbytes = s->ustats.outbytes; + conns = s->cnt.conns; + inpkts = s->cnt.inpkts; + outpkts = s->cnt.outpkts; + inbytes = s->cnt.inbytes; + outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); + sum->conns += conns; + sum->inpkts += inpkts; + sum->outpkts += outpkts; sum->inbytes += inbytes; sum->outbytes += outbytes; } else { add = true; - sum->conns = s->ustats.conns; - sum->inpkts = s->ustats.inpkts; - sum->outpkts = s->ustats.outpkts; do { start = u64_stats_fetch_begin(&s->syncp); - sum->inbytes = s->ustats.inbytes; - sum->outbytes = s->ustats.outbytes; + sum->conns = s->cnt.conns; + sum->inpkts = s->cnt.inpkts; + sum->outpkts = s->cnt.outpkts; + sum->inbytes = s->cnt.inbytes; + sum->outbytes = s->cnt.outbytes; } while (u64_stats_fetch_retry(&s->syncp, start)); } } @@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; struct ip_vs_stats *s; - u32 n_conns; - u32 n_inpkts, n_outpkts; - u64 n_inbytes, n_outbytes; - u32 rate; + u64 rate; struct net *net = (struct net *)arg; struct netns_ipvs *ipvs; @@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg) s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); - n_conns = s->ustats.conns; - n_inpkts = s->ustats.inpkts; - n_outpkts = s->ustats.outpkts; - n_inbytes = s->ustats.inbytes; - n_outbytes = s->ustats.outbytes; + ip_vs_read_cpu_stats(&s->kstats, s->cpustats); /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns) << 9; - e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps) >> 2; - - rate = (n_inpkts - e->last_inpkts) << 9; - e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps) >> 2; - - rate = (n_outpkts - e->last_outpkts) << 9; - e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps) >> 2; - - rate = (n_inbytes - e->last_inbytes) << 4; - e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps) >> 2; - - rate = (n_outbytes - e->last_outbytes) << 4; - e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps) >> 2; + rate = (s->kstats.conns - e->last_conns) << 9; + e->last_conns = s->kstats.conns; + e->cps += ((s64)rate - (s64)e->cps) >> 2; + + rate = (s->kstats.inpkts - e->last_inpkts) << 9; + e->last_inpkts = s->kstats.inpkts; + e->inpps += ((s64)rate - (s64)e->inpps) >> 2; + + rate = (s->kstats.outpkts - e->last_outpkts) << 9; + e->last_outpkts = s->kstats.outpkts; + e->outpps += ((s64)rate - (s64)e->outpps) >> 2; + + /* scaled by 2^5, but divided 2 seconds */ + rate = (s->kstats.inbytes - e->last_inbytes) << 4; + e->last_inbytes = s->kstats.inbytes; + e->inbps += ((s64)rate - (s64)e->inbps) >> 2; + + rate = (s->kstats.outbytes - e->last_outbytes) << 4; + e->last_outbytes = s->kstats.outbytes; + e->outbps += ((s64)rate - (s64)e->outbps) >> 2; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); @@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - struct ip_vs_stats_user *u = &stats->ustats; + struct ip_vs_kstats *k = &stats->kstats; /* reset counters, caller must hold the stats->lock lock */ - est->last_inbytes = u->inbytes; - est->last_outbytes = u->outbytes; - est->last_conns = u->conns; - est->last_inpkts = u->inpkts; - est->last_outpkts = u->outpkts; + est->last_inbytes = k->inbytes; + est->last_outbytes = k->outbytes; + est->last_conns = k->conns; + est->last_inpkts = k->inpkts; + est->last_outpkts = k->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; @@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) } /* Get decoded rates */ -void ip_vs_read_estimator(struct ip_vs_stats_user *dst, - struct ip_vs_stats *stats) +void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) { struct ip_vs_estimator *e = &stats->est; -- cgit v1.2.3 From e3eb3250d84ef97b766312345774367b6a310db8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 5 Feb 2015 14:41:52 +0000 Subject: drm: add support for tiled/compressed/etc modifier in addfb2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In DRM/KMS we are lacking a good way to deal with tiled/compressed formats. Especially in the case of dmabuf/prime buffer sharing, where we cannot always rely on under-the-hood flags passed to driver specific gem-create ioctl to pass around these extra flags. The proposal is to add a per-plane format modifier. This allows to, if necessary, use different tiling patters for sub-sampled planes, etc. The format modifiers are added at the end of the ioctl struct, so for legacy userspace it will be zero padded. v1: original v1.5: increase modifier to 64b v2: Incorporate review comments from the big thread, plus a few more. - Add a getcap so that userspace doesn't have to jump through hoops. - Allow modifiers only when a flag is set. That way drivers know when they're dealing with old userspace and need to fish out e.g. tiling from other information. - After rolling out checks for ->modifier to all drivers I've decided that this is way too fragile and needs an explicit opt-in flag. So do that instead. - Add a define (just for documentation really) for the "NONE" modifier. Imo we don't need to add mask #defines since drivers really should only do exact matches against values defined with fourcc_mod_code. - Drop the Samsung tiling modifier on Rob's request since he's not yet sure whether that one is accurate. v3: - Also add a new ->modifier[] array to struct drm_framebuffer and fill it in drm_helper_mode_fill_fb_struct. Requested by Tvrkto Uruslin. - Remove TODO in comment and add code comment that modifiers should be properly documented, requested by Rob. Cc: Rob Clark Cc: Tvrtko Ursulin Cc: Laurent Pinchart Cc: Daniel Stone Cc: Ville Syrjälä Cc: Michel Dänzer Signed-off-by: Rob Clark (v1.5) Reviewed-by: Rob Clark Reviewed-by: Daniel Stone Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 14 +++++++++++++- drivers/gpu/drm/drm_crtc_helper.c | 1 + drivers/gpu/drm/drm_ioctl.c | 3 +++ include/drm/drm_crtc.h | 4 ++++ include/uapi/drm/drm.h | 1 + include/uapi/drm/drm_fourcc.h | 32 ++++++++++++++++++++++++++++++++ include/uapi/drm/drm_mode.h | 9 +++++++++ 7 files changed, 63 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index ad2934ba0bd2..b15d720eda4c 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3314,6 +3314,12 @@ static int framebuffer_check(const struct drm_mode_fb_cmd2 *r) DRM_DEBUG_KMS("bad pitch %u for plane %d\n", r->pitches[i], i); return -EINVAL; } + + if (r->modifier[i] && !(r->flags & DRM_MODE_FB_MODIFIERS)) { + DRM_DEBUG_KMS("bad fb modifier %llu for plane %d\n", + r->modifier[i], i); + return -EINVAL; + } } return 0; @@ -3327,7 +3333,7 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, struct drm_framebuffer *fb; int ret; - if (r->flags & ~DRM_MODE_FB_INTERLACED) { + if (r->flags & ~(DRM_MODE_FB_INTERLACED | DRM_MODE_FB_MODIFIERS)) { DRM_DEBUG_KMS("bad framebuffer flags 0x%08x\n", r->flags); return ERR_PTR(-EINVAL); } @@ -3343,6 +3349,12 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, return ERR_PTR(-EINVAL); } + if (r->flags & DRM_MODE_FB_MODIFIERS && + !dev->mode_config.allow_fb_modifiers) { + DRM_DEBUG_KMS("driver does not support fb modifiers\n"); + return ERR_PTR(-EINVAL); + } + ret = framebuffer_check(r); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index b1979e7bdc88..3053aab968f9 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -837,6 +837,7 @@ void drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb, for (i = 0; i < 4; i++) { fb->pitches[i] = mode_cmd->pitches[i]; fb->offsets[i] = mode_cmd->offsets[i]; + fb->modifier[i] = mode_cmd->modifier[i]; } drm_fb_get_bpp_depth(mode_cmd->pixel_format, &fb->depth, &fb->bits_per_pixel); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 3785d66721f2..a6d773a61c2d 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -321,6 +321,9 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ else req->value = 64; break; + case DRM_CAP_ADDFB2_MODIFIERS: + req->value = dev->mode_config.allow_fb_modifiers; + break; default: return -EINVAL; } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 0ebd9286b332..cee54c45eba9 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -202,6 +202,7 @@ struct drm_framebuffer { const struct drm_framebuffer_funcs *funcs; unsigned int pitches[4]; unsigned int offsets[4]; + uint64_t modifier[4]; unsigned int width; unsigned int height; /* depth can be 15 or 16 */ @@ -1152,6 +1153,9 @@ struct drm_mode_config { /* whether async page flip is supported or not */ bool async_page_flip; + /* whether the driver supports fb modifiers */ + bool allow_fb_modifiers; + /* cursor size */ uint32_t cursor_width, cursor_height; }; diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 01b2d6d0e355..ff6ef62d084b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -630,6 +630,7 @@ struct drm_gem_open { */ #define DRM_CAP_CURSOR_WIDTH 0x8 #define DRM_CAP_CURSOR_HEIGHT 0x9 +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 /** DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 646ae5f39f42..622109677747 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -132,4 +132,36 @@ #define DRM_FORMAT_YUV444 fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */ #define DRM_FORMAT_YVU444 fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */ + +/* + * Format Modifiers: + * + * Format modifiers describe, typically, a re-ordering or modification + * of the data in a plane of an FB. This can be used to express tiled/ + * swizzled formats, or compression, or a combination of the two. + * + * The upper 8 bits of the format modifier are a vendor-id as assigned + * below. The lower 56 bits are assigned as vendor sees fit. + */ + +/* Vendor Ids: */ +#define DRM_FORMAT_MOD_NONE 0 +#define DRM_FORMAT_MOD_VENDOR_INTEL 0x01 +#define DRM_FORMAT_MOD_VENDOR_AMD 0x02 +#define DRM_FORMAT_MOD_VENDOR_NV 0x03 +#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04 +#define DRM_FORMAT_MOD_VENDOR_QCOM 0x05 +/* add more to the end as needed */ + +#define fourcc_mod_code(vendor, val) \ + ((((u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffL)) + +/* + * Format Modifier tokens: + * + * When adding a new token please document the layout with a code comment, + * similar to the fourcc codes above. drm_fourcc.h is considered the + * authoritative source for all of these. + */ + #endif /* DRM_FOURCC_H */ diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index ca788e01dab2..dbeba949462a 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -336,6 +336,7 @@ struct drm_mode_fb_cmd { }; #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ +#define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ struct drm_mode_fb_cmd2 { __u32 fb_id; @@ -356,10 +357,18 @@ struct drm_mode_fb_cmd2 { * So it would consist of Y as offsets[0] and UV as * offsets[1]. Note that offsets[0] will generally * be 0 (but this is not required). + * + * To accommodate tiled, compressed, etc formats, a per-plane + * modifier can be specified. The default value of zero + * indicates "native" format as specified by the fourcc. + * Vendor specific modifier token. This allows, for example, + * different tiling/swizzling pattern on different planes. + * See discussion above of DRM_FORMAT_MOD_xxx. */ __u32 handles[4]; __u32 pitches[4]; /* pitch for each plane */ __u32 offsets[4]; /* offset of each plane */ + __u64 modifier[4]; /* ie, tiling, compressed (per plane) */ }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -- cgit v1.2.3 From 93b81f5102a7cd270a305c2741b17c8d44bb0629 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Feb 2015 17:16:05 +0000 Subject: drm/i915: Add tiled framebuffer modifiers To be used from the new addfb2 extension. v2: - Drop Intel-specific untiled modfier. - Move to drm_fourcc.h. - Document layouts a bit and denote them as platform-specific and not useable for cross-driver sharing. - Add Y-tiling for completeness. - Drop special docstring markers to avoid confusing kerneldoc. v3: Give Y-tiling a unique idea, noticed by Tvrtko. Signed-off-by: Tvrtko Ursulin (v1) Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + include/uapi/drm/drm_fourcc.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3da3dc527315..99b25928df2f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -31,6 +31,7 @@ #define _I915_DRV_H_ #include +#include #include "i915_reg.h" #include "intel_bios.h" diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 622109677747..4837c3d2319a 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -164,4 +164,35 @@ * authoritative source for all of these. */ +/* Intel framebuffer modifiers */ + +/* + * Intel X-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out row-major, with + * a platform-dependent stride. On top of that the memory can apply + * platform-depending swizzling of some higher address bits into bit6. + * + * This format is highly platforms specific and not useful for cross-driver + * sharing. It exists since on a given platform it does uniquely identify the + * layout in a simple way for i915-specific userspace. + */ +#define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) + +/* + * Intel Y-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out in OWORD (16 bytes) + * chunks column-major, with a platform-dependent height. On top of that the + * memory can apply platform-depending swizzling of some higher address bits + * into bit6. + * + * This format is highly platforms specific and not useful for cross-driver + * sharing. It exists since on a given platform it does uniquely identify the + * layout in a simple way for i915-specific userspace. + */ +#define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) + #endif /* DRM_FOURCC_H */ -- cgit v1.2.3 From e31a0ba7df6ce21ac4ed58c4182ec12ca8fd78fb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 2 Jan 2015 12:18:23 -0300 Subject: [media] media: Fix DVB devnode representation at media controller The previous provision for DVB media controller support were to define an ID (likely meaning the adapter number) for the DVB devnodes. This is just plain wrong. Just like V4L, DVB devices (and any other device node)) are uniquely identified via a (major, minor) tuple. This is enough to uniquely identify a devnode, no matter what API it implements. So, before we go too far, let's mark the old v4l, fb, dvb and alsa "devnode" info as deprecated, and just call it as "dev". We can latter add fields specific to each API if needed. As we don't want to break compilation on already existing apps, let's just keep the old definitions as-is, adding a note that those are deprecated at media-entity.h. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dev.c | 4 ++-- drivers/media/v4l2-core/v4l2-device.c | 4 ++-- include/media/media-entity.h | 12 +----------- include/uapi/linux/media.h | 15 +++++++++++++++ 4 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 86bb93fd7db8..d89d5cb465d9 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -943,8 +943,8 @@ int __video_register_device(struct video_device *vdev, int type, int nr, vdev->vfl_type != VFL_TYPE_SUBDEV) { vdev->entity.type = MEDIA_ENT_T_DEVNODE_V4L; vdev->entity.name = vdev->name; - vdev->entity.info.v4l.major = VIDEO_MAJOR; - vdev->entity.info.v4l.minor = vdev->minor; + vdev->entity.info.dev.major = VIDEO_MAJOR; + vdev->entity.info.dev.minor = vdev->minor; ret = media_device_register_entity(vdev->v4l2_dev->mdev, &vdev->entity); if (ret < 0) diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c index 015f92aab44a..204cc67c84e8 100644 --- a/drivers/media/v4l2-core/v4l2-device.c +++ b/drivers/media/v4l2-core/v4l2-device.c @@ -248,8 +248,8 @@ int v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev) goto clean_up; } #if defined(CONFIG_MEDIA_CONTROLLER) - sd->entity.info.v4l.major = VIDEO_MAJOR; - sd->entity.info.v4l.minor = vdev->minor; + sd->entity.info.dev.major = VIDEO_MAJOR; + sd->entity.info.dev.minor = vdev->minor; #endif sd->devnode = vdev; } diff --git a/include/media/media-entity.h b/include/media/media-entity.h index e00459185d20..d6d74bcfe183 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -87,17 +87,7 @@ struct media_entity { struct { u32 major; u32 minor; - } v4l; - struct { - u32 major; - u32 minor; - } fb; - struct { - u32 card; - u32 device; - u32 subdevice; - } alsa; - int dvb; + } dev; /* Sub-device specifications */ /* Nothing needed yet */ diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index d847c760e8f0..418f4fec391a 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -75,6 +75,20 @@ struct media_entity_desc { union { /* Node specifications */ + struct { + __u32 major; + __u32 minor; + } dev; + +#if 1 + /* + * DEPRECATED: previous node specifications. Kept just to + * avoid breaking compilation, but media_entity_desc.dev + * should be used instead. In particular, alsa and dvb + * fields below are wrong: for all devnodes, there should + * be just major/minor inside the struct, as this is enough + * to represent any devnode, no matter what type. + */ struct { __u32 major; __u32 minor; @@ -89,6 +103,7 @@ struct media_entity_desc { __u32 subdevice; } alsa; int dvb; +#endif /* Sub-device specifications */ /* Nothing needed yet */ -- cgit v1.2.3 From 1d20f9f6330c988505e51f5010656978fd70cd0c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 3 Jan 2015 11:09:39 -0300 Subject: [media] media: add new types for DVB devnodes Most of the DVB subdevs have already their own devnode. Add support for them at the media controller API. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index 418f4fec391a..4c8f26243252 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -50,7 +50,14 @@ struct media_device_info { #define MEDIA_ENT_T_DEVNODE_V4L (MEDIA_ENT_T_DEVNODE + 1) #define MEDIA_ENT_T_DEVNODE_FB (MEDIA_ENT_T_DEVNODE + 2) #define MEDIA_ENT_T_DEVNODE_ALSA (MEDIA_ENT_T_DEVNODE + 3) -#define MEDIA_ENT_T_DEVNODE_DVB (MEDIA_ENT_T_DEVNODE + 4) +#define MEDIA_ENT_T_DEVNODE_DVB_FE (MEDIA_ENT_T_DEVNODE + 4) +#define MEDIA_ENT_T_DEVNODE_DVB_DEMUX (MEDIA_ENT_T_DEVNODE + 5) +#define MEDIA_ENT_T_DEVNODE_DVB_DVR (MEDIA_ENT_T_DEVNODE + 6) +#define MEDIA_ENT_T_DEVNODE_DVB_CA (MEDIA_ENT_T_DEVNODE + 7) +#define MEDIA_ENT_T_DEVNODE_DVB_NET (MEDIA_ENT_T_DEVNODE + 8) + +/* Legacy symbol. Use it to avoid userspace compilation breakages */ +#define MEDIA_ENT_T_DEVNODE_DVB MEDIA_ENT_T_DEVNODE_DVB_FE #define MEDIA_ENT_T_V4L2_SUBDEV (2 << MEDIA_ENT_TYPE_SHIFT) #define MEDIA_ENT_T_V4L2_SUBDEV_SENSOR (MEDIA_ENT_T_V4L2_SUBDEV + 1) -- cgit v1.2.3 From 91b0f3a06e432d01c66cd05973628a2ec3bde9ed Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 26 Jan 2015 08:53:18 -0300 Subject: [media] media: add a subdev type for tuner Add MEDIA_ENT_T_V4L2_SUBDEV_TUNER to represent the V4L2 (and dvb) tuner subdevices. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index 4c8f26243252..52cc2a6b19b7 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -66,6 +66,8 @@ struct media_device_info { /* A converter of analogue video to its digital representation. */ #define MEDIA_ENT_T_V4L2_SUBDEV_DECODER (MEDIA_ENT_T_V4L2_SUBDEV + 4) +#define MEDIA_ENT_T_V4L2_SUBDEV_TUNER (MEDIA_ENT_T_V4L2_SUBDEV + 5) + #define MEDIA_ENT_FL_DEFAULT (1 << 0) struct media_entity_desc { -- cgit v1.2.3 From 76a3aeac2f6c02ecf065fa9baa279dd54bf2d819 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 17 Feb 2015 00:05:27 +0100 Subject: hdspm.h: include stdint.h in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes compilation error: sound/hdspm.h:43:2: error: unknown type name ‘uint32_t’ Signed-off-by: Mikko Rapeli Signed-off-by: Takashi Iwai --- include/uapi/sound/hdspm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/hdspm.h b/include/uapi/sound/hdspm.h index b357f1a5e29c..5737332d38f2 100644 --- a/include/uapi/sound/hdspm.h +++ b/include/uapi/sound/hdspm.h @@ -20,6 +20,12 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#ifdef __KERNEL__ +#include +#else +#include +#endif + /* Maximum channels is 64 even on 56Mode you have 64playbacks to matrix */ #define HDSPM_MAX_CHANNELS 64 -- cgit v1.2.3 From 4bebf7091aa15ec60edf0dcbc654410a87ca21fe Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 17 Feb 2015 00:05:38 +0100 Subject: include/uapi/sound/asound.h: include stdlib.h in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes compiler errors like: error: field ‘trigger_tstamp’ has incomplete type error: invalid application of ‘sizeof’ to incomplete t ype ‘struct timespec’ Signed-off-by: Mikko Rapeli Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 1f23cd635957..1fe3f4f3d696 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -25,6 +25,9 @@ #include +#ifndef __KERNEL__ +#include +#endif /* * protocol version -- cgit v1.2.3 From bbf91c1c5bfc00c2961f15657359ee7e87de4269 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 17 Feb 2015 00:05:42 +0100 Subject: include/uapi/sound/asequencer.h: include sound/asound.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: unknown type name ‘snd_seq_client_type_t’ snd_seq_client_type_t type; /* client type */ Signed-off-by: Mikko Rapeli Signed-off-by: Takashi Iwai --- include/uapi/sound/asequencer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h index 09c8a00ea503..5a5fa4956ebd 100644 --- a/include/uapi/sound/asequencer.h +++ b/include/uapi/sound/asequencer.h @@ -22,6 +22,7 @@ #ifndef _UAPI__SOUND_ASEQUENCER_H #define _UAPI__SOUND_ASEQUENCER_H +#include /** version of the sequencer */ #define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION (1, 0, 1) -- cgit v1.2.3 From b9956409c281931c74ba8d0a2b61a98076a58602 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Tue, 17 Feb 2015 00:05:43 +0100 Subject: include/uapi/sound/emu10k1.h: include sound/asound.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation errors like: error: field ‘id’ has incomplete type struct snd_ctl_elem_id id; /* full control ID definition */ Signed-off-by: Mikko Rapeli Signed-off-by: Takashi Iwai --- include/uapi/sound/emu10k1.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/emu10k1.h b/include/uapi/sound/emu10k1.h index d1bbaf78457a..ec1535bb6aed 100644 --- a/include/uapi/sound/emu10k1.h +++ b/include/uapi/sound/emu10k1.h @@ -23,8 +23,7 @@ #define _UAPI__SOUND_EMU10K1_H #include - - +#include /* * ---- FX8010 ---- -- cgit v1.2.3 From 27ac905b8f88d28779b0661809286b5ba2817d37 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:57 -0500 Subject: perf/x86/intel: Reduce lbr_sel_map[] size The index of lbr_sel_map is bit value of perf branch_sample_type. PERF_SAMPLE_BRANCH_MAX is 1024 at present, so each lbr_sel_map uses 4096 bytes. By using bit shift as index, we can reduce lbr_sel_map size to 40 bytes. This patch defines 'bit shift' for branch types, and use 'bit shift' to define lbr_sel_maps. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: jolsa@redhat.com Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1415156173-10035-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 4 +++ arch/x86/kernel/cpu/perf_event_intel_lbr.c | 54 ++++++++++++++---------------- include/uapi/linux/perf_event.h | 49 +++++++++++++++++++-------- 3 files changed, 64 insertions(+), 43 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index df525d2be1e8..0c45b22495dc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -515,6 +515,10 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +enum { + PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT, +}; + #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 58f1a94beaf0..8bc078f43a82 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -69,10 +69,6 @@ static enum { #define LBR_FROM_FLAG_IN_TX (1ULL << 62) #define LBR_FROM_FLAG_ABORT (1ULL << 61) -#define for_each_branch_sample_type(x) \ - for ((x) = PERF_SAMPLE_BRANCH_USER; \ - (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) - /* * x86control flow change classification * x86control flow changes include branches, interrupts, traps, faults @@ -403,14 +399,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) { struct hw_perf_event_extra *reg; u64 br_type = event->attr.branch_sample_type; - u64 mask = 0, m; - u64 v; + u64 mask = 0, v; + int i; - for_each_branch_sample_type(m) { - if (!(br_type & m)) + for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) { + if (!(br_type & (1ULL << i))) continue; - v = x86_pmu.lbr_sel_map[m]; + v = x86_pmu.lbr_sel_map[i]; if (v == LBR_NOT_SUPP) return -EOPNOTSUPP; @@ -678,35 +674,35 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) /* * Map interface branch filters onto LBR filters */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP - | LBR_IND_JMP | LBR_FAR, +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches */ - [PERF_SAMPLE_BRANCH_ANY_CALL] = + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, - [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL - | LBR_FAR, - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; /* core */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9b79abbd1ab8..e46b93279e3d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -152,21 +152,42 @@ enum perf_event_sample_format { * The branch types can be combined, however BRANCH_ANY covers all types * of branches and therefore it supersedes all the other types. */ +enum perf_branch_sample_type_shift { + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ +}; + enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ - PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ - - PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = + 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = + 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = + 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = + 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ -- cgit v1.2.3 From 2c44b1936bb3b135a3fac8b3493394d42e51cf70 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Nov 2014 10:36:45 +0100 Subject: perf/x86/intel: Expose LBR callstack to user space tooling With LBR call stack feature enable, there are three callchain options. Enable the 3rd callchain option (LBR callstack) to user space tooling. Signed-off-by: Peter Zijlstra (Intel) Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Andy Lutomirski Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20141105093759.GQ10501@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 8 -------- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 8 ++++---- include/uapi/linux/perf_event.h | 16 ++++++++-------- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 69c26b396cf4..a371d27d6795 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -523,14 +523,6 @@ struct x86_perf_task_context { int lbr_stack_state; }; -enum { - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT, - PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE, - - PERF_SAMPLE_BRANCH_CALL_STACK = - 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, -}; - #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 084f2eb20c8b..0473874109cb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -537,7 +537,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) u64 mask = 0, v; int i; - for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) { + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { if (!(br_type & (1ULL << i))) continue; @@ -821,7 +821,7 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) /* * Map interface branch filters onto LBR filters */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, @@ -840,7 +840,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, @@ -852,7 +852,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e46b93279e3d..1e3cd07cf76e 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -166,6 +166,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -175,18 +177,16 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = - 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = - 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = - 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = - 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; -- cgit v1.2.3 From 229d043096ea8e58829d37d35767afeac15997f5 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 13 Feb 2015 15:14:03 -0600 Subject: ALSA: core: selection of audio_tstamp type and accuracy reports Audio timestamps can be extracted from sample counters, wall clocks, PHC clocks (Ethernet AVB), on-demand synchronized snapshots. This patch provides the ability to report timestamping capabilities, select timestamp types and retrieve timestamp accuracy, if supported. Details can be found in Documentations/sound/alsa/timestamping.txt This functionality is introduced by reclaiming the reserved_aligned field introduced by commit9c7066aef4a5eb8e4063de28f06c508bf6f2963a in snd_pcm_status to provide userspace with selection/query capabilities. Additional driver_tstamp and audio_tstamp_accuracy fields are also added. snd_pcm_mmap_status remains a read-only structure with only the audio timestamp value accessible from user space. The selection of audio timestamp type is done through snd_pcm_status only This commit does not impact ABI and does not impact the default behavior. By default audio timestamp is aligned with hw_pointer and reports the DMA position. Backwards compatibility is handled by using the HDAudio wall clock for playback and the hw_ptr for all other cases. For timestamp selection a new STATUS_EXT ioctl is introduced with read/write parameters. Alsa-lib will be modified to make use of STATUS_EXT. Signed-off-by: Pierre-Louis Bossart Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/timestamping.txt | 200 ++++++++++++++++++++++++++++++ include/sound/pcm.h | 60 +++++++++ include/uapi/sound/asound.h | 34 ++++- 3 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 Documentation/sound/alsa/timestamping.txt (limited to 'include/uapi') diff --git a/Documentation/sound/alsa/timestamping.txt b/Documentation/sound/alsa/timestamping.txt new file mode 100644 index 000000000000..0b191a23f534 --- /dev/null +++ b/Documentation/sound/alsa/timestamping.txt @@ -0,0 +1,200 @@ +The ALSA API can provide two different system timestamps: + +- Trigger_tstamp is the system time snapshot taken when the .trigger +callback is invoked. This snapshot is taken by the ALSA core in the +general case, but specific hardware may have synchronization +capabilities or conversely may only be able to provide a correct +estimate with a delay. In the latter two cases, the low-level driver +is responsible for updating the trigger_tstamp at the most appropriate +and precise moment. Applications should not rely solely on the first +trigger_tstamp but update their internal calculations if the driver +provides a refined estimate with a delay. + +- tstamp is the current system timestamp updated during the last +event or application query. +The difference (tstamp - trigger_tstamp) defines the elapsed time. + +The ALSA API provides reports two basic pieces of information, avail +and delay, which combined with the trigger and current system +timestamps allow for applications to keep track of the 'fullness' of +the ring buffer and the amount of queued samples. + +The use of these different pointers and time information depends on +the application needs: + +- 'avail' reports how much can be written in the ring buffer +- 'delay' reports the time it will take to hear a new sample after all +queued samples have been played out. + +When timestamps are enabled, the avail/delay information is reported +along with a snapshot of system time. Applications can select from +CLOCK_REALTIME (NTP corrections including going backwards), +CLOCK_MONOTONIC (NTP corrections but never going backwards), +CLOCK_MONOTIC_RAW (without NTP corrections) and change the mode +dynamically with sw_params + + +The ALSA API also provide an audio_tstamp which reflects the passage +of time as measured by different components of audio hardware. In +ascii-art, this could be represented as follows (for the playback +case): + + +--------------------------------------------------------------> time + ^ ^ ^ ^ ^ + | | | | | + analog link dma app FullBuffer + time time time time time + | | | | | + |< codec delay >|<--hw delay-->||<---avail->| + |<----------------- delay---------------------->| | + |<----ring buffer length---->| + +The analog time is taken at the last stage of the playback, as close +as possible to the actual transducer + +The link time is taken at the output of the SOC/chipset as the samples +are pushed on a link. The link time can be directly measured if +supported in hardware by sample counters or wallclocks (e.g. with +HDAudio 24MHz or PTP clock for networked solutions) or indirectly +estimated (e.g. with the frame counter in USB). + +The DMA time is measured using counters - typically the least reliable +of all measurements due to the bursty natured of DMA transfers. + +The app time corresponds to the time tracked by an application after +writing in the ring buffer. + +The application can query what the hardware supports, define which +audio time it wants reported by selecting the relevant settings in +audio_tstamp_config fields, get an estimate of the timestamp +accuracy. It can also request the delay-to-analog be included in the +measurement. Direct access to the link time is very interesting on +platforms that provide an embedded DSP; measuring directly the link +time with dedicated hardware, possibly synchronized with system time, +removes the need to keep track of internal DSP processing times and +latency. + +In case the application requests an audio tstamp that is not supported +in hardware/low-level driver, the type is overridden as DEFAULT and the +timestamp will report the DMA time based on the hw_pointer value. + +For backwards compatibility with previous implementations that did not +provide timestamp selection, with a zero-valued COMPAT timestamp type +the results will default to the HDAudio wall clock for playback +streams and to the DMA time (hw_ptr) in all other cases. + +The audio timestamp accuracy can be returned to user-space, so that +appropriate decisions are made: + +- for dma time (default), the granularity of the transfers can be + inferred from the steps between updates and in turn provide + information on how much the application pointer can be rewound + safely. + +- the link time can be used to track long-term drifts between audio + and system time using the (tstamp-trigger_tstamp)/audio_tstamp + ratio, the precision helps define how much smoothing/low-pass + filtering is required. The link time can be either reset on startup + or reported as is (the latter being useful to compare progress of + different streams - but may require the wallclock to be always + running and not wrap-around during idle periods). If supported in + hardware, the absolute link time could also be used to define a + precise start time (patches WIP) + +- including the delay in the audio timestamp may + counter-intuitively not increase the precision of timestamps, e.g. if a + codec includes variable-latency DSP processing or a chain of + hardware components the delay is typically not known with precision. + +The accuracy is reported in nanosecond units (using an unsigned 32-bit +word), which gives a max precision of 4.29s, more than enough for +audio applications... + +Due to the varied nature of timestamping needs, even for a single +application, the audio_tstamp_config can be changed dynamically. In +the STATUS ioctl, the parameters are read-only and do not allow for +any application selection. To work around this limitation without +impacting legacy applications, a new STATUS_EXT ioctl is introduced +with read/write parameters. ALSA-lib will be modified to make use of +STATUS_EXT and effectively deprecate STATUS. + +The ALSA API only allows for a single audio timestamp to be reported +at a time. This is a conscious design decision, reading the audio +timestamps from hardware registers or from IPC takes time, the more +timestamps are read the more imprecise the combined measurements +are. To avoid any interpretation issues, a single (system, audio) +timestamp is reported. Applications that need different timestamps +will be required to issue multiple queries and perform an +interpolation of the results + +In some hardware-specific configuration, the system timestamp is +latched by a low-level audio subsytem, and the information provided +back to the driver. Due to potential delays in the communication with +the hardware, there is a risk of misalignment with the avail and delay +information. To make sure applications are not confused, a +driver_timestamp field is added in the snd_pcm_status structure; this +timestamp shows when the information is put together by the driver +before returning from the STATUS and STATUS_EXT ioctl. in most cases +this driver_timestamp will be identical to the regular system tstamp. + +Examples of typestamping with HDaudio: + +1. DMA timestamp, no compensation for DMA+analog delay +$ ./audio_time -p --ts_type=1 +playback: systime: 341121338 nsec, audio time 342000000 nsec, systime delta -878662 +playback: systime: 426236663 nsec, audio time 427187500 nsec, systime delta -950837 +playback: systime: 597080580 nsec, audio time 598000000 nsec, systime delta -919420 +playback: systime: 682059782 nsec, audio time 683020833 nsec, systime delta -961051 +playback: systime: 852896415 nsec, audio time 853854166 nsec, systime delta -957751 +playback: systime: 937903344 nsec, audio time 938854166 nsec, systime delta -950822 + +2. DMA timestamp, compensation for DMA+analog delay +$ ./audio_time -p --ts_type=1 -d +playback: systime: 341053347 nsec, audio time 341062500 nsec, systime delta -9153 +playback: systime: 426072447 nsec, audio time 426062500 nsec, systime delta 9947 +playback: systime: 596899518 nsec, audio time 596895833 nsec, systime delta 3685 +playback: systime: 681915317 nsec, audio time 681916666 nsec, systime delta -1349 +playback: systime: 852741306 nsec, audio time 852750000 nsec, systime delta -8694 + +3. link timestamp, compensation for DMA+analog delay +$ ./audio_time -p --ts_type=2 -d +playback: systime: 341060004 nsec, audio time 341062791 nsec, systime delta -2787 +playback: systime: 426242074 nsec, audio time 426244875 nsec, systime delta -2801 +playback: systime: 597080992 nsec, audio time 597084583 nsec, systime delta -3591 +playback: systime: 682084512 nsec, audio time 682088291 nsec, systime delta -3779 +playback: systime: 852936229 nsec, audio time 852940916 nsec, systime delta -4687 +playback: systime: 938107562 nsec, audio time 938112708 nsec, systime delta -5146 + +Example 1 shows that the timestamp at the DMA level is close to 1ms +ahead of the actual playback time (as a side time this sort of +measurement can help define rewind safeguards). Compensating for the +DMA-link delay in example 2 helps remove the hardware buffering abut +the information is still very jittery, with up to one sample of +error. In example 3 where the timestamps are measured with the link +wallclock, the timestamps show a monotonic behavior and a lower +dispersion. + +Example 3 and 4 are with USB audio class. Example 3 shows a high +offset between audio time and system time due to buffering. Example 4 +shows how compensating for the delay exposes a 1ms accuracy (due to +the use of the frame counter by the driver) + +Example 3: DMA timestamp, no compensation for delay, delta of ~5ms +$ ./audio_time -p -Dhw:1 -t1 +playback: systime: 120174019 nsec, audio time 125000000 nsec, systime delta -4825981 +playback: systime: 245041136 nsec, audio time 250000000 nsec, systime delta -4958864 +playback: systime: 370106088 nsec, audio time 375000000 nsec, systime delta -4893912 +playback: systime: 495040065 nsec, audio time 500000000 nsec, systime delta -4959935 +playback: systime: 620038179 nsec, audio time 625000000 nsec, systime delta -4961821 +playback: systime: 745087741 nsec, audio time 750000000 nsec, systime delta -4912259 +playback: systime: 870037336 nsec, audio time 875000000 nsec, systime delta -4962664 + +Example 4: DMA timestamp, compensation for delay, delay of ~1ms +$ ./audio_time -p -Dhw:1 -t1 -d +playback: systime: 120190520 nsec, audio time 120000000 nsec, systime delta 190520 +playback: systime: 245036740 nsec, audio time 244000000 nsec, systime delta 1036740 +playback: systime: 370034081 nsec, audio time 369000000 nsec, systime delta 1034081 +playback: systime: 495159907 nsec, audio time 494000000 nsec, systime delta 1159907 +playback: systime: 620098824 nsec, audio time 619000000 nsec, systime delta 1098824 +playback: systime: 745031847 nsec, audio time 744000000 nsec, systime delta 1031847 diff --git a/include/sound/pcm.h b/include/sound/pcm.h index c0ddb7e69c28..60f0e48f7905 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -60,6 +60,9 @@ struct snd_pcm_hardware { struct snd_pcm_substream; +struct snd_pcm_audio_tstamp_config; /* definitions further down */ +struct snd_pcm_audio_tstamp_report; + struct snd_pcm_ops { int (*open)(struct snd_pcm_substream *substream); int (*close)(struct snd_pcm_substream *substream); @@ -281,6 +284,58 @@ struct snd_pcm_hw_constraint_ranges { struct snd_pcm_hwptr_log; +/* + * userspace-provided audio timestamp config to kernel, + * structure is for internal use only and filled with dedicated unpack routine + */ +struct snd_pcm_audio_tstamp_config { + /* 5 of max 16 bits used */ + u32 type_requested:4; + u32 report_delay:1; /* add total delay to A/D or D/A */ +}; + +static inline void snd_pcm_unpack_audio_tstamp_config(__u32 data, + struct snd_pcm_audio_tstamp_config *config) +{ + config->type_requested = data & 0xF; + config->report_delay = (data >> 4) & 1; +} + +/* + * kernel-provided audio timestamp report to user-space + * structure is for internal use only and read by dedicated pack routine + */ +struct snd_pcm_audio_tstamp_report { + /* 6 of max 16 bits used for bit-fields */ + + /* for backwards compatibility */ + u32 valid:1; + + /* actual type if hardware could not support requested timestamp */ + u32 actual_type:4; + + /* accuracy represented in ns units */ + u32 accuracy_report:1; /* 0 if accuracy unknown, 1 if accuracy field is valid */ + u32 accuracy; /* up to 4.29s, will be packed in separate field */ +}; + +static inline void snd_pcm_pack_audio_tstamp_report(__u32 *data, __u32 *accuracy, + const struct snd_pcm_audio_tstamp_report *report) +{ + u32 tmp; + + tmp = report->accuracy_report; + tmp <<= 4; + tmp |= report->actual_type; + tmp <<= 1; + tmp |= report->valid; + + *data &= 0xffff; /* zero-clear MSBs */ + *data |= (tmp << 16); + *accuracy = report->accuracy; +} + + struct snd_pcm_runtime { /* -- Status -- */ struct snd_pcm_substream *trigger_master; @@ -361,6 +416,11 @@ struct snd_pcm_runtime { struct snd_dma_buffer *dma_buffer_p; /* allocated buffer */ + /* -- audio timestamp config -- */ + struct snd_pcm_audio_tstamp_config audio_tstamp_config; + struct snd_pcm_audio_tstamp_report audio_tstamp_report; + struct timespec driver_tstamp; + #if defined(CONFIG_SND_PCM_OSS) || defined(CONFIG_SND_PCM_OSS_MODULE) /* -- OSS things -- */ struct snd_pcm_oss_runtime oss; diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 0e88e7a0f0eb..acef4e4d2735 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -267,10 +267,17 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */ #define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */ #define SNDRV_PCM_INFO_NO_PERIOD_WAKEUP 0x00800000 /* period wakeup can be disabled */ -#define SNDRV_PCM_INFO_HAS_WALL_CLOCK 0x01000000 /* has audio wall clock for audio/system time sync */ +#define SNDRV_PCM_INFO_HAS_WALL_CLOCK 0x01000000 /* (Deprecated)has audio wall clock for audio/system time sync */ +#define SNDRV_PCM_INFO_HAS_LINK_ATIME 0x01000000 /* report hardware link audio time, reset on startup */ +#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ +#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ +#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ + #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ + + typedef int __bitwise snd_pcm_state_t; #define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */ #define SNDRV_PCM_STATE_SETUP ((__force snd_pcm_state_t) 1) /* stream has a setup */ @@ -408,6 +415,22 @@ struct snd_pcm_channel_info { unsigned int step; /* samples distance in bits */ }; +enum { + /* + * first definition for backwards compatibility only, + * maps to wallclock/link time for HDAudio playback and DEFAULT/DMA time for everything else + */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT = 0, + + /* timestamp definitions */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT = 1, /* DMA time, reported as per hw_ptr */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK = 2, /* link time reported by sample or wallclock counter, reset on startup */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ABSOLUTE = 3, /* link time reported by sample or wallclock counter, not reset on startup */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ESTIMATED = 4, /* link time estimated indirectly */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED = 5, /* link time synchronized with system time */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED +}; + struct snd_pcm_status { snd_pcm_state_t state; /* stream state */ struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */ @@ -419,9 +442,11 @@ struct snd_pcm_status { snd_pcm_uframes_t avail_max; /* max frames available on hw since last status */ snd_pcm_uframes_t overrange; /* count of ADC (capture) overrange detections from last status */ snd_pcm_state_t suspended_state; /* suspended stream state */ - __u32 reserved_alignment; /* must be filled with zero */ - struct timespec audio_tstamp; /* from sample counter or wall clock */ - unsigned char reserved[56-sizeof(struct timespec)]; /* must be filled with zero */ + __u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */ + struct timespec audio_tstamp; /* sample counter, wall clock, PHC or on-demand sync'ed */ + struct timespec driver_tstamp; /* useful in case reference system tstamp is reported with delay */ + __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */ + unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */ }; struct snd_pcm_mmap_status { @@ -534,6 +559,7 @@ enum { #define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t) #define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22) #define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr) +#define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status) #define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info) #define SNDRV_PCM_IOCTL_PREPARE _IO('A', 0x40) #define SNDRV_PCM_IOCTL_RESET _IO('A', 0x41) -- cgit v1.2.3 From c72638bdaabe9ea4b09003b9db7e1754f472fbed Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 13 Feb 2015 15:14:09 -0600 Subject: ALSA: bump PCM protocol to 2.0.13 Bump PCM protocol to enable use of STATUS_EXT ioctls, older apps will still use STATUS and audio timestamp configuration is not supported (backwards compatible behavior). Signed-off-by: Pierre-Louis Bossart Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index acef4e4d2735..3d46e9a0cd2e 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -140,7 +140,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 12) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 13) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; -- cgit v1.2.3 From ca8895d9bb41e743271c42a4438a296de891b73b Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Wed, 26 Nov 2014 12:22:03 -0600 Subject: Return MD_SB_CLUSTERED if mddev is clustered Signed-off-by: Goldwyn Rodrigues --- drivers/md/md.c | 3 +++ include/uapi/linux/raid/md_p.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/md/md.c b/drivers/md/md.c index 3387f940140b..5ed57688e5c5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5634,6 +5634,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg) info.state = (1<bitmap && mddev->bitmap_info.offset) info.state |= (1< Date: Wed, 29 Oct 2014 18:51:31 -0500 Subject: Add new disk to clustered array Algorithm: 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD) 2. Node 1 sends NEWDISK with uuid and slot number 3. Other nodes issue kobject_uevent_env with uuid and slot number (Steps 4,5 could be a udev rule) 4. In userspace, the node searches for the disk, perhaps using blkid -t SUB_UUID="" 5. Other nodes issue either of the following depending on whether the disk was found: ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and disc.number set to slot number) ioctl(CLUSTERED_DISK_NACK) 6. Other nodes drop lock on no-new-devs (CR) if device is found 7. Node 1 attempts EX lock on no-new-devs 8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk as SpareLocal 9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED 10. Other nodes understand if the device is added or not by reading the superblock again after receiving the METADATA_UPDATED message. Signed-off-by: Lidong Zhong Signed-off-by: Goldwyn Rodrigues --- drivers/md/md-cluster.c | 104 ++++++++++++++++++++++++++++++++++++++++- drivers/md/md-cluster.h | 4 ++ drivers/md/md.c | 52 +++++++++++++++++++-- drivers/md/md.h | 5 ++ drivers/md/raid1.c | 1 + include/uapi/linux/raid/md_p.h | 6 +++ include/uapi/linux/raid/md_u.h | 1 + 7 files changed, 169 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index d85a6ca4443e..03e521a9ca7d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -12,11 +12,13 @@ #include #include #include +#include #include "md.h" #include "bitmap.h" #include "md-cluster.h" #define LVB_SIZE 64 +#define NEW_DEV_TIMEOUT 5000 struct dlm_lock_resource { dlm_lockspace_t *ls; @@ -56,19 +58,25 @@ struct md_cluster_info { struct dlm_lock_resource *ack_lockres; struct dlm_lock_resource *message_lockres; struct dlm_lock_resource *token_lockres; + struct dlm_lock_resource *no_new_dev_lockres; struct md_thread *recv_thread; + struct completion newdisk_completion; }; enum msg_type { METADATA_UPDATED = 0, RESYNCING, + NEWDISK, }; struct cluster_msg { int type; int slot; + /* TODO: Unionize this for smaller footprint */ sector_t low; sector_t high; + char uuid[16]; + int raid_slot; }; static void sync_ast(void *arg) @@ -358,13 +366,41 @@ static void process_suspend_info(struct md_cluster_info *cinfo, spin_unlock_irq(&cinfo->suspend_lock); } +static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) +{ + char disk_uuid[64]; + struct md_cluster_info *cinfo = mddev->cluster_info; + char event_name[] = "EVENT=ADD_DEVICE"; + char raid_slot[16]; + char *envp[] = {event_name, disk_uuid, raid_slot, NULL}; + int len; + + len = snprintf(disk_uuid, 64, "DEVICE_UUID="); + pretty_uuid(disk_uuid + len, cmsg->uuid); + snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot); + pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot); + init_completion(&cinfo->newdisk_completion); + kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp); + wait_for_completion_timeout(&cinfo->newdisk_completion, + NEW_DEV_TIMEOUT); +} + + +static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + + md_reload_sb(mddev); + dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); +} + static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) { switch (msg->type) { case METADATA_UPDATED: pr_info("%s: %d Received message: METADATA_UPDATE from %d\n", __func__, __LINE__, msg->slot); - md_reload_sb(mddev); + process_metadata_update(mddev, msg); break; case RESYNCING: pr_info("%s: %d Received message: RESYNCING from %d\n", @@ -372,6 +408,10 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) process_suspend_info(mddev->cluster_info, msg->slot, msg->low, msg->high); break; + case NEWDISK: + pr_info("%s: %d Received message: NEWDISK from %d\n", + __func__, __LINE__, msg->slot); + process_add_new_disk(mddev, msg); }; } @@ -593,10 +633,18 @@ static int join(struct mddev *mddev, int nodes) cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); if (!cinfo->ack_lockres) goto err; + cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); + if (!cinfo->no_new_dev_lockres) + goto err; + /* get sync CR lock on ACK. */ if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", ret); + /* get sync CR lock on no-new-dev. */ + if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) + pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); + pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); @@ -621,6 +669,7 @@ err: lockres_free(cinfo->message_lockres); lockres_free(cinfo->token_lockres); lockres_free(cinfo->ack_lockres); + lockres_free(cinfo->no_new_dev_lockres); lockres_free(cinfo->bitmap_lockres); lockres_free(cinfo->sb_lock); if (cinfo->lockspace) @@ -642,6 +691,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->message_lockres); lockres_free(cinfo->token_lockres); lockres_free(cinfo->ack_lockres); + lockres_free(cinfo->no_new_dev_lockres); lockres_free(cinfo->sb_lock); lockres_free(cinfo->bitmap_lockres); dlm_release_lockspace(cinfo->lockspace, 2); @@ -742,6 +792,55 @@ out: return ret; } +static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + struct cluster_msg cmsg; + int ret = 0; + struct mdp_superblock_1 *sb = page_address(rdev->sb_page); + char *uuid = sb->device_uuid; + + memset(&cmsg, 0, sizeof(cmsg)); + cmsg.type = cpu_to_le32(NEWDISK); + memcpy(cmsg.uuid, uuid, 16); + cmsg.raid_slot = rdev->desc_nr; + lock_comm(cinfo); + ret = __sendmsg(cinfo, &cmsg); + if (ret) + return ret; + cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; + ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); + cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; + /* Some node does not "see" the device */ + if (ret == -EAGAIN) + ret = -ENOENT; + else + dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); + return ret; +} + +static int add_new_disk_finish(struct mddev *mddev) +{ + struct cluster_msg cmsg; + struct md_cluster_info *cinfo = mddev->cluster_info; + int ret; + /* Write sb and inform others */ + md_update_sb(mddev, 1); + cmsg.type = METADATA_UPDATED; + ret = __sendmsg(cinfo, &cmsg); + unlock_comm(cinfo); + return ret; +} + +static void new_disk_ack(struct mddev *mddev, bool ack) +{ + struct md_cluster_info *cinfo = mddev->cluster_info; + + if (ack) + dlm_unlock_sync(cinfo->no_new_dev_lockres); + complete(&cinfo->newdisk_completion); +} + static struct md_cluster_operations cluster_ops = { .join = join, .leave = leave, @@ -753,6 +852,9 @@ static struct md_cluster_operations cluster_ops = { .metadata_update_finish = metadata_update_finish, .metadata_update_cancel = metadata_update_cancel, .area_resyncing = area_resyncing, + .add_new_disk_start = add_new_disk_start, + .add_new_disk_finish = add_new_disk_finish, + .new_disk_ack = new_disk_ack, }; static int __init cluster_init(void) diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 03785402afaa..60d7e58964f5 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h @@ -6,6 +6,7 @@ #include "md.h" struct mddev; +struct md_rdev; struct md_cluster_operations { int (*join)(struct mddev *mddev, int nodes); @@ -18,6 +19,9 @@ struct md_cluster_operations { int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_cancel)(struct mddev *mddev); int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); + int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); + int (*add_new_disk_finish)(struct mddev *mddev); + void (*new_disk_ack)(struct mddev *mddev, bool ack); }; #endif /* _MD_CLUSTER_H */ diff --git a/drivers/md/md.c b/drivers/md/md.c index fe0484648de4..5703c2e89f3a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2210,7 +2210,7 @@ static void sync_sbs(struct mddev *mddev, int nospares) } } -static void md_update_sb(struct mddev *mddev, int force_change) +void md_update_sb(struct mddev *mddev, int force_change) { struct md_rdev *rdev; int sync_req; @@ -2371,6 +2371,7 @@ repeat: wake_up(&rdev->blocked_wait); } } +EXPORT_SYMBOL(md_update_sb); /* words written to sysfs files may, or may not, be \n terminated. * We want to accept with case. For this we use cmd_match. @@ -3151,7 +3152,7 @@ static void analyze_sbs(struct mddev *mddev) kick_rdev_from_array(rdev); continue; } - if (rdev != freshest) + if (rdev != freshest) { if (super_types[mddev->major_version]. validate_super(mddev, rdev)) { printk(KERN_WARNING "md: kicking non-fresh %s" @@ -3160,6 +3161,15 @@ static void analyze_sbs(struct mddev *mddev) kick_rdev_from_array(rdev); continue; } + /* No device should have a Candidate flag + * when reading devices + */ + if (test_bit(Candidate, &rdev->flags)) { + pr_info("md: kicking Cluster Candidate %s from array!\n", + bdevname(rdev->bdev, b)); + kick_rdev_from_array(rdev); + } + } if (mddev->level == LEVEL_MULTIPATH) { rdev->desc_nr = i++; rdev->raid_disk = rdev->desc_nr; @@ -5655,7 +5665,6 @@ static int get_array_info(struct mddev *mddev, void __user *arg) info.state |= (1<major,info->minor); + if (mddev_is_clustered(mddev) && + !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) { + pr_err("%s: Cannot add to clustered mddev. Try --cluster-add\n", + mdname(mddev)); + return -EINVAL; + } + if (info->major != MAJOR(dev) || info->minor != MINOR(dev)) return -EOVERFLOW; @@ -5830,6 +5846,25 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) else clear_bit(WriteMostly, &rdev->flags); + /* + * check whether the device shows up in other nodes + */ + if (mddev_is_clustered(mddev)) { + if (info->state & (1 << MD_DISK_CANDIDATE)) { + /* Through --cluster-confirm */ + set_bit(Candidate, &rdev->flags); + md_cluster_ops->new_disk_ack(mddev, true); + } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { + /* --add initiated by this node */ + err = md_cluster_ops->add_new_disk_start(mddev, rdev); + if (err) { + md_cluster_ops->add_new_disk_finish(mddev); + export_rdev(rdev); + return err; + } + } + } + rdev->raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); if (!err && !mddev->pers->hot_remove_disk) { @@ -5855,6 +5890,9 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) if (!err) md_new_event(mddev); md_wakeup_thread(mddev->thread); + if (mddev_is_clustered(mddev) && + (info->state & (1 << MD_DISK_CLUSTER_ADD))) + md_cluster_ops->add_new_disk_finish(mddev); return err; } @@ -6456,6 +6494,7 @@ static inline bool md_ioctl_valid(unsigned int cmd) case SET_DISK_FAULTY: case STOP_ARRAY: case STOP_ARRAY_RO: + case CLUSTERED_DISK_NACK: return true; default: return false; @@ -6728,6 +6767,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto unlock; } + case CLUSTERED_DISK_NACK: + if (mddev_is_clustered(mddev)) + md_cluster_ops->new_disk_ack(mddev, false); + else + err = -EINVAL; + goto unlock; + case HOT_ADD_DISK: err = hot_add_disk(mddev, new_decode_dev(arg)); goto unlock; diff --git a/drivers/md/md.h b/drivers/md/md.h index bfebcfdf54e6..6dc0ce09f50c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -171,6 +171,10 @@ enum flag_bits { * a want_replacement device with same * raid_disk number. */ + Candidate, /* For clustered environments only: + * This device is seen locally but not + * by the whole cluster + */ }; #define BB_LEN_MASK (0x00000000000001FFULL) @@ -666,6 +670,7 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); extern void md_reload_sb(struct mddev *mddev); +extern void md_update_sb(struct mddev *mddev, int force); static inline int mddev_check_plugged(struct mddev *mddev) { return !!blk_check_plugged(md_unplug, mddev, diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f70d74189d16..53ed5d48308f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1571,6 +1571,7 @@ static int raid1_spare_active(struct mddev *mddev) struct md_rdev *rdev = conf->mirrors[i].rdev; struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; if (repl + && !test_bit(Candidate, &repl->flags) && repl->recovery_offset == MaxSector && !test_bit(Faulty, &repl->flags) && !test_and_set_bit(In_sync, &repl->flags)) { diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 643489d33e68..2ae6131e69a5 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -78,6 +78,12 @@ #define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ #define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ #define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ +#define MD_DISK_CLUSTER_ADD 4 /* Initiate a disk add across the cluster + * For clustered enviroments only. + */ +#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed + * For clustered enviroments only. + */ #define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. * read requests will only be sent here in diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 74e7c60c4716..1cb8aa6850b5 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -62,6 +62,7 @@ #define STOP_ARRAY _IO (MD_MAJOR, 0x32) #define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) #define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) +#define CLUSTERED_DISK_NACK _IO (MD_MAJOR, 0x35) /* 63 partitions with the alternate major number (mdp) */ #define MdpMinorShift 6 -- cgit v1.2.3 From 7e182f78988404717e27aed5a9d4150631b323f4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 25 Feb 2015 12:05:13 -0300 Subject: [media] media.h: mark alsa struct in media_entity_desc as TODO The alsa struct in struct media_entity_desc is now marked as deprecated. However, the alsa struct should remain as it is since it cannot be replaced by a simple major/minor device node description. The alsa struct was designed to be used as an alsa card description so V4L2 drivers could use this to expose the alsa card that they create to carry the captured audio. Such a card is not just a PCM device, but also needs to contain the alsa subdevice information, and it may map to multiple devices, e.g. a PCM and a mixer device, such as the au0828 usb stick creates. This is exactly as intended and this cannot and should not be replaced by a simple major/minor. However, whether this information is in the right form for an ALSA device such that it can handle udev renaming rules as well is another matter. So mark this alsa struct as TODO and document the problems involved. Updated the documentation as well to reflect this and to add the 'major' and 'minor' field documentation. Updated the documentation to clearly state that struct dev is to be used for (sub-)devices that create a single device node. Other devices need their own structure here. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../DocBook/media/v4l/media-ioc-enum-entities.xml | 16 ++++++++++++- include/uapi/linux/media.h | 26 +++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml index cbf307f21a63..5872f8bbf774 100644 --- a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml +++ b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml @@ -145,7 +145,21 @@ struct dev - Valid for (sub-)devices that create devnodes. + Valid for (sub-)devices that create a single device node. + + + + + __u32 + major + Device node major number. + + + + + __u32 + minor + Device node minor number. diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index 52cc2a6b19b7..4e816be3de39 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -89,6 +89,27 @@ struct media_entity_desc { __u32 minor; } dev; +#if 1 + /* + * TODO: this shouldn't have been added without + * actual drivers that use this. When the first real driver + * appears that sets this information, special attention + * should be given whether this information is 1) enough, and + * 2) can deal with udev rules that rename devices. The struct + * dev would not be sufficient for this since that does not + * contain the subdevice information. In addition, struct dev + * can only refer to a single device, and not to multiple (e.g. + * pcm and mixer devices). + * + * So for now mark this as a to do. + */ + struct { + __u32 card; + __u32 device; + __u32 subdevice; + } alsa; +#endif + #if 1 /* * DEPRECATED: previous node specifications. Kept just to @@ -106,11 +127,6 @@ struct media_entity_desc { __u32 major; __u32 minor; } fb; - struct { - __u32 card; - __u32 device; - __u32 subdevice; - } alsa; int dvb; #endif -- cgit v1.2.3 From b5ff6e1637b683d5996ae11ac29afe406c0bee90 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 27 Feb 2015 11:15:17 +0000 Subject: drm/i915/skl: Add new displayable tiling formats Starting with SKL display engine can scan out Y, and newly introduced Yf tiling formats so add the latter to the frame buffer modifier space. v2: Definitions moved to drm_fourcc.h. v3: Try to document the format better. Signed-off-by: Tvrtko Ursulin Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- include/uapi/drm/drm_fourcc.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4837c3d2319a..b35418b20dcf 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -195,4 +195,19 @@ */ #define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) +/* + * Intel Yf-tiling layout + * + * This is a tiled layout using 4Kb tiles in row-major layout. + * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which + * are arranged in four groups (two wide, two high) with column-major layout. + * Each group therefore consits out of four 256 byte units, which are also laid + * out as 2x2 column-major. + * 256 byte units are made out of four 64 byte blocks of pixels, producing + * either a square block or a 2:1 unit. + * 64 byte blocks of pixels contain four pixel rows of 16 bytes, where the width + * in pixel depends on the pixel depth. + */ +#define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) + #endif /* DRM_FOURCC_H */ -- cgit v1.2.3 From 93a714d6b53d87872e552dbb273544bdeaaf6e12 Mon Sep 17 00:00:00 2001 From: Madhu Challa Date: Wed, 25 Feb 2015 09:58:35 -0800 Subject: multicast: Extend ip address command to enable multicast group join/leave on Joining multicast group on ethernet level via "ip maddr" command would not work if we have an Ethernet switch that does igmp snooping since the switch would not replicate multicast packets on ports that did not have IGMP reports for the multicast addresses. Linux vxlan interfaces created via "ip link add vxlan" have the group option that enables then to do the required join. By extending ip address command with option "autojoin" we can get similar functionality for openvswitch vxlan interfaces as well as other tunneling mechanisms that need to receive multicast traffic. The kernel code is structured similar to how the vxlan driver does a group join / leave. example: ip address add 224.1.1.10/24 dev eth5 autojoin ip address del 224.1.1.10/24 dev eth5 Signed-off-by: Madhu Challa Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + include/uapi/linux/if_addr.h | 1 + net/ipv4/devinet.c | 31 +++++++++++++++++++++++++++++++ net/ipv4/igmp.c | 13 +++++++++++++ net/ipv6/addrconf.c | 38 +++++++++++++++++++++++++++++++++++--- net/ipv6/mcast.c | 20 ++++++++++++++++---- 7 files changed, 98 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index dbe225478adb..1b26c6c3fd7c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,7 @@ struct netns_ipv4 { struct sock *fibnl; struct sock * __percpu *icmp_sk; + struct sock *mc_autojoin_sk; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 69ae41f2098c..ca0db12cd089 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -67,6 +67,7 @@ struct netns_ipv6 { struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; + struct sock *mc_autojoin_sk; #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES struct mr6_table *mrt6; diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dea10a87dfd1..40fdfea39714 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -50,6 +50,7 @@ enum { #define IFA_F_PERMANENT 0x80 #define IFA_F_MANAGETEMPADDR 0x100 #define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3a8985c94581..5105759e4e00 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } +static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +{ + struct ip_mreqn mreq = { + .imr_multiaddr.s_addr = ifa->ifa_address, + .imr_ifindex = ifa->ifa_dev->dev->ifindex, + }; + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = __ip_mc_join_group(sk, &mreq); + else + ret = __ip_mc_leave_group(sk, &mreq); + release_sock(sk); + + return ret; +} + static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -584,6 +604,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; + if (ipv4_is_multicast(ifa->ifa_address)) + ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * userspace already relies on not having to provide this. */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); + if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, + true, ifa); + + if (ret < 0) { + inet_free_ifa(ifa); + return ret; + } + } return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } else { inet_free_ifa(ifa); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4b1172d73e03..5cb1ef4ce292 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE #include @@ -2740,6 +2741,7 @@ static const struct file_operations igmp_mcf_seq_fops = { static int __net_init igmp_net_init(struct net *net) { struct proc_dir_entry *pde; + int err; pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); if (!pde) @@ -2748,8 +2750,18 @@ static int __net_init igmp_net_init(struct net *net) &igmp_mcf_seq_fops); if (!pde) goto out_mcfilter; + err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET, + SOCK_DGRAM, 0, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n", + err); + goto out_sock; + } + return 0; +out_sock: + remove_proc_entry("mcfilter", net->proc_net); out_mcfilter: remove_proc_entry("igmp", net->proc_net); out_igmp: @@ -2760,6 +2772,7 @@ static void __net_exit igmp_net_exit(struct net *net) { remove_proc_entry("mcfilter", net->proc_net); remove_proc_entry("igmp", net->proc_net); + inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk); } static struct pernet_operations igmp_net_ops = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98e4a63d72bb..783bccfcc060 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2464,6 +2464,23 @@ err_exit: return err; } +static int ipv6_mc_config(struct sock *sk, bool join, + const struct in6_addr *addr, int ifindex) +{ + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = __ipv6_sock_mc_join(sk, ifindex, addr); + else + ret = __ipv6_sock_mc_drop(sk, ifindex, addr); + release_sock(sk); + + return ret; +} + /* * Manual configuration of address on an interface */ @@ -2476,10 +2493,10 @@ static int inet6_addr_add(struct net *net, int ifindex, struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + unsigned long timeout; + clock_t expires; int scope; u32 flags; - clock_t expires; - unsigned long timeout; ASSERT_RTNL(); @@ -2501,6 +2518,14 @@ static int inet6_addr_add(struct net *net, int ifindex, if (IS_ERR(idev)) return PTR_ERR(idev); + if (ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, + true, pfx, ifindex); + + if (ret < 0) + return ret; + } + scope = ipv6_addr_scope(pfx); timeout = addrconf_timeout_fixup(valid_lft, HZ); @@ -2542,6 +2567,9 @@ static int inet6_addr_add(struct net *net, int ifindex, in6_ifa_put(ifp); addrconf_verify_rtnl(); return 0; + } else if (ifa_flags & IFA_F_MCAUTOJOIN) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, ifindex); } return PTR_ERR(ifp); @@ -2578,6 +2606,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, jiffies); ipv6_del_addr(ifp); addrconf_verify_rtnl(); + if (ipv6_addr_is_multicast(pfx)) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, dev->ifindex); + } return 0; } } @@ -3945,7 +3977,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e4955d019734..1dd1fedff9f4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2929,20 +2929,32 @@ static int __net_init igmp6_net_init(struct net *net) inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, + SOCK_RAW, IPPROTO_ICMPV6, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n", + err); + goto out_sock_create; + } + err = igmp6_proc_init(net); if (err) - goto out_sock_create; -out: - return err; + goto out_sock_create_autojoin; + + return 0; +out_sock_create_autojoin: + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); out_sock_create: inet_ctl_sock_destroy(net->ipv6.igmp_sk); - goto out; +out: + return err; } static void __net_exit igmp6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.igmp_sk); + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); igmp6_proc_exit(net); } -- cgit v1.2.3 From 31f909a2c0abfc1a1a76b2981d28ac85d33210e7 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 24 Feb 2015 22:42:16 +0900 Subject: nl/mac80211: allow zero plink timeout to disable STA expiration Both wpa_supplicant and mac80211 have and inactivity timer. By default wpa_supplicant will be timed out in 5 minutes and mac80211's it is 30 minutes. If wpa_supplicant uses a longer timer than mac80211, it will get unexpected disconnection by mac80211. Using 0xffffffff instead as the configured value could solve this w/o changing the code, but due to integer overflow in the expression used this doesn't work. The expression is: (current jiffies) > (frame Rx jiffies + NL80211_MESHCONF_PLINK_TIMEOUT * 250) On 32bit system, the right side would overflow and be a very small value if NL80211_MESHCONF_PLINK_TIMEOUT is sufficiently large, causing unexpectedly early disconnections. Instead allow disabling the inactivity timer to avoid this situation, by passing the (previously invalid and useless) value 0. Signed-off-by: Masashi Honma [reword/rewrap commit log] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- net/mac80211/mesh.c | 3 ++- net/wireless/nl80211.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 68b294e83944..2dcf9bba317c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3092,7 +3092,8 @@ enum nl80211_mesh_power_mode { * * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've * established peering with for longer than this time (in seconds), then - * remove it from the STA's list of peers. Default is 30 minutes. + * remove it from the STA's list of peers. You may set this to 0 to disable + * the removal of the STA. Default is 30 minutes. * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 0c8b2a77d312..acf441ff9f4a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -574,7 +574,8 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; - ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); + if (ifmsh->mshcfg.plink_timeout > 0) + ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d78fd8b54515..9c6e23ede5b2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5265,7 +5265,7 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff, mask, NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32); if (mask_out) -- cgit v1.2.3 From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:43 +0100 Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the ELF BPF loader where instructions are being loaded that need map fixups. An initial stage loads all maps into the kernel, and later on replaces related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source register and the actual fd as immediate value. The kernel verifier recognizes this keyword and replaces the map fd with a real pointer internally. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 -- include/uapi/linux/bpf.h | 2 ++ samples/bpf/libbpf.h | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/filter.h b/include/linux/filter.h index caac2087a4d5..5e3863d5f666 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -145,8 +145,6 @@ struct bpf_prog_aux; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 - /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..0248180bf2e2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -120,6 +120,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_SOCKET_FILTER, }; +#define BPF_PSEUDO_MAP_FD 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 58c5fe1bdba1..a6bb7e9c22c3 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -92,7 +92,9 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ -- cgit v1.2.3 From 96be4325f443dbbfeb37d2a157675ac0736531a1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:46 +0100 Subject: ebpf: add sched_cls_type and map it to sk_filter's verifier ops As discussed recently and at netconf/netdev01, we want to prevent making bpf_verifier_ops registration available for modules, but have them at a controlled place inside the kernel instead. The reason for this is, that out-of-tree modules can go crazy and define and register any verfifier ops they want, doing all sorts of crap, even bypassing available GPLed eBPF helper functions. We don't want to offer such a shiny playground, of course, but keep strict control to ourselves inside the core kernel. This also encourages us to design eBPF user helpers carefully and generically, so they can be shared among various subsystems using eBPF. For the eBPF traffic classifier (cls_bpf), it's a good start to share the same helper facilities as we currently do in eBPF for socket filters. That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus one day if there's a good reason to diverge the set of helper functions from the set available to socket filters, we keep ABI compatibility. In future, we could place all bpf_prog_type_list at a central place, perhaps. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 15 +++++++++++++-- net/core/filter.c | 7 +++++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0248180bf2e2..3fa1af8a58d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_SCHED_CLS, }; #define BPF_PSEUDO_MAP_FD 1 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a28e09c7825d..594d341f04db 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1172,6 +1172,17 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) return 0; } +static bool may_access_skb(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + return true; + default: + return false; + } +} + /* verify safety of LD_ABS|LD_IND instructions: * - they can only appear in the programs where ctx == skb * - since they are wrappers of function calls, they scratch R1-R5 registers, @@ -1194,8 +1205,8 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) struct reg_state *reg; int i, err; - if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { - verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); + if (!may_access_skb(env->prog->aux->prog_type)) { + verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n"); return -EINVAL; } diff --git a/net/core/filter.c b/net/core/filter.c index 741721233166..514d4082f326 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1166,9 +1166,16 @@ static struct bpf_prog_type_list sk_filter_type __read_mostly = { .type = BPF_PROG_TYPE_SOCKET_FILTER, }; +static struct bpf_prog_type_list sched_cls_type __read_mostly = { + .ops = &sk_filter_ops, + .type = BPF_PROG_TYPE_SCHED_CLS, +}; + static int __init register_sk_filter_ops(void) { bpf_register_prog_type(&sk_filter_type); + bpf_register_prog_type(&sched_cls_type); + return 0; } late_initcall(register_sk_filter_ops); -- cgit v1.2.3 From e2e9b6541dd4b31848079da80fe2253daaafb549 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:48 +0100 Subject: cls_bpf: add initial eBPF support for programmable classifiers This work extends the "classic" BPF programmable tc classifier by extending its scope also to native eBPF code! This allows for user space to implement own custom, 'safe' C like classifiers (or whatever other frontend language LLVM et al may provide in future), that can then be compiled with the LLVM eBPF backend to an eBPF elf file. The result of this can be loaded into the kernel via iproute2's tc. In the kernel, they can be JITed on major archs and thus run in native performance. Simple, minimal toy example to demonstrate the workflow: #include #include #include #include "tc_bpf_api.h" __section("classify") int cls_main(struct sk_buff *skb) { return (0x800 << 16) | load_byte(skb, ETH_HLEN + __builtin_offsetof(struct iphdr, tos)); } char __license[] __section("license") = "GPL"; The classifier can then be compiled into eBPF opcodes and loaded via tc, for example: clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o tc filter add dev em1 parent 1: bpf cls.o [...] As it has been demonstrated, the scope can even reach up to a fully fledged flow dissector (similarly as in samples/bpf/sockex2_kern.c). For tc, maps are allowed to be used, but from kernel context only, in other words, eBPF code can keep state across filter invocations. In future, we perhaps may reattach from a different application to those maps e.g., to read out collected statistics/state. Similarly as in socket filters, we may extend functionality for eBPF classifiers over time depending on the use cases. For that purpose, cls_bpf programs are using BPF_PROG_TYPE_SCHED_CLS program type, so we can allow additional functions/accessors (e.g. an ABI compatible offset translation to skb fields/metadata). For an initial cls_bpf support, we allow the same set of helper functions as eBPF socket filters, but we could diverge at some point in time w/o problem. I was wondering whether cls_bpf and act_bpf could share C programs, I can imagine that at some point, we introduce i) further common handlers for both (or even beyond their scope), and/or if truly needed ii) some restricted function space for each of them. Both can be abstracted easily through struct bpf_verifier_ops in future. The context of cls_bpf versus act_bpf is slightly different though: a cls_bpf program will return a specific classid whereas act_bpf a drop/non-drop return code, latter may also in future mangle skbs. That said, we can surely have a "classify" and "action" section in a single object file, or considered mentioned constraint add a possibility of a shared section. The workflow for getting native eBPF running from tc [1] is as follows: for f_bpf, I've added a slightly modified ELF parser code from Alexei's kernel sample, which reads out the LLVM compiled object, sets up maps (and dynamically fixes up map fds) if any, and loads the eBPF instructions all centrally through the bpf syscall. The resulting fd from the loaded program itself is being passed down to cls_bpf, which looks up struct bpf_prog from the fd store, and holds reference, so that it stays available also after tc program lifetime. On tc filter destruction, it will then drop its reference. Moreover, I've also added the optional possibility to annotate an eBPF filter with a name (e.g. path to object file, or something else if preferred) so that when tc dumps currently installed filters, some more context can be given to an admin for a given instance (as opposed to just the file descriptor number). Last but not least, bpf_prog_get() and bpf_prog_put() needed to be exported, so that eBPF can be used from cls_bpf built as a module. Thanks to 60a3b2253c41 ("net: bpf: make eBPF interpreter images read-only") I think this is of no concern since anything wanting to alter eBPF opcode after verification stage would crash the kernel. [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf Signed-off-by: Daniel Borkmann Cc: Jamal Hadi Salim Cc: Jiri Pirko Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 + kernel/bpf/syscall.c | 2 + net/sched/cls_bpf.c | 206 ++++++++++++++++++++++++++++++++----------- 3 files changed, 158 insertions(+), 52 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 25731dfb3fcc..bf08e76bf505 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,6 +397,8 @@ enum { TCA_BPF_CLASSID, TCA_BPF_OPS_LEN, TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, __TCA_BPF_MAX, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d69449acbd0..669719ccc9ee 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -419,6 +419,7 @@ void bpf_prog_put(struct bpf_prog *prog) bpf_prog_free(prog); } } +EXPORT_SYMBOL_GPL(bpf_prog_put); static int bpf_prog_release(struct inode *inode, struct file *filp) { @@ -466,6 +467,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) fdput(f); return prog; } +EXPORT_SYMBOL_GPL(bpf_prog_get); /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD log_buf diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5f3ee9e4b5bf..6f7ed8f8e6ee 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include #include #include @@ -24,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); +#define CLS_BPF_NAME_LEN 256 + struct cls_bpf_head { struct list_head plist; u32 hgen; @@ -32,18 +36,24 @@ struct cls_bpf_head { struct cls_bpf_prog { struct bpf_prog *filter; - struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct tcf_result res; struct list_head link; + struct tcf_result res; + struct tcf_exts exts; u32 handle; - u16 bpf_num_ops; + union { + u32 bpf_fd; + u16 bpf_num_ops; + }; + struct sock_filter *bpf_ops; + const char *bpf_name; struct tcf_proto *tp; struct rcu_head rcu; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_FD] = { .type = NLA_U32 }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -76,6 +86,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } +static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) +{ + return !prog->bpf_ops; +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -94,8 +109,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); - bpf_prog_destroy(prog->filter); + if (cls_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); + kfree(prog->bpf_name); kfree(prog->bpf_ops); kfree(prog); } @@ -114,6 +133,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); + return 0; } @@ -151,69 +171,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_prog_from_ops(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) { struct sock_filter *bpf_ops; - struct tcf_exts exts; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; u16 bpf_size, bpf_num_ops; - u32 classid; int ret; - if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID]) - return -EINVAL; - - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - return ret; - - classid = nla_get_u32(tb[TCA_BPF_CLASSID]); bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]); - if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) { - ret = -EINVAL; - goto errout; - } + if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) + return -EINVAL; bpf_size = bpf_num_ops * sizeof(*bpf_ops); - if (bpf_size != nla_len(tb[TCA_BPF_OPS])) { - ret = -EINVAL; - goto errout; - } + if (bpf_size != nla_len(tb[TCA_BPF_OPS])) + return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (bpf_ops == NULL) { - ret = -ENOMEM; - goto errout; - } + if (bpf_ops == NULL) + return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto errout_free; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - prog->bpf_num_ops = bpf_num_ops; prog->bpf_ops = bpf_ops; + prog->bpf_num_ops = bpf_num_ops; + prog->bpf_name = NULL; + prog->filter = fp; prog->res.classid = classid; + return 0; +} + +static int cls_bpf_prog_from_efd(struct nlattr **tb, + struct cls_bpf_prog *prog, u32 classid) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_BPF_NAME]), + nla_len(tb[TCA_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + prog->bpf_ops = NULL; + prog->bpf_fd = bpf_fd; + prog->bpf_name = name; + + prog->filter = fp; + prog->res.classid = classid; + + return 0; +} + +static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, + unsigned long base, struct nlattr **tb, + struct nlattr *est, bool ovr) +{ + struct tcf_exts exts; + bool is_bpf, is_ebpf; + u32 classid; + int ret; + + is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; + is_ebpf = tb[TCA_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_BPF_CLASSID]) + return -EINVAL; + + tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + return ret; + + classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + + ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : + cls_bpf_prog_from_efd(tb, prog, classid); + if (ret < 0) { + tcf_exts_destroy(&exts); + return ret; + } + tcf_bind_filter(tp, &prog->res, base); tcf_exts_change(tp, &prog->exts, &exts); return 0; -errout_free: - kfree(bpf_ops); -errout: - tcf_exts_destroy(&exts); - return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -297,11 +369,43 @@ errout: return ret; } +static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *tm) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; - struct nlattr *nest, *nla; + struct nlattr *nest; + int ret; if (prog == NULL) return skb->len; @@ -314,16 +418,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops)) - goto nla_put_failure; - nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops * - sizeof(struct sock_filter)); - if (nla == NULL) + if (cls_bpf_is_ebpf(prog)) + ret = cls_bpf_dump_ebpf_info(prog, skb); + else + ret = cls_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); - if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; -- cgit v1.2.3 From ffc1199122d83d60ad99f9c55df32feb650b7bff Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Sat, 21 Feb 2015 16:52:39 +0100 Subject: cfg80211: add VHT support for IBSS Add NL80211_EXT_FEATURE_VHT_IBSS flag and VHT support for IBSS. Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ net/wireless/nl80211.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 2dcf9bba317c..8ee31f108407 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4328,11 +4328,13 @@ enum nl80211_feature_flags { /** * enum nl80211_ext_feature_index - bit index of extended features. + * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ enum nl80211_ext_feature_index { + NL80211_EXT_FEATURE_VHT_IBSS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9c6e23ede5b2..66666fdf1c8d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7265,8 +7265,18 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: - if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) - break; + if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + return -EINVAL; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + return -EINVAL; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_VHT_IBSS)) + return -EINVAL; + break; default: return -EINVAL; } -- cgit v1.2.3 From 5fc7432991a86678b38a2d700edbe8bcd29cc579 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Feb 2015 15:32:43 +0100 Subject: nl80211: add notes about userspace API/ABI modifications Add notes about userspace ABI/API modifications, including the fact that we decided that API submissions should come with a driver implementation. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8ee31f108407..90c5aeb3cca7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -25,6 +25,19 @@ * */ +/* + * This header file defines the userspace API to the wireless stack. Please + * be careful not to break things - i.e. don't move anything around or so + * unless you can demonstrate that it breaks neither API nor ABI. + * + * Additions to the API should be accompanied by actual implementations in + * an upstream driver, so that example implementations exist in case there + * are ever concerns about the precise semantics of the API or changes are + * needed, and to ensure that code for dead (no longer implemented) API + * can actually be identified and removed. + * Nonetheless, semantics should also be documented carefully in this file. + */ + #include #define NL80211_GENL_NAME "nl80211" -- cgit v1.2.3 From 03c0566542f4c7a45ce3193f27cbf5700b506c18 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:13:56 -0600 Subject: mpls: Netlink commands to add, remove, and dump routes This change adds two new netlink routing attributes: RTA_VIA and RTA_NEWDST. RTA_VIA specifies the specifies the next machine to send a packet to like RTA_GATEWAY. RTA_VIA differs from RTA_GATEWAY in that it includes the address family of the address of the next machine to send a packet to. Currently the MPLS code supports addresses in AF_INET, AF_INET6 and AF_PACKET. For AF_INET and AF_INET6 the destination mac address is acquired from the neighbour table. For AF_PACKET the destination mac_address is specified in the netlink configuration. I think raw destination mac address support with the family AF_PACKET will prove useful. There is MPLS-TP which is defined to operate on machines that do not support internet packets of any flavor. Further seem to be corner cases where it can be useful. At this point I don't care much either way. RTA_NEWDST specifies the destination address to forward the packet with. MPLS typically changes it's destination address at every hop. For a swap operation RTA_NEWDST is specified with a length of one label. For a push operation RTA_NEWDST is specified with two or more labels. For a pop operation RTA_NEWDST is not specified or equivalently an emtpy RTAN_NEWDST is specified. Those new netlink attributes are used to implement handling of rt-netlink RTM_NEWROUTE, RTM_DELROUTE, and RTM_GETROUTE messages, to maintain the MPLS label table. rtm_to_route_config parses a netlink RTM_NEWROUTE or RTM_DELROUTE message, verify no unhandled attributes or unhandled values are present and sets up the data structures for mpls_route_add and mpls_route_del. I did my best to match up with the existing conventions with the caveats that MPLS addresses are all destination-specific-addresses, and so don't properly have a scope. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 8 ++ net/mpls/af_mpls.c | 229 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5cc5d66bf519..bad65550ae3e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -303,6 +303,8 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, __RTA_MAX }; @@ -344,6 +346,12 @@ struct rtnexthop { #define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) #define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[0]; +}; + /* RTM_CACHEINFO */ struct rta_cacheinfo { diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 2d6612a10e30..b4d7cec398d2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -212,6 +212,11 @@ static struct packet_type mpls_packet_type __read_mostly = { .func = mpls_forward, }; +const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + struct mpls_route_config { u32 rc_protocol; u32 rc_ifindex; @@ -410,6 +415,22 @@ static struct notifier_block mpls_dev_notifier = { .notifier_call = mpls_dev_notify, }; +static int nla_put_via(struct sk_buff *skb, + u16 family, const void *addr, int alen) +{ + struct nlattr *nla; + struct rtvia *via; + + nla = nla_reserve(skb, RTA_VIA, alen + 2); + if (!nla) + return -EMSGSIZE; + + via = nla_data(nla); + via->rtvia_family = family; + memcpy(via->rtvia_addr, addr, alen); + return 0; +} + int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]) { @@ -467,6 +488,210 @@ int nla_get_labels(const struct nlattr *nla, return 0; } +static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct mpls_route_config *cfg) +{ + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int index; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); + + if (rtm->rtm_family != AF_MPLS) + goto errout; + if (rtm->rtm_dst_len != 20) + goto errout; + if (rtm->rtm_src_len != 0) + goto errout; + if (rtm->rtm_tos != 0) + goto errout; + if (rtm->rtm_table != RT_TABLE_MAIN) + goto errout; + /* Any value is acceptable for rtm_protocol */ + + /* As mpls uses destination specific addresses + * (or source specific address in the case of multicast) + * all addresses have universal scope. + */ + if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) + goto errout; + if (rtm->rtm_type != RTN_UNICAST) + goto errout; + if (rtm->rtm_flags != 0) + goto errout; + + cfg->rc_label = LABEL_NOT_SPECIFIED; + cfg->rc_protocol = rtm->rtm_protocol; + cfg->rc_nlflags = nlh->nlmsg_flags; + cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; + cfg->rc_nlinfo.nlh = nlh; + cfg->rc_nlinfo.nl_net = sock_net(skb->sk); + + for (index = 0; index <= RTA_MAX; index++) { + struct nlattr *nla = tb[index]; + if (!nla) + continue; + + switch(index) { + case RTA_OIF: + cfg->rc_ifindex = nla_get_u32(nla); + break; + case RTA_NEWDST: + if (nla_get_labels(nla, MAX_NEW_LABELS, + &cfg->rc_output_labels, + cfg->rc_output_label)) + goto errout; + break; + case RTA_DST: + { + u32 label_count; + if (nla_get_labels(nla, 1, &label_count, + &cfg->rc_label)) + goto errout; + + /* The first 16 labels are reserved, and may not be set */ + if (cfg->rc_label < 16) + goto errout; + + break; + } + case RTA_VIA: + { + struct rtvia *via = nla_data(nla); + cfg->rc_via_family = via->rtvia_family; + cfg->rc_via_alen = nla_len(nla) - 2; + if (cfg->rc_via_alen > MAX_VIA_ALEN) + goto errout; + + /* Validate the address family */ + switch(cfg->rc_via_family) { + case AF_PACKET: + break; + case AF_INET: + if (cfg->rc_via_alen != 4) + goto errout; + break; + case AF_INET6: + if (cfg->rc_via_alen != 16) + goto errout; + break; + default: + /* Unsupported address family */ + goto errout; + } + + memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen); + break; + } + default: + /* Unsupported attribute */ + goto errout; + } + } + + err = 0; +errout: + return err; +} + +static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_del(&cfg); +} + + +static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct mpls_route_config cfg; + int err; + + err = rtm_to_route_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return mpls_route_add(&cfg); +} + +static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, + u32 label, struct mpls_route *rt, int flags) +{ + struct nlmsghdr *nlh; + struct rtmsg *rtm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = AF_MPLS; + rtm->rtm_dst_len = 20; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = rt->rt_protocol; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + + if (rt->rt_labels && + nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) + goto nla_put_failure; + if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen)) + goto nla_put_failure; + if (rt->rt_dev && nla_put_u32(skb, RTA_OIF, rt->rt_dev->ifindex)) + goto nla_put_failure; + if (nla_put_labels(skb, RTA_DST, 1, &label)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + unsigned int index; + + ASSERT_RTNL(); + + index = cb->args[0]; + if (index < 16) + index = 16; + + for (; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt; + rt = net->mpls.platform_label[index]; + if (!rt) + continue; + + if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + index, rt, NLM_F_MULTI) < 0) + break; + } + cb->args[0] = index; + + return skb->len; +} + static int resize_platform_label_table(struct net *net, size_t limit) { size_t size = sizeof(struct mpls_route *) * limit; @@ -662,6 +887,9 @@ static int __init mpls_init(void) dev_add_pack(&mpls_packet_type); + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); err = 0; out: return err; @@ -674,6 +902,7 @@ module_init(mpls_init); static void __exit mpls_exit(void) { + rtnl_unregister_all(PF_MPLS); dev_remove_pack(&mpls_packet_type); unregister_netdevice_notifier(&mpls_dev_notifier); unregister_pernet_subsys(&mpls_net_ops); -- cgit v1.2.3 From 8de147dc8e2adea82b8a1a2a08fcc983330f6770 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:14:31 -0600 Subject: mpls: Multicast route table change notifications Unlike IPv4 this code notifies on all cases where mpls routes are added or removed and it never automatically removes routes. Avoiding both the userspace confusion that is caused by omitting route updates and the possibility of a flood of netlink traffic when an interface goes doew. For now reserved labels are handled automatically and userspace is not notified. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ net/mpls/af_mpls.c | 60 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index bad65550ae3e..06f75a407f74 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -631,6 +631,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF RTNLGRP_MDB, #define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b4d7cec398d2..75a994a50381 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -36,6 +36,10 @@ struct mpls_route { /* next hop label forwarding entry */ static int zero = 0; static int label_limit = (1 << 20) - 1; +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags); + static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) { struct mpls_route *rt = NULL; @@ -246,6 +250,20 @@ static void mpls_rt_free(struct mpls_route *rt) kfree_rcu(rt, rt_rcu); } +static void mpls_notify_route(struct net *net, unsigned index, + struct mpls_route *old, struct mpls_route *new, + const struct nl_info *info) +{ + struct nlmsghdr *nlh = info ? info->nlh : NULL; + unsigned portid = info ? info->portid : 0; + int event = new ? RTM_NEWROUTE : RTM_DELROUTE; + struct mpls_route *rt = new ? new : old; + unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; + /* Ignore reserved labels for now */ + if (rt && (index >= 16)) + rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); +} + static void mpls_route_update(struct net *net, unsigned index, struct net_device *dev, struct mpls_route *new, const struct nl_info *info) @@ -260,6 +278,8 @@ static void mpls_route_update(struct net *net, unsigned index, old = rt; } + mpls_notify_route(net, index, old, new, info); + /* If we removed a route free it now */ mpls_rt_free(old); } @@ -692,6 +712,46 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static inline size_t lfib_nlmsg_size(struct mpls_route *rt) +{ + size_t payload = + NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */ + + nla_total_size(4); /* RTA_DST */ + if (rt->rt_labels) /* RTA_NEWDST */ + payload += nla_total_size(rt->rt_labels * 4); + if (rt->rt_dev) /* RTA_OIF */ + payload += nla_total_size(4); + return payload; +} + +static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, + struct nlmsghdr *nlh, struct net *net, u32 portid, + unsigned int nlm_flags) +{ + struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); + if (err < 0) { + /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); + + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); +} + static int resize_platform_label_table(struct net *net, size_t limit) { size_t size = sizeof(struct mpls_route *) * limit; -- cgit v1.2.3 From 98fc43864af9e74116eec81c290db048cded15d8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 1 Mar 2015 09:10:13 +0200 Subject: nl80211: prohibit mixing 'any' and regular wowlan triggers If the device supports waking up on 'any' signal - i.e. it continues operating as usual and wakes up the host on pretty much anything that happens, then it makes no sense to also configure the more restricted WoWLAN mode where the device operates more autonomously but also in a more restricted fashion. Currently only cw2100 supports both 'any' and other triggers, but it seems to be broken as it doesn't configure anything to the device, so we can't currently get into a situation where both even can correctly be configured. This is about to change (Intel devices are going to support both and have different behaviour depending on configuration) so make sure the conflicting modes cannot be configured. (It seems that cw2100 advertises 'any' and 'disconnect' as a means of saying that's what it will always do, but that isn't really the way this API was meant to be used nor does it actually mean anything as 'any' always implies 'disconnect' already, and the driver doesn't change device configuration in any way depending on the settings.) Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ net/wireless/nl80211.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 90c5aeb3cca7..37e7f39441e5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3708,6 +3708,8 @@ struct nl80211_pattern_support { * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put * the chip into a special state -- works best with chips that have * support for low-power operation already (flag) + * Note that this mode is incompatible with all of the others, if + * any others are even supported by the device. * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect * is detected is implementation-specific (flag) * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 01874628ae00..07cef3d7653e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9105,6 +9105,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; + bool regular = false; if (!wowlan) return -EOPNOTSUPP; @@ -9132,12 +9133,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT)) return -EINVAL; new_triggers.disconnect = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) { if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT)) return -EINVAL; new_triggers.magic_pkt = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED]) @@ -9147,24 +9150,28 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE)) return -EINVAL; new_triggers.gtk_rekey_failure = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) { if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ)) return -EINVAL; new_triggers.eap_identity_req = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) { if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE)) return -EINVAL; new_triggers.four_way_handshake = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) { if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE)) return -EINVAL; new_triggers.rfkill_release = true; + regular = true; } if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { @@ -9173,6 +9180,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; + regular = true; + nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) n_patterns++; @@ -9234,6 +9243,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + regular = true; err = nl80211_parse_wowlan_tcp( rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], &new_triggers); @@ -9242,6 +9252,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) { + regular = true; err = nl80211_parse_wowlan_nd( rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT], &new_triggers); @@ -9249,6 +9260,17 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } + /* The 'any' trigger means the device continues operating more or less + * as in its normal operation mode and wakes up the host on most of the + * normal interrupts (like packet RX, ...) + * It therefore makes little sense to combine with the more constrained + * wakeup trigger modes. + */ + if (new_triggers.any && regular) { + err = -EINVAL; + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; -- cgit v1.2.3 From 1a6ab46fa9c2bc9399694b4856ab7ea19c036485 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 4 Mar 2015 10:56:13 +0900 Subject: ALSA: Fix spelling typo in Documentation/DocBook/alsa-driver-api.xml This patch fix spelling typo found in alsa-driver-api.xml. It is because this file is generated from comments in source files, I have to fix source files. Signed-off-by: Masanari Iida Signed-off-by: Takashi Iwai --- include/sound/compress_driver.h | 4 ++-- include/sound/control.h | 2 +- include/sound/soc.h | 2 +- include/uapi/sound/compress_offload.h | 2 +- sound/core/pcm_dmaengine.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index f48089d364c5..fa1d05512c09 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -70,7 +70,7 @@ struct snd_compr_runtime { * @device: device pointer * @direction: stream direction, playback/recording * @metadata_set: metadata set flag, true when set - * @next_track: has userspace signall next track transistion, true when set + * @next_track: has userspace signal next track transition, true when set * @private_data: pointer to DSP private data */ struct snd_compr_stream { @@ -95,7 +95,7 @@ struct snd_compr_stream { * and the stream properties * @get_params: retrieve the codec parameters, mandatory * @set_metadata: Set the metadata values for a stream - * @get_metadata: retreives the requested metadata values from stream + * @get_metadata: retrieves the requested metadata values from stream * @trigger: Trigger operations like start, pause, resume, drain, stop. * This callback is mandatory * @pointer: Retrieve current h/w pointer information. Mandatory diff --git a/include/sound/control.h b/include/sound/control.h index 75f3054023f7..95aad6d3fd1a 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -227,7 +227,7 @@ snd_ctl_add_slave(struct snd_kcontrol *master, struct snd_kcontrol *slave) * Add a virtual slave control to the given master. * Unlike snd_ctl_add_slave(), the element added via this function * is supposed to have volatile values, and get callback is called - * at each time quried from the master. + * at each time queried from the master. * * When the control peeks the hardware values directly and the value * can be changed by other means than the put callback of the element, diff --git a/include/sound/soc.h b/include/sound/soc.h index 0d1ade195628..cf0bb156d6da 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1469,7 +1469,7 @@ static inline struct snd_soc_codec *snd_soc_kcontrol_codec( } /** - * snd_soc_kcontrol_platform() - Returns the platform that registerd the control + * snd_soc_kcontrol_platform() - Returns the platform that registered the control * @kcontrol: The control for which to get the platform * * Note: This function will only work correctly if the control has been diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 22ed8cb7800b..e00d8cbfc628 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -75,7 +75,7 @@ struct snd_compr_tstamp { /** * struct snd_compr_avail - avail descriptor * @avail: Number of bytes available in ring buffer for writing/reading - * @tstamp: timestamp infomation + * @tstamp: timestamp information */ struct snd_compr_avail { __u64 avail; diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 6542c4083594..fba365a78390 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_request_channel); * * The function should usually be called from the pcm open callback. Note that * this function will use private_data field of the substream's runtime. So it - * is not availabe to your pcm driver implementation. + * is not available to your pcm driver implementation. */ int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, struct dma_chan *chan) @@ -328,7 +328,7 @@ EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open); * This function will request a DMA channel using the passed filter function and * data. The function should usually be called from the pcm open callback. Note * that this function will use private_data field of the substream's runtime. So - * it is not availabe to your pcm driver implementation. + * it is not available to your pcm driver implementation. */ int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, dma_filter_fn filter_fn, void *filter_data) -- cgit v1.2.3 From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 4 Mar 2015 12:54:21 +0200 Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi This extends the design in commit 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") with optional set of rules that are needed to meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The previously added BR_PROXYARP behavior is left as-is and a new BR_PROXYARP_WIFI alternative is added so that this behavior can be configured from user space when required. In addition, this enables proxyarp functionality for unicast ARP requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible to use unicast as well as broadcast for these frames. The key differences in functionality: BR_PROXYARP: - uses the flag on the bridge port on which the request frame was received to determine whether to reply - block bridge port flooding completely on ports that enable proxy ARP BR_PROXYARP_WIFI: - uses the flag on the bridge port to which the target device of the request belongs - block bridge port flooding selectively based on whether the proxyarp functionality replied Signed-off-by: Jouni Malinen Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_link.h | 1 + net/bridge/br_forward.c | 3 +++ net/bridge/br_input.c | 17 ++++++++++------- net/bridge/br_netlink.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 7 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index a57bca2ea97e..dad8b00beed2 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -44,6 +44,7 @@ struct br_ip_list { #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) +#define BR_PROXYARP_WIFI BIT(10) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index dfd0bb22e554..756436e1ce89 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -247,6 +247,7 @@ enum { IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */ IFLA_BRPORT_PROXYARP, /* proxy ARP */ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */ + IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f96933a823e3..1238fabff874 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -188,6 +188,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; + if ((p->flags & BR_PROXYARP_WIFI) && + BR_INPUT_SKB_CB(skb)->proxyarp_replied) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2aa7be3a847..052c5ebbc947 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb) } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid) + u16 vid, struct net_bridge_port *p) { struct net_device *dev = br->dev; struct neighbour *n; @@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, u8 *arpptr, *sha; __be32 sip, tip; + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + if (dev->flags & IFF_NOARP) return; @@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } f = __br_fdb_get(br, n->ha, vid); - if (f) + if (f && ((p->flags & BR_PROXYARP) || + (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, sha, n->ha, sha); + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } neigh_release(n); } @@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) { - if (IS_ENABLED(CONFIG_INET) && - p->flags & BR_PROXYARP && - skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid); + if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) + br_do_proxy_arp(skb, br, vid, p); + if (is_broadcast_ether_addr(dest)) { skb2 = skb; unicast = false; } else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c72083968768..8bc6b67457dc 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -143,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || - nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP))) + nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || + nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, + !!(p->flags & BR_PROXYARP_WIFI))) return -EMSGSIZE; return 0; @@ -553,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); + br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index de0919975a25..c32e279c62f8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -305,6 +305,7 @@ struct br_input_skb_cb { #endif u16 frag_max_size; + bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 2de5d91199e8..4905845a94e9 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); +BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_fast_leave, #endif &brport_attr_proxyarp, + &brport_attr_proxyarp_wifi, NULL }; -- cgit v1.2.3 From 1cae565e8b746f484f1ff1b71d2a1c89d7cf0668 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Mar 2015 15:05:36 +0100 Subject: netfilter: nf_tables: limit maximum table name length to 32 bytes Set the same as we use for chain names, it should be enough. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 04188b47629d..a143acafa5d9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -535,7 +535,7 @@ struct nft_table { u64 hgenerator; u32 use; u16 flags; - char name[]; + char name[NFT_TABLE_MAXNAMELEN]; }; /** diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 832bc46db78b..b9783931503b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1,6 +1,7 @@ #ifndef _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H +#define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 199fd0f27b0e..284b20ce566b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { - [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; @@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, if (!try_module_get(afi->owner)) return -EAFNOSUPPORT; - table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + table = kzalloc(sizeof(*table), GFP_KERNEL); if (table == NULL) { module_put(afi->owner); return -ENOMEM; } - nla_strlcpy(table->name, name, nla_len(name)); + nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); table->flags = flags; -- cgit v1.2.3 From d0f91938bede204a343473792529e0db7d599836 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 5 Mar 2015 10:23:49 +0100 Subject: tipc: add ip/udp media type The ip/udp bearer can be configured in a point-to-point mode by specifying both local and remote ip/hostname, or it can be enabled in multicast mode, where links are established to all tipc nodes that have joined the same multicast group. The multicast IP address is generated based on the TIPC network ID, but can be overridden by using another multicast address as remote ip. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 9 + net/tipc/Kconfig | 8 + net/tipc/Makefile | 1 + net/tipc/bearer.c | 13 +- net/tipc/bearer.h | 12 +- net/tipc/msg.h | 2 +- net/tipc/udp_media.c | 442 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 479 insertions(+), 8 deletions(-) create mode 100644 net/tipc/udp_media.c (limited to 'include/uapi') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 8d723824ad69..d4c8f142ba63 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -83,11 +83,20 @@ enum { TIPC_NLA_BEARER_NAME, /* string */ TIPC_NLA_BEARER_PROP, /* nest */ TIPC_NLA_BEARER_DOMAIN, /* u32 */ + TIPC_NLA_BEARER_UDP_OPTS, /* nest */ __TIPC_NLA_BEARER_MAX, TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1 }; +enum { + TIPC_NLA_UDP_UNSPEC, + TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */ + TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */ + + __TIPC_NLA_UDP_MAX, + TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1 +}; /* Socket info */ enum { TIPC_NLA_SOCK_UNSPEC, diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 91c8a8e031db..c25a3a149dc4 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -26,3 +26,11 @@ config TIPC_MEDIA_IB help Saying Y here will enable support for running TIPC on IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 599b1a540d2b..57e460be4692 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -10,5 +10,6 @@ tipc-y += addr.o bcast.o bearer.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index af6deeb397a8..840db89e4283 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -47,6 +47,9 @@ static struct tipc_media * const media_info_array[] = { ð_media_info, #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, #endif NULL }; @@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * tipc_enable_bearer - enable bearer with the given name */ static int tipc_enable_bearer(struct net *net, const char *name, - u32 disc_domain, u32 priority) + u32 disc_domain, u32 priority, + struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; @@ -304,7 +308,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr); + res = m_ptr->enable_media(net, b_ptr, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, kfree_rcu(b_ptr, rcu); } -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; @@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio, attrs); if (err) { rtnl_unlock(); return err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 097aff08ad5b..5cad243ee8fc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -41,7 +41,7 @@ #include #define MAX_BEARERS 2 -#define MAX_MEDIA 2 +#define MAX_MEDIA 3 #define MAX_NODES 4096 #define WSIZE 32 @@ -59,6 +59,7 @@ */ #define TIPC_MEDIA_TYPE_ETH 1 #define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 /** * struct tipc_node_map - set of node identifiers @@ -104,7 +105,8 @@ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + struct nlattr *attr[]); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -183,6 +185,9 @@ extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB extern struct tipc_media ib_media_info; #endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); @@ -197,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c1cc8d7a5d52..fa167846d1ab 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -87,7 +87,7 @@ struct plist; * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields * are word aligned for quicker access */ -#define BUF_HEADROOM LL_MAX_HEADER +#define BUF_HEADROOM (LL_MAX_HEADER + 48) struct tipc_skb_cb { void *handle; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000000..0d10001db40d --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,442 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "core.h" +#include "bearer.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 udp_port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; +}; + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + if (ntohs(ua->proto) == ETH_P_IP) { + if (ipv4_is_multicast(ua->ipv4.s_addr)) + addr->broadcast = 1; + } else if (ntohs(ua->proto) == ETH_P_IPV6) { + if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) + addr->broadcast = 1; + } else { + pr_err("Invalid UDP media address\n"); + } +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + int ttl, err = 0; + struct udp_bearer *ub; + struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct sk_buff *clone; + struct rtable *rt; + + clone = skb_clone(skb, GFP_ATOMIC); + skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + if (htons(dst->proto) == ETH_P_IP) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = clone->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + ttl = ip4_dst_hoplimit(&rt->dst); + err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, + src->udp_port, dst->udp_port, + false, true); + if (err < 0) { + ip_rt_put(rt); + goto tx_error; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + if (err) + goto tx_error; + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6, + &dst->ipv6, 0, ttl, src->udp_port, + dst->udp_port, false); +#endif + } + return err; + +tx_error: + kfree_skb(clone); + return err; +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + kfree_skb(skb); + return 0; + } + + skb_pull(skb, sizeof(struct udphdr)); + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + + if (b) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + if (!ipv4_is_multicast(remote->ipv4.s_addr)) + return 0; + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = __ip_mc_join_group(sk, &mreqn); + } else { + if (!ipv6_addr_is_multicast(&remote->ipv6)) + return 0; + err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + } + return err; +} + +/** + * parse_options - build local/remote addresses from configuration + * @attrs: netlink config data + * @ub: UDP bearer instance + * @local: local bearer IP address/port + * @remote: peer or multicast IP/port + */ +static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, + struct udp_media_addr *local, + struct udp_media_addr *remote) +{ + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct sockaddr_storage *sa_local, *sa_remote; + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { + sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); + sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); + } else { +err: + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { + struct sockaddr_in *ip4; + + ip4 = (struct sockaddr_in *)sa_local; + local->proto = htons(ETH_P_IP); + local->udp_port = ip4->sin_port; + local->ipv4.s_addr = ip4->sin_addr.s_addr; + + ip4 = (struct sockaddr_in *)sa_remote; + remote->proto = htons(ETH_P_IP); + remote->udp_port = ip4->sin_port; + remote->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { + struct sockaddr_in6 *ip6; + + ip6 = (struct sockaddr_in6 *)sa_local; + local->proto = htons(ETH_P_IPV6); + local->udp_port = ip6->sin6_port; + local->ipv6 = ip6->sin6_addr; + ub->ifindex = ip6->sin6_scope_id; + + ip6 = (struct sockaddr_in6 *)sa_remote; + remote->proto = htons(ETH_P_IPV6); + remote->udp_port = ip6->sin6_port; + remote->ipv6 = ip6->sin6_addr; + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr *remote; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {0}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + remote = (struct udp_media_addr *)&b->bcast_addr.value; + memset(remote, 0, sizeof(struct udp_media_addr)); + err = parse_options(attrs, ub, &local, remote); + if (err) + goto err; + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = 1; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (htons(local.proto) == ETH_P_IP) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); +#if IS_ENABLED(CONFIG_IPV6) + } else if (htons(local.proto) == ETH_P_IPV6) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.udp_port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + if (enable_mcast(ub, remote)) + goto err; + return 0; +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; -- cgit v1.2.3 From 37ed9493699cc5dafe1b8725858ef73176fdc9d2 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:13 -0800 Subject: rtnetlink: add RTNH_F_EXTERNAL flag for fib offload Add new RTNH_F_EXTERNAL flag to mark fib entries offloaded externally, for example to a switchdev switch device. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 06f75a407f74..c3722b024e73 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -334,6 +334,7 @@ struct rtnexthop { #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_EXTERNAL 8 /* Route installed externally */ /* Macros to handle hexthops */ -- cgit v1.2.3 From 05050753602626ed4c46271c689929b625f409e7 Mon Sep 17 00:00:00 2001 From: Ilan peer Date: Wed, 4 Mar 2015 00:32:06 -0500 Subject: cfg80211: Add API to change the indoor regulatory setting Previously, the indoor setting configuration assumed that as long as a station interface is connected, the indoor environment setting does not change. However, this assumption is problematic as: - It is possible that a station interface is connected to a mobile AP, e.g., softAP or a P2P GO, where it is possible that both the station and the mobile AP move out of the indoor environment making the indoor setting invalid. In such a case, user space has no way to invalidate the setting. - A station interface disconnection does not necessarily imply that the device is no longer operating in an indoor environment, e.g., it is possible that the station interface is roaming but is still stays indoor. To handle the above, extend the indoor configuration API to allow user space to indicate a change of indoor settings, and allow it to indicate weather it controls the indoor setting, such that: 1. If the user space process explicitly indicates that it is going to control the indoor setting, do not clear the indoor setting internally, unless the socket is released. The user space process should use the NL80211_ATTR_SOCKET_OWNER attribute in the command to state that it is going to control the indoor setting. 2. Reset the indoor setting when restoring the regulatory settings in case it is not owned by a user space process. Based on the above, a user space tool that continuously monitors the indoor settings, i.e., tracking power setting, location etc., can indicate environment changes to the regulatory core. It should be noted that currently user space is the only provided mechanism used to hint to the regulatory core over the indoor/outdoor environment -- while the country IEs do have an environment setting this has been completely ignored by the regulatory core by design for a while now since country IEs typically can contain bogus data. Acked-by: Luis R. Rodriguez Signed-off-by: ArikX Nemtsov Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++ net/wireless/nl80211.c | 19 ++++++++++++++- net/wireless/reg.c | 57 ++++++++++++++++++++++++++++++++++++++++---- net/wireless/reg.h | 15 +++++++++++- 4 files changed, 94 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 37e7f39441e5..ae16ba9cb1e3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1697,6 +1697,10 @@ enum nl80211_commands { * If set during scheduled scan start then the new scan req will be * owned by the netlink socket that created it and the scheduled scan will * be stopped when the socket is closed. + * If set during configuration of regulatory indoor operation then the + * regulatory indoor configuration would be owned by the netlink socket + * that configured the indoor setting, and the indoor operation would be + * cleared when the socket is closed. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. @@ -1752,6 +1756,9 @@ enum nl80211_commands { * * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a * WoWLAN net-detect scan) is started, u32 in seconds. + + * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device + * is operating in an indoor environment. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2120,6 +2127,8 @@ enum nl80211_attrs { NL80211_ATTR_SCHED_SCAN_DELAY, + NL80211_ATTR_REG_INDOOR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 07cef3d7653e..b02085301785 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -399,6 +399,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, + [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -4958,7 +4959,10 @@ static int parse_reg_rule(struct nlattr *tb[], static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { char *data = NULL; + bool is_indoor; enum nl80211_user_reg_hint_type user_reg_hint_type; + u32 owner_nlportid; + /* * You should only get this when cfg80211 hasn't yet initialized @@ -4984,7 +4988,15 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); return regulatory_hint_user(data, user_reg_hint_type); case NL80211_USER_REG_HINT_INDOOR: - return regulatory_hint_indoor_user(); + if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + owner_nlportid = info->snd_portid; + is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR]; + } else { + owner_nlportid = 0; + is_indoor = true; + } + + return regulatory_hint_indoor(is_indoor, owner_nlportid); default: return -EINVAL; } @@ -12810,6 +12822,11 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_unlock(); + /* + * It is possible that the user space process that is controlling the + * indoor setting disappeared, so notify the regulatory core. + */ + regulatory_netlink_notify(notify->portid); return NOTIFY_OK; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c24c8bf3c988..4239dd408137 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -128,9 +128,12 @@ static int reg_num_devs_support_basehint; * State variable indicating if the platform on which the devices * are attached is operating in an indoor environment. The state variable * is relevant for all registered devices. - * (protected by RTNL) */ static bool reg_is_indoor; +static spinlock_t reg_indoor_lock; + +/* Used to track the userspace process controlling the indoor setting */ +static u32 reg_is_indoor_portid; static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -2288,15 +2291,50 @@ int regulatory_hint_user(const char *alpha2, return 0; } -int regulatory_hint_indoor_user(void) +int regulatory_hint_indoor(bool is_indoor, u32 portid) { + spin_lock(®_indoor_lock); + + /* It is possible that more than one user space process is trying to + * configure the indoor setting. To handle such cases, clear the indoor + * setting in case that some process does not think that the device + * is operating in an indoor environment. In addition, if a user space + * process indicates that it is controlling the indoor setting, save its + * portid, i.e., make it the owner. + */ + reg_is_indoor = is_indoor; + if (reg_is_indoor) { + if (!reg_is_indoor_portid) + reg_is_indoor_portid = portid; + } else { + reg_is_indoor_portid = 0; + } + spin_unlock(®_indoor_lock); - reg_is_indoor = true; + if (!is_indoor) + reg_check_channels(); return 0; } +void regulatory_netlink_notify(u32 portid) +{ + spin_lock(®_indoor_lock); + + if (reg_is_indoor_portid != portid) { + spin_unlock(®_indoor_lock); + return; + } + + reg_is_indoor = false; + reg_is_indoor_portid = 0; + + spin_unlock(®_indoor_lock); + + reg_check_channels(); +} + /* Driver hints */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) { @@ -2464,7 +2502,17 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); - reg_is_indoor = false; + /* + * Clear the indoor setting in case that it is not controlled by user + * space, as otherwise there is no guarantee that the device is still + * operating in an indoor environment. + */ + spin_lock(®_indoor_lock); + if (reg_is_indoor && !reg_is_indoor_portid) { + reg_is_indoor = false; + reg_check_channels(); + } + spin_unlock(®_indoor_lock); reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -3061,6 +3109,7 @@ int __init regulatory_init(void) spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); + spin_lock_init(®_indoor_lock); reg_regdb_size_check(); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4b45d6e61d24..a2c4e16459da 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -25,7 +25,20 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy); int regulatory_hint_user(const char *alpha2, enum nl80211_user_reg_hint_type user_reg_hint_type); -int regulatory_hint_indoor_user(void); + +/** + * regulatory_hint_indoor - hint operation in indoor env. or not + * @is_indoor: if true indicates that user space thinks that the + * device is operating in an indoor environment. + * @portid: the netlink port ID on which the hint was given. + */ +int regulatory_hint_indoor(bool is_indoor, u32 portid); + +/** + * regulatory_netlink_notify - notify on released netlink socket + * @portid: the netlink socket port ID + */ +void regulatory_netlink_notify(u32 portid); void wiphy_regulatory_register(struct wiphy *wiphy); void wiphy_regulatory_deregister(struct wiphy *wiphy); -- cgit v1.2.3 From bfd30caebaba7f00c13eec3c538799202d36d12a Mon Sep 17 00:00:00 2001 From: Stefan Brüns Date: Thu, 5 Mar 2015 23:15:07 -0800 Subject: Input: rename KEY_DIRECTION to KEY_ROTATE_DISPLAY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new name better reflects intended usage (but we are keeping the old name as an alias for compatibility). Signed-off-by: Stefan Brüns Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index a1d7e931ab72..eff1cf18031e 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -368,7 +368,8 @@ struct input_keymap_entry { #define KEY_MSDOS 151 #define KEY_COFFEE 152 /* AL Terminal Lock/Screensaver */ #define KEY_SCREENLOCK KEY_COFFEE -#define KEY_DIRECTION 153 +#define KEY_ROTATE_DISPLAY 153 /* Display orientation for e.g. tablets */ +#define KEY_DIRECTION KEY_ROTATE_DISPLAY #define KEY_CYCLEWINDOWS 154 #define KEY_MAIL 155 #define KEY_BOOKMARKS 156 /* AC Bookmarks */ -- cgit v1.2.3 From 1bd187de536494a27925902b9653e9ef0ace4774 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 23 Feb 2015 16:24:44 +0200 Subject: x86, intel-mid: remove Intel MID specific serial support Since we have a native 8250 driver carrying the Intel MID serial devices the specific support is not needed anymore. This patch removes it for Intel MID. Note that the console device name is changed from ttyMFDx to ttySx. Signed-off-by: Andy Shevchenko Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig.debug | 4 - arch/x86/include/asm/intel-mid.h | 3 - arch/x86/kernel/early_printk.c | 6 - arch/x86/platform/intel-mid/Makefile | 1 - .../platform/intel-mid/early_printk_intel_mid.c | 112 -- drivers/tty/serial/Kconfig | 10 - drivers/tty/serial/Makefile | 1 - drivers/tty/serial/mfd.c | 1505 -------------------- include/linux/serial_mfd.h | 47 - include/uapi/linux/serial_reg.h | 19 - 10 files changed, 1708 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/early_printk_intel_mid.c delete mode 100644 drivers/tty/serial/mfd.c delete mode 100644 include/linux/serial_mfd.h (limited to 'include/uapi') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 20028da8ae18..72484a645f05 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -43,10 +43,6 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. -config EARLY_PRINTK_INTEL_MID - bool "Early printk for Intel MID platform support" - depends on EARLY_PRINTK && X86_INTEL_MID - config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" depends on EARLY_PRINTK && PCI diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 705d35708a50..7c5af123bdbd 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options; #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -extern struct console early_hsu_console; -extern void hsu_early_console_init(const char *); - extern void intel_scu_devices_create(void); extern void intel_scu_devices_destroy(void); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index a62536a1be88..f85e3fb50f28 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf) if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_EARLY_PRINTK_INTEL_MID - if (!strncmp(buf, "hsu", 3)) { - hsu_early_console_init(buf + 3); - early_console_register(&early_hsu_console, keep); - } -#endif #ifdef CONFIG_EARLY_PRINTK_EFI if (!strncmp(buf, "efi", 3)) early_console_register(&early_efi_console, keep); diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 0a8ee703b9fa..0ce1b1913673 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o -obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c deleted file mode 100644 index 4e720829ab90..000000000000 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * early_printk_intel_mid.c - early consoles for Intel MID platforms - * - * Copyright (c) 2008-2010, Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* - * This file implements early console named hsu. - * hsu is based on a High Speed UART device which only exists in the Medfield - * platform - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Following is the early console based on Medfield HSU (High - * Speed UART) device. - */ -#define HSU_PORT_BASE 0xffa28080 - -static void __iomem *phsu; - -void hsu_early_console_init(const char *s) -{ - unsigned long paddr, port = 0; - u8 lcr; - - /* - * Select the early HSU console port if specified by user in the - * kernel command line. - */ - if (*s && !kstrtoul(s, 10, &port)) - port = clamp_val(port, 0, 2); - - paddr = HSU_PORT_BASE + port * 0x80; - phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr); - - /* Disable FIFO */ - writeb(0x0, phsu + UART_FCR); - - /* Set to default 115200 bps, 8n1 */ - lcr = readb(phsu + UART_LCR); - writeb((0x80 | lcr), phsu + UART_LCR); - writeb(0x18, phsu + UART_DLL); - writeb(lcr, phsu + UART_LCR); - writel(0x3600, phsu + UART_MUL*4); - - writeb(0x8, phsu + UART_MCR); - writeb(0x7, phsu + UART_FCR); - writeb(0x3, phsu + UART_LCR); - - /* Clear IRQ status */ - readb(phsu + UART_LSR); - readb(phsu + UART_RX); - readb(phsu + UART_IIR); - readb(phsu + UART_MSR); - - /* Enable FIFO */ - writeb(0x7, phsu + UART_FCR); -} - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -static void early_hsu_putc(char ch) -{ - unsigned int timeout = 10000; /* 10ms */ - u8 status; - - while (--timeout) { - status = readb(phsu + UART_LSR); - if (status & BOTH_EMPTY) - break; - udelay(1); - } - - /* Only write the char when there was no timeout */ - if (timeout) - writeb(ch, phsu + UART_TX); -} - -static void early_hsu_write(struct console *con, const char *str, unsigned n) -{ - int i; - - for (i = 0; i < n && *str; i++) { - if (*str == '\n') - early_hsu_putc('\r'); - early_hsu_putc(*str); - str++; - } -} - -struct console early_hsu_console = { - .name = "earlyhsu", - .write = early_hsu_write, - .flags = CON_PRINTBUFFER, - .index = -1, -}; diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 60c368a5dbf1..c9dbc626038d 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -483,16 +483,6 @@ config SERIAL_SA1100_CONSOLE your boot loader (lilo or loadlin) about how to pass options to the kernel at boot time.) -config SERIAL_MFD_HSU - tristate "Medfield High Speed UART support" - depends on PCI - select SERIAL_CORE - -config SERIAL_MFD_HSU_CONSOLE - bool "Medfield HSU serial console support" - depends on SERIAL_MFD_HSU=y - select SERIAL_CORE_CONSOLE - config SERIAL_BFIN tristate "Blackfin serial port support" depends on BLACKFIN diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 599be4b05a26..f42b4f9845df 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -78,7 +78,6 @@ obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o -obj-$(CONFIG_SERIAL_MFD_HSU) += mfd.o obj-$(CONFIG_SERIAL_IFX6X60) += ifx6x60.o obj-$(CONFIG_SERIAL_PCH_UART) += pch_uart.o obj-$(CONFIG_SERIAL_MSM_SMD) += msm_smd_tty.o diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c deleted file mode 100644 index 8fe4501d7565..000000000000 --- a/drivers/tty/serial/mfd.c +++ /dev/null @@ -1,1505 +0,0 @@ -/* - * mfd.c: driver for High Speed UART device of Intel Medfield platform - * - * Refer pxa.c, 8250.c and some other drivers in drivers/serial/ - * - * (C) Copyright 2010 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -/* Notes: - * 1. DMA channel allocation: 0/1 channel are assigned to port 0, - * 2/3 chan to port 1, 4/5 chan to port 3. Even number chans - * are used for RX, odd chans for TX - * - * 2. The RI/DSR/DCD/DTR are not pinned out, DCD & DSR are always - * asserted, only when the HW is reset the DDCD and DDSR will - * be triggered - */ - -#if defined(CONFIG_SERIAL_MFD_HSU_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) -#define SUPPORT_SYSRQ -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define HSU_DMA_BUF_SIZE 2048 - -#define chan_readl(chan, offset) readl(chan->reg + offset) -#define chan_writel(chan, offset, val) writel(val, chan->reg + offset) - -#define mfd_readl(obj, offset) readl(obj->reg + offset) -#define mfd_writel(obj, offset, val) writel(val, obj->reg + offset) - -static int hsu_dma_enable; -module_param(hsu_dma_enable, int, 0); -MODULE_PARM_DESC(hsu_dma_enable, - "It is a bitmap to set working mode, if bit[x] is 1, then port[x] will work in DMA mode, otherwise in PIO mode."); - -struct hsu_dma_buffer { - u8 *buf; - dma_addr_t dma_addr; - u32 dma_size; - u32 ofs; -}; - -struct hsu_dma_chan { - u32 id; - enum dma_data_direction dirt; - struct uart_hsu_port *uport; - void __iomem *reg; -}; - -struct uart_hsu_port { - struct uart_port port; - unsigned char ier; - unsigned char lcr; - unsigned char mcr; - unsigned int lsr_break_flag; - char name[12]; - int index; - struct device *dev; - - struct hsu_dma_chan *txc; - struct hsu_dma_chan *rxc; - struct hsu_dma_buffer txbuf; - struct hsu_dma_buffer rxbuf; - int use_dma; /* flag for DMA/PIO */ - int running; - int dma_tx_on; -}; - -/* Top level data structure of HSU */ -struct hsu_port { - void __iomem *reg; - unsigned long paddr; - unsigned long iolen; - u32 irq; - - struct uart_hsu_port port[3]; - struct hsu_dma_chan chans[10]; - - struct dentry *debugfs; -}; - -static inline unsigned int serial_in(struct uart_hsu_port *up, int offset) -{ - unsigned int val; - - if (offset > UART_MSR) { - offset <<= 2; - val = readl(up->port.membase + offset); - } else - val = (unsigned int)readb(up->port.membase + offset); - - return val; -} - -static inline void serial_out(struct uart_hsu_port *up, int offset, int value) -{ - if (offset > UART_MSR) { - offset <<= 2; - writel(value, up->port.membase + offset); - } else { - unsigned char val = value & 0xff; - writeb(val, up->port.membase + offset); - } -} - -#ifdef CONFIG_DEBUG_FS - -#define HSU_REGS_BUFSIZE 1024 - - -static ssize_t port_show_regs(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct uart_hsu_port *up = file->private_data; - char *buf; - u32 len = 0; - ssize_t ret; - - buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL); - if (!buf) - return 0; - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MFD HSU port[%d] regs:\n", up->index); - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "=================================\n"); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "IER: \t\t0x%08x\n", serial_in(up, UART_IER)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "IIR: \t\t0x%08x\n", serial_in(up, UART_IIR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "LCR: \t\t0x%08x\n", serial_in(up, UART_LCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MCR: \t\t0x%08x\n", serial_in(up, UART_MCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "LSR: \t\t0x%08x\n", serial_in(up, UART_LSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MSR: \t\t0x%08x\n", serial_in(up, UART_MSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "FOR: \t\t0x%08x\n", serial_in(up, UART_FOR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "PS: \t\t0x%08x\n", serial_in(up, UART_PS)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MUL: \t\t0x%08x\n", serial_in(up, UART_MUL)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "DIV: \t\t0x%08x\n", serial_in(up, UART_DIV)); - - if (len > HSU_REGS_BUFSIZE) - len = HSU_REGS_BUFSIZE; - - ret = simple_read_from_buffer(user_buf, count, ppos, buf, len); - kfree(buf); - return ret; -} - -static ssize_t dma_show_regs(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct hsu_dma_chan *chan = file->private_data; - char *buf; - u32 len = 0; - ssize_t ret; - - buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL); - if (!buf) - return 0; - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MFD HSU DMA channel [%d] regs:\n", chan->id); - - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "=================================\n"); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "CR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_CR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "DCR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_DCR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "BSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_BSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "MOTSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_MOTSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2TSR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3SAR)); - len += snprintf(buf + len, HSU_REGS_BUFSIZE - len, - "D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3TSR)); - - if (len > HSU_REGS_BUFSIZE) - len = HSU_REGS_BUFSIZE; - - ret = simple_read_from_buffer(user_buf, count, ppos, buf, len); - kfree(buf); - return ret; -} - -static const struct file_operations port_regs_ops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = port_show_regs, - .llseek = default_llseek, -}; - -static const struct file_operations dma_regs_ops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = dma_show_regs, - .llseek = default_llseek, -}; - -static int hsu_debugfs_init(struct hsu_port *hsu) -{ - int i; - char name[32]; - - hsu->debugfs = debugfs_create_dir("hsu", NULL); - if (!hsu->debugfs) - return -ENOMEM; - - for (i = 0; i < 3; i++) { - snprintf(name, sizeof(name), "port_%d_regs", i); - debugfs_create_file(name, S_IFREG | S_IRUGO, - hsu->debugfs, (void *)(&hsu->port[i]), &port_regs_ops); - } - - for (i = 0; i < 6; i++) { - snprintf(name, sizeof(name), "dma_chan_%d_regs", i); - debugfs_create_file(name, S_IFREG | S_IRUGO, - hsu->debugfs, (void *)&hsu->chans[i], &dma_regs_ops); - } - - return 0; -} - -static void hsu_debugfs_remove(struct hsu_port *hsu) -{ - if (hsu->debugfs) - debugfs_remove_recursive(hsu->debugfs); -} - -#else -static inline int hsu_debugfs_init(struct hsu_port *hsu) -{ - return 0; -} - -static inline void hsu_debugfs_remove(struct hsu_port *hsu) -{ -} -#endif /* CONFIG_DEBUG_FS */ - -static void serial_hsu_enable_ms(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - up->ier |= UART_IER_MSI; - serial_out(up, UART_IER, up->ier); -} - -static void hsu_dma_tx(struct uart_hsu_port *up) -{ - struct circ_buf *xmit = &up->port.state->xmit; - struct hsu_dma_buffer *dbuf = &up->txbuf; - int count; - - /* test_and_set_bit may be better, but anyway it's in lock protected mode */ - if (up->dma_tx_on) - return; - - /* Update the circ buf info */ - xmit->tail += dbuf->ofs; - xmit->tail &= UART_XMIT_SIZE - 1; - - up->port.icount.tx += dbuf->ofs; - dbuf->ofs = 0; - - /* Disable the channel */ - chan_writel(up->txc, HSU_CH_CR, 0x0); - - if (!uart_circ_empty(xmit) && !uart_tx_stopped(&up->port)) { - dma_sync_single_for_device(up->port.dev, - dbuf->dma_addr, - dbuf->dma_size, - DMA_TO_DEVICE); - - count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); - dbuf->ofs = count; - - /* Reprogram the channel */ - chan_writel(up->txc, HSU_CH_D0SAR, dbuf->dma_addr + xmit->tail); - chan_writel(up->txc, HSU_CH_D0TSR, count); - - /* Reenable the channel */ - chan_writel(up->txc, HSU_CH_DCR, 0x1 - | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24)); - up->dma_tx_on = 1; - chan_writel(up->txc, HSU_CH_CR, 0x1); - } - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); -} - -/* The buffer is already cache coherent */ -static void hsu_dma_start_rx_chan(struct hsu_dma_chan *rxc, - struct hsu_dma_buffer *dbuf) -{ - dbuf->ofs = 0; - - chan_writel(rxc, HSU_CH_BSR, 32); - chan_writel(rxc, HSU_CH_MOTSR, 4); - - chan_writel(rxc, HSU_CH_D0SAR, dbuf->dma_addr); - chan_writel(rxc, HSU_CH_D0TSR, dbuf->dma_size); - chan_writel(rxc, HSU_CH_DCR, 0x1 | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24) /* timeout bit, see HSU Errata 1 */ - ); - chan_writel(rxc, HSU_CH_CR, 0x3); -} - -/* Protected by spin_lock_irqsave(port->lock) */ -static void serial_hsu_start_tx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - if (up->use_dma) { - hsu_dma_tx(up); - } else if (!(up->ier & UART_IER_THRI)) { - up->ier |= UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -static void serial_hsu_stop_tx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - struct hsu_dma_chan *txc = up->txc; - - if (up->use_dma) - chan_writel(txc, HSU_CH_CR, 0x0); - else if (up->ier & UART_IER_THRI) { - up->ier &= ~UART_IER_THRI; - serial_out(up, UART_IER, up->ier); - } -} - -/* This is always called in spinlock protected mode, so - * modify timeout timer is safe here */ -static void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts, - unsigned long *flags) -{ - struct hsu_dma_buffer *dbuf = &up->rxbuf; - struct hsu_dma_chan *chan = up->rxc; - struct uart_port *port = &up->port; - struct tty_port *tport = &port->state->port; - int count; - - /* - * First need to know how many is already transferred, - * then check if its a timeout DMA irq, and return - * the trail bytes out, push them up and reenable the - * channel - */ - - /* Timeout IRQ, need wait some time, see Errata 2 */ - if (int_sts & 0xf00) - udelay(2); - - /* Stop the channel */ - chan_writel(chan, HSU_CH_CR, 0x0); - - count = chan_readl(chan, HSU_CH_D0SAR) - dbuf->dma_addr; - if (!count) { - /* Restart the channel before we leave */ - chan_writel(chan, HSU_CH_CR, 0x3); - return; - } - - dma_sync_single_for_cpu(port->dev, dbuf->dma_addr, - dbuf->dma_size, DMA_FROM_DEVICE); - - /* - * Head will only wrap around when we recycle - * the DMA buffer, and when that happens, we - * explicitly set tail to 0. So head will - * always be greater than tail. - */ - tty_insert_flip_string(tport, dbuf->buf, count); - port->icount.rx += count; - - dma_sync_single_for_device(up->port.dev, dbuf->dma_addr, - dbuf->dma_size, DMA_FROM_DEVICE); - - /* Reprogram the channel */ - chan_writel(chan, HSU_CH_D0SAR, dbuf->dma_addr); - chan_writel(chan, HSU_CH_D0TSR, dbuf->dma_size); - chan_writel(chan, HSU_CH_DCR, 0x1 - | (0x1 << 8) - | (0x1 << 16) - | (0x1 << 24) /* timeout bit, see HSU Errata 1 */ - ); - spin_unlock_irqrestore(&up->port.lock, *flags); - tty_flip_buffer_push(tport); - spin_lock_irqsave(&up->port.lock, *flags); - - chan_writel(chan, HSU_CH_CR, 0x3); - -} - -static void serial_hsu_stop_rx(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - struct hsu_dma_chan *chan = up->rxc; - - if (up->use_dma) - chan_writel(chan, HSU_CH_CR, 0x2); - else { - up->ier &= ~UART_IER_RLSI; - up->port.read_status_mask &= ~UART_LSR_DR; - serial_out(up, UART_IER, up->ier); - } -} - -static inline void receive_chars(struct uart_hsu_port *up, int *status, - unsigned long *flags) -{ - unsigned int ch, flag; - unsigned int max_count = 256; - - do { - ch = serial_in(up, UART_RX); - flag = TTY_NORMAL; - up->port.icount.rx++; - - if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { - - dev_warn(up->dev, "We really rush into ERR/BI case" - "status = 0x%02x", *status); - /* For statistics only */ - if (*status & UART_LSR_BI) { - *status &= ~(UART_LSR_FE | UART_LSR_PE); - up->port.icount.brk++; - /* - * We do the SysRQ and SAK checking - * here because otherwise the break - * may get masked by ignore_status_mask - * or read_status_mask. - */ - if (uart_handle_break(&up->port)) - goto ignore_char; - } else if (*status & UART_LSR_PE) - up->port.icount.parity++; - else if (*status & UART_LSR_FE) - up->port.icount.frame++; - if (*status & UART_LSR_OE) - up->port.icount.overrun++; - - /* Mask off conditions which should be ignored. */ - *status &= up->port.read_status_mask; - -#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE - if (up->port.cons && - up->port.cons->index == up->port.line) { - /* Recover the break flag from console xmit */ - *status |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif - if (*status & UART_LSR_BI) { - flag = TTY_BREAK; - } else if (*status & UART_LSR_PE) - flag = TTY_PARITY; - else if (*status & UART_LSR_FE) - flag = TTY_FRAME; - } - - if (uart_handle_sysrq_char(&up->port, ch)) - goto ignore_char; - - uart_insert_char(&up->port, *status, UART_LSR_OE, ch, flag); - ignore_char: - *status = serial_in(up, UART_LSR); - } while ((*status & UART_LSR_DR) && max_count--); - - spin_unlock_irqrestore(&up->port.lock, *flags); - tty_flip_buffer_push(&up->port.state->port); - spin_lock_irqsave(&up->port.lock, *flags); -} - -static void transmit_chars(struct uart_hsu_port *up) -{ - struct circ_buf *xmit = &up->port.state->xmit; - int count; - - if (up->port.x_char) { - serial_out(up, UART_TX, up->port.x_char); - up->port.icount.tx++; - up->port.x_char = 0; - return; - } - if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) { - serial_hsu_stop_tx(&up->port); - return; - } - - /* The IRQ is for TX FIFO half-empty */ - count = up->port.fifosize / 2; - - do { - serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - - up->port.icount.tx++; - if (uart_circ_empty(xmit)) - break; - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(&up->port); - - if (uart_circ_empty(xmit)) - serial_hsu_stop_tx(&up->port); -} - -static inline void check_modem_status(struct uart_hsu_port *up) -{ - int status; - - status = serial_in(up, UART_MSR); - - if ((status & UART_MSR_ANY_DELTA) == 0) - return; - - if (status & UART_MSR_TERI) - up->port.icount.rng++; - if (status & UART_MSR_DDSR) - up->port.icount.dsr++; - /* We may only get DDCD when HW init and reset */ - if (status & UART_MSR_DDCD) - uart_handle_dcd_change(&up->port, status & UART_MSR_DCD); - /* Will start/stop_tx accordingly */ - if (status & UART_MSR_DCTS) - uart_handle_cts_change(&up->port, status & UART_MSR_CTS); - - wake_up_interruptible(&up->port.state->port.delta_msr_wait); -} - -/* - * This handles the interrupt from one port. - */ -static irqreturn_t port_irq(int irq, void *dev_id) -{ - struct uart_hsu_port *up = dev_id; - unsigned int iir, lsr; - unsigned long flags; - - if (unlikely(!up->running)) - return IRQ_NONE; - - spin_lock_irqsave(&up->port.lock, flags); - if (up->use_dma) { - lsr = serial_in(up, UART_LSR); - if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) - dev_warn(up->dev, - "Got lsr irq while using DMA, lsr = 0x%2x\n", - lsr); - check_modem_status(up); - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_HANDLED; - } - - iir = serial_in(up, UART_IIR); - if (iir & UART_IIR_NO_INT) { - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_NONE; - } - - lsr = serial_in(up, UART_LSR); - if (lsr & UART_LSR_DR) - receive_chars(up, &lsr, &flags); - check_modem_status(up); - - /* lsr will be renewed during the receive_chars */ - if (lsr & UART_LSR_THRE) - transmit_chars(up); - - spin_unlock_irqrestore(&up->port.lock, flags); - return IRQ_HANDLED; -} - -static inline void dma_chan_irq(struct hsu_dma_chan *chan) -{ - struct uart_hsu_port *up = chan->uport; - unsigned long flags; - u32 int_sts; - - spin_lock_irqsave(&up->port.lock, flags); - - if (!up->use_dma || !up->running) - goto exit; - - /* - * No matter what situation, need read clear the IRQ status - * There is a bug, see Errata 5, HSD 2900918 - */ - int_sts = chan_readl(chan, HSU_CH_SR); - - /* Rx channel */ - if (chan->dirt == DMA_FROM_DEVICE) - hsu_dma_rx(up, int_sts, &flags); - - /* Tx channel */ - if (chan->dirt == DMA_TO_DEVICE) { - chan_writel(chan, HSU_CH_CR, 0x0); - up->dma_tx_on = 0; - hsu_dma_tx(up); - } - -exit: - spin_unlock_irqrestore(&up->port.lock, flags); - return; -} - -static irqreturn_t dma_irq(int irq, void *dev_id) -{ - struct hsu_port *hsu = dev_id; - u32 int_sts, i; - - int_sts = mfd_readl(hsu, HSU_GBL_DMAISR); - - /* Currently we only have 6 channels may be used */ - for (i = 0; i < 6; i++) { - if (int_sts & 0x1) - dma_chan_irq(&hsu->chans[i]); - int_sts >>= 1; - } - - return IRQ_HANDLED; -} - -static unsigned int serial_hsu_tx_empty(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - unsigned int ret; - - spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; - spin_unlock_irqrestore(&up->port.lock, flags); - - return ret; -} - -static unsigned int serial_hsu_get_mctrl(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char status; - unsigned int ret; - - status = serial_in(up, UART_MSR); - - ret = 0; - if (status & UART_MSR_DCD) - ret |= TIOCM_CAR; - if (status & UART_MSR_RI) - ret |= TIOCM_RNG; - if (status & UART_MSR_DSR) - ret |= TIOCM_DSR; - if (status & UART_MSR_CTS) - ret |= TIOCM_CTS; - return ret; -} - -static void serial_hsu_set_mctrl(struct uart_port *port, unsigned int mctrl) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char mcr = 0; - - if (mctrl & TIOCM_RTS) - mcr |= UART_MCR_RTS; - if (mctrl & TIOCM_DTR) - mcr |= UART_MCR_DTR; - if (mctrl & TIOCM_OUT1) - mcr |= UART_MCR_OUT1; - if (mctrl & TIOCM_OUT2) - mcr |= UART_MCR_OUT2; - if (mctrl & TIOCM_LOOP) - mcr |= UART_MCR_LOOP; - - mcr |= up->mcr; - - serial_out(up, UART_MCR, mcr); -} - -static void serial_hsu_break_ctl(struct uart_port *port, int break_state) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); - if (break_state == -1) - up->lcr |= UART_LCR_SBC; - else - up->lcr &= ~UART_LCR_SBC; - serial_out(up, UART_LCR, up->lcr); - spin_unlock_irqrestore(&up->port.lock, flags); -} - -/* - * What special to do: - * 1. chose the 64B fifo mode - * 2. start dma or pio depends on configuration - * 3. we only allocate dma memory when needed - */ -static int serial_hsu_startup(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - pm_runtime_get_sync(up->dev); - - /* - * Clear the FIFO buffers and disable them. - * (they will be reenabled in set_termios()) - */ - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_out(up, UART_FCR, 0); - - /* Clear the interrupt registers. */ - (void) serial_in(up, UART_LSR); - (void) serial_in(up, UART_RX); - (void) serial_in(up, UART_IIR); - (void) serial_in(up, UART_MSR); - - /* Now, initialize the UART, default is 8n1 */ - serial_out(up, UART_LCR, UART_LCR_WLEN8); - - spin_lock_irqsave(&up->port.lock, flags); - - up->port.mctrl |= TIOCM_OUT2; - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - - /* - * Finally, enable interrupts. Note: Modem status interrupts - * are set via set_termios(), which will be occurring imminently - * anyway, so we don't enable them here. - */ - if (!up->use_dma) - up->ier = UART_IER_RLSI | UART_IER_RDI | UART_IER_RTOIE; - else - up->ier = 0; - serial_out(up, UART_IER, up->ier); - - spin_unlock_irqrestore(&up->port.lock, flags); - - /* DMA init */ - if (up->use_dma) { - struct hsu_dma_buffer *dbuf; - struct circ_buf *xmit = &port->state->xmit; - - up->dma_tx_on = 0; - - /* First allocate the RX buffer */ - dbuf = &up->rxbuf; - dbuf->buf = kzalloc(HSU_DMA_BUF_SIZE, GFP_KERNEL); - if (!dbuf->buf) { - up->use_dma = 0; - goto exit; - } - dbuf->dma_addr = dma_map_single(port->dev, - dbuf->buf, - HSU_DMA_BUF_SIZE, - DMA_FROM_DEVICE); - dbuf->dma_size = HSU_DMA_BUF_SIZE; - - /* Start the RX channel right now */ - hsu_dma_start_rx_chan(up->rxc, dbuf); - - /* Next init the TX DMA */ - dbuf = &up->txbuf; - dbuf->buf = xmit->buf; - dbuf->dma_addr = dma_map_single(port->dev, - dbuf->buf, - UART_XMIT_SIZE, - DMA_TO_DEVICE); - dbuf->dma_size = UART_XMIT_SIZE; - - /* This should not be changed all around */ - chan_writel(up->txc, HSU_CH_BSR, 32); - chan_writel(up->txc, HSU_CH_MOTSR, 4); - dbuf->ofs = 0; - } - -exit: - /* And clear the interrupt registers again for luck. */ - (void) serial_in(up, UART_LSR); - (void) serial_in(up, UART_RX); - (void) serial_in(up, UART_IIR); - (void) serial_in(up, UART_MSR); - - up->running = 1; - return 0; -} - -static void serial_hsu_shutdown(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned long flags; - - /* Disable interrupts from this port */ - up->ier = 0; - serial_out(up, UART_IER, 0); - up->running = 0; - - spin_lock_irqsave(&up->port.lock, flags); - up->port.mctrl &= ~TIOCM_OUT2; - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - spin_unlock_irqrestore(&up->port.lock, flags); - - /* Disable break condition and FIFOs */ - serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT); - serial_out(up, UART_FCR, 0); - - pm_runtime_put(up->dev); -} - -static void -serial_hsu_set_termios(struct uart_port *port, struct ktermios *termios, - struct ktermios *old) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - unsigned char cval, fcr = 0; - unsigned long flags; - unsigned int baud, quot; - u32 ps, mul; - - switch (termios->c_cflag & CSIZE) { - case CS5: - cval = UART_LCR_WLEN5; - break; - case CS6: - cval = UART_LCR_WLEN6; - break; - case CS7: - cval = UART_LCR_WLEN7; - break; - default: - case CS8: - cval = UART_LCR_WLEN8; - break; - } - - /* CMSPAR isn't supported by this driver */ - termios->c_cflag &= ~CMSPAR; - - if (termios->c_cflag & CSTOPB) - cval |= UART_LCR_STOP; - if (termios->c_cflag & PARENB) - cval |= UART_LCR_PARITY; - if (!(termios->c_cflag & PARODD)) - cval |= UART_LCR_EPAR; - - /* - * The base clk is 50Mhz, and the baud rate come from: - * baud = 50M * MUL / (DIV * PS * DLAB) - * - * For those basic low baud rate we can get the direct - * scalar from 2746800, like 115200 = 2746800/24. For those - * higher baud rate, we handle them case by case, mainly by - * adjusting the MUL/PS registers, and DIV register is kept - * as default value 0x3d09 to make things simple - */ - baud = uart_get_baud_rate(port, termios, old, 0, 4000000); - - quot = 1; - ps = 0x10; - mul = 0x3600; - switch (baud) { - case 3500000: - mul = 0x3345; - ps = 0xC; - break; - case 1843200: - mul = 0x2400; - break; - case 3000000: - case 2500000: - case 2000000: - case 1500000: - case 1000000: - case 500000: - /* mul/ps/quot = 0x9C4/0x10/0x1 will make a 500000 bps */ - mul = baud / 500000 * 0x9C4; - break; - default: - /* Use uart_get_divisor to get quot for other baud rates */ - quot = 0; - } - - if (!quot) - quot = uart_get_divisor(port, baud); - - if ((up->port.uartclk / quot) < (2400 * 16)) - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_1B; - else if ((up->port.uartclk / quot) < (230400 * 16)) - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_16B; - else - fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_32B; - - fcr |= UART_FCR_HSU_64B_FIFO; - - /* - * Ok, we're now changing the port state. Do it with - * interrupts disabled. - */ - spin_lock_irqsave(&up->port.lock, flags); - - /* Update the per-port timeout */ - uart_update_timeout(port, termios->c_cflag, baud); - - up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; - if (termios->c_iflag & INPCK) - up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - up->port.read_status_mask |= UART_LSR_BI; - - /* Characters to ignore */ - up->port.ignore_status_mask = 0; - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; - if (termios->c_iflag & IGNBRK) { - up->port.ignore_status_mask |= UART_LSR_BI; - /* - * If we're ignoring parity and break indicators, - * ignore overruns too (for real raw support). - */ - if (termios->c_iflag & IGNPAR) - up->port.ignore_status_mask |= UART_LSR_OE; - } - - /* Ignore all characters if CREAD is not set */ - if ((termios->c_cflag & CREAD) == 0) - up->port.ignore_status_mask |= UART_LSR_DR; - - /* - * CTS flow control flag and modem status interrupts, disable - * MSI by default - */ - up->ier &= ~UART_IER_MSI; - if (UART_ENABLE_MS(&up->port, termios->c_cflag)) - up->ier |= UART_IER_MSI; - - serial_out(up, UART_IER, up->ier); - - if (termios->c_cflag & CRTSCTS) - up->mcr |= UART_MCR_AFE | UART_MCR_RTS; - else - up->mcr &= ~UART_MCR_AFE; - - serial_out(up, UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */ - serial_out(up, UART_DLL, quot & 0xff); /* LS of divisor */ - serial_out(up, UART_DLM, quot >> 8); /* MS of divisor */ - serial_out(up, UART_LCR, cval); /* reset DLAB */ - serial_out(up, UART_MUL, mul); /* set MUL */ - serial_out(up, UART_PS, ps); /* set PS */ - up->lcr = cval; /* Save LCR */ - serial_hsu_set_mctrl(&up->port, up->port.mctrl); - serial_out(up, UART_FCR, fcr); - spin_unlock_irqrestore(&up->port.lock, flags); -} - -static void -serial_hsu_pm(struct uart_port *port, unsigned int state, - unsigned int oldstate) -{ -} - -static void serial_hsu_release_port(struct uart_port *port) -{ -} - -static int serial_hsu_request_port(struct uart_port *port) -{ - return 0; -} - -static void serial_hsu_config_port(struct uart_port *port, int flags) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - up->port.type = PORT_MFD; -} - -static int -serial_hsu_verify_port(struct uart_port *port, struct serial_struct *ser) -{ - /* We don't want the core code to modify any port params */ - return -EINVAL; -} - -static const char * -serial_hsu_type(struct uart_port *port) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - return up->name; -} - -/* Mainly for uart console use */ -static struct uart_hsu_port *serial_hsu_ports[3]; -static struct uart_driver serial_hsu_reg; - -#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE - -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - -/* Wait for transmitter & holding register to empty */ -static inline void wait_for_xmitr(struct uart_hsu_port *up) -{ - unsigned int status, tmout = 1000; - - /* Wait up to 1ms for the character to be sent. */ - do { - status = serial_in(up, UART_LSR); - - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; - - if (--tmout == 0) - break; - udelay(1); - } while (!(status & BOTH_EMPTY)); - - /* Wait up to 1s for flow control if necessary */ - if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (--tmout && - ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0)) - udelay(1); - } -} - -static void serial_hsu_console_putchar(struct uart_port *port, int ch) -{ - struct uart_hsu_port *up = - container_of(port, struct uart_hsu_port, port); - - wait_for_xmitr(up); - serial_out(up, UART_TX, ch); -} - -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - * - * The console_lock must be held when we get here. - */ -static void -serial_hsu_console_write(struct console *co, const char *s, unsigned int count) -{ - struct uart_hsu_port *up = serial_hsu_ports[co->index]; - unsigned long flags; - unsigned int ier; - int locked = 1; - - touch_nmi_watchdog(); - - local_irq_save(flags); - if (up->port.sysrq) - locked = 0; - else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); - - /* First save the IER then disable the interrupts */ - ier = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); - - uart_console_write(&up->port, s, count, serial_hsu_console_putchar); - - /* - * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up); - serial_out(up, UART_IER, ier); - - if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); -} - -static struct console serial_hsu_console; - -static int __init -serial_hsu_console_setup(struct console *co, char *options) -{ - struct uart_hsu_port *up; - int baud = 115200; - int bits = 8; - int parity = 'n'; - int flow = 'n'; - - if (co->index == -1 || co->index >= serial_hsu_reg.nr) - co->index = 0; - up = serial_hsu_ports[co->index]; - if (!up) - return -ENODEV; - - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - - return uart_set_options(&up->port, co, baud, parity, bits, flow); -} - -static struct console serial_hsu_console = { - .name = "ttyMFD", - .write = serial_hsu_console_write, - .device = uart_console_device, - .setup = serial_hsu_console_setup, - .flags = CON_PRINTBUFFER, - .index = -1, - .data = &serial_hsu_reg, -}; - -#define SERIAL_HSU_CONSOLE (&serial_hsu_console) -#else -#define SERIAL_HSU_CONSOLE NULL -#endif - -static struct uart_ops serial_hsu_pops = { - .tx_empty = serial_hsu_tx_empty, - .set_mctrl = serial_hsu_set_mctrl, - .get_mctrl = serial_hsu_get_mctrl, - .stop_tx = serial_hsu_stop_tx, - .start_tx = serial_hsu_start_tx, - .stop_rx = serial_hsu_stop_rx, - .enable_ms = serial_hsu_enable_ms, - .break_ctl = serial_hsu_break_ctl, - .startup = serial_hsu_startup, - .shutdown = serial_hsu_shutdown, - .set_termios = serial_hsu_set_termios, - .pm = serial_hsu_pm, - .type = serial_hsu_type, - .release_port = serial_hsu_release_port, - .request_port = serial_hsu_request_port, - .config_port = serial_hsu_config_port, - .verify_port = serial_hsu_verify_port, -}; - -static struct uart_driver serial_hsu_reg = { - .owner = THIS_MODULE, - .driver_name = "MFD serial", - .dev_name = "ttyMFD", - .major = TTY_MAJOR, - .minor = 128, - .nr = 3, - .cons = SERIAL_HSU_CONSOLE, -}; - -#ifdef CONFIG_PM -static int serial_hsu_suspend(struct pci_dev *pdev, pm_message_t state) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - - /* Make sure this is not the internal dma controller */ - if (priv && (pdev->device != 0x081E)) { - up = priv; - uart_suspend_port(&serial_hsu_reg, &up->port); - } - - pci_save_state(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; -} - -static int serial_hsu_resume(struct pci_dev *pdev) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - int ret; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - ret = pci_enable_device(pdev); - if (ret) - dev_warn(&pdev->dev, - "HSU: can't re-enable device, try to continue\n"); - - if (priv && (pdev->device != 0x081E)) { - up = priv; - uart_resume_port(&serial_hsu_reg, &up->port); - } - return 0; -} - -static int serial_hsu_runtime_idle(struct device *dev) -{ - pm_schedule_suspend(dev, 500); - return -EBUSY; -} - -static int serial_hsu_runtime_suspend(struct device *dev) -{ - return 0; -} - -static int serial_hsu_runtime_resume(struct device *dev) -{ - return 0; -} -#else -#define serial_hsu_suspend NULL -#define serial_hsu_resume NULL -#define serial_hsu_runtime_idle NULL -#define serial_hsu_runtime_suspend NULL -#define serial_hsu_runtime_resume NULL -#endif - -static const struct dev_pm_ops serial_hsu_pm_ops = { - .runtime_suspend = serial_hsu_runtime_suspend, - .runtime_resume = serial_hsu_runtime_resume, - .runtime_idle = serial_hsu_runtime_idle, -}; - -/* temp global pointer before we settle down on using one or four PCI dev */ -static struct hsu_port *phsu; - -static int serial_hsu_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct uart_hsu_port *uport; - int index, ret; - - printk(KERN_INFO "HSU: found PCI Serial controller(ID: %04x:%04x)\n", - pdev->vendor, pdev->device); - - switch (pdev->device) { - case 0x081B: - index = 0; - break; - case 0x081C: - index = 1; - break; - case 0x081D: - index = 2; - break; - case 0x081E: - /* internal DMA controller */ - index = 3; - break; - default: - dev_err(&pdev->dev, "HSU: out of index!"); - return -ENODEV; - } - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - if (index == 3) { - /* DMA controller */ - ret = request_irq(pdev->irq, dma_irq, 0, "hsu_dma", phsu); - if (ret) { - dev_err(&pdev->dev, "can not get IRQ\n"); - goto err_disable; - } - pci_set_drvdata(pdev, phsu); - } else { - /* UART port 0~2 */ - uport = &phsu->port[index]; - uport->port.irq = pdev->irq; - uport->port.dev = &pdev->dev; - uport->dev = &pdev->dev; - - ret = request_irq(pdev->irq, port_irq, 0, uport->name, uport); - if (ret) { - dev_err(&pdev->dev, "can not get IRQ\n"); - goto err_disable; - } - uart_add_one_port(&serial_hsu_reg, &uport->port); - - pci_set_drvdata(pdev, uport); - } - - pm_runtime_put_noidle(&pdev->dev); - pm_runtime_allow(&pdev->dev); - - return 0; - -err_disable: - pci_disable_device(pdev); - return ret; -} - -static void hsu_global_init(void) -{ - struct hsu_port *hsu; - struct uart_hsu_port *uport; - struct hsu_dma_chan *dchan; - int i, ret; - - hsu = kzalloc(sizeof(struct hsu_port), GFP_KERNEL); - if (!hsu) - return; - - /* Get basic io resource and map it */ - hsu->paddr = 0xffa28000; - hsu->iolen = 0x1000; - - if (!(request_mem_region(hsu->paddr, hsu->iolen, "HSU global"))) - pr_warn("HSU: error in request mem region\n"); - - hsu->reg = ioremap_nocache((unsigned long)hsu->paddr, hsu->iolen); - if (!hsu->reg) { - pr_err("HSU: error in ioremap\n"); - ret = -ENOMEM; - goto err_free_region; - } - - /* Initialise the 3 UART ports */ - uport = hsu->port; - for (i = 0; i < 3; i++) { - uport->port.type = PORT_MFD; - uport->port.iotype = UPIO_MEM; - uport->port.mapbase = (resource_size_t)hsu->paddr - + HSU_PORT_REG_OFFSET - + i * HSU_PORT_REG_LENGTH; - uport->port.membase = hsu->reg + HSU_PORT_REG_OFFSET - + i * HSU_PORT_REG_LENGTH; - - sprintf(uport->name, "hsu_port%d", i); - uport->port.fifosize = 64; - uport->port.ops = &serial_hsu_pops; - uport->port.line = i; - uport->port.flags = UPF_IOREMAP; - /* set the scalable maxim support rate to 2746800 bps */ - uport->port.uartclk = 115200 * 24 * 16; - - uport->running = 0; - uport->txc = &hsu->chans[i * 2]; - uport->rxc = &hsu->chans[i * 2 + 1]; - - serial_hsu_ports[i] = uport; - uport->index = i; - - if (hsu_dma_enable & (1<use_dma = 1; - else - uport->use_dma = 0; - - uport++; - } - - /* Initialise 6 dma channels */ - dchan = hsu->chans; - for (i = 0; i < 6; i++) { - dchan->id = i; - dchan->dirt = (i & 0x1) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - dchan->uport = &hsu->port[i/2]; - dchan->reg = hsu->reg + HSU_DMA_CHANS_REG_OFFSET + - i * HSU_DMA_CHANS_REG_LENGTH; - - dchan++; - } - - phsu = hsu; - hsu_debugfs_init(hsu); - return; - -err_free_region: - release_mem_region(hsu->paddr, hsu->iolen); - kfree(hsu); - return; -} - -static void serial_hsu_remove(struct pci_dev *pdev) -{ - void *priv = pci_get_drvdata(pdev); - struct uart_hsu_port *up; - - if (!priv) - return; - - pm_runtime_forbid(&pdev->dev); - pm_runtime_get_noresume(&pdev->dev); - - /* For port 0/1/2, priv is the address of uart_hsu_port */ - if (pdev->device != 0x081E) { - up = priv; - uart_remove_one_port(&serial_hsu_reg, &up->port); - } - - free_irq(pdev->irq, priv); - pci_disable_device(pdev); -} - -/* First 3 are UART ports, and the 4th is the DMA */ -static const struct pci_device_id pci_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081B) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081C) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081D) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081E) }, - {}, -}; - -static struct pci_driver hsu_pci_driver = { - .name = "HSU serial", - .id_table = pci_ids, - .probe = serial_hsu_probe, - .remove = serial_hsu_remove, - .suspend = serial_hsu_suspend, - .resume = serial_hsu_resume, - .driver = { - .pm = &serial_hsu_pm_ops, - }, -}; - -static int __init hsu_pci_init(void) -{ - int ret; - - hsu_global_init(); - - ret = uart_register_driver(&serial_hsu_reg); - if (ret) - return ret; - - return pci_register_driver(&hsu_pci_driver); -} - -static void __exit hsu_pci_exit(void) -{ - pci_unregister_driver(&hsu_pci_driver); - uart_unregister_driver(&serial_hsu_reg); - - hsu_debugfs_remove(phsu); - - kfree(phsu); -} - -module_init(hsu_pci_init); -module_exit(hsu_pci_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, pci_ids); diff --git a/include/linux/serial_mfd.h b/include/linux/serial_mfd.h deleted file mode 100644 index 2b071e0b034d..000000000000 --- a/include/linux/serial_mfd.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _SERIAL_MFD_H_ -#define _SERIAL_MFD_H_ - -/* HW register offset definition */ -#define UART_FOR 0x08 -#define UART_PS 0x0C -#define UART_MUL 0x0D -#define UART_DIV 0x0E - -#define HSU_GBL_IEN 0x0 -#define HSU_GBL_IST 0x4 - -#define HSU_GBL_INT_BIT_PORT0 0x0 -#define HSU_GBL_INT_BIT_PORT1 0x1 -#define HSU_GBL_INT_BIT_PORT2 0x2 -#define HSU_GBL_INT_BIT_IRI 0x3 -#define HSU_GBL_INT_BIT_HDLC 0x4 -#define HSU_GBL_INT_BIT_DMA 0x5 - -#define HSU_GBL_ISR 0x8 -#define HSU_GBL_DMASR 0x400 -#define HSU_GBL_DMAISR 0x404 - -#define HSU_PORT_REG_OFFSET 0x80 -#define HSU_PORT0_REG_OFFSET 0x80 -#define HSU_PORT1_REG_OFFSET 0x100 -#define HSU_PORT2_REG_OFFSET 0x180 -#define HSU_PORT_REG_LENGTH 0x80 - -#define HSU_DMA_CHANS_REG_OFFSET 0x500 -#define HSU_DMA_CHANS_REG_LENGTH 0x40 - -#define HSU_CH_SR 0x0 /* channel status reg */ -#define HSU_CH_CR 0x4 /* control reg */ -#define HSU_CH_DCR 0x8 /* descriptor control reg */ -#define HSU_CH_BSR 0x10 /* max fifo buffer size reg */ -#define HSU_CH_MOTSR 0x14 /* minimum ocp transfer size */ -#define HSU_CH_D0SAR 0x20 /* desc 0 start addr */ -#define HSU_CH_D0TSR 0x24 /* desc 0 transfer size */ -#define HSU_CH_D1SAR 0x28 -#define HSU_CH_D1TSR 0x2C -#define HSU_CH_D2SAR 0x30 -#define HSU_CH_D2TSR 0x34 -#define HSU_CH_D3SAR 0x38 -#define HSU_CH_D3TSR 0x3C - -#endif diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 00adb01fa5f3..e9b4cb0cd7ed 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -241,25 +241,6 @@ #define UART_FCR_PXAR16 0x80 /* receive FIFO threshold = 16 */ #define UART_FCR_PXAR32 0xc0 /* receive FIFO threshold = 32 */ -/* - * Intel MID on-chip HSU (High Speed UART) defined bits - */ -#define UART_FCR_HSU_64_1B 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_HSU_64_16B 0x40 /* receive FIFO treshold = 16 */ -#define UART_FCR_HSU_64_32B 0x80 /* receive FIFO treshold = 32 */ -#define UART_FCR_HSU_64_56B 0xc0 /* receive FIFO treshold = 56 */ - -#define UART_FCR_HSU_16_1B 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_HSU_16_4B 0x40 /* receive FIFO treshold = 4 */ -#define UART_FCR_HSU_16_8B 0x80 /* receive FIFO treshold = 8 */ -#define UART_FCR_HSU_16_14B 0xc0 /* receive FIFO treshold = 14 */ - -#define UART_FCR_HSU_64B_FIFO 0x20 /* chose 64 bytes FIFO */ -#define UART_FCR_HSU_16B_FIFO 0x00 /* chose 16 bytes FIFO */ - -#define UART_FCR_HALF_EMPT_TXI 0x00 /* trigger TX_EMPT IRQ for half empty */ -#define UART_FCR_FULL_EMPT_TXI 0x08 /* trigger TX_EMPT IRQ for full empty */ - /* * These register definitions are for the 16C950 */ -- cgit v1.2.3 From c93682477bd861744589215515a63b81fdbd8948 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Thu, 5 Mar 2015 20:16:11 +0200 Subject: net/dcb: Add IEEE QCN attribute As specified in 802.1Qau spec. Add this optional attribute to the DCB netlink layer. To allow for application to use the new attribute, NIC drivers should implement and register the callbacks ieee_getqcn, ieee_setqcn and ieee_getqcnstats. The QCN attribute holds a set of parameters for management, and a set of statistics to provide informative data on Congestion-Control defined by this spec. Signed-off-by: Shani Michaeli Signed-off-by: Shachar Raindel Signed-off-by: Or Gerlitz Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/dcbnl.h | 3 +++ include/uapi/linux/dcbnl.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ net/dcb/dcbnl.c | 44 ++++++++++++++++++++++++++++--- 3 files changed, 110 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 597b88a94332..207d9ba1f92c 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -49,6 +49,9 @@ struct dcbnl_rtnl_ops { int (*ieee_setets) (struct net_device *, struct ieee_ets *); int (*ieee_getmaxrate) (struct net_device *, struct ieee_maxrate *); int (*ieee_setmaxrate) (struct net_device *, struct ieee_maxrate *); + int (*ieee_getqcn) (struct net_device *, struct ieee_qcn *); + int (*ieee_setqcn) (struct net_device *, struct ieee_qcn *); + int (*ieee_getqcnstats) (struct net_device *, struct ieee_qcn_stats *); int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *); int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *); int (*ieee_getapp) (struct net_device *, struct dcb_app *); diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index e711f20dc522..6497d7933d5b 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -78,6 +78,70 @@ struct ieee_maxrate { __u64 tc_maxrate[IEEE_8021QAZ_MAX_TCS]; }; +enum dcbnl_cndd_states { + DCB_CNDD_RESET = 0, + DCB_CNDD_EDGE, + DCB_CNDD_INTERIOR, + DCB_CNDD_INTERIOR_READY, +}; + +/* This structure contains the IEEE 802.1Qau QCN managed object. + * + *@rpg_enable: enable QCN RP + *@rppp_max_rps: maximum number of RPs allowed for this CNPV on this port + *@rpg_time_reset: time between rate increases if no CNMs received. + * given in u-seconds + *@rpg_byte_reset: transmitted data between rate increases if no CNMs received. + * given in Bytes + *@rpg_threshold: The number of times rpByteStage or rpTimeStage can count + * before RP rate control state machine advances states + *@rpg_max_rate: the maxinun rate, in Mbits per second, + * at which an RP can transmit + *@rpg_ai_rate: The rate, in Mbits per second, + * used to increase rpTargetRate in the RPR_ACTIVE_INCREASE + *@rpg_hai_rate: The rate, in Mbits per second, + * used to increase rpTargetRate in the RPR_HYPER_INCREASE state + *@rpg_gd: Upon CNM receive, flow rate is limited to (Fb/Gd)*CurrentRate. + * rpgGd is given as log2(Gd), where Gd may only be powers of 2 + *@rpg_min_dec_fac: The minimum factor by which the current transmit rate + * can be changed by reception of a CNM. + * value is given as percentage (1-100) + *@rpg_min_rate: The minimum value, in bits per second, for rate to limit + *@cndd_state_machine: The state of the congestion notification domain + * defense state machine, as defined by IEEE 802.3Qau + * section 32.1.1. In the interior ready state, + * the QCN capable hardware may add CN-TAG TLV to the + * outgoing traffic, to specifically identify outgoing + * flows. + */ + +struct ieee_qcn { + __u8 rpg_enable[IEEE_8021QAZ_MAX_TCS]; + __u32 rppp_max_rps[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_time_reset[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_byte_reset[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_threshold[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_max_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_ai_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_hai_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_gd[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_min_dec_fac[IEEE_8021QAZ_MAX_TCS]; + __u32 rpg_min_rate[IEEE_8021QAZ_MAX_TCS]; + __u32 cndd_state_machine[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains the IEEE 802.1Qau QCN statistics. + * + *@rppp_rp_centiseconds: the number of RP-centiseconds accumulated + * by RPs at this priority level on this Port + *@rppp_created_rps: number of active RPs(flows) that react to CNMs + */ + +struct ieee_qcn_stats { + __u64 rppp_rp_centiseconds[IEEE_8021QAZ_MAX_TCS]; + __u32 rppp_created_rps[IEEE_8021QAZ_MAX_TCS]; +}; + /* This structure contains the IEEE 802.1Qaz PFC managed object * * @pfc_cap: Indicates the number of traffic classes on the local device @@ -334,6 +398,8 @@ enum ieee_attrs { DCB_ATTR_IEEE_PEER_PFC, DCB_ATTR_IEEE_PEER_APP, DCB_ATTR_IEEE_MAXRATE, + DCB_ATTR_IEEE_QCN, + DCB_ATTR_IEEE_QCN_STATS, __DCB_ATTR_IEEE_MAX }; #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 93ea80196f0e..5b21f6f88e97 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { [DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)}, [DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED}, [DCB_ATTR_IEEE_MAXRATE] = {.len = sizeof(struct ieee_maxrate)}, + [DCB_ATTR_IEEE_QCN] = {.len = sizeof(struct ieee_qcn)}, + [DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)}, }; static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { @@ -1030,7 +1032,7 @@ nla_put_failure: return err; } -/* Handle IEEE 802.1Qaz GET commands. */ +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { struct nlattr *ieee, *app; @@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) } } + if (ops->ieee_getqcn) { + struct ieee_qcn qcn; + + memset(&qcn, 0, sizeof(qcn)); + err = ops->ieee_getqcn(netdev, &qcn); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN, + sizeof(qcn), &qcn); + if (err) + return -EMSGSIZE; + } + } + + if (ops->ieee_getqcnstats) { + struct ieee_qcn_stats qcn_stats; + + memset(&qcn_stats, 0, sizeof(qcn_stats)); + err = ops->ieee_getqcnstats(netdev, &qcn_stats); + if (!err) { + err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS, + sizeof(qcn_stats), &qcn_stats); + if (err) + return -EMSGSIZE; + } + } + if (ops->ieee_getpfc) { struct ieee_pfc pfc; memset(&pfc, 0, sizeof(pfc)); @@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, } EXPORT_SYMBOL(dcbnl_cee_notify); -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. +/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands. + * If any requested operation can not be completed + * the entire msg is aborted and error value is returned. * No attempt is made to reconcile the case where only part of the * cmd can be completed. */ @@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, goto err; } + if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) { + struct ieee_qcn *qcn = + nla_data(ieee[DCB_ATTR_IEEE_QCN]); + + err = ops->ieee_setqcn(netdev, qcn); + if (err) + goto err; + } + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); -- cgit v1.2.3 From c78ba6d64c78634a875d1e316676667cabfea256 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 11 Mar 2015 15:39:21 +0100 Subject: ipv6: expose RFC4191 route preference via rtnetlink This makes it possible to retain the route preference when RAs are handled in userspace. Signed-off-by: Lubomir Rintel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + net/ipv6/route.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index c3722b024e73..bea910f924dd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -305,6 +305,7 @@ enum rtattr_type_t { RTA_MFC_STATS, RTA_VIA, RTA_NEWDST, + RTA_PREF, __RTA_MAX }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 06fa819c43c9..58c0e6a4d15d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2398,6 +2398,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PREF] = { .type = NLA_U8 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2405,6 +2406,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + unsigned int pref; int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2480,6 +2482,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); } + if (tb[RTA_PREF]) { + pref = nla_get_u8(tb[RTA_PREF]); + if (pref != ICMPV6_ROUTER_PREF_LOW && + pref != ICMPV6_ROUTER_PREF_HIGH) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + cfg->fc_flags |= RTF_PREF(pref); + } + err = 0; errout: return err; @@ -2583,7 +2593,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + + nla_total_size(1); /* RTA_PREF */ } static int rt6_fill_node(struct net *net, @@ -2724,6 +2735,9 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3 From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:16 +0100 Subject: ebpf: add prandom helper for packet sampling This work is similar to commit 4cd3675ebf74 ("filter: added BPF random opcode") and adds a possibility for packet sampling in eBPF. Currently, this is only possible in classic BPF and useful to combine sampling with f.e. packet sockets, possible also with tc. Example function proto-type looks like: u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/core.c | 2 ++ kernel/bpf/helpers.c | 12 ++++++++++++ net/core/filter.c | 2 ++ 5 files changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80f2e0fc3d02..50bf95e29a96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_get_prandom_u32_proto; + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3fa1af8a58d7..1c2ca2b477c8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -165,6 +165,7 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 50603aec766a..c1dbbb5d289b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -661,6 +661,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; +const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a3c7701a8b5e..95eb59a045ea 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -11,6 +11,7 @@ */ #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -87,3 +88,14 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; + +static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return prandom_u32(); +} + +const struct bpf_func_proto bpf_get_prandom_u32_proto = { + .func = bpf_get_prandom_u32, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; diff --git a/net/core/filter.c b/net/core/filter.c index 7a4eb7030dba..4344db39af2e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1139,6 +1139,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; default: return NULL; } -- cgit v1.2.3 From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:17 +0100 Subject: ebpf: add helper for obtaining current processor id This patch adds the possibility to obtain raw_smp_processor_id() in eBPF. Currently, this is only possible in classic BPF where commit da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added facilities for this. Perhaps most importantly, this would also allow us to track per CPU statistics with eBPF maps, or to implement a poor-man's per CPU data structure through eBPF maps. Example function proto-type looks like: u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 12 ++++++++++++ net/core/filter.c | 2 ++ 5 files changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50bf95e29a96..30bfd331882a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; +extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1c2ca2b477c8..de1f63668daf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -166,6 +166,7 @@ enum bpf_func_id { BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ + BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1dbbb5d289b..4139a0f8b558 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -662,6 +662,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; +const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 95eb59a045ea..bd7f5988ed9c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -12,6 +12,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -99,3 +100,14 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + return raw_smp_processor_id(); +} + +const struct bpf_func_proto bpf_get_smp_processor_id_proto = { + .func = bpf_get_smp_processor_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; diff --git a/net/core/filter.c b/net/core/filter.c index 4344db39af2e..33310eee6134 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1141,6 +1141,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_map_delete_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; default: return NULL; } -- cgit v1.2.3 From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 13 Mar 2015 11:57:42 -0700 Subject: bpf: allow extended BPF programs access skb fields introduce user accessible mirror of in-kernel 'struct sk_buff': struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs can do: int bpf_prog(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since skb->pkt_type is a bitfield. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 +- include/uapi/linux/bpf.h | 10 ++++ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 152 ++++++++++++++++++++++++++++++++++++++++++----- net/core/filter.c | 100 +++++++++++++++++++++++++------ 5 files changed, 234 insertions(+), 35 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30bfd331882a..280a315de8d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,6 +103,9 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + + u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn); }; struct bpf_prog_type_list { @@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index de1f63668daf..929545a27546 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -170,4 +170,14 @@ enum bpf_func_id { __BPF_FUNC_MAX_ID, }; +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 669719ccc9ee..ea75c654af1b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_prog; /* run eBPF verifier */ - err = bpf_check(prog, attr); + err = bpf_check(&prog, attr); if (err < 0) goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e6b522496250..c22ebd36fa4b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env) return err; } else if (class == BPF_LDX) { - if (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0) { - verbose("BPF_LDX uses reserved fields\n"); - return -EINVAL; - } + enum bpf_reg_type src_reg_type; + + /* check for reserved fields is already done */ + /* check src operand */ err = check_reg_arg(regs, insn->src_reg, SRC_OP); if (err) @@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env) if (err) return err; + src_reg_type = regs[insn->src_reg].type; + + if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) { + /* saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) + * use reserved 'imm' field to mark this insn + */ + insn->imm = src_reg_type; + + } else if (src_reg_type != insn->imm && + (src_reg_type == PTR_TO_CTX || + insn->imm == PTR_TO_CTX)) { + /* ABuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: + * src_reg == ctx in one branch and + * src_reg == stack|map in some other branch. + * Reject it. + */ + verbose("same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) int i, j; for (i = 0; i < insn_cnt; i++, insn++) { + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) != BPF_MEM || + insn->imm != 0)) { + verbose("BPF_LDX uses reserved fields\n"); + return -EINVAL; + } + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_map *map; struct fd f; @@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) insn->src_reg = 0; } +static void adjust_branches(struct bpf_prog *prog, int pos, int delta) +{ + struct bpf_insn *insn = prog->insnsi; + int insn_cnt = prog->len; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (BPF_CLASS(insn->code) != BPF_JMP || + BPF_OP(insn->code) == BPF_CALL || + BPF_OP(insn->code) == BPF_EXIT) + continue; + + /* adjust offset of jmps if necessary */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; + else if (i > pos && i + insn->off + 1 < pos) + insn->off -= delta; + } +} + +/* convert load instructions that access fields of 'struct __sk_buff' + * into sequence of instructions that access fields of 'struct sk_buff' + */ +static int convert_ctx_accesses(struct verifier_env *env) +{ + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + u32 cnt; + int i; + + if (!env->prog->aux->ops->convert_ctx_access) + return 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LDX | BPF_MEM | BPF_W)) + continue; + + if (insn->imm != PTR_TO_CTX) { + /* clear internal mark */ + insn->imm = 0; + continue; + } + + cnt = env->prog->aux->ops-> + convert_ctx_access(insn->dst_reg, insn->src_reg, + insn->off, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } + + if (cnt == 1) { + memcpy(insn, insn_buf, sizeof(*insn)); + continue; + } + + /* several new insns need to be inserted. Make room for them */ + insn_cnt += cnt - 1; + new_prog = bpf_prog_realloc(env->prog, + bpf_prog_size(insn_cnt), + GFP_USER); + if (!new_prog) + return -ENOMEM; + + new_prog->len = insn_cnt; + + memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, + sizeof(*insn) * (insn_cnt - i - cnt)); + + /* copy substitute insns in place of load instruction */ + memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); + + /* adjust branches in the whole program */ + adjust_branches(new_prog, i, cnt - 1); + + /* keep walking new program and skip insns we just inserted */ + env->prog = new_prog; + insn = new_prog->insnsi + i + cnt - 1; + i += cnt - 1; + } + + return 0; +} + static void free_states(struct verifier_env *env) { struct verifier_state_list *sl, *sln; @@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env) kfree(env->explored_states); } -int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; struct verifier_env *env; int ret = -EINVAL; - if (prog->len <= 0 || prog->len > BPF_MAXINSNS) + if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; /* 'struct verifier_env' can be global, but since it's not small, @@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) if (!env) return -ENOMEM; - env->prog = prog; + env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ mutex_lock(&bpf_verifier_lock); @@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr) if (ret < 0) goto skip_full_check; - env->explored_states = kcalloc(prog->len, + env->explored_states = kcalloc(env->prog->len, sizeof(struct verifier_state_list *), GFP_USER); ret = -ENOMEM; @@ -1968,6 +2083,10 @@ skip_full_check: while (pop_stack(env, NULL) >= 0); free_states(env); + if (ret == 0) + /* program is valid, convert *(u32*)(ctx + off) accesses */ + ret = convert_ctx_accesses(env); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ @@ -1983,18 +2102,18 @@ skip_full_check: if (ret == 0 && env->used_map_cnt) { /* if program passed verifier, update used_maps in bpf_prog_info */ - prog->aux->used_maps = kmalloc_array(env->used_map_cnt, - sizeof(env->used_maps[0]), - GFP_KERNEL); + env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, + sizeof(env->used_maps[0]), + GFP_KERNEL); - if (!prog->aux->used_maps) { + if (!env->prog->aux->used_maps) { ret = -ENOMEM; goto free_log_buf; } - memcpy(prog->aux->used_maps, env->used_maps, + memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt); - prog->aux->used_map_cnt = env->used_map_cnt; + env->prog->aux->used_map_cnt = env->used_map_cnt; /* program is valid. Convert pseudo bpf_ld_imm64 into generic * bpf_ld_imm64 instructions @@ -2006,11 +2125,12 @@ free_log_buf: if (log_level) vfree(log_buf); free_env: - if (!prog->aux->used_maps) + if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); + *prog = env->prog; kfree(env); mutex_unlock(&bpf_verifier_lock); return ret; diff --git a/net/core/filter.c b/net/core/filter.c index 33310eee6134..4e9dd0ad0d5b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return prandom_u32(); } +static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (skb_field) { + case SKF_AD_MARK: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; + + case SKF_AD_PKTTYPE: + *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +#endif + break; + + case SKF_AD_QUEUE: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, queue_mapping)); + break; + } + + return insn - insn_buf; +} + static bool convert_bpf_extensions(struct sock_filter *fp, struct bpf_insn **insnp) { struct bpf_insn *insn = *insnp; + u32 cnt; switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: @@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - PKT_TYPE_OFFSET()); - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); -#ifdef __BIG_ENDIAN_BITFIELD - insn++; - *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5); -#endif + cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_IFINDEX: @@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - - *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, mark)); + cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_RXHASH: @@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - - *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, queue_mapping)); + cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: @@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { - /* skb fields cannot be accessed yet */ - return false; + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* check bounds */ + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + /* all __sk_buff fields are __u32 */ + if (size != 4) + return false; + + return true; +} + +static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, len): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, len)); + break; + + case offsetof(struct __sk_buff, mark): + return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, pkt_type): + return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, queue_mapping): + return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + } + + return insn - insn_buf; } static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { -- cgit v1.2.3 From 9df85aaa43297cb12dc85155695dd1bfdf94f91d Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:43 -0600 Subject: vfio: platform: probe to devices on the platform bus Driver to bind to Linux platform devices, and callbacks to discover their resources to be used by the main VFIO PLATFORM code. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform.c | 103 ++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 1 + 2 files changed, 104 insertions(+) create mode 100644 drivers/vfio/platform/vfio_platform.c (limited to 'include/uapi') diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c new file mode 100644 index 000000000000..cef645c83996 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis " +#define DRIVER_DESC "VFIO for platform devices - User Level meta-driver" + +/* probing devices from the linux platform bus */ + +static struct resource *get_platform_resource(struct vfio_platform_device *vdev, + int num) +{ + struct platform_device *dev = (struct platform_device *) vdev->opaque; + int i; + + for (i = 0; i < dev->num_resources; i++) { + struct resource *r = &dev->resource[i]; + + if (resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) { + if (!num) + return r; + + num--; + } + } + return NULL; +} + +static int get_platform_irq(struct vfio_platform_device *vdev, int i) +{ + struct platform_device *pdev = (struct platform_device *) vdev->opaque; + + return platform_get_irq(pdev, i); +} + +static int vfio_platform_probe(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->opaque = (void *) pdev; + vdev->name = pdev->name; + vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; + vdev->get_resource = get_platform_resource; + vdev->get_irq = get_platform_irq; + + ret = vfio_platform_probe_common(vdev, &pdev->dev); + if (ret) + kfree(vdev); + + return ret; +} + +static int vfio_platform_remove(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&pdev->dev); + if (vdev) { + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct platform_driver vfio_platform_driver = { + .probe = vfio_platform_probe, + .remove = vfio_platform_remove, + .driver = { + .name = "vfio-platform", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(vfio_platform_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c30f4f5..ea9514b6bb5c 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -160,6 +160,7 @@ struct vfio_device_info { __u32 flags; #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ +#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- cgit v1.2.3 From 36fe431f2811fa3b5fed15d272c585d5a47977aa Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:44 -0600 Subject: vfio: amba: VFIO support for AMBA devices Add support for discovering AMBA devices with VFIO and handle them similarly to Linux platform devices. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_amba.c | 115 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 1 + 2 files changed, 116 insertions(+) create mode 100644 drivers/vfio/platform/vfio_amba.c (limited to 'include/uapi') diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c new file mode 100644 index 000000000000..ff0331f72526 --- /dev/null +++ b/drivers/vfio/platform/vfio_amba.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis " +#define DRIVER_DESC "VFIO for AMBA devices - User Level meta-driver" + +/* probing devices from the AMBA bus */ + +static struct resource *get_amba_resource(struct vfio_platform_device *vdev, + int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + + if (i == 0) + return &adev->res; + + return NULL; +} + +static int get_amba_irq(struct vfio_platform_device *vdev, int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + int ret = 0; + + if (i < AMBA_NR_IRQS) + ret = adev->irq[i]; + + /* zero is an unset IRQ for AMBA devices */ + return ret ? ret : -ENXIO; +} + +static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->name = kasprintf(GFP_KERNEL, "vfio-amba-%08x", adev->periphid); + if (!vdev->name) { + kfree(vdev); + return -ENOMEM; + } + + vdev->opaque = (void *) adev; + vdev->flags = VFIO_DEVICE_FLAGS_AMBA; + vdev->get_resource = get_amba_resource; + vdev->get_irq = get_amba_irq; + + ret = vfio_platform_probe_common(vdev, &adev->dev); + if (ret) { + kfree(vdev->name); + kfree(vdev); + } + + return ret; +} + +static int vfio_amba_remove(struct amba_device *adev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&adev->dev); + if (vdev) { + kfree(vdev->name); + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct amba_id pl330_ids[] = { + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +static struct amba_driver vfio_amba_driver = { + .probe = vfio_amba_probe, + .remove = vfio_amba_remove, + .id_table = pl330_ids, + .drv = { + .name = "vfio-amba", + .owner = THIS_MODULE, + }, +}; + +module_amba_driver(vfio_amba_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ea9514b6bb5c..b57b750c222f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -161,6 +161,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ +#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- cgit v1.2.3 From c24973957975403521ca76a776c2dfd12fbe9add Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 16 Mar 2015 18:06:02 -0700 Subject: bpf: allow BPF programs access 'protocol' and 'vlan_tci' fields as a follow on to patch 70006af95515 ("bpf: allow eBPF access skb fields") this patch allows 'protocol' and 'vlan_tci' fields to be accessible from extended BPF programs. The usage of 'protocol', 'vlan_present' and 'vlan_tci' fields is the same as corresponding SKF_AD_PROTOCOL, SKF_AD_VLAN_TAG_PRESENT and SKF_AD_VLAN_TAG accesses in classic BPF. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 ++ net/core/filter.c | 72 +++++++++++++++++++++++++++++++-------------- samples/bpf/test_verifier.c | 9 ++++++ 3 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929545a27546..1623047af463 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -178,6 +178,9 @@ struct __sk_buff { __u32 pkt_type; __u32 mark; __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index 4e9dd0ad0d5b..b95ae7fe7e4f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -177,6 +177,35 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, offsetof(struct sk_buff, queue_mapping)); break; + + case SKF_AD_PROTOCOL: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + + /* dst_reg = *(u16 *) (src_reg + offsetof(protocol)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, protocol)); + /* dst_reg = ntohs(dst_reg) [emitting a nop or swap16] */ + *insn++ = BPF_ENDIAN(BPF_FROM_BE, dst_reg, 16); + break; + + case SKF_AD_VLAN_TAG: + case SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_tci)); + if (skb_field == SKF_AD_VLAN_TAG) { + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, + ~VLAN_TAG_PRESENT); + } else { + /* dst_reg >>= 12 */ + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); + /* dst_reg &= 1 */ + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); + } + break; } return insn - insn_buf; @@ -190,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, switch (fp->k) { case SKF_AD_OFF + SKF_AD_PROTOCOL: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - - /* A = *(u16 *) (CTX + offsetof(protocol)) */ - *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, protocol)); - /* A = ntohs(A) [emitting a nop or swap16] */ - *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); + cnt = convert_skb_access(SKF_AD_PROTOCOL, BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_PKTTYPE: @@ -242,22 +266,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_VLAN_TAG: - case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + cnt = convert_skb_access(SKF_AD_VLAN_TAG, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; + break; - /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ - *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, - offsetof(struct sk_buff, vlan_tci)); - if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, - ~VLAN_TAG_PRESENT); - } else { - /* A >>= 12 */ - *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12); - /* A &= 1 */ - *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1); - } + case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: + cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + BPF_REG_A, BPF_REG_CTX, insn); + insn += cnt - 1; break; case SKF_AD_OFF + SKF_AD_PAY_OFFSET: @@ -1215,6 +1232,17 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, case offsetof(struct __sk_buff, queue_mapping): return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, protocol): + return convert_skb_access(SKF_AD_PROTOCOL, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_present): + return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, + dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, vlan_tci): + return convert_skb_access(SKF_AD_VLAN_TAG, + dst_reg, src_reg, insn); } return insn - insn_buf; diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index df6dbb6576f6..75d561f9fd6a 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -658,6 +658,15 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, queue_mapping)), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, -- cgit v1.2.3 From 27cd44618b92fc8c6889e4628407791e45422bac Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Wed, 4 Mar 2015 14:41:16 +0000 Subject: drm/i915: Add I915_PARAM_REVISION Adds a parameter which can be used with DRM_I915_GETPARAM to query the GPU revision. The intention is to use this in Mesa to implement the WaDisableSIMD16On3SrcInstr workaround on Skylake but only for revision 2. Signed-off-by: Neil Roberts Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 053e1788f578..3b64c526b6b9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -68,6 +68,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_CHIPSET_ID: value = dev->pdev->device; break; + case I915_PARAM_REVISION: + value = dev->pdev->revision; + break; case I915_PARAM_HAS_GEM: value = 1; break; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6eed16b92a24..b768f3b21eaa 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -347,6 +347,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 #define I915_PARAM_MMAP_VERSION 30 #define I915_PARAM_HAS_BSD2 31 +#define I915_PARAM_REVISION 32 typedef struct drm_i915_getparam { int param; -- cgit v1.2.3 From 8c4f83fb1e8bf317e894f62d17a63c32b7a6b75e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 11 Mar 2015 08:26:23 +0100 Subject: drm/fourcc: 64 #defines need ULL postfix I have no idea about the exact rules, but this angered Dave's 32bit rhel gcc. Reported-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/uapi/drm/drm_fourcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index e6efac23c7ea..07735822a28f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -151,7 +151,7 @@ /* add more to the end as needed */ #define fourcc_mod_code(vendor, val) \ - ((((u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffL)) + ((((u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL)) /* * Format Modifier tokens: -- cgit v1.2.3 From a1559ffefb2a80eabbee65d7cc04e828d4fd557d Mon Sep 17 00:00:00 2001 From: Jeff McGee Date: Mon, 9 Mar 2015 16:06:54 -0700 Subject: drm/i915: Export total subslice and EU counts Setup new I915_GETPARAM ioctl entries for subslice total and EU total. Userspace drivers need these values when constructing GPGPU commands. This kernel query method is intended to replace the PCI ID-based tables that userspace drivers currently maintain. The kernel driver can employ fuse register reads as needed to ensure the most accurate determination of GT config attributes. This first became important with Cherryview in which the config could differ between devices with the same PCI ID. The kernel detection of these values is device-specific and not included in this patch. Because zero is not a valid value for any of these parameters, a value of zero is interpreted as unknown for the device. Userspace drivers should continue to maintain ID-based tables for older devices not supported by the new query method. v2: Increment our I915_GETPARAM indices to fit after REVISION which was merged ahead of us. For: VIZ-4636 Signed-off-by: Jeff McGee Tested-by: Zhigang Gong Acked-by: Zhigang Gong Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 10 ++++++++++ include/uapi/drm/i915_drm.h | 2 ++ 2 files changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8e914303b831..d49ed68f041e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -153,6 +153,16 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_MMAP_VERSION: value = 1; break; + case I915_PARAM_SUBSLICE_TOTAL: + value = INTEL_INFO(dev)->subslice_total; + if (!value) + return -ENODEV; + break; + case I915_PARAM_EU_TOTAL: + value = INTEL_INFO(dev)->eu_total; + if (!value) + return -ENODEV; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b768f3b21eaa..8d1be9073380 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -348,6 +348,8 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_MMAP_VERSION 30 #define I915_PARAM_HAS_BSD2 31 #define I915_PARAM_REVISION 32 +#define I915_PARAM_SUBSLICE_TOTAL 33 +#define I915_PARAM_EU_TOTAL 34 typedef struct drm_i915_getparam { int param; -- cgit v1.2.3 From 847aac644e92e5624f2c153bab409bf713d5ff9a Mon Sep 17 00:00:00 2001 From: Li Xi Date: Thu, 19 Mar 2015 04:04:53 +0900 Subject: vfs: Add general support to enforce project quota limits This patch adds support for a new quota type PRJQUOTA for project quota enforcement. Also a new method get_projid() is added into dquot_operations structure. Signed-off-by: Li Xi Signed-off-by: Dmitry Monakhov Reviewed-by: Jan Kara Signed-off-by: Jan Kara --- fs/quota/dquot.c | 20 +++++++++++++++++--- fs/quota/quotaio_v2.h | 6 ++++-- include/linux/quota.h | 2 ++ include/uapi/linux/quota.h | 6 ++++-- 4 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 327a58448592..ecc25cf0ee6e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1163,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn) return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); - case PRJQUOTA: /* Never taken... Just make gcc happy */ - return 0; + case PRJQUOTA: + return 1; } return 0; } @@ -1405,6 +1405,9 @@ static void __dquot_initialize(struct inode *inode, int type) /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { struct kqid qid; + kprojid_t projid; + int rc; + got[cnt] = NULL; if (type != -1 && cnt != type) continue; @@ -1415,6 +1418,10 @@ static void __dquot_initialize(struct inode *inode, int type) */ if (dquots[cnt]) continue; + + if (!sb_has_quota_active(sb, cnt)) + continue; + init_needed = 1; switch (cnt) { @@ -1424,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type) case GRPQUOTA: qid = make_kqid_gid(inode->i_gid); break; + case PRJQUOTA: + rc = inode->i_sb->dq_op->get_projid(inode, &projid); + if (rc) + continue; + qid = make_kqid_projid(projid); + break; } got[cnt] = dqget(sb, qid); } @@ -2176,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, error = -EROFS; goto out_fmt; } - if (!sb->s_op->quota_write || !sb->s_op->quota_read) { + if (!sb->s_op->quota_write || !sb->s_op->quota_read || + (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; goto out_fmt; } diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h index f1966b42c2fd..4e95430093d9 100644 --- a/fs/quota/quotaio_v2.h +++ b/fs/quota/quotaio_v2.h @@ -13,12 +13,14 @@ */ #define V2_INITQMAGICS {\ 0xd9c01f11, /* USRQUOTA */\ - 0xd9c01927 /* GRPQUOTA */\ + 0xd9c01927, /* GRPQUOTA */\ + 0xd9c03f14, /* PRJQUOTA */\ } #define V2_INITQVERSIONS {\ 1, /* USRQUOTA */\ - 1 /* GRPQUOTA */\ + 1, /* GRPQUOTA */\ + 1, /* PRJQUOTA */\ } /* First generic header */ diff --git a/include/linux/quota.h b/include/linux/quota.h index cf910d1f8efa..b2505acfd3c0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -50,6 +50,7 @@ #undef USRQUOTA #undef GRPQUOTA +#undef PRJQUOTA enum quota_type { USRQUOTA = 0, /* element used for user quotas */ GRPQUOTA = 1, /* element used for group quotas */ @@ -319,6 +320,7 @@ struct dquot_operations { /* get reserved quota for delayed alloc, value returned is managed by * quota code only */ qsize_t *(*get_reserved_space) (struct inode *); + int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */ }; struct path; diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h index 1f49b8341c99..9c95b2c1c88a 100644 --- a/include/uapi/linux/quota.h +++ b/include/uapi/linux/quota.h @@ -36,11 +36,12 @@ #include #include -#define __DQUOT_VERSION__ "dquot_6.5.2" +#define __DQUOT_VERSION__ "dquot_6.6.0" -#define MAXQUOTAS 2 +#define MAXQUOTAS 3 #define USRQUOTA 0 /* element used for user quotas */ #define GRPQUOTA 1 /* element used for group quotas */ +#define PRJQUOTA 2 /* element used for project quotas */ /* * Definitions for the default names of the quotas files. @@ -48,6 +49,7 @@ #define INITQFNAMES { \ "user", /* USRQUOTA */ \ "group", /* GRPQUOTA */ \ + "project", /* PRJQUOTA */ \ "undefined", \ }; -- cgit v1.2.3 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net | 8 ++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/dev.c | 18 ++++++++++++++++++ net/core/net-sysfs.c | 23 +++++++++++++++++++++++ net/core/rtnetlink.c | 21 +++++++++++++++++++++ 6 files changed, 75 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index beb8ec4dabbc..5ecfd72ba684 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -188,6 +188,14 @@ Description: Indicates the interface unique physical port identifier within the NIC, as a string. +What: /sys/class/net//phys_port_name +Date: March 2015 +KernelVersion: 4.0 +Contact: netdev@vger.kernel.org +Description: + Indicates the interface physical port name within the NIC, + as a string. + What: /sys/class/net//speed Date: October 2009 KernelVersion: 2.6.33 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76c5de4978a8..ec8f9b5f6500 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,6 +1164,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_phys_port_name)(struct net_device *dev, + char *name, size_t len); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 756436e1ce89..7158fd00a109 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -147,6 +147,7 @@ enum { IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 39fe369b46ad..a1f24151db5b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_id); +/** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7e58bd7ec232..cc5cf689809c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25b4b5d23485..6abe634c666c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; -- cgit v1.2.3 From af615762e972be0c66cf1d156ca4fac13b93c0b0 Mon Sep 17 00:00:00 2001 From: Jörg Thalheim Date: Wed, 18 Mar 2015 10:06:58 +0100 Subject: bridge: add ageing_time, stp_state, priority over netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jörg Thalheim Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 3 +++ net/bridge/br_netlink.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7158fd00a109..f5f5edd5ae5f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -225,6 +225,9 @@ enum { IFLA_BR_FORWARD_DELAY, IFLA_BR_HELLO_TIME, IFLA_BR_MAX_AGE, + IFLA_BR_AGEING_TIME, + IFLA_BR_STP_STATE, + IFLA_BR_PRIORITY, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8bc6b67457dc..e1115a224a95 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -733,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, + [IFLA_BR_AGEING_TIME] = { .type = NLA_U32 }, + [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, + [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -762,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_AGEING_TIME]) { + u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]); + + br->ageing_time = clock_t_to_jiffies(ageing_time); + } + + if (data[IFLA_BR_STP_STATE]) { + u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); + + br_stp_set_enabled(br, stp_enabled); + } + + if (data[IFLA_BR_PRIORITY]) { + u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]); + + br_stp_set_bridge_priority(br, priority); + } + return 0; } @@ -770,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev) return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */ + nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */ 0; } @@ -779,10 +803,16 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) u32 forward_delay = jiffies_to_clock_t(br->forward_delay); u32 hello_time = jiffies_to_clock_t(br->hello_time); u32 age_time = jiffies_to_clock_t(br->max_age); + u32 ageing_time = jiffies_to_clock_t(br->ageing_time); + u32 stp_enabled = br->stp_enabled; + u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]; if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || - nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time)) + nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) || + nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) || + nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) || + nla_put_u16(skb, IFLA_BR_PRIORITY, priority)) return -EMSGSIZE; return 0; -- cgit v1.2.3 From d6d2a1882a79c1a5425d6f82b2fc7b934916f893 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Sep 2014 10:04:40 -0400 Subject: drm/radeon: add INFO query for GPU temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful for profiling. Tested-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kms.c | 7 +++++++ include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 686411e4e4f6..7904887c6cc7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -547,6 +547,13 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = 1; break; + case RADEON_INFO_CURRENT_GPU_TEMP: + /* get temperature in millidegrees C */ + if (rdev->asic->pm.get_temperature) + *value = radeon_get_temperature(rdev); + else + *value = 0; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 50d0fb41a3bf..66b1131bb296 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1034,6 +1034,7 @@ struct drm_radeon_cs { #define RADEON_INFO_VRAM_USAGE 0x1e #define RADEON_INFO_GTT_USAGE 0x1f #define RADEON_INFO_ACTIVE_CU_COUNT 0x20 +#define RADEON_INFO_CURRENT_GPU_TEMP 0x21 struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From 5c363a860398e2a09e5cff7f4654cf82ed8485e1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Sep 2014 11:33:30 -0400 Subject: drm/radeon: add INFO query for current sclk/mclk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the UMDs to query the current sclk/mclk for profiling, etc. Tested-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kms.c | 14 ++++++++++++++ include/uapi/drm/radeon_drm.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 7904887c6cc7..cf7e54e9b0d1 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -554,6 +554,20 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = 0; break; + case RADEON_INFO_CURRENT_GPU_SCLK: + /* get sclk in Mhz */ + if (rdev->pm.dpm_enabled) + *value = radeon_dpm_get_current_sclk(rdev) / 100; + else + *value = rdev->pm.current_sclk / 100; + break; + case RADEON_INFO_CURRENT_GPU_MCLK: + /* get mclk in Mhz */ + if (rdev->pm.dpm_enabled) + *value = radeon_dpm_get_current_mclk(rdev) / 100; + else + *value = rdev->pm.current_mclk / 100; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 66b1131bb296..de7ee21efea2 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1035,6 +1035,8 @@ struct drm_radeon_cs { #define RADEON_INFO_GTT_USAGE 0x1f #define RADEON_INFO_ACTIVE_CU_COUNT 0x20 #define RADEON_INFO_CURRENT_GPU_TEMP 0x21 +#define RADEON_INFO_CURRENT_GPU_SCLK 0x22 +#define RADEON_INFO_CURRENT_GPU_MCLK 0x23 struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From 4535cb9cefbc736b47bc1e7f8628065aba5ca0d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Oct 2014 11:26:50 -0400 Subject: drm/radeon: add support for read reg query from radeon info ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to query certain registers from userspace for profiling and harvest configuration. E.g., it can be used by the GALLIUM_HUD for profiling the status of various gfx blocks. Tested-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kms.c | 8 ++++++++ include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index cf7e54e9b0d1..7b2a7335cc5d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -568,6 +568,14 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = rdev->pm.current_mclk / 100; break; + case RADEON_INFO_READ_REG: + if (copy_from_user(value, value_ptr, sizeof(uint32_t))) { + DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__); + return -EFAULT; + } + if (radeon_get_allowed_info_register(rdev, *value, value)) + return -EINVAL; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index de7ee21efea2..871e73f99a4d 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1037,6 +1037,7 @@ struct drm_radeon_cs { #define RADEON_INFO_CURRENT_GPU_TEMP 0x21 #define RADEON_INFO_CURRENT_GPU_SCLK 0x22 #define RADEON_INFO_CURRENT_GPU_MCLK 0x23 +#define RADEON_INFO_READ_REG 0x24 struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From 94caee8c312d96522bcdae88791aaa9ebcd5f22c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Mar 2015 15:11:11 +0100 Subject: ebpf: add sched_act_type and map it to sk_filter's verifier ops In order to prepare eBPF support for tc action, we need to add sched_act_type, so that the eBPF verifier is aware of what helper function act_bpf may use, that it can load skb data and read out currently available skb fields. This is bascially analogous to 96be4325f443 ("ebpf: add sched_cls_type and map it to sk_filter's verifier ops"). BPF_PROG_TYPE_SCHED_CLS and BPF_PROG_TYPE_SCHED_ACT need to be separate since both will have a different set of functionality in future (classifier vs action), thus we won't run into ABI troubles when the point in time comes to diverge functionality from the classifier. The future plan for act_bpf would be that it will be able to write into skb->data and alter selected fields mirrored in struct __sk_buff. For an initial support, it's sufficient to map it to sk_filter_ops. Signed-off-by: Daniel Borkmann Cc: Jiri Pirko Reviewed-by: Jiri Pirko Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 1 + net/core/filter.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1623047af463..3dd314a45d0d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -119,6 +119,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, }; #define BPF_PSEUDO_MAP_FD 1 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c22ebd36fa4b..0e714f799ec0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1180,6 +1180,7 @@ static bool may_access_skb(enum bpf_prog_type type) switch (type) { case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: return true; default: return false; diff --git a/net/core/filter.c b/net/core/filter.c index bdaac5895def..084eacc4d1d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1263,10 +1263,16 @@ static struct bpf_prog_type_list sched_cls_type __read_mostly = { .type = BPF_PROG_TYPE_SCHED_CLS, }; +static struct bpf_prog_type_list sched_act_type __read_mostly = { + .ops = &sk_filter_ops, + .type = BPF_PROG_TYPE_SCHED_ACT, +}; + static int __init register_sk_filter_ops(void) { bpf_register_prog_type(&sk_filter_type); bpf_register_prog_type(&sched_cls_type); + bpf_register_prog_type(&sched_act_type); return 0; } -- cgit v1.2.3 From a8cb5f556b567974d75ea29c15181c445c541b1f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Mar 2015 15:11:12 +0100 Subject: act_bpf: add initial eBPF support for actions This work extends the "classic" BPF programmable tc action by extending its scope also to native eBPF code! Together with commit e2e9b6541dd4 ("cls_bpf: add initial eBPF support for programmable classifiers") this adds the facility to implement fully flexible classifier and actions for tc that can be implemented in a C subset in user space, "safely" loaded into the kernel, and being run in native speed when JITed. Also, since eBPF maps can be shared between eBPF programs, it offers the possibility that cls_bpf and act_bpf can share data 1) between themselves and 2) between user space applications. That means that, f.e. customized runtime statistics can be collected in user space, but also more importantly classifier and action behaviour could be altered based on map input from the user space application. For the remaining details on the workflow and integration, see the cls_bpf commit e2e9b6541dd4. Preliminary iproute2 part can be found under [1]. [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf-act Signed-off-by: Daniel Borkmann Cc: Jamal Hadi Salim Cc: Jiri Pirko Acked-by: Jiri Pirko Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tc_act/tc_bpf.h | 6 +- include/uapi/linux/tc_act/tc_bpf.h | 2 + net/sched/act_bpf.c | 295 ++++++++++++++++++++++++++----------- 3 files changed, 220 insertions(+), 83 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index 86a070ffc930..a152e9858b2c 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -16,8 +16,12 @@ struct tcf_bpf { struct tcf_common common; struct bpf_prog *filter; + union { + u32 bpf_fd; + u16 bpf_num_ops; + }; struct sock_filter *bpf_ops; - u16 bpf_num_ops; + const char *bpf_name; }; #define to_bpf(a) \ container_of(a->priv, struct tcf_bpf, common) diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 5288bd77e63b..07f17cc70bb3 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -24,6 +24,8 @@ enum { TCA_ACT_BPF_PARMS, TCA_ACT_BPF_OPS_LEN, TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 5f6288fa3f12..4d2cede17468 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -13,26 +13,40 @@ #include #include #include +#include + #include #include #include #include -#define BPF_TAB_MASK 15 +#define BPF_TAB_MASK 15 +#define ACT_BPF_NAME_LEN 256 + +struct tcf_bpf_cfg { + struct bpf_prog *filter; + struct sock_filter *bpf_ops; + char *bpf_name; + u32 bpf_fd; + u16 bpf_num_ops; +}; -static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, +static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { - struct tcf_bpf *b = a->priv; + struct tcf_bpf *prog = act->priv; int action, filter_res; - spin_lock(&b->tcf_lock); + spin_lock(&prog->tcf_lock); - b->tcf_tm.lastuse = jiffies; - bstats_update(&b->tcf_bstats, skb); + prog->tcf_tm.lastuse = jiffies; + bstats_update(&prog->tcf_bstats, skb); - filter_res = BPF_PROG_RUN(b->filter, skb); + /* Needed here for accessing maps. */ + rcu_read_lock(); + filter_res = BPF_PROG_RUN(prog->filter, skb); + rcu_read_unlock(); /* A BPF program may overwrite the default action opcode. * Similarly as in cls_bpf, if filter_res == -1 we use the @@ -52,52 +66,87 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, break; case TC_ACT_SHOT: action = filter_res; - b->tcf_qstats.drops++; + prog->tcf_qstats.drops++; break; case TC_ACT_UNSPEC: - action = b->tcf_action; + action = prog->tcf_action; break; default: action = TC_ACT_UNSPEC; break; } - spin_unlock(&b->tcf_lock); + spin_unlock(&prog->tcf_lock); return action; } -static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, +static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) +{ + return !prog->bpf_ops; +} + +static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + struct nlattr *nla; + + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) + return -EMSGSIZE; + + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * + sizeof(struct sock_filter)); + if (nla == NULL) + return -EMSGSIZE; + + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); + + return 0; +} + +static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) + return -EMSGSIZE; + + if (prog->bpf_name && + nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) + return -EMSGSIZE; + + return 0; +} + +static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { unsigned char *tp = skb_tail_pointer(skb); - struct tcf_bpf *b = a->priv; + struct tcf_bpf *prog = act->priv; struct tc_act_bpf opt = { - .index = b->tcf_index, - .refcnt = b->tcf_refcnt - ref, - .bindcnt = b->tcf_bindcnt - bind, - .action = b->tcf_action, + .index = prog->tcf_index, + .refcnt = prog->tcf_refcnt - ref, + .bindcnt = prog->tcf_bindcnt - bind, + .action = prog->tcf_action, }; - struct tcf_t t; - struct nlattr *nla; + struct tcf_t tm; + int ret; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) - goto nla_put_failure; - - nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * - sizeof(struct sock_filter)); - if (!nla) + if (tcf_bpf_is_ebpf(prog)) + ret = tcf_bpf_dump_ebpf_info(prog, skb); + else + ret = tcf_bpf_dump_bpf_info(prog, skb); + if (ret) goto nla_put_failure; - memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); + tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(b->tcf_tm.expires); - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) goto nla_put_failure; + return skb->len; nla_put_failure: @@ -107,36 +156,21 @@ nla_put_failure: static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, + [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, + [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; -static int tcf_bpf_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) { - struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; - struct tc_act_bpf *parm; - struct tcf_bpf *b; - u16 bpf_size, bpf_num_ops; struct sock_filter *bpf_ops; - struct sock_fprog_kern tmp; + struct sock_fprog_kern fprog_tmp; struct bpf_prog *fp; + u16 bpf_size, bpf_num_ops; int ret; - if (!nla) - return -EINVAL; - - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); - if (ret < 0) - return ret; - - if (!tb[TCA_ACT_BPF_PARMS] || - !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) - return -EINVAL; - parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) return -EINVAL; @@ -146,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return -EINVAL; bpf_ops = kzalloc(bpf_size, GFP_KERNEL); - if (!bpf_ops) + if (bpf_ops == NULL) return -ENOMEM; memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); - tmp.len = bpf_num_ops; - tmp.filter = bpf_ops; + fprog_tmp.len = bpf_num_ops; + fprog_tmp.filter = bpf_ops; - ret = bpf_prog_create(&fp, &tmp); - if (ret) - goto free_bpf_ops; + ret = bpf_prog_create(&fp, &fprog_tmp); + if (ret < 0) { + kfree(bpf_ops); + return ret; + } - if (!tcf_hash_check(parm->index, a, bind)) { - ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); - if (ret) + cfg->bpf_ops = bpf_ops; + cfg->bpf_num_ops = bpf_num_ops; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) +{ + struct bpf_prog *fp; + char *name = NULL; + u32 bpf_fd; + + bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); + + fp = bpf_prog_get(bpf_fd); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { + bpf_prog_put(fp); + return -EINVAL; + } + + if (tb[TCA_ACT_BPF_NAME]) { + name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), + nla_len(tb[TCA_ACT_BPF_NAME]), + GFP_KERNEL); + if (!name) { + bpf_prog_put(fp); + return -ENOMEM; + } + } + + cfg->bpf_fd = bpf_fd; + cfg->bpf_name = name; + cfg->filter = fp; + + return 0; +} + +static int tcf_bpf_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *act, + int replace, int bind) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tc_act_bpf *parm; + struct tcf_bpf *prog; + struct tcf_bpf_cfg cfg; + bool is_bpf, is_ebpf; + int ret; + + if (!nla) + return -EINVAL; + + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + if (ret < 0) + return ret; + + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; + is_ebpf = tb[TCA_ACT_BPF_FD]; + + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || + !tb[TCA_ACT_BPF_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); + + memset(&cfg, 0, sizeof(cfg)); + + ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : + tcf_bpf_init_from_efd(tb, &cfg); + if (ret < 0) + return ret; + + if (!tcf_hash_check(parm->index, act, bind)) { + ret = tcf_hash_create(parm->index, est, act, + sizeof(*prog), bind); + if (ret < 0) goto destroy_fp; ret = ACT_P_CREATED; } else { + /* Don't override defaults. */ if (bind) goto destroy_fp; - tcf_hash_release(a, bind); - if (!ovr) { + + tcf_hash_release(act, bind); + if (!replace) { ret = -EEXIST; goto destroy_fp; } } - b = to_bpf(a); - spin_lock_bh(&b->tcf_lock); - b->tcf_action = parm->action; - b->bpf_num_ops = bpf_num_ops; - b->bpf_ops = bpf_ops; - b->filter = fp; - spin_unlock_bh(&b->tcf_lock); + prog = to_bpf(act); + spin_lock_bh(&prog->tcf_lock); + + prog->bpf_ops = cfg.bpf_ops; + prog->bpf_name = cfg.bpf_name; + + if (cfg.bpf_num_ops) + prog->bpf_num_ops = cfg.bpf_num_ops; + if (cfg.bpf_fd) + prog->bpf_fd = cfg.bpf_fd; + + prog->tcf_action = parm->action; + prog->filter = cfg.filter; + + spin_unlock_bh(&prog->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(a); + tcf_hash_insert(act); + return ret; destroy_fp: - bpf_prog_destroy(fp); -free_bpf_ops: - kfree(bpf_ops); + if (is_ebpf) + bpf_prog_put(cfg.filter); + else + bpf_prog_destroy(cfg.filter); + + kfree(cfg.bpf_ops); + kfree(cfg.bpf_name); + return ret; } -static void tcf_bpf_cleanup(struct tc_action *a, int bind) +static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - struct tcf_bpf *b = a->priv; + const struct tcf_bpf *prog = act->priv; - bpf_prog_destroy(b->filter); + if (tcf_bpf_is_ebpf(prog)) + bpf_prog_put(prog->filter); + else + bpf_prog_destroy(prog->filter); } -static struct tc_action_ops act_bpf_ops = { - .kind = "bpf", - .type = TCA_ACT_BPF, - .owner = THIS_MODULE, - .act = tcf_bpf, - .dump = tcf_bpf_dump, - .cleanup = tcf_bpf_cleanup, - .init = tcf_bpf_init, +static struct tc_action_ops act_bpf_ops __read_mostly = { + .kind = "bpf", + .type = TCA_ACT_BPF, + .owner = THIS_MODULE, + .act = tcf_bpf, + .dump = tcf_bpf_dump, + .cleanup = tcf_bpf_cleanup, + .init = tcf_bpf_init, }; static int __init bpf_init_module(void) -- cgit v1.2.3 From 8da86466b83787df0d4b89ec81c310de072d101c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki/吉藤英明 Date: Thu, 19 Mar 2015 22:41:46 +0900 Subject: net: neighbour: Add mcast_resolicit to configure the number of multicast resolicitations in PROBE state. We send unicast neighbor (ARP or NDP) solicitations ucast_probes times in PROBE state. Zhu Yanjun reported that some implementation does not reply against them and the entry will become FAILED, which is undesirable. We had been dealt with such nodes by sending multicast probes mcast_ solicit times after unicast probes in PROBE state. In 2003, I made a change not to send them to improve compatibility with IPv6 NDP. Let's introduce per-protocol per-interface sysctl knob "mcast_ reprobe" to configure the number of multicast (re)solicitation for reconfirmation in PROBE state. The default is 0, since we have been doing so for 10+ years. Reported-by: Zhu Yanjun CC: Ulf Samuelsson Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + include/uapi/linux/neighbour.h | 1 + net/core/neighbour.c | 15 +++++++++++---- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e7bdf5170802..bd33e66f49aa 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -42,6 +42,7 @@ enum { NEIGH_VAR_MCAST_PROBES, NEIGH_VAR_UCAST_PROBES, NEIGH_VAR_APP_PROBES, + NEIGH_VAR_MCAST_REPROBES, NEIGH_VAR_RETRANS_TIME, NEIGH_VAR_BASE_REACHABLE_TIME, NEIGH_VAR_DELAY_PROBE_TIME, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 3873a35509aa..2e35c61bbdd1 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -126,6 +126,7 @@ enum { NDTPA_PROXY_QLEN, /* u32 */ NDTPA_LOCKTIME, /* u64, msecs */ NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0e8b32efc031..3de654256028 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -817,10 +817,9 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); - if (!(n->nud_state & NUD_PROBE)) - max_probes += NEIGH_VAR(p, MCAST_PROBES); - return max_probes; + return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + + (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : + NEIGH_VAR(p, MCAST_PROBES)); } static void neigh_invalidate(struct neighbour *neigh) @@ -1742,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NEIGH_VAR(parms, UCAST_PROBES)) || nla_put_u32(skb, NDTPA_MCAST_PROBES, NEIGH_VAR(parms, MCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_REPROBES, + NEIGH_VAR(parms, MCAST_REPROBES)) || nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || @@ -1901,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, @@ -2001,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) NEIGH_VAR_SET(p, MCAST_PROBES, nla_get_u32(tbp[i])); break; + case NDTPA_MCAST_REPROBES: + NEIGH_VAR_SET(p, MCAST_REPROBES, + nla_get_u32(tbp[i])); + break; case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); @@ -2987,6 +2993,7 @@ static struct neigh_sysctl_table { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), -- cgit v1.2.3 From e1c47e732cbb4211d15ae0c4465dc4ceefcaa398 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 4 Mar 2015 01:47:55 -0800 Subject: [media] v4l2-subdev.h: add 'which' field for the enum structs While all other pad ops allow you to select whether to use the 'try' or the 'active' formats, the enum ops didn't have that option and always used 'try'. However, this will fail if a simple (e.g. PCI) bridge driver wants to use the enum pad op of a subdev that's also used in a complex platform driver like the omap3. Such a bridge driver generally wants to enum formats based on the active format. So add a new 'which' field to these structs. Note that V4L2_SUBDEV_FORMAT_TRY is 0, so the default remains TRY (applications need to set reserved to 0). Signed-off-by: Hans Verkuil Acked-by: Lad, Prabhakar Tested-by: Lad, Prabhakar Acked-by: Laurent Pinchart Acked-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-subdev.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index e0a7e3da498a..dbce2b554e02 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -69,12 +69,14 @@ struct v4l2_subdev_crop { * @pad: pad number, as reported by the media API * @index: format index during enumeration * @code: format code (MEDIA_BUS_FMT_ definitions) + * @which: format type (from enum v4l2_subdev_format_whence) */ struct v4l2_subdev_mbus_code_enum { __u32 pad; __u32 index; __u32 code; - __u32 reserved[9]; + __u32 which; + __u32 reserved[8]; }; /** @@ -82,6 +84,7 @@ struct v4l2_subdev_mbus_code_enum { * @pad: pad number, as reported by the media API * @index: format index during enumeration * @code: format code (MEDIA_BUS_FMT_ definitions) + * @which: format type (from enum v4l2_subdev_format_whence) */ struct v4l2_subdev_frame_size_enum { __u32 index; @@ -91,7 +94,8 @@ struct v4l2_subdev_frame_size_enum { __u32 max_width; __u32 min_height; __u32 max_height; - __u32 reserved[9]; + __u32 which; + __u32 reserved[8]; }; /** @@ -113,6 +117,7 @@ struct v4l2_subdev_frame_interval { * @width: frame width in pixels * @height: frame height in pixels * @interval: frame interval in seconds + * @which: format type (from enum v4l2_subdev_format_whence) */ struct v4l2_subdev_frame_interval_enum { __u32 index; @@ -121,7 +126,8 @@ struct v4l2_subdev_frame_interval_enum { __u32 width; __u32 height; struct v4l2_fract interval; - __u32 reserved[9]; + __u32 which; + __u32 reserved[8]; }; /** -- cgit v1.2.3 From 682f048bd49449f4ab978664a7f69a44a74e3caa Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Mon, 23 Mar 2015 09:11:13 +0300 Subject: af_packet: pass checksum validation status to the user Introduce TP_STATUS_CSUM_VALID tp_status flag to tell the af_packet user that at least the transport header checksum has been already validated. For now, the flag may be set for incoming packets only. Signed-off-by: Alexander Drozdov Cc: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 13 ++++++++++--- include/uapi/linux/if_packet.h | 1 + net/packet/af_packet.c | 9 +++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index a6d7cb91069e..daa015af16a0 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -440,9 +440,10 @@ and the following flags apply: +++ Capture process: from include/linux/if_packet.h - #define TP_STATUS_COPY 2 - #define TP_STATUS_LOSING 4 - #define TP_STATUS_CSUMNOTREADY 8 + #define TP_STATUS_COPY (1 << 1) + #define TP_STATUS_LOSING (1 << 2) + #define TP_STATUS_CSUMNOTREADY (1 << 3) + #define TP_STATUS_CSUM_VALID (1 << 7) TP_STATUS_COPY : This flag indicates that the frame (and associated meta information) has been truncated because it's @@ -466,6 +467,12 @@ TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which reading the packet we should not try to check the checksum. +TP_STATUS_CSUM_VALID : This flag indicates that at least the transport + header checksum of the packet has been already + validated on the kernel side. If the flag is not set + then we are free to check the checksum by ourselves + provided that TP_STATUS_CSUMNOTREADY is also not set. + for convenience there are also the following defines: #define TP_STATUS_KERNEL 0 diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index da2d668b8cf1..053bd102fbe0 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -99,6 +99,7 @@ struct tpacket_auxdata { #define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ #define TP_STATUS_BLK_TMO (1 << 5) #define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */ +#define TP_STATUS_CSUM_VALID (1 << 7) /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9d854c5ce0b5..5102c3cc4eec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1924,6 +1924,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + status |= TP_STATUS_CSUM_VALID; if (snaplen > res) snaplen = res; @@ -3031,6 +3035,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, aux.tp_status = TP_STATUS_USER; if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && + (skb->ip_summed == CHECKSUM_COMPLETE || + skb_csum_unnecessary(skb))) + aux.tp_status |= TP_STATUS_CSUM_VALID; + aux.tp_len = origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; -- cgit v1.2.3 From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:00 +0100 Subject: ipv6: introduce secret_stable to ipv6_devconf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the procfs logic for the stable_address knob: The secret is formatted as an ipv6 address and will be stored per interface and per namespace. We track initialized flag and return EIO errors until the secret is set. We don't inherit the secret to newly created namespaces. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 +++ include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4d5169f5d7d1..82806c60aa42 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -53,6 +53,10 @@ struct ipv6_devconf { __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + struct ipv6_stable_secret { + bool initialized; + struct in6_addr secret; + } stable_secret; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 437a6a4b125a..5efa54ae567c 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -170,6 +170,7 @@ enum { DEVCONF_ACCEPT_RA_FROM_LOCAL, DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, + DEVCONF_STABLE_SECRET, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 158378e73f0a..5b967c8a617a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,9 @@ #define INFINITY_LIFE_TIME 0xFFFFFFFF +#define IPV6_MAX_STRLEN \ + sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -202,6 +206,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + } }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -240,6 +247,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + }, }; /* Check if a valid qdisc is available */ @@ -4430,6 +4440,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + /* we omit DEVCONF_STABLE_SECRET for now */ } static inline size_t inet6_ifla6_size(void) @@ -5074,6 +5085,53 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int err; + struct in6_addr addr; + char str[IPV6_MAX_STRLEN]; + struct ctl_table lctl = *ctl; + struct ipv6_stable_secret *secret = ctl->data; + + lctl.maxlen = IPV6_MAX_STRLEN; + lctl.data = str; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (!write && !secret->initialized) { + err = -EIO; + goto out; + } + + if (!write) { + err = snprintf(str, sizeof(str), "%pI6", + &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; + } + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); + if (err || !write) + goto out; + + if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) { + err = -EIO; + goto out; + } + + secret->initialized = true; + secret->secret = addr; + +out: + rtnl_unlock(); + + return err; +} static struct addrconf_sysctl_table { @@ -5346,6 +5404,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, { /* sentinel */ } @@ -5442,6 +5507,9 @@ static int __net_init addrconf_init_net(struct net *net) dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; + dflt->stable_secret.initialized = false; + all->stable_secret.initialized = false; + net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.2.3 From 622c81d57b392cc9be836670eb464a4dfaa9adfe Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:01 +0100 Subject: ipv6: generation of stable privacy addresses for link-local and autoconf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the stable privacy address generation for link-local and autoconf addresses as specified in RFC7217. RID = F(Prefix, Net_Iface, Network_ID, DAD_Counter, secret_key) is the RID (random identifier). As the hash function F we chose one round of sha1. Prefix will be either the link-local prefix or the router advertised one. As Net_Iface we use the MAC address of the device. DAD_Counter and secret_key are implemented as specified. We don't use Network_ID, as it couples the code too closely to other subsystems. It is specified as optional in the RFC. As Net_Iface we only use the MAC address: we simply have no stable identifier in the kernel we could possibly use: because this code might run very early, we cannot depend on names, as they might be changed by user space early on during the boot process. A new address generation mode is introduced, IN6_ADDR_GEN_MODE_STABLE_PRIVACY. With iproute2 one can switch back to none or eui64 address configuration mode although the stable_secret is already set. We refuse writes to ipv6/conf/all/stable_secret but only allow ipv6/conf/default/stable_secret and the interface specific file to be written to. The default stable_secret is used as the parameter for the namespace, the interface specific can overwrite the secret, e.g. when switching a network configuration from one system to another while inheriting the secret. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/ipv6/addrconf.c | 130 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 127 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f5f5edd5ae5f..7ffb18df01ca 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -216,6 +216,7 @@ enum { enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY, }; /* Bridge section */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5b967c8a617a..6813268ce8b8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -131,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_generate_stable_address(struct in6_addr *addr, + u8 dad_count, + const struct inet6_dev *idev); /* * Configured unicast address hash table @@ -2302,6 +2305,11 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; + } else if (in6_dev->addr_gen_mode == + IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !ipv6_generate_stable_address(&addr, 0, + in6_dev)) { + goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2820,12 +2828,98 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static bool ipv6_reserved_interfaceid(struct in6_addr address) +{ + if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0) + return true; + + if (address.s6_addr32[2] == htonl(0x02005eff) && + ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000))) + return true; + + if (address.s6_addr32[2] == htonl(0xfdffffff) && + ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80))) + return true; + + return false; +} + +static int ipv6_generate_stable_address(struct in6_addr *address, + u8 dad_count, + const struct inet6_dev *idev) +{ + static const int idgen_retries = 3; + + static DEFINE_SPINLOCK(lock); + static __u32 digest[SHA_DIGEST_WORDS]; + static __u32 workspace[SHA_WORKSPACE_WORDS]; + + static union { + char __data[SHA_MESSAGE_BYTES]; + struct { + struct in6_addr secret; + __be64 prefix; + unsigned char hwaddr[MAX_ADDR_LEN]; + u8 dad_count; + } __packed; + } data; + + struct in6_addr secret; + struct in6_addr temp; + struct net *net = dev_net(idev->dev); + + BUILD_BUG_ON(sizeof(data.__data) != sizeof(data)); + + if (idev->cnf.stable_secret.initialized) + secret = idev->cnf.stable_secret.secret; + else if (net->ipv6.devconf_dflt->stable_secret.initialized) + secret = net->ipv6.devconf_dflt->stable_secret.secret; + else + return -1; + +retry: + spin_lock_bh(&lock); + + sha_init(digest); + memset(&data, 0, sizeof(data)); + memset(workspace, 0, sizeof(workspace)); + memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len); + data.prefix = ((__be64)address->s6_addr32[0] << 32) | + (__be64)address->s6_addr32[1]; + data.secret = secret; + data.dad_count = dad_count; + + sha_transform(digest, data.__data, workspace); + + temp = *address; + temp.s6_addr32[2] = digest[0]; + temp.s6_addr32[3] = digest[1]; + + spin_unlock_bh(&lock); + + if (ipv6_reserved_interfaceid(temp)) { + dad_count++; + if (dad_count > idgen_retries) + return -1; + goto retry; + } + + *address = temp; + return 0; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { - struct in6_addr addr; + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + if (!ipv6_generate_stable_address(&addr, 0, idev)) + addrconf_add_linklocal(idev, &addr); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. @@ -4675,8 +4769,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE) + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) + return -EINVAL; + + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) return -EINVAL; + idev->addr_gen_mode = mode; err = 0; } @@ -5093,8 +5194,12 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, struct in6_addr addr; char str[IPV6_MAX_STRLEN]; struct ctl_table lctl = *ctl; + struct net *net = ctl->extra2; struct ipv6_stable_secret *secret = ctl->data; + if (&net->ipv6.devconf_all->stable_secret == ctl->data) + return -EIO; + lctl.maxlen = IPV6_MAX_STRLEN; lctl.data = str; @@ -5127,6 +5232,23 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, secret->initialized = true; secret->secret = addr; + if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) { + struct net_device *dev; + + for_each_netdev(net, dev) { + struct inet6_dev *idev = __in6_dev_get(dev); + + if (idev) { + idev->addr_gen_mode = + IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + } + } else { + struct inet6_dev *idev = ctl->extra1; + + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + out: rtnl_unlock(); -- cgit v1.2.3 From 64236f3f3d742469e4027b83a9515e84e9ab21b4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:02 +0100 Subject: ipv6: introduce IFA_F_STABLE_PRIVACY flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to mark appropriate addresses so we can do retries in case their DAD failed. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/if_addr.h | 1 + net/ipv6/addrconf.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index 40fdfea39714..4318ab1635ce 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -51,6 +51,7 @@ enum { #define IFA_F_MANAGETEMPADDR 0x100 #define IFA_F_NOPREFIXROUTE 0x200 #define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 struct ifa_cacheinfo { __u32 ifa_prefered; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6813268ce8b8..c2357b6f62dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2199,6 +2199,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) __u32 valid_lft; __u32 prefered_lft; int addr_type; + u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); @@ -2309,6 +2310,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) IN6_ADDR_GEN_MODE_STABLE_PRIVACY && !ipv6_generate_stable_address(&addr, 0, in6_dev)) { + addr_flags |= IFA_F_STABLE_PRIVACY; goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { @@ -2328,7 +2330,6 @@ ok: if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; - u32 addr_flags = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && @@ -2807,10 +2808,11 @@ static void init_loopback(struct net_device *dev) } } -static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) +static void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags) { struct inet6_ifaddr *ifp; - u32 addr_flags = IFA_F_PERMANENT; + u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && @@ -2818,7 +2820,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr addr_flags |= IFA_F_OPTIMISTIC; #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { @@ -2916,7 +2917,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { if (!ipv6_generate_stable_address(&addr, 0, idev)) - addrconf_add_linklocal(idev, &addr); + addrconf_add_linklocal(idev, &addr, + IFA_F_STABLE_PRIVACY); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { @@ -2925,7 +2927,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) * couldn't generate one. */ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); } -- cgit v1.2.3 From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Tue, 24 Mar 2015 14:48:41 +0100 Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension If vlan offloading takes place then vlan header is removed from frame and its contents, both vlan_tci and vlan_proto, is available to user space via TPACKET interface. However, only vlan_tci can be used in BPF filters. This commit introduces a new BPF extension. It makes possible to load the value of vlan_proto (vlan TPID) to register A. Support for classic BPF and eBPF is being added, analogous to skb->protocol. Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Pirko Signed-off-by: Michal Sekletar Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 3 ++- include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 17 +++++++++++++++++ tools/net/bpf_exp.l | 2 ++ tools/net/bpf_exp.y | 11 ++++++++++- 7 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 9930ecfbb465..135581f015e1 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -280,7 +280,8 @@ Possible BPF extensions are shown in the following table: rxhash skb->hash cpu raw_smp_processor_id() vlan_tci skb_vlan_tag_get(skb) - vlan_pr skb_vlan_tag_present(skb) + vlan_avail skb_vlan_tag_present(skb) + vlan_tpid skb->vlan_proto rand prandom_u32() These extensions can also be prefixed with '#'. diff --git a/include/linux/filter.h b/include/linux/filter.h index 9ee8c67ea249..fa11b3a367be 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) BPF_ANCILLARY(VLAN_TAG_PRESENT); BPF_ANCILLARY(PAY_OFFSET); BPF_ANCILLARY(RANDOM); + BPF_ANCILLARY(VLAN_TPID); } /* Fallthrough. */ default: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3dd314a45d0d..27dc4ec58840 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -182,6 +182,7 @@ struct __sk_buff { __u32 protocol; __u32 vlan_present; __u32 vlan_tci; + __u32 vlan_proto; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 47785d5ecf17..34c7936ca114 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -77,7 +77,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 #define SKF_AD_RANDOM 56 -#define SKF_AD_MAX 60 +#define SKF_AD_VLAN_TPID 60 +#define SKF_AD_MAX 64 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 084eacc4d1d4..32f43c59908c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -272,6 +272,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp, insn += cnt - 1; break; + case SKF_AD_OFF + SKF_AD_VLAN_TPID: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ + *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, + offsetof(struct sk_buff, vlan_proto)); + /* A = ntohs(A) [emitting a nop or swap16] */ + *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); + break; + case SKF_AD_OFF + SKF_AD_PAY_OFFSET: case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: @@ -1226,6 +1236,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, offsetof(struct sk_buff, protocol)); break; + case offsetof(struct __sk_buff, vlan_proto): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_proto)); + break; + case offsetof(struct __sk_buff, mark): return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 833a96611da6..c83af3fb77de 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,8 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("vlan_avail") { return K_VLANP; } +"#"?("vlan_tpid") { return K_VLANTPID; } "#"?("rand") { return K_RAND; } ":" { return ':'; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e6306c51c26f..f8332749b44c 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_VLANTPID K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -167,6 +167,9 @@ ldb | OP_LDB K_RAND { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDB K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldh @@ -218,6 +221,9 @@ ldh | OP_LDH K_RAND { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDH K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldi @@ -274,6 +280,9 @@ ld | OP_LD K_RAND { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LD K_VLANTPID { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { -- cgit v1.2.3 From dd46c787788d5bf5b974729d43e4c405814a4c7d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 25 Mar 2015 15:07:05 +1100 Subject: fs: Add support FALLOC_FL_INSERT_RANGE for fallocate FALLOC_FL_INSERT_RANGE command is the opposite command of FALLOC_FL_COLLAPSE_RANGE that is needed for someone who wants to add some data in the middle of file. FALLOC_FL_INSERT_RANGE will create space for writing new data within a file after shifting extents to right as given length. This command also has same limitations as FALLOC_FL_COLLAPSE_RANGE in that operations need to be filesystem block boundary aligned and cannot cross the current EOF. Signed-off-by: Namjae Jeon Signed-off-by: Ashish Sangwan Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/open.c | 8 ++++++-- include/linux/falloc.h | 6 ++++++ include/uapi/linux/falloc.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/fs/open.c b/fs/open.c index 33f9cbf2610b..b724cc0e0228 100644 --- a/fs/open.c +++ b/fs/open.c @@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) return -EINVAL; /* Return error if mode is not supported */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) + if (mode & ~FALLOC_FL_SUPPORTED_MASK) return -EOPNOTSUPP; /* Punch hole and zero range are mutually exclusive */ @@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) (mode & ~FALLOC_FL_COLLAPSE_RANGE)) return -EINVAL; + /* Insert range should only be used exclusively. */ + if ((mode & FALLOC_FL_INSERT_RANGE) && + (mode & ~FALLOC_FL_INSERT_RANGE)) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) return -EBADF; diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 31591686ac2d..996111000a8c 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -21,4 +21,10 @@ struct space_resv { #define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) #define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) +#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | \ + FALLOC_FL_INSERT_RANGE) + #endif /* _FALLOC_H_ */ diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h index d1197ae3723c..3e445a760f14 100644 --- a/include/uapi/linux/falloc.h +++ b/include/uapi/linux/falloc.h @@ -41,4 +41,21 @@ */ #define FALLOC_FL_ZERO_RANGE 0x10 +/* + * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without + * overwriting any existing data. The contents of the file beyond offset are + * shifted towards right by len bytes to create a hole. As such, this + * operation will increase the size of the file by len bytes. + * + * Different filesystems may implement different limitations on the granularity + * of the operation. Most will limit operations to filesystem block size + * boundaries, but this boundary may be larger or smaller depending on + * the filesystem and/or the configuration of the filesystem or file. + * + * Attempting to insert space using this flag at OR beyond the end of + * the file is considered an illegal operation - just use ftruncate(2) or + * fallocate(2) with mode 0 for such type of operations. + */ +#define FALLOC_FL_INSERT_RANGE 0x20 + #endif /* _UAPI_FALLOC_H_ */ -- cgit v1.2.3 From 2c60fae1489c70206e66c28d72b69a3e496c313d Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 26 Mar 2015 21:47:16 +0200 Subject: drm/i915: fix definition of the DRM_IOCTL_I915_GET_SPRITE_COLORKEY ioctl Fix definition of the DRM_IOCTL_I915_GET_SPRITE_COLORKEY ioctl, so that it is different from the DRM_IOCTL_I915_SET_SPRITE_COLORKEY ioctl. Note that this is just for accuracy, the ioctl implementation itself is totally unused and already ripped out. Signed-off-by: Tommi Rantala [danvet: Add note that this is a dead ioctl.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8d1be9073380..551b6737f5df 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -270,7 +270,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) -#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) -- cgit v1.2.3 From 34f439278cef7b1177f8ce24f9fc81dfc6221d3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Feb 2015 14:05:38 +0100 Subject: perf: Add per event clockid support While thinking on the whole clock discussion it occurred to me we have two distinct uses of time: 1) the tracking of event/ctx/cgroup enabled/running/stopped times which includes the self-monitoring support in struct perf_event_mmap_page. 2) the actual timestamps visible in the data records. And we've been conflating them. The first is all about tracking time deltas, nobody should really care in what time base that happens, its all relative information, as long as its internally consistent it works. The second however is what people are worried about when having to merge their data with external sources. And here we have the discussion on MONOTONIC vs MONOTONIC_RAW etc.. Where MONOTONIC is good for correlating between machines (static offset), MONOTNIC_RAW is required for correlating against a fixed rate hardware clock. This means configurability; now 1) makes that hard because it needs to be internally consistent across groups of unrelated events; which is why we had to have a global perf_clock(). However, for 2) it doesn't really matter, perf itself doesn't care what it writes into the buffer. The below patch makes the distinction between these two cases by adding perf_event_clock() which is used for the second case. It further makes this configurable on a per-event basis, but adds a few sanity checks such that we cannot combine events with different clocks in confusing ways. And since we then have per-event configurability we might as well retain the 'legacy' behaviour as a default. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 14 ++++++-- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 6 ++-- kernel/events/core.c | 77 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 91 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ac41b3ad1fc9..0420ebcac116 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1978,13 +1978,23 @@ void arch_perf_update_userpage(struct perf_event *event, data = cyc2ns_read_begin(); + /* + * Internal timekeeping for enabled/running/stopped times + * is always in the local_clock domain. + */ userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; userpg->time_offset = data->cyc2ns_offset - now; - userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + /* + * cap_user_time_zero doesn't make sense when we're using a different + * time base for the records. + */ + if (event->clock == &local_clock) { + userpg->cap_user_time_zero = 1; + userpg->time_zero = data->cyc2ns_offset; + } cyc2ns_read_end(data); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b16eac5f54ce..401554074de9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -173,6 +173,7 @@ struct perf_event; * pmu::capabilities flags */ #define PERF_PMU_CAP_NO_INTERRUPT 0x01 +#define PERF_PMU_CAP_NO_NMI 0x02 /** * struct pmu - generic performance monitoring unit @@ -457,6 +458,7 @@ struct perf_event { struct pid_namespace *ns; u64 id; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1e3cd07cf76e..3bb40ddadbe5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -326,7 +326,8 @@ struct perf_event_attr { exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ comm_exec : 1, /* flag comm events that are due to an exec */ - __reserved_1 : 39; + use_clockid : 1, /* use @clockid for time fields */ + __reserved_1 : 38; union { __u32 wakeup_events; /* wakeup every n events */ @@ -355,8 +356,7 @@ struct perf_event_attr { */ __u32 sample_stack_user; - /* Align to u64. */ - __u32 __reserved_2; + __s32 clockid; /* * Defines set of regs to dump for each sample * state captured on: diff --git a/kernel/events/core.c b/kernel/events/core.c index bb1a7c36e794..c40c2cac2d8e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -327,6 +327,11 @@ static inline u64 perf_clock(void) return local_clock(); } +static inline u64 perf_event_clock(struct perf_event *event) +{ + return event->clock(); +} + static inline struct perf_cpu_context * __get_cpu_context(struct perf_event_context *ctx) { @@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header, } if (sample_type & PERF_SAMPLE_TIME) - data->time = perf_clock(); + data->time = perf_event_clock(event); if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) data->id = primary_event_id(event); @@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event, task_event->event_id.tid = perf_event_tid(event, task); task_event->event_id.ptid = perf_event_tid(event, current); + task_event->event_id.time = perf_event_clock(event); + perf_output_put(&handle, task_event->event_id); perf_event__output_id_sample(event, &handle, &sample); @@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task, /* .ppid */ /* .tid */ /* .ptid */ - .time = perf_clock(), + /* .time */ }, }; @@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) .misc = 0, .size = sizeof(throttle_event), }, - .time = perf_clock(), + .time = perf_event_clock(event), .id = primary_event_id(event), .stream_id = event->id, }; @@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event) static struct pmu perf_swevent = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = perf_swevent_init, .add = perf_swevent_add, .del = perf_swevent_del, @@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event) static struct pmu perf_cpu_clock = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, .del = cpu_clock_event_del, @@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event) static struct pmu perf_task_clock = { .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_NMI, + .event_init = task_clock_event_init, .add = task_clock_event_add, .del = task_clock_event_del, @@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->hw.target = task; } + event->clock = &local_clock; + if (parent_event) + event->clock = parent_event->clock; + if (!overflow_handler && parent_event) { overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; @@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) if (output_event->cpu == -1 && output_event->ctx != event->ctx) goto out; + /* + * Mixing clocks in the same buffer is trouble you don't need. + */ + if (output_event->clock != event->clock) + goto out; + set: mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ @@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b) mutex_lock_nested(b, SINGLE_DEPTH_NESTING); } +static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) +{ + bool nmi_safe = false; + + switch (clk_id) { + case CLOCK_MONOTONIC: + event->clock = &ktime_get_mono_fast_ns; + nmi_safe = true; + break; + + case CLOCK_MONOTONIC_RAW: + event->clock = &ktime_get_raw_fast_ns; + nmi_safe = true; + break; + + case CLOCK_REALTIME: + event->clock = &ktime_get_real_ns; + break; + + case CLOCK_BOOTTIME: + event->clock = &ktime_get_boot_ns; + break; + + case CLOCK_TAI: + event->clock = &ktime_get_tai_ns; + break; + + default: + return -EINVAL; + } + + if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI)) + return -EINVAL; + + return 0; +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open, */ pmu = event->pmu; + if (attr.use_clockid) { + err = perf_event_set_clock(event, attr.clockid); + if (err) + goto err_alloc; + } + if (group_leader && (is_software_event(event) != is_software_event(group_leader))) { if (is_software_event(event)) { @@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open, */ if (group_leader->group_leader != group_leader) goto err_context; + + /* All events in a group should have the same clock */ + if (group_leader->clock != event->clock) + goto err_context; + /* * Do not allow to attach to a group in a different * task or CPU context: -- cgit v1.2.3 From 271c865161c57cfabca45b93eaa712b19da365bc Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 27 Mar 2015 12:46:12 +1030 Subject: Add virtio-input driver. virtio-input is basically evdev-events-over-virtio, so this driver isn't much more than reading configuration from config space and forwarding incoming events to the linux input layer. Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- MAINTAINERS | 6 + drivers/virtio/Kconfig | 10 + drivers/virtio/Makefile | 1 + drivers/virtio/virtio_input.c | 384 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/virtio_ids.h | 1 + include/uapi/linux/virtio_input.h | 76 ++++++++ 7 files changed, 479 insertions(+) create mode 100644 drivers/virtio/virtio_input.c create mode 100644 include/uapi/linux/virtio_input.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 0e1abe8cc684..585e6cdd9186 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10435,6 +10435,12 @@ S: Maintained F: drivers/vhost/ F: include/uapi/linux/vhost.h +VIRTIO INPUT DRIVER +M: Gerd Hoffmann +S: Maintained +F: drivers/virtio/virtio_input.c +F: include/uapi/linux/virtio_input.h + VIA RHINE NETWORK DRIVER M: Roger Luethi S: Maintained diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index b546da5d8ea3..cab9f3f63a38 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -48,6 +48,16 @@ config VIRTIO_BALLOON If unsure, say M. +config VIRTIO_INPUT + tristate "Virtio input driver" + depends on VIRTIO + depends on INPUT + ---help--- + This driver supports virtio input devices such as + keyboards, mice and tablets. + + If unsure, say M. + config VIRTIO_MMIO tristate "Platform bus driver for memory mapped virtio devices" depends on HAS_IOMEM diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index d85565b8ea46..41e30e3dc842 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o +obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c new file mode 100644 index 000000000000..60e2a1677563 --- /dev/null +++ b/drivers/virtio/virtio_input.c @@ -0,0 +1,384 @@ +#include +#include +#include +#include + +#include +#include + +struct virtio_input { + struct virtio_device *vdev; + struct input_dev *idev; + char name[64]; + char serial[64]; + char phys[64]; + struct virtqueue *evt, *sts; + struct virtio_input_event evts[64]; + spinlock_t lock; + bool ready; +}; + +static void virtinput_queue_evtbuf(struct virtio_input *vi, + struct virtio_input_event *evtbuf) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, evtbuf, sizeof(*evtbuf)); + virtqueue_add_inbuf(vi->evt, sg, 1, evtbuf, GFP_ATOMIC); +} + +static void virtinput_recv_events(struct virtqueue *vq) +{ + struct virtio_input *vi = vq->vdev->priv; + struct virtio_input_event *event; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vi->lock, flags); + if (vi->ready) { + while ((event = virtqueue_get_buf(vi->evt, &len)) != NULL) { + spin_unlock_irqrestore(&vi->lock, flags); + input_event(vi->idev, + le16_to_cpu(event->type), + le16_to_cpu(event->code), + le32_to_cpu(event->value)); + spin_lock_irqsave(&vi->lock, flags); + virtinput_queue_evtbuf(vi, event); + } + virtqueue_kick(vq); + } + spin_unlock_irqrestore(&vi->lock, flags); +} + +/* + * On error we are losing the status update, which isn't critical as + * this is typically used for stuff like keyboard leds. + */ +static int virtinput_send_status(struct virtio_input *vi, + u16 type, u16 code, s32 value) +{ + struct virtio_input_event *stsbuf; + struct scatterlist sg[1]; + unsigned long flags; + int rc; + + stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC); + if (!stsbuf) + return -ENOMEM; + + stsbuf->type = cpu_to_le16(type); + stsbuf->code = cpu_to_le16(code); + stsbuf->value = cpu_to_le32(value); + sg_init_one(sg, stsbuf, sizeof(*stsbuf)); + + spin_lock_irqsave(&vi->lock, flags); + if (vi->ready) { + rc = virtqueue_add_outbuf(vi->sts, sg, 1, stsbuf, GFP_ATOMIC); + virtqueue_kick(vi->sts); + } else { + rc = -ENODEV; + } + spin_unlock_irqrestore(&vi->lock, flags); + + if (rc != 0) + kfree(stsbuf); + return rc; +} + +static void virtinput_recv_status(struct virtqueue *vq) +{ + struct virtio_input *vi = vq->vdev->priv; + struct virtio_input_event *stsbuf; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vi->lock, flags); + while ((stsbuf = virtqueue_get_buf(vi->sts, &len)) != NULL) + kfree(stsbuf); + spin_unlock_irqrestore(&vi->lock, flags); +} + +static int virtinput_status(struct input_dev *idev, unsigned int type, + unsigned int code, int value) +{ + struct virtio_input *vi = input_get_drvdata(idev); + + return virtinput_send_status(vi, type, code, value); +} + +static u8 virtinput_cfg_select(struct virtio_input *vi, + u8 select, u8 subsel) +{ + u8 size; + + virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select); + virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel); + virtio_cread(vi->vdev, struct virtio_input_config, size, &size); + return size; +} + +static void virtinput_cfg_bits(struct virtio_input *vi, int select, int subsel, + unsigned long *bits, unsigned int bitcount) +{ + unsigned int bit; + u8 *virtio_bits; + u8 bytes; + + bytes = virtinput_cfg_select(vi, select, subsel); + if (!bytes) + return; + if (bitcount > bytes * 8) + bitcount = bytes * 8; + + /* + * Bitmap in virtio config space is a simple stream of bytes, + * with the first byte carrying bits 0-7, second bits 8-15 and + * so on. + */ + virtio_bits = kzalloc(bytes, GFP_KERNEL); + if (!virtio_bits) + return; + virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config, + u.bitmap), + virtio_bits, bytes); + for (bit = 0; bit < bitcount; bit++) { + if (virtio_bits[bit / 8] & (1 << (bit % 8))) + __set_bit(bit, bits); + } + kfree(virtio_bits); + + if (select == VIRTIO_INPUT_CFG_EV_BITS) + __set_bit(subsel, vi->idev->evbit); +} + +static void virtinput_cfg_abs(struct virtio_input *vi, int abs) +{ + u32 mi, ma, re, fu, fl; + + virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs); + virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi); + virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma); + virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re); + virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu); + virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); + input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); + input_abs_set_res(vi->idev, abs, re); +} + +static int virtinput_init_vqs(struct virtio_input *vi) +{ + struct virtqueue *vqs[2]; + vq_callback_t *cbs[] = { virtinput_recv_events, + virtinput_recv_status }; + static const char *names[] = { "events", "status" }; + int err; + + err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); + if (err) + return err; + vi->evt = vqs[0]; + vi->sts = vqs[1]; + + return 0; +} + +static void virtinput_fill_evt(struct virtio_input *vi) +{ + unsigned long flags; + int i, size; + + spin_lock_irqsave(&vi->lock, flags); + size = virtqueue_get_vring_size(vi->evt); + if (size > ARRAY_SIZE(vi->evts)) + size = ARRAY_SIZE(vi->evts); + for (i = 0; i < size; i++) + virtinput_queue_evtbuf(vi, &vi->evts[i]); + virtqueue_kick(vi->evt); + spin_unlock_irqrestore(&vi->lock, flags); +} + +static int virtinput_probe(struct virtio_device *vdev) +{ + struct virtio_input *vi; + unsigned long flags; + size_t size; + int abs, err; + + if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) + return -ENODEV; + + vi = kzalloc(sizeof(*vi), GFP_KERNEL); + if (!vi) + return -ENOMEM; + + vdev->priv = vi; + vi->vdev = vdev; + spin_lock_init(&vi->lock); + + err = virtinput_init_vqs(vi); + if (err) + goto err_init_vq; + + vi->idev = input_allocate_device(); + if (!vi->idev) { + err = -ENOMEM; + goto err_input_alloc; + } + input_set_drvdata(vi->idev, vi); + + size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_NAME, 0); + virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config, + u.string), + vi->name, min(size, sizeof(vi->name))); + size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_SERIAL, 0); + virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config, + u.string), + vi->serial, min(size, sizeof(vi->serial))); + snprintf(vi->phys, sizeof(vi->phys), + "virtio%d/input0", vdev->index); + vi->idev->name = vi->name; + vi->idev->phys = vi->phys; + vi->idev->uniq = vi->serial; + + size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0); + if (size >= sizeof(struct virtio_input_devids)) { + virtio_cread(vi->vdev, struct virtio_input_config, + u.ids.bustype, &vi->idev->id.bustype); + virtio_cread(vi->vdev, struct virtio_input_config, + u.ids.vendor, &vi->idev->id.vendor); + virtio_cread(vi->vdev, struct virtio_input_config, + u.ids.product, &vi->idev->id.product); + virtio_cread(vi->vdev, struct virtio_input_config, + u.ids.version, &vi->idev->id.version); + } else { + vi->idev->id.bustype = BUS_VIRTUAL; + } + + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_PROP_BITS, 0, + vi->idev->propbit, INPUT_PROP_CNT); + size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REP); + if (size) + __set_bit(EV_REP, vi->idev->evbit); + + vi->idev->dev.parent = &vdev->dev; + vi->idev->event = virtinput_status; + + /* device -> kernel */ + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_KEY, + vi->idev->keybit, KEY_CNT); + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REL, + vi->idev->relbit, REL_CNT); + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_ABS, + vi->idev->absbit, ABS_CNT); + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_MSC, + vi->idev->mscbit, MSC_CNT); + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SW, + vi->idev->swbit, SW_CNT); + + /* kernel -> device */ + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_LED, + vi->idev->ledbit, LED_CNT); + virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND, + vi->idev->sndbit, SND_CNT); + + if (test_bit(EV_ABS, vi->idev->evbit)) { + for (abs = 0; abs < ABS_CNT; abs++) { + if (!test_bit(abs, vi->idev->absbit)) + continue; + virtinput_cfg_abs(vi, abs); + } + } + + virtio_device_ready(vdev); + vi->ready = true; + err = input_register_device(vi->idev); + if (err) + goto err_input_register; + + virtinput_fill_evt(vi); + return 0; + +err_input_register: + spin_lock_irqsave(&vi->lock, flags); + vi->ready = false; + spin_unlock_irqrestore(&vi->lock, flags); + input_free_device(vi->idev); +err_input_alloc: + vdev->config->del_vqs(vdev); +err_init_vq: + kfree(vi); + return err; +} + +static void virtinput_remove(struct virtio_device *vdev) +{ + struct virtio_input *vi = vdev->priv; + unsigned long flags; + + spin_lock_irqsave(&vi->lock, flags); + vi->ready = false; + spin_unlock_irqrestore(&vi->lock, flags); + + input_unregister_device(vi->idev); + vdev->config->del_vqs(vdev); + kfree(vi); +} + +#ifdef CONFIG_PM_SLEEP +static int virtinput_freeze(struct virtio_device *vdev) +{ + struct virtio_input *vi = vdev->priv; + unsigned long flags; + + spin_lock_irqsave(&vi->lock, flags); + vi->ready = false; + spin_unlock_irqrestore(&vi->lock, flags); + + vdev->config->del_vqs(vdev); + return 0; +} + +static int virtinput_restore(struct virtio_device *vdev) +{ + struct virtio_input *vi = vdev->priv; + int err; + + err = virtinput_init_vqs(vi); + if (err) + return err; + + virtio_device_ready(vdev); + vi->ready = true; + virtinput_fill_evt(vi); + return 0; +} +#endif + +static unsigned int features[] = { + /* none */ +}; +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_INPUT, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct virtio_driver virtio_input_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .id_table = id_table, + .probe = virtinput_probe, + .remove = virtinput_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtinput_freeze, + .restore = virtinput_restore, +#endif +}; + +module_virtio_driver(virtio_input_driver); +MODULE_DEVICE_TABLE(virtio, id_table); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Virtio input device driver"); +MODULE_AUTHOR("Gerd Hoffmann "); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 68ceb97c458c..04b829e9ac46 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -430,6 +430,7 @@ header-y += virtio_blk.h header-y += virtio_config.h header-y += virtio_console.h header-y += virtio_ids.h +header-y += virtio_input.h header-y += virtio_net.h header-y += virtio_pci.h header-y += virtio_ring.h diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 284fc3a05f7b..5f60aa4be50a 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,5 +39,6 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h new file mode 100644 index 000000000000..a7fe5c8fb135 --- /dev/null +++ b/include/uapi/linux/virtio_input.h @@ -0,0 +1,76 @@ +#ifndef _LINUX_VIRTIO_INPUT_H +#define _LINUX_VIRTIO_INPUT_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ + +#include + +enum virtio_input_config_select { + VIRTIO_INPUT_CFG_UNSET = 0x00, + VIRTIO_INPUT_CFG_ID_NAME = 0x01, + VIRTIO_INPUT_CFG_ID_SERIAL = 0x02, + VIRTIO_INPUT_CFG_ID_DEVIDS = 0x03, + VIRTIO_INPUT_CFG_PROP_BITS = 0x10, + VIRTIO_INPUT_CFG_EV_BITS = 0x11, + VIRTIO_INPUT_CFG_ABS_INFO = 0x12, +}; + +struct virtio_input_absinfo { + __u32 min; + __u32 max; + __u32 fuzz; + __u32 flat; + __u32 res; +}; + +struct virtio_input_devids { + __u16 bustype; + __u16 vendor; + __u16 product; + __u16 version; +}; + +struct virtio_input_config { + __u8 select; + __u8 subsel; + __u8 size; + __u8 reserved[5]; + union { + char string[128]; + __u8 bitmap[128]; + struct virtio_input_absinfo abs; + struct virtio_input_devids ids; + } u; +}; + +struct virtio_input_event { + __le16 type; + __le16 code; + __le32 value; +}; + +#endif /* _LINUX_VIRTIO_INPUT_H */ -- cgit v1.2.3 From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 26 Mar 2015 19:53:57 -0700 Subject: tc: bpf: generalize pedit action existing TC action 'pedit' can munge any bits of the packet. Generalize it for use in bpf programs attached as cls_bpf and act_bpf via bpf_skb_store_bytes() helper function. Signed-off-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 2 ++ net/core/filter.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 73 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 280a315de8d6..d5cda067115a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -59,6 +59,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 27dc4ec58840..74aab6e0d964 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_func_id { BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ + BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0e714f799ec0..630a7bac1e51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_IMM; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; + } else if (arg_type == ARG_PTR_TO_CTX) { + expected_type = PTR_TO_CTX; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index 32f43c59908c..444a07e4f68d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + unsigned int offset = (unsigned int) r2; + void *from = (void *) (long) r3; + unsigned int len = (unsigned int) r4; + char buf[16]; + void *ptr; + + /* bpf verifier guarantees that: + * 'from' pointer points to bpf program stack + * 'len' bytes of it were initialized + * 'len' > 0 + * 'skb' is a valid pointer to 'struct sk_buff' + * + * so check for invalid 'offset' and too large 'len' + */ + if (offset > 0xffff || len > sizeof(buf)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, buf); + if (unlikely(!ptr)) + return -EFAULT; + + skb_postpull_rcsum(skb, ptr, len); + + memcpy(ptr, from, len); + + if (ptr == buf) + /* skb_store_bits cannot return -EFAULT here */ + skb_store_bits(skb, offset, ptr, len); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + return 0; +} + +const struct bpf_func_proto bpf_skb_store_bytes_proto = { + .func = bpf_skb_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id) } } +static const struct bpf_func_proto * +tc_cls_act_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + default: + return sk_filter_func_proto(func_id); + } +} + static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { @@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = { .convert_ctx_access = sk_filter_convert_ctx_access, }; +static const struct bpf_verifier_ops tc_cls_act_ops = { + .get_func_proto = tc_cls_act_func_proto, + .is_valid_access = sk_filter_is_valid_access, + .convert_ctx_access = sk_filter_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, }; static struct bpf_prog_type_list sched_cls_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_CLS, }; static struct bpf_prog_type_list sched_act_type __read_mostly = { - .ops = &sk_filter_ops, + .ops = &tc_cls_act_ops, .type = BPF_PROG_TYPE_SCHED_ACT, }; -- cgit v1.2.3 From 3a323d4e17dd5a84f6ad036e6f985d263ca973ed Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 18 Mar 2015 10:47:02 +0200 Subject: nl80211: small clarification of the sched_scan delay attribute Just clarify that the delay is only before the first cycle. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ae16ba9cb1e3..241220c43e86 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1754,8 +1754,9 @@ enum nl80211_commands { * should be contained in the result as the sum of the respective counters * over all channels. * - * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a - * WoWLAN net-detect scan) is started, u32 in seconds. + * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a + * scheduled scan (or a WoWLAN net-detect scan) is started, u32 + * in seconds. * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device * is operating in an indoor environment. -- cgit v1.2.3 From 4b3a81a917a5ef21a4483d699cefd4d9fa35b841 Mon Sep 17 00:00:00 2001 From: Boris Brezillion Date: Wed, 28 Jan 2015 17:49:51 +0100 Subject: Add RGB444_1X12 and RGB565_1X16 media bus formats Add RGB444_1X12 and RGB565_1X16 format definitions and update the documentation. Signed-off-by: Boris Brezillon Acked-by: Mauro Carvalho Chehab Acked-by: Sakari Ailus Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Philipp Zabel --- Documentation/DocBook/media/v4l/subdev-formats.xml | 40 ++++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 4 ++- 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index c5ea868e3909..29fe60112f4c 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -192,6 +192,24 @@ see . + + MEDIA_BUS_FMT_RGB444_1X12 + 0x100e + + &dash-ent-20; + r3 + r2 + r1 + r0 + g3 + g2 + g1 + g0 + b3 + b2 + b1 + b0 + MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 @@ -304,6 +322,28 @@ see . g4 g3 + + MEDIA_BUS_FMT_RGB565_1X16 + 0x100f + + &dash-ent-16; + r4 + r3 + r2 + r1 + r0 + g5 + g4 + g3 + g2 + g1 + g0 + b4 + b3 + b2 + b1 + b0 + MEDIA_BUS_FMT_BGR565_2X8_BE 0x1005 diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 23b40908be30..37091c668f65 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,11 +33,13 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x100e */ +/* RGB - next is 0x1010 */ +#define MEDIA_BUS_FMT_RGB444_1X12 0x100e #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE 0x1003 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE 0x1004 +#define MEDIA_BUS_FMT_RGB565_1X16 0x100f #define MEDIA_BUS_FMT_BGR565_2X8_BE 0x1005 #define MEDIA_BUS_FMT_BGR565_2X8_LE 0x1006 #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 -- cgit v1.2.3 From b295c22978b86fc62019d12f4108b68b7e795610 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 2 Dec 2014 17:49:04 +0100 Subject: Add LVDS RGB media bus formats This patch adds three new RGB media bus formats that describe 18-bit or 24-bit samples transferred over an LVDS bus with three or four differential data pairs, serialized into 7 time slots, using standard SPWG/PSWG/VESA or JEIDA data ordering. Signed-off-by: Philipp Zabel Acked-by: Sakari Ailus Acked-by: Hans Verkuil --- Documentation/DocBook/media/v4l/subdev-formats.xml | 255 +++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 5 +- 2 files changed, 259 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 29fe60112f4c..18449b32f240 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -622,6 +622,261 @@ see . + + On LVDS buses, usually each sample is transferred serialized in + seven time slots per pixel clock, on three (18-bit) or four (24-bit) + differential data pairs at the same time. The remaining bits are used for + control signals as defined by SPWG/PSWG/VESA or JEIDA standards. + The 24-bit RGB format serialized in seven time slots on four lanes using + JEIDA defined bit mapping will be named + MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA, for example. + + + + LVDS RGB formats + + + + + + + + + + + + + Identifier + Code + + + Data organization + + + + + Timeslot + Lane + 3 + 2 + 1 + 0 + + + + + MEDIA_BUS_FMT_RGB666_1X7X3_SPWG + 0x1010 + 0 + + - + d + b1 + g0 + + + + + 1 + + - + d + b0 + r5 + + + + + 2 + + - + d + g5 + r4 + + + + + 3 + + - + b5 + g4 + r3 + + + + + 4 + + - + b4 + g3 + r2 + + + + + 5 + + - + b3 + g2 + r1 + + + + + 6 + + - + b2 + g1 + r0 + + + MEDIA_BUS_FMT_RGB888_1X7X4_SPWG + 0x1011 + 0 + + d + d + b1 + g0 + + + + + 1 + + b7 + d + b0 + r5 + + + + + 2 + + b6 + d + g5 + r4 + + + + + 3 + + g7 + b5 + g4 + r3 + + + + + 4 + + g6 + b4 + g3 + r2 + + + + + 5 + + r7 + b3 + g2 + r1 + + + + + 6 + + r6 + b2 + g1 + r0 + + + MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA + 0x1012 + 0 + + d + d + b3 + g2 + + + + + 1 + + b1 + d + b2 + r7 + + + + + 2 + + b0 + d + g7 + r6 + + + + + 3 + + g1 + b7 + g6 + r5 + + + + + 4 + + g0 + b6 + g5 + r4 + + + + + 5 + + r1 + b5 + g4 + r3 + + + + + 6 + + r0 + b4 + g3 + r2 + + + +
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 37091c668f65..3fb9cbbb603f 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,7 +33,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1010 */ +/* RGB - next is 0x1013 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x100e #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -45,9 +45,12 @@ #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 #define MEDIA_BUS_FMT_RGB565_2X8_LE 0x1008 #define MEDIA_BUS_FMT_RGB666_1X18 0x1009 +#define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG 0x1010 #define MEDIA_BUS_FMT_RGB888_1X24 0x100a #define MEDIA_BUS_FMT_RGB888_2X12_BE 0x100b #define MEDIA_BUS_FMT_RGB888_2X12_LE 0x100c +#define MEDIA_BUS_FMT_RGB888_1X7X4_SPWG 0x1011 +#define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA 0x1012 #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d /* YUV (including grey) - next is 0x2024 */ -- cgit v1.2.3 From 08c38458be7efa36a1d2dffd40500448e46d29c5 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 2 Dec 2014 15:45:25 +0100 Subject: Add BGR888_1X24 and GBR888_1X24 media bus formats This patch adds two more 24-bit RGB formats. BGR888 is more or less common, GBR888 is used on the internal connection between the IPU display interface and the TVE (VGA DAC) on i.MX53 SoCs. Signed-off-by: Philipp Zabel Acked-by: Laurent Pinchart Acked-by: Hans Verkuil --- Documentation/DocBook/media/v4l/subdev-formats.xml | 60 ++++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 4 +- 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 18449b32f240..805cbe1acab7 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -480,6 +480,66 @@ see . b1 b0 + + MEDIA_BUS_FMT_BGR888_1X24 + 0x1013 + + &dash-ent-8; + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + + + MEDIA_BUS_FMT_GBR888_1X24 + 0x1014 + + &dash-ent-8; + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + MEDIA_BUS_FMT_RGB888_1X24 0x100a diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 3fb9cbbb603f..6f6942e8dae6 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,7 +33,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1013 */ +/* RGB - next is 0x1015 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x100e #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -46,6 +46,8 @@ #define MEDIA_BUS_FMT_RGB565_2X8_LE 0x1008 #define MEDIA_BUS_FMT_RGB666_1X18 0x1009 #define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG 0x1010 +#define MEDIA_BUS_FMT_BGR888_1X24 0x1013 +#define MEDIA_BUS_FMT_GBR888_1X24 0x1014 #define MEDIA_BUS_FMT_RGB888_1X24 0x100a #define MEDIA_BUS_FMT_RGB888_2X12_BE 0x100b #define MEDIA_BUS_FMT_RGB888_2X12_LE 0x100c -- cgit v1.2.3 From 0fc63eb104d76e20654cd97eeb1bfd0f35bc3d3a Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 3 Dec 2014 11:01:54 +0100 Subject: Add YUV8_1X24 media bus format This patch adds the media bus format for a 24-bit bus format with three 8-bit YUV components. Signed-off-by: Philipp Zabel Acked-by: Laurent Pinchart Acked-by: Hans Verkuil --- Documentation/DocBook/media/v4l/subdev-formats.xml | 37 ++++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 3 +- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 805cbe1acab7..8d1f62402f88 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -3015,6 +3015,43 @@ see . u1 u0 + + MEDIA_BUS_FMT_YUV8_1X24 + 0x2024 + + - + - + - + - + - + - + - + - + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + MEDIA_BUS_FMT_YUV10_1X30 0x2016 diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 6f6942e8dae6..8dbf16cc5d1c 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -55,7 +55,7 @@ #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA 0x1012 #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d -/* YUV (including grey) - next is 0x2024 */ +/* YUV (including grey) - next is 0x2025 */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 #define MEDIA_BUS_FMT_UV8_1X8 0x2015 #define MEDIA_BUS_FMT_UYVY8_1_5X8 0x2002 @@ -81,6 +81,7 @@ #define MEDIA_BUS_FMT_VYUY10_1X20 0x201b #define MEDIA_BUS_FMT_YUYV10_1X20 0x200d #define MEDIA_BUS_FMT_YVYU10_1X20 0x200e +#define MEDIA_BUS_FMT_YUV8_1X24 0x2024 #define MEDIA_BUS_FMT_YUV10_1X30 0x2016 #define MEDIA_BUS_FMT_AYUV8_1X32 0x2017 #define MEDIA_BUS_FMT_UYVY12_2X12 0x201c -- cgit v1.2.3 From 203508ef52e3fee93b71262928541ecea82c735d Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 2 Dec 2014 17:56:04 +0100 Subject: Add RGB666_1X24_CPADHI media bus format Commit 9e74d2926a28 ("staging: imx-drm: add LVDS666 support for parallel display") describes a 24-bit bus format where three 6-bit components each take the lower part of 8 bits with the two high bits zero padded. Add a component-wise padded media bus format RGB666_1X24_CPADHI to support this connection. Signed-off-by: Philipp Zabel Acked-by: Hans Verkuil Tested-by: Emil Renner Berthing --- Documentation/DocBook/media/v4l/subdev-formats.xml | 34 +++++++++++++++++++++- include/uapi/linux/media-bus-format.h | 3 +- 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 8d1f62402f88..18b71aff48c9 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -91,7 +91,9 @@ see . For formats where the total number of bits per pixel is smaller than the number of bus samples per pixel times the bus width, a padding value stating if the bytes are padded in their most high order bits - (PADHI) or low order bits (PADLO). + (PADHI) or low order bits (PADLO). A "C" prefix is used for component-wise + padding in the most high order bits (CPADHI) or low order bits (CPADLO) + of each separate component. For formats where the number of bus samples per pixel is larger than 1, an endianness value stating if the pixel is transferred MSB first (BE) or LSB first (LE). @@ -480,6 +482,36 @@ see . b1 b0 + + MEDIA_BUS_FMT_RGB666_1X24_CPADHI + 0x1015 + + &dash-ent-8; + 0 + 0 + r5 + r4 + r3 + r2 + r1 + r0 + 0 + 0 + g5 + g4 + g3 + g2 + g1 + g0 + 0 + 0 + b5 + b4 + b3 + b2 + b1 + b0 + MEDIA_BUS_FMT_BGR888_1X24 0x1013 diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 8dbf16cc5d1c..83ea46f4be51 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,7 +33,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1015 */ +/* RGB - next is 0x1016 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x100e #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -45,6 +45,7 @@ #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 #define MEDIA_BUS_FMT_RGB565_2X8_LE 0x1008 #define MEDIA_BUS_FMT_RGB666_1X18 0x1009 +#define MEDIA_BUS_FMT_RGB666_1X24_CPADHI 0x1015 #define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG 0x1010 #define MEDIA_BUS_FMT_BGR888_1X24 0x1013 #define MEDIA_BUS_FMT_GBR888_1X24 0x1014 -- cgit v1.2.3 From 15e318bdc6dfb82914c82fb7ad00badaa8387d8e Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:24 +0200 Subject: xfrm: simplify xfrm_address_t use In many places, the a6 field is typecasted to struct in6_addr. As the fields are in union anyway, just add in6_addr type to the union and get rid of the typecasting. Modifying the uapi header is okay, the union has still the same size. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +++--- include/uapi/linux/xfrm.h | 2 ++ net/ipv6/xfrm6_mode_beet.c | 4 ++-- net/ipv6/xfrm6_policy.c | 4 +--- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d0ac7d7be8a7..461f83539493 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1025,7 +1025,7 @@ xfrm_addr_any(const xfrm_address_t *addr, unsigned short family) case AF_INET: return addr->a4 == 0; case AF_INET6: - return ipv6_addr_any((struct in6_addr *)&addr->a6); + return ipv6_addr_any(&addr->in6); } return 0; } @@ -1238,8 +1238,8 @@ void xfrm_flowi_addr_get(const struct flowi *fl, memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4)); break; case AF_INET6: - *(struct in6_addr *)saddr->a6 = fl->u.ip6.saddr; - *(struct in6_addr *)daddr->a6 = fl->u.ip6.daddr; + saddr->in6 = fl->u.ip6.saddr; + daddr->in6 = fl->u.ip6.daddr; break; } } diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 02d5125a5ee8..2cd9e608d0d1 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -1,6 +1,7 @@ #ifndef _LINUX_XFRM_H #define _LINUX_XFRM_H +#include #include /* All of the structures in this file may not change size as they are @@ -13,6 +14,7 @@ typedef union { __be32 a4; __be32 a6[4]; + struct in6_addr in6; } xfrm_address_t; /* Ident of a specific xfrm_state. It is used on input to lookup diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 9949a356d62c..1e205c3253ac 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; - ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; err = 0; out: return err; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 91d934c22a2a..f337a908a76a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net, return -EHOSTUNREACH; dev = ip6_dst_idev(dst)->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, - (struct in6_addr *)&daddr->a6, 0, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 9255fd9d94bc..f0d52d721b3a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -709,7 +709,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; - sin6->sin6_addr = *(struct in6_addr *)xaddr->a6; + sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index de971b6d38c5..f5e39e35d73a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1043,12 +1043,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, break; case AF_INET6: - *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; - *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; + x->sel.daddr.in6 = daddr->in6; + x->sel.saddr.in6 = saddr->in6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; - *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; - *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; + x->props.saddr.in6 = saddr->in6; + x->id.daddr.in6 = daddr->in6; break; } -- cgit v1.2.3 From 761da2935d6e18d178582dbdf315a3a458555505 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:36 +0000 Subject: netfilter: nf_tables: add set timeout API support Add set timeout support to the netlink API. Sets with timeout support enabled can have a default timeout value and garbage collection interval specified. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 +++++++++ include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 30 ++++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b8cd60dcb4e1..8936803a2ad5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -258,6 +258,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @dtype: data type (verdict or numeric type defined by userspace) * @size: maximum set size * @nelems: number of elements + * @timeout: default timeout value in msecs + * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops * @pnet: network namespace @@ -274,6 +276,8 @@ struct nft_set { u32 dtype; u32 size; u32 nelems; + u64 timeout; + u32 gc_int; u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; @@ -295,6 +299,11 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla); +static inline unsigned long nft_set_gc_interval(const struct nft_set *set) +{ + return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; +} + /** * struct nft_set_binding - nf_tables set binding * diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b9783931503b..971d245e7378 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -208,12 +208,14 @@ enum nft_rule_compat_attributes { * @NFT_SET_CONSTANT: set contents may not change while bound * @NFT_SET_INTERVAL: set contains intervals * @NFT_SET_MAP: set is used as a dictionary + * @NFT_SET_TIMEOUT: set uses timeouts */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, NFT_SET_CONSTANT = 0x2, NFT_SET_INTERVAL = 0x4, NFT_SET_MAP = 0x8, + NFT_SET_TIMEOUT = 0x10, }; /** @@ -252,6 +254,8 @@ enum nft_set_desc_attributes { * @NFTA_SET_POLICY: selection policy (NLA_U32) * @NFTA_SET_DESC: set description (NLA_NESTED) * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -265,6 +269,8 @@ enum nft_set_attributes { NFTA_SET_POLICY, NFTA_SET_DESC, NFTA_SET_ID, + NFTA_SET_TIMEOUT, + NFTA_SET_GC_INTERVAL, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5604c2df05d1..6320b64e773e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2216,6 +2216,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_POLICY] = { .type = NLA_U32 }, [NFTA_SET_DESC] = { .type = NLA_NESTED }, [NFTA_SET_ID] = { .type = NLA_U32 }, + [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2366,6 +2368,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (set->timeout && + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) + goto nla_put_failure; + if (set->gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + goto nla_put_failure; + if (set->policy != NFT_SET_POL_PERFORMANCE) { if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) goto nla_put_failure; @@ -2578,7 +2587,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, dtype, flags, policy; + u64 timeout; + u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; int err; @@ -2605,7 +2615,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_FLAGS] != NULL) { flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | - NFT_SET_INTERVAL | NFT_SET_MAP)) + NFT_SET_INTERVAL | NFT_SET_MAP | + NFT_SET_TIMEOUT)) return -EINVAL; } @@ -2631,6 +2642,19 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } else if (flags & NFT_SET_MAP) return -EINVAL; + timeout = 0; + if (nla[NFTA_SET_TIMEOUT] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); + } + gc_int = 0; + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); @@ -2699,6 +2723,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, set->flags = flags; set->size = desc.size; set->policy = policy; + set->timeout = timeout; + set->gc_int = gc_int; err = ops->init(set, &desc, nla); if (err < 0) -- cgit v1.2.3 From c3e1b005ed1cc068fc9d454a6e745830d55d251d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 26 Mar 2015 12:39:37 +0000 Subject: netfilter: nf_tables: add set element timeout support Add API support for set element timeouts. Elements can have a individual timeout value specified, overriding the sets' default. Two new extension types are used for timeouts - the timeout value and the expiration time. The timeout value only exists if it differs from the default value. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 20 ++++++++++++ include/uapi/linux/netfilter/nf_tables.h | 4 +++ net/netfilter/nf_tables_api.c | 53 ++++++++++++++++++++++++++++++-- 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8936803a2ad5..f2726c537248 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -329,12 +329,16 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_KEY: element key * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_TIMEOUT: element timeout + * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { NFT_SET_EXT_KEY, NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, + NFT_SET_EXT_TIMEOUT, + NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_NUM }; @@ -431,6 +435,22 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } +static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); +} + +static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && + time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); +} + static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, void *elem) { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 971d245e7378..83441cc4594b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -290,12 +290,16 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) + * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, NFTA_SET_ELEM_KEY, NFTA_SET_ELEM_DATA, NFTA_SET_ELEM_FLAGS, + NFTA_SET_ELEM_TIMEOUT, + NFTA_SET_ELEM_EXPIRATION, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6320b64e773e..9e032dbc149c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2863,6 +2863,14 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .len = sizeof(u8), .align = __alignof__(u8), }, + [NFT_SET_EXT_TIMEOUT] = { + .len = sizeof(u64), + .align = __alignof__(u64), + }, + [NFT_SET_EXT_EXPIRATION] = { + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, }; EXPORT_SYMBOL_GPL(nft_set_ext_types); @@ -2874,6 +2882,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -2935,6 +2944,25 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + cpu_to_be64(*nft_set_ext_timeout(ext)))) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + unsigned long expires, now = jiffies; + + expires = *nft_set_ext_expiration(ext); + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + + if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, + cpu_to_be64(jiffies_to_msecs(expires)))) + goto nla_put_failure; + } + nla_nest_end(skb, nest); return 0; @@ -3158,7 +3186,7 @@ static void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const struct nft_data *key, const struct nft_data *data, - gfp_t gfp) + u64 timeout, gfp_t gfp) { struct nft_set_ext *ext; void *elem; @@ -3173,6 +3201,11 @@ static void *nft_set_elem_init(const struct nft_set *set, memcpy(nft_set_ext_key(ext), key, set->klen); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) memcpy(nft_set_ext_data(ext), data, set->dlen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) + *nft_set_ext_expiration(ext) = + jiffies + msecs_to_jiffies(timeout); + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) + *nft_set_ext_timeout(ext) = timeout; return elem; } @@ -3201,6 +3234,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u64 timeout; u32 flags; int err; @@ -3241,6 +3275,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } + timeout = 0; + if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); + } else if (set->flags & NFT_SET_TIMEOUT) { + timeout = set->timeout; + } + err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; @@ -3249,6 +3292,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err2; nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); + if (timeout > 0) { + nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (timeout != set->timeout) + nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + } if (nla[NFTA_SET_ELEM_DATA] != NULL) { err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); @@ -3277,7 +3325,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, + timeout, GFP_KERNEL); if (elem.priv == NULL) goto err3; -- cgit v1.2.3 From a5581ef4c2eac6449188862e903eb46c7233582a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 1 Apr 2015 07:50:29 +0200 Subject: can: introduce new raw socket option to join the given CAN filters The CAN_RAW socket can set multiple CAN identifier specific filters that lead to multiple filters in the af_can.c filter processing. These filters are indenpendent from each other which leads to logical OR'ed filters when applied. This socket option joines the given CAN filters in the way that only CAN frames are passed to user space that matched *all* given CAN filters. The semantic for the applied filters is therefore changed to a logical AND. This is useful especially when the filterset is a combination of filters where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or CAN ID ranges from the incoming traffic. As the raw_rcv() function is executed from NET_RX softirq the introduced variables are implemented as per-CPU variables to avoid extensive locking at CAN frame reception time. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.txt | 20 ++++++++++++++++++-- include/uapi/linux/can/raw.h | 1 + net/can/raw.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 0a2859a8ee7e..5abad1e921ca 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -22,7 +22,8 @@ This file contains 4.1.3 RAW socket option CAN_RAW_LOOPBACK 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS 4.1.5 RAW socket option CAN_RAW_FD_FRAMES - 4.1.6 RAW socket returned message flags + 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS + 4.1.7 RAW socket returned message flags 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM) 4.2.1 Broadcast Manager operations 4.2.2 Broadcast Manager message flags @@ -601,7 +602,22 @@ solution for a couple of reasons: CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU. The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall. - 4.1.6 RAW socket returned message flags + 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS + + The CAN_RAW socket can set multiple CAN identifier specific filters that + lead to multiple filters in the af_can.c filter processing. These filters + are indenpendent from each other which leads to logical OR'ed filters when + applied (see 4.1.1). + + This socket option joines the given CAN filters in the way that only CAN + frames are passed to user space that matched *all* given CAN filters. The + semantic for the applied filters is therefore changed to a logical AND. + + This is useful especially when the filterset is a combination of filters + where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or + CAN ID ranges from the incoming traffic. + + 4.1.7 RAW socket returned message flags When using recvmsg() call, the msg->msg_flags may contain following flags: diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 78ec76fd89a6..8735f1080385 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -57,6 +57,7 @@ enum { CAN_RAW_LOOPBACK, /* local loopback (default:on) */ CAN_RAW_RECV_OWN_MSGS, /* receive my own msgs (default:off) */ CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ + CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */ }; #endif /* !_UAPI_CAN_RAW_H */ diff --git a/net/can/raw.c b/net/can/raw.c index 0c8d537b59b8..31b9748cbb4e 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -77,6 +77,7 @@ MODULE_ALIAS("can-proto-1"); struct uniqframe { ktime_t tstamp; const struct sk_buff *skb; + unsigned int join_rx_count; }; struct raw_sock { @@ -87,6 +88,7 @@ struct raw_sock { int loopback; int recv_own_msgs; int fd_frames; + int join_filters; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ struct can_filter *filter; /* pointer to filter(s) */ @@ -132,10 +134,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { - return; + if (ro->join_filters) { + this_cpu_inc(ro->uniq->join_rx_count); + /* drop frame until all enabled filters matched */ + if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) + return; + } else { + return; + } } else { this_cpu_ptr(ro->uniq)->skb = oskb; this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->join_rx_count = 1; + /* drop first frame to check all enabled filters? */ + if (ro->join_filters && ro->count > 1) + return; } /* clone the given skb to be able to enqueue it into the rcv queue */ @@ -311,6 +324,7 @@ static int raw_init(struct sock *sk) ro->loopback = 1; ro->recv_own_msgs = 0; ro->fd_frames = 0; + ro->join_filters = 0; /* alloc_percpu provides zero'ed memory */ ro->uniq = alloc_percpu(struct uniqframe); @@ -604,6 +618,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, break; + case CAN_RAW_JOIN_FILTERS: + if (optlen != sizeof(ro->join_filters)) + return -EINVAL; + + if (copy_from_user(&ro->join_filters, optval, optlen)) + return -EFAULT; + + break; + default: return -ENOPROTOOPT; } @@ -668,6 +691,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, val = &ro->fd_frames; break; + case CAN_RAW_JOIN_FILTERS: + if (len > sizeof(int)) + len = sizeof(int); + val = &ro->join_filters; + break; + default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 2541517c32be2531e0da59dfd7efc1ce844644f5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 25 Mar 2015 12:49:20 -0700 Subject: tracing, perf: Implement BPF programs attached to kprobes BPF programs, attached to kprobes, provide a safe way to execute user-defined BPF byte-code programs without being able to crash or hang the kernel in any way. The BPF engine makes sure that such programs have a finite execution time and that they cannot break out of their sandbox. The user interface is to attach to a kprobe via the perf syscall: struct perf_event_attr attr = { .type = PERF_TYPE_TRACEPOINT, .config = event_id, ... }; event_fd = perf_event_open(&attr,...); ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); 'prog_fd' is a file descriptor associated with BPF program previously loaded. 'event_id' is an ID of the kprobe created. Closing 'event_fd': close(event_fd); ... automatically detaches BPF program from it. BPF programs can call in-kernel helper functions to: - lookup/update/delete elements in maps - probe_read - wraper of probe_kernel_read() used to access any kernel data structures BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is architecture dependent) and return 0 to ignore the event and 1 to store kprobe event into the ring buffer. Note, kprobes are a fundamentally _not_ a stable kernel ABI, so BPF programs attached to kprobes must be recompiled for every kernel version and user must supply correct LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call. Signed-off-by: Alexei Starovoitov Reviewed-by: Steven Rostedt Reviewed-by: Masami Hiramatsu Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 11 ++++ include/uapi/linux/bpf.h | 3 + include/uapi/linux/perf_event.h | 1 + kernel/bpf/syscall.c | 7 ++- kernel/events/core.c | 59 ++++++++++++++++++ kernel/trace/Makefile | 1 + kernel/trace/bpf_trace.c | 130 ++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_kprobe.c | 8 +++ 8 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 kernel/trace/bpf_trace.c (limited to 'include/uapi') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 77325e1a1816..0aa535bc9f05 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -13,6 +13,7 @@ struct trace_array; struct trace_buffer; struct tracer; struct dentry; +struct bpf_prog; struct trace_print_flags { unsigned long mask; @@ -306,6 +307,7 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; int (*perf_perm)(struct ftrace_event_call *, struct perf_event *); @@ -551,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file, event_triggers_post_call(file, tt); } +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..b2948feeb70b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, }; /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -151,6 +152,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ }; } __attribute__((aligned(8))); @@ -162,6 +164,7 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3bb40ddadbe5..91803e54ee73 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -381,6 +381,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 536edc2be307..504c10b990ef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -16,6 +16,7 @@ #include #include #include +#include static LIST_HEAD(bpf_map_types); @@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD log_buf +#define BPF_PROG_LOAD_LAST_FIELD kern_version static int bpf_prog_load(union bpf_attr *attr) { @@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr) if (attr->insn_cnt >= BPF_MAXINSNS) return -EINVAL; + if (type == BPF_PROG_TYPE_KPROBE && + attr->kern_version != LINUX_VERSION_CODE) + return -EINVAL; + /* plain bpf_prog allocation */ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); if (!prog) diff --git a/kernel/events/core.c b/kernel/events/core.c index c40c2cac2d8e..5c13862d3e85 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -3407,6 +3409,7 @@ errout: } static void perf_event_free_filter(struct perf_event *event); +static void perf_event_free_bpf_prog(struct perf_event *event); static void free_event_rcu(struct rcu_head *head) { @@ -3416,6 +3419,7 @@ static void free_event_rcu(struct rcu_head *head) if (event->ns) put_pid_ns(event->ns); perf_event_free_filter(event); + perf_event_free_bpf_prog(event); kfree(event); } @@ -3928,6 +3932,7 @@ static inline int perf_fget_light(int fd, struct fd *p) static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -3981,6 +3986,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon case PERF_EVENT_IOC_SET_FILTER: return perf_event_set_filter(event, (void __user *)arg); + case PERF_EVENT_IOC_SET_BPF: + return perf_event_set_bpf_prog(event, arg); + default: return -ENOTTY; } @@ -6455,6 +6463,49 @@ static void perf_event_free_filter(struct perf_event *event) ftrace_profile_free_filter(event); } +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) +{ + struct bpf_prog *prog; + + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + + if (event->tp_event->prog) + return -EEXIST; + + if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) + /* bpf programs can only be attached to kprobes */ + return -EINVAL; + + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) { + /* valid fd, but invalid bpf program type */ + bpf_prog_put(prog); + return -EINVAL; + } + + event->tp_event->prog = prog; + + return 0; +} + +static void perf_event_free_bpf_prog(struct perf_event *event) +{ + struct bpf_prog *prog; + + if (!event->tp_event) + return; + + prog = event->tp_event->prog; + if (prog) { + event->tp_event->prog = NULL; + bpf_prog_put(prog); + } +} + #else static inline void perf_tp_register(void) @@ -6470,6 +6521,14 @@ static void perf_event_free_filter(struct perf_event *event) { } +static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) +{ + return -ENOENT; +} + +static void perf_event_free_bpf_prog(struct perf_event *event) +{ +} #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98f26588255e..c575a300103b 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c new file mode 100644 index 000000000000..f1e87da91da3 --- /dev/null +++ b/kernel/trace/bpf_trace.c @@ -0,0 +1,130 @@ +/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include "trace.h" + +static DEFINE_PER_CPU(int, bpf_prog_active); + +/** + * trace_call_bpf - invoke BPF program + * @prog: BPF program + * @ctx: opaque context pointer + * + * kprobe handlers execute BPF programs via this helper. + * Can be used from static tracepoints in the future. + * + * Return: BPF programs always return an integer which is interpreted by + * kprobe handler as: + * 0 - return from kprobe (event is filtered out) + * 1 - store kprobe event into ring buffer + * Other values are reserved and currently alias to 1 + */ +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + unsigned int ret; + + if (in_nmi()) /* not supported yet */ + return 1; + + preempt_disable(); + + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + /* + * since some bpf program is already running on this cpu, + * don't call into another bpf program (same or different) + * and don't send kprobe event into ring-buffer, + * so return zero here + */ + ret = 0; + goto out; + } + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + + out: + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_call_bpf); + +static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *dst = (void *) (long) r1; + int size = (int) r2; + void *unsafe_ptr = (void *) (long) r3; + + return probe_kernel_read(dst, unsafe_ptr, size); +} + +static const struct bpf_func_proto bpf_probe_read_proto = { + .func = bpf_probe_read, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_probe_read: + return &bpf_probe_read_proto; + default: + return NULL; + } +} + +/* bpf+kprobe programs can access fields of 'struct pt_regs' */ +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +{ + /* check bounds */ + if (off < 0 || off >= sizeof(struct pt_regs)) + return false; + + /* only read is allowed */ + if (type != BPF_READ) + return false; + + /* disallow misaligned access */ + if (off % size != 0) + return false; + + return true; +} + +static struct bpf_verifier_ops kprobe_prog_ops = { + .get_func_proto = kprobe_prog_func_proto, + .is_valid_access = kprobe_prog_is_valid_access, +}; + +static struct bpf_prog_type_list kprobe_tl = { + .ops = &kprobe_prog_ops, + .type = BPF_PROG_TYPE_KPROBE, +}; + +static int __init register_kprobe_prog_ops(void) +{ + bpf_register_prog_type(&kprobe_tl); + return 0; +} +late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8fa549f6f528..dc3462507d7c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1134,11 +1134,15 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; @@ -1165,11 +1169,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; + struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) return; -- cgit v1.2.3 From d9847d310ab4003725e6ed1822682e24bd406908 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 25 Mar 2015 12:49:21 -0700 Subject: tracing: Allow BPF programs to call bpf_ktime_get_ns() bpf_ktime_get_ns() is used by programs to compute time delta between events or as a timestamp Signed-off-by: Alexei Starovoitov Reviewed-by: Steven Rostedt Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1427312966-8434-5-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/uapi/linux/bpf.h | 1 + kernel/trace/bpf_trace.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b2948feeb70b..238c6883877b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -165,6 +165,7 @@ enum bpf_func_id { BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ + BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f1e87da91da3..8f5787294971 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,6 +78,18 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +static const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = true, + .ret_type = RET_INTEGER, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -89,6 +101,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; default: return NULL; } -- cgit v1.2.3 From 9c959c863f8217a2ff3d7c296e8223654d240569 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 25 Mar 2015 12:49:22 -0700 Subject: tracing: Allow BPF programs to call bpf_trace_printk() Debugging of BPF programs needs some form of printk from the program, so let programs call limited trace_printk() with %d %u %x %p modifiers only. Similar to kernel modules, during program load verifier checks whether program is calling bpf_trace_printk() and if so, kernel allocates trace_printk buffers and emits big 'this is debug only' banner. Signed-off-by: Alexei Starovoitov Reviewed-by: Steven Rostedt Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1427312966-8434-6-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/uapi/linux/bpf.h | 1 + kernel/trace/bpf_trace.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 238c6883877b..cc47ef41076a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -166,6 +166,7 @@ enum bpf_func_id { BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ + BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8f5787294971..2d56ce501632 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace.h" static DEFINE_PER_CPU(int, bpf_prog_active); @@ -90,6 +91,74 @@ static const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; +/* + * limited trace_printk() + * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed + */ +static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +{ + char *fmt = (char *) (long) r1; + int mod[3] = {}; + int fmt_cnt = 0; + int i; + + /* + * bpf_check()->check_func_arg()->check_stack_boundary() + * guarantees that fmt points to bpf program stack, + * fmt_size bytes of it were initialized and fmt_size > 0 + */ + if (fmt[--fmt_size] != 0) + return -EINVAL; + + /* check format string for allowed specifiers */ + for (i = 0; i < fmt_size; i++) { + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) + return -EINVAL; + + if (fmt[i] != '%') + continue; + + if (fmt_cnt >= 3) + return -EINVAL; + + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ + i++; + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } else if (fmt[i] == 'p') { + mod[fmt_cnt]++; + i++; + if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) + return -EINVAL; + fmt_cnt++; + continue; + } + + if (fmt[i] == 'l') { + mod[fmt_cnt]++; + i++; + } + + if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + return -EINVAL; + fmt_cnt++; + } + + return __trace_printk(1/* fake ip will not be printed */, fmt, + mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, + mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, + mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); +} + +static const struct bpf_func_proto bpf_trace_printk_proto = { + .func = bpf_trace_printk, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -103,6 +172,15 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + + case BPF_FUNC_trace_printk: + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; default: return NULL; } -- cgit v1.2.3 From e8c6deac69629c0cb97c3d3272f8631ef17f8f0f Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:10 +0200 Subject: perf: Add data_{offset,size} to user_page Currently, the actual perf ring buffer is one page into the mmap area, following the user page and the userspace follows this convention. This patch adds data_{offset,size} fields to user_page that can be used by userspace instead for locating perf data in the mmap area. This is also helpful when mapping existing or shared buffers if their size is not known in advance. Right now, it is made to follow the existing convention that data_offset == PAGE_SIZE and data_offset + data_size == mmap_size. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 5 +++++ kernel/events/core.c | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 91803e54ee73..86c44ae66d43 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -522,9 +522,14 @@ struct perf_event_mmap_page { * In this case the kernel will not over-write unread data. * * See perf_output_put_handle() for the data ordering. + * + * data_{offset,size} indicate the location and size of the perf record + * buffer within the mmapped area. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ + __u64 data_offset; /* where the buffer starts */ + __u64 data_size; /* data buffer size */ }; #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5c13862d3e85..6efa516f1ab8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4105,6 +4105,8 @@ static void perf_event_init_userpage(struct perf_event *event) /* Allow new userspace to detect that bit 0 is deprecated */ userpg->cap_bit0_is_deprecated = 1; userpg->size = offsetof(struct perf_event_mmap_page, __reserved); + userpg->data_offset = PAGE_SIZE; + userpg->data_size = perf_data_size(rb); unlock: rcu_read_unlock(); -- cgit v1.2.3 From 45bfb2e50471abbbfd83d40d28c986078b0d24ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2015 14:18:11 +0200 Subject: perf: Add AUX area to ring buffer for raw data streams This patch introduces "AUX space" in the perf mmap buffer, intended for exporting high bandwidth data streams to userspace, such as instruction flow traces. AUX space is a ring buffer, defined by aux_{offset,size} fields in the user_page structure, and read/write pointers aux_{head,tail}, which abide by the same rules as data_* counterparts of the main perf buffer. In order to allocate/mmap AUX, userspace needs to set up aux_offset to such an offset that will be greater than data_offset+data_size and aux_size to be the desired buffer size. Both need to be page aligned. Then, same aux_offset and aux_size should be passed to mmap() call and if everything adds up, you should have an AUX buffer as a result. Pages that are mapped into this buffer also come out of user's mlock rlimit plus perf_event_mlock_kb allowance. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 17 +++++ include/uapi/linux/perf_event.h | 16 +++++ kernel/events/core.c | 141 +++++++++++++++++++++++++++++++++------- kernel/events/internal.h | 23 +++++++ kernel/events/ring_buffer.c | 97 +++++++++++++++++++++++++-- 5 files changed, 265 insertions(+), 29 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 401554074de9..5a94f6d6fa91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -284,6 +284,18 @@ struct pmu { * Return the count value for a counter. */ u64 (*count) (struct perf_event *event); /*optional*/ + + /* + * Set up pmu-private data structures for an AUX area + */ + void *(*setup_aux) (int cpu, void **pages, + int nr_pages, bool overwrite); + /* optional */ + + /* + * Free pmu-private AUX data structures + */ + void (*free_aux) (void *aux); /* optional */ }; /** @@ -862,6 +874,11 @@ static inline bool needs_branch_stack(struct perf_event *event) return event->attr.branch_sample_type != 0; } +static inline bool has_aux(struct perf_event *event) +{ + return event->pmu->setup_aux; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 86c44ae66d43..6c5013a71714 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -530,6 +530,22 @@ struct perf_event_mmap_page { __u64 data_tail; /* user-space written tail */ __u64 data_offset; /* where the buffer starts */ __u64 data_size; /* data buffer size */ + + /* + * AUX area is defined by aux_{offset,size} fields that should be set + * by the userspace, so that + * + * aux_offset >= data_offset + data_size + * + * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. + * + * Ring buffer pointers aux_{head,tail} have the same semantics as + * data_{head,tail} and same ordering rules apply. + */ + __u64 aux_head; + __u64 aux_tail; + __u64 aux_offset; + __u64 aux_size; }; #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6efa516f1ab8..da51128c337a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4306,6 +4306,9 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->mmap_count); atomic_inc(&event->rb->mmap_count); + if (vma->vm_pgoff) + atomic_inc(&event->rb->aux_mmap_count); + if (event->pmu->event_mapped) event->pmu->event_mapped(event); } @@ -4330,6 +4333,20 @@ static void perf_mmap_close(struct vm_area_struct *vma) if (event->pmu->event_unmapped) event->pmu->event_unmapped(event); + /* + * rb->aux_mmap_count will always drop before rb->mmap_count and + * event->mmap_count, so it is ok to use event->mmap_mutex to + * serialize with perf_mmap here. + */ + if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); + vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; + + rb_free_aux(rb); + mutex_unlock(&event->mmap_mutex); + } + atomic_dec(&rb->mmap_count); if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) @@ -4403,7 +4420,7 @@ out_put: static const struct vm_operations_struct perf_mmap_vmops = { .open = perf_mmap_open, - .close = perf_mmap_close, + .close = perf_mmap_close, /* non mergable */ .fault = perf_mmap_fault, .page_mkwrite = perf_mmap_fault, }; @@ -4414,10 +4431,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); unsigned long locked, lock_limit; - struct ring_buffer *rb; + struct ring_buffer *rb = NULL; unsigned long vma_size; unsigned long nr_pages; - long user_extra, extra; + long user_extra = 0, extra = 0; int ret = 0, flags = 0; /* @@ -4432,7 +4449,66 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; vma_size = vma->vm_end - vma->vm_start; - nr_pages = (vma_size / PAGE_SIZE) - 1; + + if (vma->vm_pgoff == 0) { + nr_pages = (vma_size / PAGE_SIZE) - 1; + } else { + /* + * AUX area mapping: if rb->aux_nr_pages != 0, it's already + * mapped, all subsequent mappings should have the same size + * and offset. Must be above the normal perf buffer. + */ + u64 aux_offset, aux_size; + + if (!event->rb) + return -EINVAL; + + nr_pages = vma_size / PAGE_SIZE; + + mutex_lock(&event->mmap_mutex); + ret = -EINVAL; + + rb = event->rb; + if (!rb) + goto aux_unlock; + + aux_offset = ACCESS_ONCE(rb->user_page->aux_offset); + aux_size = ACCESS_ONCE(rb->user_page->aux_size); + + if (aux_offset < perf_data_size(rb) + PAGE_SIZE) + goto aux_unlock; + + if (aux_offset != vma->vm_pgoff << PAGE_SHIFT) + goto aux_unlock; + + /* already mapped with a different offset */ + if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff) + goto aux_unlock; + + if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE) + goto aux_unlock; + + /* already mapped with a different size */ + if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages) + goto aux_unlock; + + if (!is_power_of_2(nr_pages)) + goto aux_unlock; + + if (!atomic_inc_not_zero(&rb->mmap_count)) + goto aux_unlock; + + if (rb_has_aux(rb)) { + atomic_inc(&rb->aux_mmap_count); + ret = 0; + goto unlock; + } + + atomic_set(&rb->aux_mmap_count, 1); + user_extra = nr_pages; + + goto accounting; + } /* * If we have rb pages ensure they're a power-of-two number, so we @@ -4444,9 +4520,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (vma_size != PAGE_SIZE * (1 + nr_pages)) return -EINVAL; - if (vma->vm_pgoff != 0) - return -EINVAL; - WARN_ON_ONCE(event->ctx->parent_ctx); again: mutex_lock(&event->mmap_mutex); @@ -4470,6 +4543,8 @@ again: } user_extra = nr_pages + 1; + +accounting: user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); /* @@ -4479,7 +4554,6 @@ again: user_locked = atomic_long_read(&user->locked_vm) + user_extra; - extra = 0; if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; @@ -4493,35 +4567,45 @@ again: goto unlock; } - WARN_ON(event->rb); + WARN_ON(!rb && event->rb); if (vma->vm_flags & VM_WRITE) flags |= RING_BUFFER_WRITABLE; - rb = rb_alloc(nr_pages, - event->attr.watermark ? event->attr.wakeup_watermark : 0, - event->cpu, flags); - if (!rb) { - ret = -ENOMEM; - goto unlock; - } + rb = rb_alloc(nr_pages, + event->attr.watermark ? event->attr.wakeup_watermark : 0, + event->cpu, flags); - atomic_set(&rb->mmap_count, 1); - rb->mmap_locked = extra; - rb->mmap_user = get_current_user(); + if (!rb) { + ret = -ENOMEM; + goto unlock; + } - atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->pinned_vm += extra; + atomic_set(&rb->mmap_count, 1); + rb->mmap_user = get_current_user(); + rb->mmap_locked = extra; - ring_buffer_attach(event, rb); + ring_buffer_attach(event, rb); - perf_event_init_userpage(event); - perf_event_update_userpage(event); + perf_event_init_userpage(event); + perf_event_update_userpage(event); + } else { + ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, flags); + if (!ret) + rb->aux_mmap_locked = extra; + } unlock: - if (!ret) + if (!ret) { + atomic_long_add(user_extra, &user->locked_vm); + vma->vm_mm->pinned_vm += extra; + atomic_inc(&event->mmap_count); + } else if (rb) { + atomic_dec(&rb->mmap_count); + } +aux_unlock: mutex_unlock(&event->mmap_mutex); /* @@ -7506,6 +7590,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) if (output_event->clock != event->clock) goto out; + /* + * If both events generate aux data, they must be on the same PMU + */ + if (has_aux(event) && has_aux(output_event) && + event->pmu != output_event->pmu) + goto out; + set: mutex_lock(&event->mmap_mutex); /* Can't redirect output if we've got an active mmap() */ diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 569b218782ad..0f6d08015927 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -35,6 +35,16 @@ struct ring_buffer { unsigned long mmap_locked; struct user_struct *mmap_user; + /* AUX area */ + unsigned long aux_pgoff; + int aux_nr_pages; + atomic_t aux_mmap_count; + unsigned long aux_mmap_locked; + void (*free_aux)(void *); + atomic_t aux_refcount; + void **aux_pages; + void *aux_priv; + struct perf_event_mmap_page *user_page; void *data_pages[0]; }; @@ -43,6 +53,14 @@ extern void rb_free(struct ring_buffer *rb); extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); +extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, + pgoff_t pgoff, int nr_pages, int flags); +extern void rb_free_aux(struct ring_buffer *rb); + +static inline bool rb_has_aux(struct ring_buffer *rb) +{ + return !!rb->aux_nr_pages; +} extern void perf_event_header__init_id(struct perf_event_header *header, @@ -81,6 +99,11 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } +static inline unsigned long perf_aux_size(struct ring_buffer *rb) +{ + return rb->aux_nr_pages << PAGE_SHIFT; +} + #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ static inline unsigned long \ func_name(struct perf_output_handle *handle, \ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index eadb95ce7aac..3de9c4e9ea9f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -243,14 +243,87 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) spin_lock_init(&rb->event_lock); } +int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, + pgoff_t pgoff, int nr_pages, int flags) +{ + bool overwrite = !(flags & RING_BUFFER_WRITABLE); + int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); + int ret = -ENOMEM; + + if (!has_aux(event)) + return -ENOTSUPP; + + rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node); + if (!rb->aux_pages) + return -ENOMEM; + + rb->free_aux = event->pmu->free_aux; + for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages; + rb->aux_nr_pages++) { + struct page *page; + + page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!page) + goto out; + + rb->aux_pages[rb->aux_nr_pages] = page_address(page); + } + + rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + overwrite); + if (!rb->aux_priv) + goto out; + + ret = 0; + + /* + * aux_pages (and pmu driver's private data, aux_priv) will be + * referenced in both producer's and consumer's contexts, thus + * we keep a refcount here to make sure either of the two can + * reference them safely. + */ + atomic_set(&rb->aux_refcount, 1); + +out: + if (!ret) + rb->aux_pgoff = pgoff; + else + rb_free_aux(rb); + + return ret; +} + +static void __rb_free_aux(struct ring_buffer *rb) +{ + int pg; + + if (rb->aux_priv) { + rb->free_aux(rb->aux_priv); + rb->free_aux = NULL; + rb->aux_priv = NULL; + } + + for (pg = 0; pg < rb->aux_nr_pages; pg++) + free_page((unsigned long)rb->aux_pages[pg]); + + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; +} + +void rb_free_aux(struct ring_buffer *rb) +{ + if (atomic_dec_and_test(&rb->aux_refcount)) + __rb_free_aux(rb); +} + #ifndef CONFIG_PERF_USE_VMALLOC /* * Back perf_mmap() with regular GFP_KERNEL-0 pages. */ -struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +static struct page * +__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { if (pgoff > rb->nr_pages) return NULL; @@ -340,8 +413,8 @@ static int data_page_nr(struct ring_buffer *rb) return rb->nr_pages << page_order(rb); } -struct page * -perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +static struct page * +__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { /* The '>' counts in the user page. */ if (pgoff > data_page_nr(rb)) @@ -416,3 +489,19 @@ fail: } #endif + +struct page * +perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +{ + if (rb->aux_nr_pages) { + /* above AUX space */ + if (pgoff > rb->aux_pgoff + rb->aux_nr_pages) + return NULL; + + /* AUX space */ + if (pgoff >= rb->aux_pgoff) + return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); + } + + return __perf_mmap_to_page(rb, pgoff); +} -- cgit v1.2.3 From 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:15 +0200 Subject: perf: Add AUX record When there's new data in the AUX space, output a record indicating its offset and size and a set of flags, such as PERF_AUX_FLAG_TRUNCATED, to mean the described data was truncated to fit in the ring buffer. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-7-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 19 +++++++++++++++++++ kernel/events/core.c | 34 ++++++++++++++++++++++++++++++++++ kernel/events/internal.h | 3 +++ 3 files changed, 56 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 6c5013a71714..8904ad3a850b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -768,6 +768,20 @@ enum perf_event_type { */ PERF_RECORD_MMAP2 = 10, + /* + * Records that new data landed in the AUX buffer part. + * + * struct { + * struct perf_event_header header; + * + * u64 aux_offset; + * u64 aux_size; + * u64 flags; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX = 11, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -785,6 +799,11 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; +/** + * PERF_RECORD_AUX::flags bits + */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ + #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 6d9fdaef7b57..dbc2eff32230 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5916,6 +5916,40 @@ void perf_event_mmap(struct vm_area_struct *vma) perf_event_mmap_event(&mmap_event); } +void perf_event_aux_event(struct perf_event *event, unsigned long head, + unsigned long size, u64 flags) +{ + struct perf_output_handle handle; + struct perf_sample_data sample; + struct perf_aux_event { + struct perf_event_header header; + u64 offset; + u64 size; + u64 flags; + } rec = { + .header = { + .type = PERF_RECORD_AUX, + .misc = 0, + .size = sizeof(rec), + }, + .offset = head, + .size = size, + .flags = flags, + }; + int ret; + + perf_event_header__init_id(&rec.header, &sample, event); + ret = perf_output_begin(&handle, event, rec.header.size); + + if (ret) + return; + + perf_output_put(&handle, rec); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + /* * IRQ throttle logging */ diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 0f6d08015927..4d117a981431 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -62,6 +62,9 @@ static inline bool rb_has_aux(struct ring_buffer *rb) return !!rb->aux_nr_pages; } +void perf_event_aux_event(struct perf_event *event, unsigned long head, + unsigned long size, u64 flags); + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, -- cgit v1.2.3 From 2023a0d2829e521fe6ad6b9907f3f90bfbf57142 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:17 +0200 Subject: perf: Support overwrite mode for the AUX area This adds support for overwrite mode in the AUX area, which means "keep collecting data till you're stopped", turning AUX area into a circular buffer, where new data overwrites old data. It does not depend on data buffer's overwrite mode, so that it doesn't lose sideband data that is instrumental for processing AUX data. Overwrite mode is enabled at mapping AUX area read only. Even though aux_tail in the buffer's user page might be user writable, it will be ignored in this mode. A PERF_RECORD_AUX with PERF_AUX_FLAG_OVERWRITE set is written to the perf data stream every time an event writes new data to the AUX area. The pmu driver might not be able to infer the exact beginning of the new data in each snapshot, some drivers will only provide the tail, which is aux_offset + aux_size in the AUX record. Consumer has to be able to tell the new data from the old one, for example, by means of time stamps if such are provided in the trace. Consumer is also responsible for disabling any events that might write to the AUX area (thus potentially racing with the consumer) before collecting the data. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-9-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 1 + kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 48 ++++++++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8904ad3a850b..29ef2f73bb4a 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -803,6 +803,7 @@ enum perf_callchain_context { * PERF_RECORD_AUX::flags bits */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index b701ebc32570..ffd51d9f5945 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct ring_buffer { local_t aux_nest; unsigned long aux_pgoff; int aux_nr_pages; + int aux_overwrite; atomic_t aux_mmap_count; unsigned long aux_mmap_locked; void (*free_aux)(void *); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 0cc7b0f39058..67b328337a41 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -283,26 +283,33 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, goto err_put; aux_head = local_read(&rb->aux_head); - aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); handle->rb = rb; handle->event = event; handle->head = aux_head; - if (aux_head - aux_tail < perf_aux_size(rb)) - handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); - else - handle->size = 0; + handle->size = 0; /* - * handle->size computation depends on aux_tail load; this forms a - * control dependency barrier separating aux_tail load from aux data - * store that will be enabled on successful return + * In overwrite mode, AUX data stores do not depend on aux_tail, + * therefore (A) control dependency barrier does not exist. The + * (B) <-> (C) ordering is still observed by the pmu driver. */ - if (!handle->size) { /* A, matches D */ - event->pending_disable = 1; - perf_output_wakeup(handle); - local_set(&rb->aux_nest, 0); - goto err_put; + if (!rb->aux_overwrite) { + aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + if (aux_head - aux_tail < perf_aux_size(rb)) + handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); + + /* + * handle->size computation depends on aux_tail load; this forms a + * control dependency barrier separating aux_tail load from aux data + * store that will be enabled on successful return + */ + if (!handle->size) { /* A, matches D */ + event->pending_disable = 1; + perf_output_wakeup(handle); + local_set(&rb->aux_nest, 0); + goto err_put; + } } return handle->rb->aux_priv; @@ -327,13 +334,22 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, bool truncated) { struct ring_buffer *rb = handle->rb; - unsigned long aux_head = local_read(&rb->aux_head); + unsigned long aux_head; u64 flags = 0; if (truncated) flags |= PERF_AUX_FLAG_TRUNCATED; - local_add(size, &rb->aux_head); + /* in overwrite mode, driver provides aux_head via handle */ + if (rb->aux_overwrite) { + flags |= PERF_AUX_FLAG_OVERWRITE; + + aux_head = handle->head; + local_set(&rb->aux_head, aux_head); + } else { + aux_head = local_read(&rb->aux_head); + local_add(size, &rb->aux_head); + } if (size || flags) { /* @@ -480,6 +496,8 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, */ atomic_set(&rb->aux_refcount, 1); + rb->aux_overwrite = overwrite; + out: if (!ret) rb->aux_pgoff = pgoff; -- cgit v1.2.3 From 1a5941312414c71dece6717da9a0fa1303127afa Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:18 +0200 Subject: perf: Add wakeup watermark control to the AUX area When AUX area gets a certain amount of new data, we want to wake up userspace to collect it. This adds a new control to specify how much data will cause a wakeup. This is then passed down to pmu drivers via output handle's "wakeup" field, so that the driver can find the nearest point where it can generate an interrupt. We repurpose __reserved_2 in the event attribute for this, even though it was never checked to be zero before, aux_watermark will only matter for new AUX-aware code, so the old code should still be fine. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-10-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 7 +++++++ kernel/events/core.c | 3 ++- kernel/events/internal.h | 4 +++- kernel/events/ring_buffer.c | 22 +++++++++++++++++++--- 4 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 29ef2f73bb4a..84819546c8ce 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -261,6 +261,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ /* add: sample_stack_user */ #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -366,6 +367,12 @@ struct perf_event_attr { * See asm/perf_regs.h for details. */ __u64 sample_regs_intr; + + /* + * Wakeup watermark for AUX area + */ + __u32 aux_watermark; + __u32 __reserved_2; /* align to __u64 */ }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/kernel/events/core.c b/kernel/events/core.c index 81e8d14ac59a..31f6b504ad62 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4677,7 +4677,8 @@ accounting: perf_event_init_userpage(event); perf_event_update_userpage(event); } else { - ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, flags); + ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, + event->attr.aux_watermark, flags); if (!ret) rb->aux_mmap_locked = extra; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ffd51d9f5945..9f6ce9ba4a04 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -27,6 +27,7 @@ struct ring_buffer { local_t lost; /* nr records lost */ long watermark; /* wakeup watermark */ + long aux_watermark; /* poll crap */ spinlock_t event_lock; struct list_head event_list; @@ -38,6 +39,7 @@ struct ring_buffer { /* AUX area */ local_t aux_head; local_t aux_nest; + local_t aux_wakeup; unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; @@ -57,7 +59,7 @@ extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, - pgoff_t pgoff, int nr_pages, int flags); + pgoff_t pgoff, int nr_pages, long watermark, int flags); extern void rb_free_aux(struct ring_buffer *rb); extern struct ring_buffer *ring_buffer_get(struct perf_event *event); extern void ring_buffer_put(struct ring_buffer *rb); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 67b328337a41..232f00f273cb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -296,6 +296,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, */ if (!rb->aux_overwrite) { aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); + handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); @@ -359,9 +360,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, perf_event_aux_event(handle->event, aux_head, size, flags); } - rb->user_page->aux_head = local_read(&rb->aux_head); + aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); - perf_output_wakeup(handle); + if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + perf_output_wakeup(handle); + local_add(rb->aux_watermark, &rb->aux_wakeup); + } handle->event = NULL; local_set(&rb->aux_nest, 0); @@ -383,6 +387,14 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) local_add(size, &rb->aux_head); + aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); + if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + perf_output_wakeup(handle); + local_add(rb->aux_watermark, &rb->aux_wakeup); + handle->wakeup = local_read(&rb->aux_wakeup) + + rb->aux_watermark; + } + handle->head = aux_head; handle->size -= size; @@ -433,7 +445,7 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx) } int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, - pgoff_t pgoff, int nr_pages, int flags) + pgoff_t pgoff, int nr_pages, long watermark, int flags) { bool overwrite = !(flags & RING_BUFFER_WRITABLE); int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); @@ -497,6 +509,10 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, atomic_set(&rb->aux_refcount, 1); rb->aux_overwrite = overwrite; + rb->aux_watermark = watermark; + + if (!rb->aux_watermark && !rb->aux_overwrite) + rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1); out: if (!ret) -- cgit v1.2.3 From ec0d7729bbaed4b9d2d3fada693278e13a3d1368 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:23 +0200 Subject: perf: Add ITRACE_START record to indicate that tracing has started For counters that generate AUX data that is bound to the context of a running task, such as instruction tracing, the decoder needs to know exactly which task is running when the event is first scheduled in, before the first sched_switch. The decoder's need to know this stems from the fact that instruction flow trace decoding will almost always require program's object code in order to reconstruct said flow and for that we need at least its pid/tid in the perf stream. To single out such instruction tracing pmus, this patch introduces ITRACE PMU capability. The reason this is not part of RECORD_AUX record is that not all pmus capable of generating AUX data need this, and the opposite is *probably* also true. While sched_switch covers for most cases, there are two problems with it: the consumer will need to process events out of order (that is, having found RECORD_AUX, it will have to skip forward to the nearest sched_switch to figure out which task it was, then go back to the actual trace to decode it) and it completely misses the case when the tracing is enabled and disabled before sched_switch, for example, via PERF_EVENT_IOC_DISABLE. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-15-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++++ include/uapi/linux/perf_event.h | 11 +++++++++++ kernel/events/core.c | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45c5873ad9b3..61992cf2e977 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,6 +129,9 @@ struct hw_perf_event { struct list_head cqm_groups_entry; struct list_head cqm_group_entry; }; + struct { /* itrace */ + int itrace_started; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* @@ -177,6 +180,7 @@ struct perf_event; #define PERF_PMU_CAP_AUX_NO_SG 0x04 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 #define PERF_PMU_CAP_EXCLUSIVE 0x10 +#define PERF_PMU_CAP_ITRACE 0x20 /** * struct pmu - generic performance monitoring unit diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 84819546c8ce..309211b3eb67 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -789,6 +789,17 @@ enum perf_event_type { */ PERF_RECORD_AUX = 11, + /* + * Indicates that instruction trace has started + * + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * }; + */ + PERF_RECORD_ITRACE_START = 12, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 31f6b504ad62..06917d537302 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1831,6 +1831,7 @@ static void perf_set_shadow_time(struct perf_event *event, #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); +static void perf_log_itrace_start(struct perf_event *event); static int event_sched_in(struct perf_event *event, @@ -1869,6 +1870,8 @@ event_sched_in(struct perf_event *event, perf_set_shadow_time(event, ctx, tstamp); + perf_log_itrace_start(event); + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; @@ -5991,6 +5994,44 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +static void perf_log_itrace_start(struct perf_event *event) +{ + struct perf_output_handle handle; + struct perf_sample_data sample; + struct perf_aux_event { + struct perf_event_header header; + u32 pid; + u32 tid; + } rec; + int ret; + + if (event->parent) + event = event->parent; + + if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) || + event->hw.itrace_started) + return; + + event->hw.itrace_started = 1; + + rec.header.type = PERF_RECORD_ITRACE_START; + rec.header.misc = 0; + rec.header.size = sizeof(rec); + rec.pid = perf_event_pid(event, current); + rec.tid = perf_event_tid(event, current); + + perf_event_header__init_id(&rec.header, &sample, event); + ret = perf_output_begin(&handle, event, rec.header.size); + + if (ret) + return; + + perf_output_put(&handle, rec); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + /* * Generic event overflow handling, sampling. */ -- cgit v1.2.3 From bdf765071a8b573f7b1ba14c02881fa3e623825e Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Fri, 30 Jan 2015 13:57:01 -0500 Subject: drm/tegra: gem: Return 64-bit offset for mmap(2) On 64-bit targets, tegra_gem_mmap() only returns a partial offset to userspace. As such, subsequent calls to mmap(2) may fail. Change the arguments to use a 64-bit offset to fix this. Signed-off-by: Sean Paul Acked-by: Erik Faye-Lund [treding@nvidia.com: tweak commit message] Signed-off-by: Thierry Reding --- include/uapi/drm/tegra_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index c15d781ecc0f..5391780c2b05 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -36,7 +36,8 @@ struct drm_tegra_gem_create { struct drm_tegra_gem_mmap { __u32 handle; - __u32 offset; + __u32 pad; + __u64 offset; }; struct drm_tegra_syncpt_read { -- cgit v1.2.3 From 64bf8049a2ec1e61e1475eb02b5514d7061d443e Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Wed, 4 Mar 2015 17:13:24 -0300 Subject: [media] am437x: include linux/videodev2.h for expanding BASE_VIDIOC_PRIVATE In am437x-vpfe.h BASE_VIDIOC_PRIVATE is used for making the name of ioctl command(VIDIOC_AM437X_CCDC_CFG). The definition of BASE_VIDIOC_PRIVATE is in linux/videodev2.h. However, linux/videodev2.h is not included in am437x-vpfe.h. As the result an application using has to include both am437x-vpfe.h and linux/videodev2.h. With this patch, the application can include just am437x-vpfe.h. Signed-off-by: Masatake YAMATO Acked-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/am437x-vpfe.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/am437x-vpfe.h b/include/uapi/linux/am437x-vpfe.h index 9b03033f9cd6..d75774317b9b 100644 --- a/include/uapi/linux/am437x-vpfe.h +++ b/include/uapi/linux/am437x-vpfe.h @@ -21,6 +21,8 @@ #ifndef AM437X_VPFE_USER_H #define AM437X_VPFE_USER_H +#include + enum vpfe_ccdc_data_size { VPFE_CCDC_DATA_16BITS = 0, VPFE_CCDC_DATA_15BITS, -- cgit v1.2.3 From aa05b979f14998fec16fa38f1c91eaa197e73148 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 8 Mar 2015 04:53:32 -0300 Subject: [media] videodev2.h: fix comment The quantization comment in the header was incorrect w.r.t. BT.2020. Fix this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index fbdc3602ee27..15b21e481246 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -268,9 +268,10 @@ enum v4l2_ycbcr_encoding { enum v4l2_quantization { /* - * The default for R'G'B' quantization is always full range. For - * Y'CbCr the quantization is always limited range, except for - * SYCC, XV601, XV709 or JPEG: those are full range. + * The default for R'G'B' quantization is always full range, except + * for the BT2020 colorspace. For Y'CbCr the quantization is always + * limited range, except for COLORSPACE_JPEG, SYCC, XV601 or XV709: + * those are full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, -- cgit v1.2.3 From cc7d2dfb75b3ac0f248801ceed65f69465eb0389 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 15 Mar 2015 14:30:25 -0300 Subject: [media] v4l2_plane_pix_format: use __u32 bytesperline instead of __u16 While running v4l2-compliance tests on vivid I suddenly got errors due to a call to vmalloc_user with size 0 from vb2. Digging deeper into the cause I discovered that this was due to the fact that struct v4l2_plane_pix_format defines bytesperline as a __u16 instead of a __u32. The test I was running selected a format of 4 * 4096 by 4 * 2048 with a 32 bit pixelformat. So bytesperline was 4 * 4 * 4096 = 65536, which becomes 0 in a __u16. And bytesperline * height is suddenly 0 as well. While the vivid driver may be a virtual driver, it is to be expected that this limit will be hit for real hardware as well in the near future: 8k deep-color video will already reach it. The solution is to change the type to __u32. The only drivers besides vivid that use the multiplanar API are little-endian ARM and SH platforms (exynos, ti-vpe, vsp1), so this is safe. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/pixfmt.xml | 4 ++-- drivers/media/platform/s5p-tv/mixer_video.c | 2 +- include/uapi/linux/videodev2.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml index 0a1528f3e891..fcde4e20205e 100644 --- a/Documentation/DocBook/media/v4l/pixfmt.xml +++ b/Documentation/DocBook/media/v4l/pixfmt.xml @@ -182,14 +182,14 @@ see . - __u16 + __u32 bytesperline Distance in bytes between the leftmost pixels in two adjacent lines. See &v4l2-pix-format;. __u16 - reserved[7] + reserved[6] Reserved for future extensions. Should be zeroed by the application. diff --git a/drivers/media/platform/s5p-tv/mixer_video.c b/drivers/media/platform/s5p-tv/mixer_video.c index 72d4f2e1efc0..751f3b618337 100644 --- a/drivers/media/platform/s5p-tv/mixer_video.c +++ b/drivers/media/platform/s5p-tv/mixer_video.c @@ -287,7 +287,7 @@ static void mxr_mplane_fill(struct v4l2_plane_pix_format *planes, u32 bl_width = divup(width, blk->width); u32 bl_height = divup(height, blk->height); u32 sizeimage = bl_width * bl_height * blk->size; - u16 bytesperline = bl_width * blk->size / blk->height; + u32 bytesperline = bl_width * blk->size / blk->height; plane->sizeimage += sizeimage; plane->bytesperline = max(plane->bytesperline, bytesperline); diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 15b21e481246..47df18f36c4b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1842,8 +1842,8 @@ struct v4l2_mpeg_vbi_fmt_ivtv { */ struct v4l2_plane_pix_format { __u32 sizeimage; - __u16 bytesperline; - __u16 reserved[7]; + __u32 bytesperline; + __u16 reserved[6]; } __attribute__ ((packed)); /** -- cgit v1.2.3 From 7b0fd4568bee379474eb0d989ef1125064f19fa7 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 15 May 2013 11:34:26 -0300 Subject: [media] v4l: Add RBG and RGB 8:8:8 media bus formats on 24 and 32 bit busses Add support and documentation for two media bus formats: MEDIA_BUS_FMT_RBG888_1X24 and MEDIA_BUS_FMT_RGB888_1X32_PADHI Signed-off-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 67 ++++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 4 +- 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index c5ea868e3909..d253e8f85f4d 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -440,6 +440,36 @@ see . b1 b0 + + MEDIA_BUS_FMT_RBG888_1X24 + 0x100e + + &dash-ent-8; + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + MEDIA_BUS_FMT_RGB888_1X24 0x100a @@ -579,6 +609,43 @@ see . b1 b0 + + MEDIA_BUS_FMT_RGB888_1X32_PADHI + 0x100f + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 23b40908be30..b585bb32d25e 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,7 +33,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x100e */ +/* RGB - next is 0x1010 */ #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE 0x1003 @@ -43,10 +43,12 @@ #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 #define MEDIA_BUS_FMT_RGB565_2X8_LE 0x1008 #define MEDIA_BUS_FMT_RGB666_1X18 0x1009 +#define MEDIA_BUS_FMT_RBG888_1X24 0x100e #define MEDIA_BUS_FMT_RGB888_1X24 0x100a #define MEDIA_BUS_FMT_RGB888_2X12_BE 0x100b #define MEDIA_BUS_FMT_RGB888_2X12_LE 0x100c #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d +#define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f /* YUV (including grey) - next is 0x2024 */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 -- cgit v1.2.3 From e8b2d7a565ae4fc8627ffe35b953062ff6119533 Mon Sep 17 00:00:00 2001 From: Hyun Kwon Date: Tue, 18 Mar 2014 13:18:14 -0300 Subject: [media] v4l: Sort YUV formats of v4l2_mbus_pixelcode Keep the formats sorted by type, bus_width, bits per component, samples per pixel and order of subsamples, in that order. Signed-off-by: Hyun Kwon Signed-off-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 600 ++++++++++----------- include/uapi/linux/media-bus-format.h | 12 +- 2 files changed, 306 insertions(+), 306 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index d253e8f85f4d..9bfd468cd524 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -2255,11 +2255,15 @@ see . y1 y0 - - MEDIA_BUS_FMT_UYVY8_1X16 - 0x200f + + MEDIA_BUS_FMT_UYVY12_2X12 + 0x201c - &dash-ent-16; + &dash-ent-20; + u11 + u10 + u9 + u8 u7 u6 u5 @@ -2268,6 +2272,16 @@ see . u2 u1 u0 + + + + + + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2281,7 +2295,11 @@ see . - &dash-ent-16; + &dash-ent-20; + v11 + v10 + v9 + v8 v7 v6 v5 @@ -2290,6 +2308,16 @@ see . v2 v1 v0 + + + + + + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2299,11 +2327,15 @@ see . y1 y0 - - MEDIA_BUS_FMT_VYUY8_1X16 - 0x2010 + + MEDIA_BUS_FMT_VYUY12_2X12 + 0x201d - &dash-ent-16; + &dash-ent-20; + v11 + v10 + v9 + v8 v7 v6 v5 @@ -2312,6 +2344,16 @@ see . v2 v1 v0 + + + + + + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2325,7 +2367,11 @@ see . - &dash-ent-16; + &dash-ent-20; + u11 + u10 + u9 + u8 u7 u6 u5 @@ -2334,6 +2380,16 @@ see . u2 u1 u0 + + + + + + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2343,11 +2399,15 @@ see . y1 y0 - - MEDIA_BUS_FMT_YUYV8_1X16 - 0x2011 + + MEDIA_BUS_FMT_YUYV12_2X12 + 0x201e - &dash-ent-16; + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2356,6 +2416,16 @@ see . y2 y1 y0 + + + + + + &dash-ent-20; + u11 + u10 + u9 + u8 u7 u6 u5 @@ -2369,7 +2439,11 @@ see . - &dash-ent-16; + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2378,6 +2452,16 @@ see . y2 y1 y0 + + + + + + &dash-ent-20; + v11 + v10 + v9 + v8 v7 v6 v5 @@ -2387,11 +2471,15 @@ see . v1 v0 - - MEDIA_BUS_FMT_YVYU8_1X16 - 0x2012 + + MEDIA_BUS_FMT_YVYU12_2X12 + 0x201f - &dash-ent-16; + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2400,6 +2488,16 @@ see . y2 y1 y0 + + + + + + &dash-ent-20; + v11 + v10 + v9 + v8 v7 v6 v5 @@ -2413,29 +2511,11 @@ see . - &dash-ent-16; - y7 - y6 - y5 - y4 - y3 - y2 - y1 - y0 - u7 - u6 - u5 - u4 - u3 - u2 - u1 - u0 - - - MEDIA_BUS_FMT_YDYUYDYV8_1X16 - 0x2014 - - &dash-ent-16; + &dash-ent-20; + y11 + y10 + y9 + y8 y7 y6 y5 @@ -2444,28 +2524,16 @@ see . y2 y1 y0 - d - d - d - d - d - d - d - d - &dash-ent-16; - y7 - y6 - y5 - y4 - y3 - y2 - y1 - y0 + &dash-ent-20; + u11 + u10 + u9 + u8 u7 u6 u5 @@ -2475,57 +2543,11 @@ see . u1 u0 - - - - - &dash-ent-16; - y7 - y6 - y5 - y4 - y3 - y2 - y1 - y0 - d - d - d - d - d - d - d - d - - - - + + MEDIA_BUS_FMT_UYVY8_1X16 + 0x200f &dash-ent-16; - y7 - y6 - y5 - y4 - y3 - y2 - y1 - y0 - v7 - v6 - v5 - v4 - v3 - v2 - v1 - v0 - - - MEDIA_BUS_FMT_UYVY10_1X20 - 0x201a - - &dash-ent-12; - u9 - u8 u7 u6 u5 @@ -2534,8 +2556,6 @@ see . u2 u1 u0 - y9 - y8 y7 y6 y5 @@ -2549,9 +2569,7 @@ see . - &dash-ent-12; - v9 - v8 + &dash-ent-16; v7 v6 v5 @@ -2560,8 +2578,6 @@ see . v2 v1 v0 - y9 - y8 y7 y6 y5 @@ -2571,13 +2587,11 @@ see . y1 y0 - - MEDIA_BUS_FMT_VYUY10_1X20 - 0x201b + + MEDIA_BUS_FMT_VYUY8_1X16 + 0x2010 - &dash-ent-12; - v9 - v8 + &dash-ent-16; v7 v6 v5 @@ -2586,8 +2600,6 @@ see . v2 v1 v0 - y9 - y8 y7 y6 y5 @@ -2601,9 +2613,7 @@ see . - &dash-ent-12; - u9 - u8 + &dash-ent-16; u7 u6 u5 @@ -2612,8 +2622,6 @@ see . u2 u1 u0 - y9 - y8 y7 y6 y5 @@ -2623,13 +2631,11 @@ see . y1 y0 - - MEDIA_BUS_FMT_YUYV10_1X20 - 0x200d + + MEDIA_BUS_FMT_YUYV8_1X16 + 0x2011 - &dash-ent-12; - y9 - y8 + &dash-ent-16; y7 y6 y5 @@ -2638,8 +2644,6 @@ see . y2 y1 y0 - u9 - u8 u7 u6 u5 @@ -2653,9 +2657,7 @@ see . - &dash-ent-12; - y9 - y8 + &dash-ent-16; y7 y6 y5 @@ -2664,8 +2666,6 @@ see . y2 y1 y0 - v9 - v8 v7 v6 v5 @@ -2675,13 +2675,11 @@ see . v1 v0 - - MEDIA_BUS_FMT_YVYU10_1X20 - 0x200e + + MEDIA_BUS_FMT_YVYU8_1X16 + 0x2012 - &dash-ent-12; - y9 - y8 + &dash-ent-16; y7 y6 y5 @@ -2690,8 +2688,6 @@ see . y2 y1 y0 - v9 - v8 v7 v6 v5 @@ -2705,9 +2701,51 @@ see . - &dash-ent-12; - y9 - y8 + &dash-ent-16; + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + + + MEDIA_BUS_FMT_YDYUYDYV8_1X16 + 0x2014 + + &dash-ent-16; + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + d + d + d + d + d + d + d + d + + + + + + &dash-ent-16; y7 y6 y5 @@ -2716,8 +2754,6 @@ see . y2 y1 y0 - u9 - u8 u7 u6 u5 @@ -2727,14 +2763,11 @@ see . u1 u0 - - MEDIA_BUS_FMT_YUV10_1X30 - 0x2016 + - - - - - y9 - y8 + + + &dash-ent-16; y7 y6 y5 @@ -2743,39 +2776,20 @@ see . y2 y1 y0 - u9 - u8 - u7 - u6 - u5 - u4 - u3 - u2 - u1 - u0 - v9 - v8 - v7 - v6 - v5 - v4 - v3 - v2 - v1 - v0 + d + d + d + d + d + d + d + d - - MEDIA_BUS_FMT_AYUV8_1X32 - 0x2017 + - a7 - a6 - a5 - a4 - a3 - a2 - a1 - a0 + + + &dash-ent-16; y7 y6 y5 @@ -2784,14 +2798,6 @@ see . y2 y1 y0 - u7 - u6 - u5 - u4 - u3 - u2 - u1 - u0 v7 v6 v5 @@ -2801,13 +2807,11 @@ see . v1 v0 - - MEDIA_BUS_FMT_UYVY12_2X12 - 0x201c + + MEDIA_BUS_FMT_UYVY10_1X20 + 0x201a - &dash-ent-20; - u11 - u10 + &dash-ent-12; u9 u8 u7 @@ -2818,14 +2822,6 @@ see . u2 u1 u0 - - - - - - &dash-ent-20; - y11 - y10 y9 y8 y7 @@ -2841,9 +2837,7 @@ see . - &dash-ent-20; - v11 - v10 + &dash-ent-12; v9 v8 v7 @@ -2854,14 +2848,6 @@ see . v2 v1 v0 - - - - - - &dash-ent-20; - y11 - y10 y9 y8 y7 @@ -2873,13 +2859,11 @@ see . y1 y0 - - MEDIA_BUS_FMT_VYUY12_2X12 - 0x201d + + MEDIA_BUS_FMT_VYUY10_1X20 + 0x201b - &dash-ent-20; - v11 - v10 + &dash-ent-12; v9 v8 v7 @@ -2890,14 +2874,6 @@ see . v2 v1 v0 - - - - - - &dash-ent-20; - y11 - y10 y9 y8 y7 @@ -2913,9 +2889,7 @@ see . - &dash-ent-20; - u11 - u10 + &dash-ent-12; u9 u8 u7 @@ -2926,14 +2900,6 @@ see . u2 u1 u0 - - - - - - &dash-ent-20; - y11 - y10 y9 y8 y7 @@ -2945,13 +2911,11 @@ see . y1 y0 - - MEDIA_BUS_FMT_YUYV12_2X12 - 0x201e + + MEDIA_BUS_FMT_YUYV10_1X20 + 0x200d - &dash-ent-20; - y11 - y10 + &dash-ent-12; y9 y8 y7 @@ -2962,14 +2926,6 @@ see . y2 y1 y0 - - - - - - &dash-ent-20; - u11 - u10 u9 u8 u7 @@ -2985,9 +2941,7 @@ see . - &dash-ent-20; - y11 - y10 + &dash-ent-12; y9 y8 y7 @@ -2998,14 +2952,6 @@ see . y2 y1 y0 - - - - - - &dash-ent-20; - v11 - v10 v9 v8 v7 @@ -3017,13 +2963,11 @@ see . v1 v0 - - MEDIA_BUS_FMT_YVYU12_2X12 - 0x201f + + MEDIA_BUS_FMT_YVYU10_1X20 + 0x200e - &dash-ent-20; - y11 - y10 + &dash-ent-12; y9 y8 y7 @@ -3034,14 +2978,6 @@ see . y2 y1 y0 - - - - - - &dash-ent-20; - v11 - v10 v9 v8 v7 @@ -3057,9 +2993,7 @@ see . - &dash-ent-20; - y11 - y10 + &dash-ent-12; y9 y8 y7 @@ -3070,14 +3004,6 @@ see . y2 y1 y0 - - - - - - &dash-ent-20; - u11 - u10 u9 u8 u7 @@ -3329,6 +3255,80 @@ see . u1 u0 + + MEDIA_BUS_FMT_YUV10_1X30 + 0x2016 + + - + - + y9 + y8 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u9 + u8 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + v9 + v8 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + + + MEDIA_BUS_FMT_AYUV8_1X32 + 0x2017 + + a7 + a6 + a5 + a4 + a3 + a2 + a1 + a0 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index b585bb32d25e..363a30fd8a21 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -67,6 +67,10 @@ #define MEDIA_BUS_FMT_YUYV10_2X10 0x200b #define MEDIA_BUS_FMT_YVYU10_2X10 0x200c #define MEDIA_BUS_FMT_Y12_1X12 0x2013 +#define MEDIA_BUS_FMT_UYVY12_2X12 0x201c +#define MEDIA_BUS_FMT_VYUY12_2X12 0x201d +#define MEDIA_BUS_FMT_YUYV12_2X12 0x201e +#define MEDIA_BUS_FMT_YVYU12_2X12 0x201f #define MEDIA_BUS_FMT_UYVY8_1X16 0x200f #define MEDIA_BUS_FMT_VYUY8_1X16 0x2010 #define MEDIA_BUS_FMT_YUYV8_1X16 0x2011 @@ -76,16 +80,12 @@ #define MEDIA_BUS_FMT_VYUY10_1X20 0x201b #define MEDIA_BUS_FMT_YUYV10_1X20 0x200d #define MEDIA_BUS_FMT_YVYU10_1X20 0x200e -#define MEDIA_BUS_FMT_YUV10_1X30 0x2016 -#define MEDIA_BUS_FMT_AYUV8_1X32 0x2017 -#define MEDIA_BUS_FMT_UYVY12_2X12 0x201c -#define MEDIA_BUS_FMT_VYUY12_2X12 0x201d -#define MEDIA_BUS_FMT_YUYV12_2X12 0x201e -#define MEDIA_BUS_FMT_YVYU12_2X12 0x201f #define MEDIA_BUS_FMT_UYVY12_1X24 0x2020 #define MEDIA_BUS_FMT_VYUY12_1X24 0x2021 #define MEDIA_BUS_FMT_YUYV12_1X24 0x2022 #define MEDIA_BUS_FMT_YVYU12_1X24 0x2023 +#define MEDIA_BUS_FMT_YUV10_1X30 0x2016 +#define MEDIA_BUS_FMT_AYUV8_1X32 0x2017 /* Bayer - next is 0x3019 */ #define MEDIA_BUS_FMT_SBGGR8_1X8 0x3001 -- cgit v1.2.3 From 2dca0551e4e2f3fc2b9beee784c7071e1e79c14b Mon Sep 17 00:00:00 2001 From: Hyun Kwon Date: Tue, 18 Mar 2014 13:18:15 -0300 Subject: [media] v4l: Add VUY8 24 bits bus format Add VUY8 24 bits bus format, V4L2_MBUS_FMT_VUY8_1X24. Signed-off-by: Hyun Kwon Signed-off-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 30 ++++++++++++++++++++++ include/uapi/linux/media-bus-format.h | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 9bfd468cd524..bc8d3fb9e4a9 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -3015,6 +3015,36 @@ see . u1 u0 + + MEDIA_BUS_FMT_VUY8_1X24 + 0x201a + + &dash-ent-8; + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + MEDIA_BUS_FMT_UYVY12_1X24 0x2020 diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 363a30fd8a21..d391893064a0 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -50,7 +50,7 @@ #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d #define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f -/* YUV (including grey) - next is 0x2024 */ +/* YUV (including grey) - next is 0x2025 */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 #define MEDIA_BUS_FMT_UV8_1X8 0x2015 #define MEDIA_BUS_FMT_UYVY8_1_5X8 0x2002 @@ -80,6 +80,7 @@ #define MEDIA_BUS_FMT_VYUY10_1X20 0x201b #define MEDIA_BUS_FMT_YUYV10_1X20 0x200d #define MEDIA_BUS_FMT_YVYU10_1X20 0x200e +#define MEDIA_BUS_FMT_VUY8_1X24 0x2024 #define MEDIA_BUS_FMT_UYVY12_1X24 0x2020 #define MEDIA_BUS_FMT_VYUY12_1X24 0x2021 #define MEDIA_BUS_FMT_YUYV12_1X24 0x2022 -- cgit v1.2.3 From a5562f65b1371a0988b707c10c44fcc2bba56990 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 15 May 2013 11:36:56 -0300 Subject: [media] v4l: xilinx: Add Test Pattern Generator driver The TPG generates multiple static or dynamic test patterns. The driver currently hardcodes the pattern to the moving box pattern. Signed-off-by: Christian Kohn Signed-off-by: Hyun Kwon Signed-off-by: Laurent Pinchart Signed-off-by: Michal Simek Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/xilinx/xlnx,v-tpg.txt | 71 ++ MAINTAINERS | 1 + drivers/media/platform/xilinx/Kconfig | 7 + drivers/media/platform/xilinx/Makefile | 1 + drivers/media/platform/xilinx/xilinx-tpg.c | 931 +++++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/xilinx-v4l2-controls.h | 73 ++ 7 files changed, 1085 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt create mode 100644 drivers/media/platform/xilinx/xilinx-tpg.c create mode 100644 include/uapi/linux/xilinx-v4l2-controls.h (limited to 'include/uapi') diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt new file mode 100644 index 000000000000..9dd86b3db937 --- /dev/null +++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt @@ -0,0 +1,71 @@ +Xilinx Video Test Pattern Generator (TPG) +----------------------------------------- + +Required properties: + +- compatible: Must contain at least one of + + "xlnx,v-tpg-5.0" (TPG version 5.0) + "xlnx,v-tpg-6.0" (TPG version 6.0) + + TPG versions backward-compatible with previous versions should list all + compatible versions in the newer to older order. + +- reg: Physical base address and length of the registers set for the device. + +- clocks: Reference to the video core clock. + +- xlnx,video-format, xlnx,video-width: Video format and width, as defined in + video.txt. + +- port: Video port, using the DT bindings defined in ../video-interfaces.txt. + The TPG has a single output port numbered 0. + +Optional properties: + +- xlnx,vtc: A phandle referencing the Video Timing Controller that generates + video timings for the TPG test patterns. + +- timing-gpios: Specifier for a GPIO that controls the timing mux at the TPG + input. The GPIO active level corresponds to the selection of VTC-generated + video timings. + +The xlnx,vtc and timing-gpios properties are mandatory when the TPG is +synthesized with two ports and forbidden when synthesized with one port. + +Example: + + tpg_0: tpg@40050000 { + compatible = "xlnx,v-tpg-6.0", "xlnx,v-tpg-5.0"; + reg = <0x40050000 0x10000>; + clocks = <&clkc 15>; + + xlnx,vtc = <&vtc_3>; + timing-gpios = <&ps7_gpio_0 55 GPIO_ACTIVE_LOW>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + xlnx,video-format = ; + xlnx,video-width = <8>; + + tpg_in: endpoint { + remote-endpoint = <&adv7611_out>; + }; + }; + port@1 { + reg = <1>; + + xlnx,video-format = ; + xlnx,video-width = <8>; + + tpg1_out: endpoint { + remote-endpoint = <&switch_in0>; + }; + }: + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index ca3b2638fcc6..30e7e38ccad0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10826,6 +10826,7 @@ T: git git://linuxtv.org/media_tree.git S: Supported F: Documentation/devicetree/bindings/media/xilinx/ F: drivers/media/platform/xilinx/ +F: include/uapi/linux/xilinx-v4l2-controls.h XILLYBUS DRIVER M: Eli Billauer diff --git a/drivers/media/platform/xilinx/Kconfig b/drivers/media/platform/xilinx/Kconfig index 19db82343bb8..d7324c726fc2 100644 --- a/drivers/media/platform/xilinx/Kconfig +++ b/drivers/media/platform/xilinx/Kconfig @@ -7,6 +7,13 @@ config VIDEO_XILINX if VIDEO_XILINX +config VIDEO_XILINX_TPG + tristate "Xilinx Video Test Pattern Generator" + depends on VIDEO_XILINX + select VIDEO_XILINX_VTC + ---help--- + Driver for the Xilinx Video Test Pattern Generator + config VIDEO_XILINX_VTC tristate "Xilinx Video Timing Controller" depends on VIDEO_XILINX diff --git a/drivers/media/platform/xilinx/Makefile b/drivers/media/platform/xilinx/Makefile index 6611e3228e3c..e8a0f2a9f733 100644 --- a/drivers/media/platform/xilinx/Makefile +++ b/drivers/media/platform/xilinx/Makefile @@ -1,4 +1,5 @@ xilinx-video-objs += xilinx-dma.o xilinx-vip.o xilinx-vipp.o obj-$(CONFIG_VIDEO_XILINX) += xilinx-video.o +obj-$(CONFIG_VIDEO_XILINX_TPG) += xilinx-tpg.o obj-$(CONFIG_VIDEO_XILINX_VTC) += xilinx-vtc.o diff --git a/drivers/media/platform/xilinx/xilinx-tpg.c b/drivers/media/platform/xilinx/xilinx-tpg.c new file mode 100644 index 000000000000..b5f7d5ecb7f6 --- /dev/null +++ b/drivers/media/platform/xilinx/xilinx-tpg.c @@ -0,0 +1,931 @@ +/* + * Xilinx Test Pattern Generator + * + * Copyright (C) 2013-2015 Ideas on Board + * Copyright (C) 2013-2015 Xilinx, Inc. + * + * Contacts: Hyun Kwon + * Laurent Pinchart + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "xilinx-vip.h" +#include "xilinx-vtc.h" + +#define XTPG_CTRL_STATUS_SLAVE_ERROR (1 << 16) +#define XTPG_CTRL_IRQ_SLAVE_ERROR (1 << 16) + +#define XTPG_PATTERN_CONTROL 0x0100 +#define XTPG_PATTERN_MASK (0xf << 0) +#define XTPG_PATTERN_CONTROL_CROSS_HAIRS (1 << 4) +#define XTPG_PATTERN_CONTROL_MOVING_BOX (1 << 5) +#define XTPG_PATTERN_CONTROL_COLOR_MASK_SHIFT 6 +#define XTPG_PATTERN_CONTROL_COLOR_MASK_MASK (0xf << 6) +#define XTPG_PATTERN_CONTROL_STUCK_PIXEL (1 << 9) +#define XTPG_PATTERN_CONTROL_NOISE (1 << 10) +#define XTPG_PATTERN_CONTROL_MOTION (1 << 12) +#define XTPG_MOTION_SPEED 0x0104 +#define XTPG_CROSS_HAIRS 0x0108 +#define XTPG_CROSS_HAIRS_ROW_SHIFT 0 +#define XTPG_CROSS_HAIRS_ROW_MASK (0xfff << 0) +#define XTPG_CROSS_HAIRS_COLUMN_SHIFT 16 +#define XTPG_CROSS_HAIRS_COLUMN_MASK (0xfff << 16) +#define XTPG_ZPLATE_HOR_CONTROL 0x010c +#define XTPG_ZPLATE_VER_CONTROL 0x0110 +#define XTPG_ZPLATE_START_SHIFT 0 +#define XTPG_ZPLATE_START_MASK (0xffff << 0) +#define XTPG_ZPLATE_SPEED_SHIFT 16 +#define XTPG_ZPLATE_SPEED_MASK (0xffff << 16) +#define XTPG_BOX_SIZE 0x0114 +#define XTPG_BOX_COLOR 0x0118 +#define XTPG_STUCK_PIXEL_THRESH 0x011c +#define XTPG_NOISE_GAIN 0x0120 +#define XTPG_BAYER_PHASE 0x0124 +#define XTPG_BAYER_PHASE_RGGB 0 +#define XTPG_BAYER_PHASE_GRBG 1 +#define XTPG_BAYER_PHASE_GBRG 2 +#define XTPG_BAYER_PHASE_BGGR 3 +#define XTPG_BAYER_PHASE_OFF 4 + +/* + * The minimum blanking value is one clock cycle for the front porch, one clock + * cycle for the sync pulse and one clock cycle for the back porch. + */ +#define XTPG_MIN_HBLANK 3 +#define XTPG_MAX_HBLANK (XVTC_MAX_HSIZE - XVIP_MIN_WIDTH) +#define XTPG_MIN_VBLANK 3 +#define XTPG_MAX_VBLANK (XVTC_MAX_VSIZE - XVIP_MIN_HEIGHT) + +/** + * struct xtpg_device - Xilinx Test Pattern Generator device structure + * @xvip: Xilinx Video IP device + * @pads: media pads + * @npads: number of pads (1 or 2) + * @has_input: whether an input is connected to the sink pad + * @formats: active V4L2 media bus format for each pad + * @default_format: default V4L2 media bus format + * @vip_format: format information corresponding to the active format + * @bayer: boolean flag if TPG is set to any bayer format + * @ctrl_handler: control handler + * @hblank: horizontal blanking control + * @vblank: vertical blanking control + * @pattern: test pattern control + * @streaming: is the video stream active + * @vtc: video timing controller + * @vtmux_gpio: video timing mux GPIO + */ +struct xtpg_device { + struct xvip_device xvip; + + struct media_pad pads[2]; + unsigned int npads; + bool has_input; + + struct v4l2_mbus_framefmt formats[2]; + struct v4l2_mbus_framefmt default_format; + const struct xvip_video_format *vip_format; + bool bayer; + + struct v4l2_ctrl_handler ctrl_handler; + struct v4l2_ctrl *hblank; + struct v4l2_ctrl *vblank; + struct v4l2_ctrl *pattern; + bool streaming; + + struct xvtc_device *vtc; + struct gpio_desc *vtmux_gpio; +}; + +static inline struct xtpg_device *to_tpg(struct v4l2_subdev *subdev) +{ + return container_of(subdev, struct xtpg_device, xvip.subdev); +} + +static u32 xtpg_get_bayer_phase(unsigned int code) +{ + switch (code) { + case MEDIA_BUS_FMT_SRGGB8_1X8: + return XTPG_BAYER_PHASE_RGGB; + case MEDIA_BUS_FMT_SGRBG8_1X8: + return XTPG_BAYER_PHASE_GRBG; + case MEDIA_BUS_FMT_SGBRG8_1X8: + return XTPG_BAYER_PHASE_GBRG; + case MEDIA_BUS_FMT_SBGGR8_1X8: + return XTPG_BAYER_PHASE_BGGR; + default: + return XTPG_BAYER_PHASE_OFF; + } +} + +static void __xtpg_update_pattern_control(struct xtpg_device *xtpg, + bool passthrough, bool pattern) +{ + u32 pattern_mask = (1 << (xtpg->pattern->maximum + 1)) - 1; + + /* + * If the TPG has no sink pad or no input connected to its sink pad + * passthrough mode can't be enabled. + */ + if (xtpg->npads == 1 || !xtpg->has_input) + passthrough = false; + + /* If passthrough mode is allowed unmask bit 0. */ + if (passthrough) + pattern_mask &= ~1; + + /* If test pattern mode is allowed unmask all other bits. */ + if (pattern) + pattern_mask &= 1; + + __v4l2_ctrl_modify_range(xtpg->pattern, 0, xtpg->pattern->maximum, + pattern_mask, pattern ? 9 : 0); +} + +static void xtpg_update_pattern_control(struct xtpg_device *xtpg, + bool passthrough, bool pattern) +{ + mutex_lock(xtpg->ctrl_handler.lock); + __xtpg_update_pattern_control(xtpg, passthrough, pattern); + mutex_unlock(xtpg->ctrl_handler.lock); +} + +/* ----------------------------------------------------------------------------- + * V4L2 Subdevice Video Operations + */ + +static int xtpg_s_stream(struct v4l2_subdev *subdev, int enable) +{ + struct xtpg_device *xtpg = to_tpg(subdev); + unsigned int width = xtpg->formats[0].width; + unsigned int height = xtpg->formats[0].height; + bool passthrough; + u32 bayer_phase; + + if (!enable) { + xvip_stop(&xtpg->xvip); + if (xtpg->vtc) + xvtc_generator_stop(xtpg->vtc); + + xtpg_update_pattern_control(xtpg, true, true); + xtpg->streaming = false; + return 0; + } + + xvip_set_frame_size(&xtpg->xvip, &xtpg->formats[0]); + + if (xtpg->vtc) { + struct xvtc_config config = { + .hblank_start = width, + .hsync_start = width + 1, + .vblank_start = height, + .vsync_start = height + 1, + }; + unsigned int htotal; + unsigned int vtotal; + + htotal = min_t(unsigned int, XVTC_MAX_HSIZE, + v4l2_ctrl_g_ctrl(xtpg->hblank) + width); + vtotal = min_t(unsigned int, XVTC_MAX_VSIZE, + v4l2_ctrl_g_ctrl(xtpg->vblank) + height); + + config.hsync_end = htotal - 1; + config.hsize = htotal; + config.vsync_end = vtotal - 1; + config.vsize = vtotal; + + xvtc_generator_start(xtpg->vtc, &config); + } + + /* + * Configure the bayer phase and video timing mux based on the + * operation mode (passthrough or test pattern generation). The test + * pattern can be modified by the control set handler, we thus need to + * take the control lock here to avoid races. + */ + mutex_lock(xtpg->ctrl_handler.lock); + + xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_MASK, xtpg->pattern->cur.val); + + /* + * Switching between passthrough and test pattern generation modes isn't + * allowed during streaming, update the control range accordingly. + */ + passthrough = xtpg->pattern->cur.val == 0; + __xtpg_update_pattern_control(xtpg, passthrough, !passthrough); + + xtpg->streaming = true; + + mutex_unlock(xtpg->ctrl_handler.lock); + + /* + * For TPG v5.0, the bayer phase needs to be off for the pass through + * mode, otherwise the external input would be subsampled. + */ + bayer_phase = passthrough ? XTPG_BAYER_PHASE_OFF + : xtpg_get_bayer_phase(xtpg->formats[0].code); + xvip_write(&xtpg->xvip, XTPG_BAYER_PHASE, bayer_phase); + + if (xtpg->vtmux_gpio) + gpiod_set_value_cansleep(xtpg->vtmux_gpio, !passthrough); + + xvip_start(&xtpg->xvip); + + return 0; +} + +/* ----------------------------------------------------------------------------- + * V4L2 Subdevice Pad Operations + */ + +static struct v4l2_mbus_framefmt * +__xtpg_get_pad_format(struct xtpg_device *xtpg, + struct v4l2_subdev_pad_config *cfg, + unsigned int pad, u32 which) +{ + switch (which) { + case V4L2_SUBDEV_FORMAT_TRY: + return v4l2_subdev_get_try_format(&xtpg->xvip.subdev, cfg, pad); + case V4L2_SUBDEV_FORMAT_ACTIVE: + return &xtpg->formats[pad]; + default: + return NULL; + } +} + +static int xtpg_get_format(struct v4l2_subdev *subdev, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_format *fmt) +{ + struct xtpg_device *xtpg = to_tpg(subdev); + + fmt->format = *__xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which); + + return 0; +} + +static int xtpg_set_format(struct v4l2_subdev *subdev, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_format *fmt) +{ + struct xtpg_device *xtpg = to_tpg(subdev); + struct v4l2_mbus_framefmt *__format; + u32 bayer_phase; + + __format = __xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which); + + /* In two pads mode the source pad format is always identical to the + * sink pad format. + */ + if (xtpg->npads == 2 && fmt->pad == 1) { + fmt->format = *__format; + return 0; + } + + /* Bayer phase is configurable at runtime */ + if (xtpg->bayer) { + bayer_phase = xtpg_get_bayer_phase(fmt->format.code); + if (bayer_phase != XTPG_BAYER_PHASE_OFF) + __format->code = fmt->format.code; + } + + xvip_set_format_size(__format, fmt); + + fmt->format = *__format; + + /* Propagate the format to the source pad. */ + if (xtpg->npads == 2) { + __format = __xtpg_get_pad_format(xtpg, cfg, 1, fmt->which); + *__format = fmt->format; + } + + return 0; +} + +/* ----------------------------------------------------------------------------- + * V4L2 Subdevice Operations + */ + +static int xtpg_enum_frame_size(struct v4l2_subdev *subdev, + struct v4l2_subdev_pad_config *cfg, + struct v4l2_subdev_frame_size_enum *fse) +{ + struct v4l2_mbus_framefmt *format; + + format = v4l2_subdev_get_try_format(subdev, cfg, fse->pad); + + if (fse->index || fse->code != format->code) + return -EINVAL; + + /* Min / max values for pad 0 is always fixed in both one and two pads + * modes. In two pads mode, the source pad(= 1) size is identical to + * the sink pad size */ + if (fse->pad == 0) { + fse->min_width = XVIP_MIN_WIDTH; + fse->max_width = XVIP_MAX_WIDTH; + fse->min_height = XVIP_MIN_HEIGHT; + fse->max_height = XVIP_MAX_HEIGHT; + } else { + fse->min_width = format->width; + fse->max_width = format->width; + fse->min_height = format->height; + fse->max_height = format->height; + } + + return 0; +} + +static int xtpg_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh) +{ + struct xtpg_device *xtpg = to_tpg(subdev); + struct v4l2_mbus_framefmt *format; + + format = v4l2_subdev_get_try_format(subdev, fh->pad, 0); + *format = xtpg->default_format; + + if (xtpg->npads == 2) { + format = v4l2_subdev_get_try_format(subdev, fh->pad, 1); + *format = xtpg->default_format; + } + + return 0; +} + +static int xtpg_close(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh) +{ + return 0; +} + +static int xtpg_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct xtpg_device *xtpg = container_of(ctrl->handler, + struct xtpg_device, + ctrl_handler); + switch (ctrl->id) { + case V4L2_CID_TEST_PATTERN: + xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_MASK, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_CROSS_HAIRS: + xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_CROSS_HAIRS, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_MOVING_BOX: + xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_MOVING_BOX, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_COLOR_MASK: + xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_COLOR_MASK_MASK, + ctrl->val << + XTPG_PATTERN_CONTROL_COLOR_MASK_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_STUCK_PIXEL: + xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_STUCK_PIXEL, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_NOISE: + xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_NOISE, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_MOTION: + xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL, + XTPG_PATTERN_CONTROL_MOTION, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_MOTION_SPEED: + xvip_write(&xtpg->xvip, XTPG_MOTION_SPEED, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW: + xvip_clr_and_set(&xtpg->xvip, XTPG_CROSS_HAIRS, + XTPG_CROSS_HAIRS_ROW_MASK, + ctrl->val << XTPG_CROSS_HAIRS_ROW_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN: + xvip_clr_and_set(&xtpg->xvip, XTPG_CROSS_HAIRS, + XTPG_CROSS_HAIRS_COLUMN_MASK, + ctrl->val << XTPG_CROSS_HAIRS_COLUMN_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_ZPLATE_HOR_START: + xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_HOR_CONTROL, + XTPG_ZPLATE_START_MASK, + ctrl->val << XTPG_ZPLATE_START_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED: + xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_HOR_CONTROL, + XTPG_ZPLATE_SPEED_MASK, + ctrl->val << XTPG_ZPLATE_SPEED_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_ZPLATE_VER_START: + xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_VER_CONTROL, + XTPG_ZPLATE_START_MASK, + ctrl->val << XTPG_ZPLATE_START_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED: + xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_VER_CONTROL, + XTPG_ZPLATE_SPEED_MASK, + ctrl->val << XTPG_ZPLATE_SPEED_SHIFT); + return 0; + case V4L2_CID_XILINX_TPG_BOX_SIZE: + xvip_write(&xtpg->xvip, XTPG_BOX_SIZE, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_BOX_COLOR: + xvip_write(&xtpg->xvip, XTPG_BOX_COLOR, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH: + xvip_write(&xtpg->xvip, XTPG_STUCK_PIXEL_THRESH, ctrl->val); + return 0; + case V4L2_CID_XILINX_TPG_NOISE_GAIN: + xvip_write(&xtpg->xvip, XTPG_NOISE_GAIN, ctrl->val); + return 0; + } + + return 0; +} + +static const struct v4l2_ctrl_ops xtpg_ctrl_ops = { + .s_ctrl = xtpg_s_ctrl, +}; + +static struct v4l2_subdev_core_ops xtpg_core_ops = { +}; + +static struct v4l2_subdev_video_ops xtpg_video_ops = { + .s_stream = xtpg_s_stream, +}; + +static struct v4l2_subdev_pad_ops xtpg_pad_ops = { + .enum_mbus_code = xvip_enum_mbus_code, + .enum_frame_size = xtpg_enum_frame_size, + .get_fmt = xtpg_get_format, + .set_fmt = xtpg_set_format, +}; + +static struct v4l2_subdev_ops xtpg_ops = { + .core = &xtpg_core_ops, + .video = &xtpg_video_ops, + .pad = &xtpg_pad_ops, +}; + +static const struct v4l2_subdev_internal_ops xtpg_internal_ops = { + .open = xtpg_open, + .close = xtpg_close, +}; + +/* + * Control Config + */ + +static const char *const xtpg_pattern_strings[] = { + "Passthrough", + "Horizontal Ramp", + "Vertical Ramp", + "Temporal Ramp", + "Solid Red", + "Solid Green", + "Solid Blue", + "Solid Black", + "Solid White", + "Color Bars", + "Zone Plate", + "Tartan Color Bars", + "Cross Hatch", + "None", + "Vertical/Horizontal Ramps", + "Black/White Checker Board", +}; + +static struct v4l2_ctrl_config xtpg_ctrls[] = { + { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_CROSS_HAIRS, + .name = "Test Pattern: Cross Hairs", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = false, + .max = true, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_MOVING_BOX, + .name = "Test Pattern: Moving Box", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = false, + .max = true, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_COLOR_MASK, + .name = "Test Pattern: Color Mask", + .type = V4L2_CTRL_TYPE_BITMASK, + .min = 0, + .max = 0xf, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_STUCK_PIXEL, + .name = "Test Pattern: Stuck Pixel", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = false, + .max = true, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_NOISE, + .name = "Test Pattern: Noise", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = false, + .max = true, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_MOTION, + .name = "Test Pattern: Motion", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = false, + .max = true, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_MOTION_SPEED, + .name = "Test Pattern: Motion Speed", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 8) - 1, + .step = 1, + .def = 4, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW, + .name = "Test Pattern: Cross Hairs Row", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 12) - 1, + .step = 1, + .def = 0x64, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN, + .name = "Test Pattern: Cross Hairs Column", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 12) - 1, + .step = 1, + .def = 0x64, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_ZPLATE_HOR_START, + .name = "Test Pattern: Zplate Horizontal Start Pos", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 16) - 1, + .step = 1, + .def = 0x1e, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED, + .name = "Test Pattern: Zplate Horizontal Speed", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 16) - 1, + .step = 1, + .def = 0, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_ZPLATE_VER_START, + .name = "Test Pattern: Zplate Vertical Start Pos", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 16) - 1, + .step = 1, + .def = 1, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED, + .name = "Test Pattern: Zplate Vertical Speed", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 16) - 1, + .step = 1, + .def = 0, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_BOX_SIZE, + .name = "Test Pattern: Box Size", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 12) - 1, + .step = 1, + .def = 0x32, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_BOX_COLOR, + .name = "Test Pattern: Box Color(RGB)", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 24) - 1, + .step = 1, + .def = 0, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH, + .name = "Test Pattern: Stuck Pixel threshold", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 16) - 1, + .step = 1, + .def = 0, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, { + .ops = &xtpg_ctrl_ops, + .id = V4L2_CID_XILINX_TPG_NOISE_GAIN, + .name = "Test Pattern: Noise Gain", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = (1 << 8) - 1, + .step = 1, + .def = 0, + .flags = V4L2_CTRL_FLAG_SLIDER, + }, +}; + +/* ----------------------------------------------------------------------------- + * Media Operations + */ + +static const struct media_entity_operations xtpg_media_ops = { + .link_validate = v4l2_subdev_link_validate, +}; + +/* ----------------------------------------------------------------------------- + * Power Management + */ + +static int __maybe_unused xtpg_pm_suspend(struct device *dev) +{ + struct xtpg_device *xtpg = dev_get_drvdata(dev); + + xvip_suspend(&xtpg->xvip); + + return 0; +} + +static int __maybe_unused xtpg_pm_resume(struct device *dev) +{ + struct xtpg_device *xtpg = dev_get_drvdata(dev); + + xvip_resume(&xtpg->xvip); + + return 0; +} + +/* ----------------------------------------------------------------------------- + * Platform Device Driver + */ + +static int xtpg_parse_of(struct xtpg_device *xtpg) +{ + struct device *dev = xtpg->xvip.dev; + struct device_node *node = xtpg->xvip.dev->of_node; + struct device_node *ports; + struct device_node *port; + unsigned int nports = 0; + bool has_endpoint = false; + + ports = of_get_child_by_name(node, "ports"); + if (ports == NULL) + ports = node; + + for_each_child_of_node(ports, port) { + const struct xvip_video_format *format; + struct device_node *endpoint; + + if (!port->name || of_node_cmp(port->name, "port")) + continue; + + format = xvip_of_get_format(port); + if (IS_ERR(format)) { + dev_err(dev, "invalid format in DT"); + return PTR_ERR(format); + } + + /* Get and check the format description */ + if (!xtpg->vip_format) { + xtpg->vip_format = format; + } else if (xtpg->vip_format != format) { + dev_err(dev, "in/out format mismatch in DT"); + return -EINVAL; + } + + if (nports == 0) { + endpoint = of_get_next_child(port, NULL); + if (endpoint) + has_endpoint = true; + of_node_put(endpoint); + } + + /* Count the number of ports. */ + nports++; + } + + if (nports != 1 && nports != 2) { + dev_err(dev, "invalid number of ports %u\n", nports); + return -EINVAL; + } + + xtpg->npads = nports; + if (nports == 2 && has_endpoint) + xtpg->has_input = true; + + return 0; +} + +static int xtpg_probe(struct platform_device *pdev) +{ + struct v4l2_subdev *subdev; + struct xtpg_device *xtpg; + u32 i, bayer_phase; + int ret; + + xtpg = devm_kzalloc(&pdev->dev, sizeof(*xtpg), GFP_KERNEL); + if (!xtpg) + return -ENOMEM; + + xtpg->xvip.dev = &pdev->dev; + + ret = xtpg_parse_of(xtpg); + if (ret < 0) + return ret; + + ret = xvip_init_resources(&xtpg->xvip); + if (ret < 0) + return ret; + + xtpg->vtmux_gpio = devm_gpiod_get_optional(&pdev->dev, "timing", + GPIOD_OUT_HIGH); + if (IS_ERR(xtpg->vtmux_gpio)) { + ret = PTR_ERR(xtpg->vtmux_gpio); + goto error_resource; + } + + xtpg->vtc = xvtc_of_get(pdev->dev.of_node); + if (IS_ERR(xtpg->vtc)) { + ret = PTR_ERR(xtpg->vtc); + goto error_resource; + } + + /* Reset and initialize the core */ + xvip_reset(&xtpg->xvip); + + /* Initialize V4L2 subdevice and media entity. Pad numbers depend on the + * number of pads. + */ + if (xtpg->npads == 2) { + xtpg->pads[0].flags = MEDIA_PAD_FL_SINK; + xtpg->pads[1].flags = MEDIA_PAD_FL_SOURCE; + } else { + xtpg->pads[0].flags = MEDIA_PAD_FL_SOURCE; + } + + /* Initialize the default format */ + xtpg->default_format.code = xtpg->vip_format->code; + xtpg->default_format.field = V4L2_FIELD_NONE; + xtpg->default_format.colorspace = V4L2_COLORSPACE_SRGB; + xvip_get_frame_size(&xtpg->xvip, &xtpg->default_format); + + bayer_phase = xtpg_get_bayer_phase(xtpg->vip_format->code); + if (bayer_phase != XTPG_BAYER_PHASE_OFF) + xtpg->bayer = true; + + xtpg->formats[0] = xtpg->default_format; + if (xtpg->npads == 2) + xtpg->formats[1] = xtpg->default_format; + + /* Initialize V4L2 subdevice and media entity */ + subdev = &xtpg->xvip.subdev; + v4l2_subdev_init(subdev, &xtpg_ops); + subdev->dev = &pdev->dev; + subdev->internal_ops = &xtpg_internal_ops; + strlcpy(subdev->name, dev_name(&pdev->dev), sizeof(subdev->name)); + v4l2_set_subdevdata(subdev, xtpg); + subdev->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; + subdev->entity.ops = &xtpg_media_ops; + + ret = media_entity_init(&subdev->entity, xtpg->npads, xtpg->pads, 0); + if (ret < 0) + goto error; + + v4l2_ctrl_handler_init(&xtpg->ctrl_handler, 3 + ARRAY_SIZE(xtpg_ctrls)); + + xtpg->vblank = v4l2_ctrl_new_std(&xtpg->ctrl_handler, &xtpg_ctrl_ops, + V4L2_CID_VBLANK, XTPG_MIN_VBLANK, + XTPG_MAX_VBLANK, 1, 100); + xtpg->hblank = v4l2_ctrl_new_std(&xtpg->ctrl_handler, &xtpg_ctrl_ops, + V4L2_CID_HBLANK, XTPG_MIN_HBLANK, + XTPG_MAX_HBLANK, 1, 100); + xtpg->pattern = v4l2_ctrl_new_std_menu_items(&xtpg->ctrl_handler, + &xtpg_ctrl_ops, V4L2_CID_TEST_PATTERN, + ARRAY_SIZE(xtpg_pattern_strings) - 1, + 1, 9, xtpg_pattern_strings); + + for (i = 0; i < ARRAY_SIZE(xtpg_ctrls); i++) + v4l2_ctrl_new_custom(&xtpg->ctrl_handler, &xtpg_ctrls[i], NULL); + + if (xtpg->ctrl_handler.error) { + dev_err(&pdev->dev, "failed to add controls\n"); + ret = xtpg->ctrl_handler.error; + goto error; + } + subdev->ctrl_handler = &xtpg->ctrl_handler; + + xtpg_update_pattern_control(xtpg, true, true); + + ret = v4l2_ctrl_handler_setup(&xtpg->ctrl_handler); + if (ret < 0) { + dev_err(&pdev->dev, "failed to set controls\n"); + goto error; + } + + platform_set_drvdata(pdev, xtpg); + + xvip_print_version(&xtpg->xvip); + + ret = v4l2_async_register_subdev(subdev); + if (ret < 0) { + dev_err(&pdev->dev, "failed to register subdev\n"); + goto error; + } + + return 0; + +error: + v4l2_ctrl_handler_free(&xtpg->ctrl_handler); + media_entity_cleanup(&subdev->entity); + xvtc_put(xtpg->vtc); +error_resource: + xvip_cleanup_resources(&xtpg->xvip); + return ret; +} + +static int xtpg_remove(struct platform_device *pdev) +{ + struct xtpg_device *xtpg = platform_get_drvdata(pdev); + struct v4l2_subdev *subdev = &xtpg->xvip.subdev; + + v4l2_async_unregister_subdev(subdev); + v4l2_ctrl_handler_free(&xtpg->ctrl_handler); + media_entity_cleanup(&subdev->entity); + + xvip_cleanup_resources(&xtpg->xvip); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(xtpg_pm_ops, xtpg_pm_suspend, xtpg_pm_resume); + +static const struct of_device_id xtpg_of_id_table[] = { + { .compatible = "xlnx,v-tpg-5.0" }, + { } +}; +MODULE_DEVICE_TABLE(of, xtpg_of_id_table); + +static struct platform_driver xtpg_driver = { + .driver = { + .name = "xilinx-tpg", + .pm = &xtpg_pm_ops, + .of_match_table = xtpg_of_id_table, + }, + .probe = xtpg_probe, + .remove = xtpg_remove, +}; + +module_platform_driver(xtpg_driver); + +MODULE_AUTHOR("Laurent Pinchart "); +MODULE_DESCRIPTION("Xilinx Test Pattern Generator Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 68ceb97c458c..d4cfc17cc414 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -446,5 +446,6 @@ header-y += wireless.h header-y += x25.h header-y += xattr.h header-y += xfrm.h +header-y += xilinx-v4l2-controls.h header-y += zorro.h header-y += zorro_ids.h diff --git a/include/uapi/linux/xilinx-v4l2-controls.h b/include/uapi/linux/xilinx-v4l2-controls.h new file mode 100644 index 000000000000..fb495b91e800 --- /dev/null +++ b/include/uapi/linux/xilinx-v4l2-controls.h @@ -0,0 +1,73 @@ +/* + * Xilinx Controls Header + * + * Copyright (C) 2013-2015 Ideas on Board + * Copyright (C) 2013-2015 Xilinx, Inc. + * + * Contacts: Hyun Kwon + * Laurent Pinchart + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __UAPI_XILINX_V4L2_CONTROLS_H__ +#define __UAPI_XILINX_V4L2_CONTROLS_H__ + +#include + +#define V4L2_CID_XILINX_OFFSET 0xc000 +#define V4L2_CID_XILINX_BASE (V4L2_CID_USER_BASE + V4L2_CID_XILINX_OFFSET) + +/* + * Private Controls for Xilinx Video IPs + */ + +/* + * Xilinx TPG Video IP + */ + +#define V4L2_CID_XILINX_TPG (V4L2_CID_USER_BASE + 0xc000) + +/* Draw cross hairs */ +#define V4L2_CID_XILINX_TPG_CROSS_HAIRS (V4L2_CID_XILINX_TPG + 1) +/* Enable a moving box */ +#define V4L2_CID_XILINX_TPG_MOVING_BOX (V4L2_CID_XILINX_TPG + 2) +/* Mask out a color component */ +#define V4L2_CID_XILINX_TPG_COLOR_MASK (V4L2_CID_XILINX_TPG + 3) +/* Enable a stuck pixel feature */ +#define V4L2_CID_XILINX_TPG_STUCK_PIXEL (V4L2_CID_XILINX_TPG + 4) +/* Enable a noisy output */ +#define V4L2_CID_XILINX_TPG_NOISE (V4L2_CID_XILINX_TPG + 5) +/* Enable the motion feature */ +#define V4L2_CID_XILINX_TPG_MOTION (V4L2_CID_XILINX_TPG + 6) +/* Configure the motion speed of moving patterns */ +#define V4L2_CID_XILINX_TPG_MOTION_SPEED (V4L2_CID_XILINX_TPG + 7) +/* The row of horizontal cross hair location */ +#define V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW (V4L2_CID_XILINX_TPG + 8) +/* The colum of vertical cross hair location */ +#define V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN (V4L2_CID_XILINX_TPG + 9) +/* Set starting point of sine wave for horizontal component */ +#define V4L2_CID_XILINX_TPG_ZPLATE_HOR_START (V4L2_CID_XILINX_TPG + 10) +/* Set speed of the horizontal component */ +#define V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED (V4L2_CID_XILINX_TPG + 11) +/* Set starting point of sine wave for vertical component */ +#define V4L2_CID_XILINX_TPG_ZPLATE_VER_START (V4L2_CID_XILINX_TPG + 12) +/* Set speed of the vertical component */ +#define V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED (V4L2_CID_XILINX_TPG + 13) +/* Moving box size */ +#define V4L2_CID_XILINX_TPG_BOX_SIZE (V4L2_CID_XILINX_TPG + 14) +/* Moving box color */ +#define V4L2_CID_XILINX_TPG_BOX_COLOR (V4L2_CID_XILINX_TPG + 15) +/* Upper limit count of generated stuck pixels */ +#define V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH (V4L2_CID_XILINX_TPG + 16) +/* Noise level */ +#define V4L2_CID_XILINX_TPG_NOISE_GAIN (V4L2_CID_XILINX_TPG + 17) + +#endif /* __UAPI_XILINX_V4L2_CONTROLS_H__ */ -- cgit v1.2.3 From 9b3075c59f858d64478f46a15daa6ecda3cf2318 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Apr 2015 01:37:05 -0700 Subject: nfsd: add NFSEXP_PNFS to the exflags array Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 1 + include/uapi/linux/nfsd/export.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 2911f519fe64..900c3ae94adc 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1158,6 +1158,7 @@ static struct flags { { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, { NFSEXP_V4ROOT, {"v4root", ""}}, + { NFSEXP_PNFS, {"pnfs", ""}}, { 0, {"", ""}} }; diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h index d3bd6ffec041..0df7bd5d2fb1 100644 --- a/include/uapi/linux/nfsd/export.h +++ b/include/uapi/linux/nfsd/export.h @@ -21,6 +21,9 @@ /* * Export flags. + * + * Please update the expflags[] array in fs/nfsd/export.c when adding + * a new flag. */ #define NFSEXP_READONLY 0x0001 #define NFSEXP_INSECURE_PORT 0x0002 -- cgit v1.2.3 From bcad57182425426dd4aa14deb27f97acb329f3cd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2015 20:52:24 +0200 Subject: ebpf: add skb->priority to offset map for usage in {cls, act}_bpf This adds the ability to read out the skb->priority from an eBPF program, so that it can be taken into account from a tc filter or action for the use-case where the priority is not being used to directly override the filter classification in a qdisc, but to tag traffic otherwise for the classifier; the priority can be assigned from various places incl. user space, in future we may also mangle it from an eBPF program. Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 74aab6e0d964..0db8580f3cca 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -184,6 +184,7 @@ struct __sk_buff { __u32 vlan_present; __u32 vlan_tci; __u32 vlan_proto; + __u32 priority; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index 444a07e4f68d..955a7d77decd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1304,6 +1304,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, offsetof(struct sk_buff, vlan_proto)); break; + case offsetof(struct __sk_buff, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, priority)); + break; + case offsetof(struct __sk_buff, mark): return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); -- cgit v1.2.3 From 91bc4822c3d61b9bb7ef66d3b77948a4f9177954 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 1 Apr 2015 17:12:13 -0700 Subject: tc: bpf: add checksum helpers Commit 608cd71a9c7c ("tc: bpf: generalize pedit action") has added the possibility to mangle packet data to BPF programs in the tc pipeline. This patch adds two helpers bpf_l3_csum_replace() and bpf_l4_csum_replace() for fixing up the protocol checksums after the packet mangling. It also adds 'flags' argument to bpf_skb_store_bytes() helper to avoid unnecessary checksum recomputations when BPF programs adjusting l3/l4 checksums and documents all three helpers in uapi header. Moreover, a sample program is added to show how BPF programs can make use of the mangle and csum helpers. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 38 +++++++++++++++- net/core/filter.c | 108 ++++++++++++++++++++++++++++++++++++++++++++-- samples/bpf/Makefile | 1 + samples/bpf/bpf_helpers.h | 7 +++ samples/bpf/tcbpf1_kern.c | 71 ++++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 5 deletions(-) create mode 100644 samples/bpf/tcbpf1_kern.c (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0db8580f3cca..23df3e7f8e7d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,7 +168,43 @@ enum bpf_func_id { BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */ + + /** + * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->data + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_skb_store_bytes, + + /** + * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_l3_csum_replace, + + /** + * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success + */ + BPF_FUNC_l4_csum_replace, __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index 955a7d77decd..b669e75d2b36 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } -static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) + +static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; unsigned int offset = (unsigned int) r2; @@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * * so check for invalid 'offset' and too large 'len' */ - if (offset > 0xffff || len > sizeof(buf)) + if (unlikely(offset > 0xffff || len > sizeof(buf))) return -EFAULT; if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) @@ -1202,7 +1204,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) if (unlikely(!ptr)) return -EFAULT; - skb_postpull_rcsum(skb, ptr, len); + if (BPF_RECOMPUTE_CSUM(flags)) + skb_postpull_rcsum(skb, ptr, len); memcpy(ptr, from, len); @@ -1210,7 +1213,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); return 0; } @@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_STACK, .arg4_type = ARG_CONST_STACK_SIZE, + .arg5_type = ARG_ANYTHING, +}; + +#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) +#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) + +static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + __sum16 sum, *ptr; + + if (unlikely(offset > 0xffff)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); + if (unlikely(!ptr)) + return -EFAULT; + + switch (BPF_HEADER_FIELD_SIZE(flags)) { + case 2: + csum_replace2(ptr, from, to); + break; + case 4: + csum_replace4(ptr, from, to); + break; + default: + return -EINVAL; + } + + if (ptr == &sum) + /* skb_store_bits guaranteed to not return -EFAULT here */ + skb_store_bits(skb, offset, ptr, sizeof(sum)); + + return 0; +} + +const struct bpf_func_proto bpf_l3_csum_replace_proto = { + .func = bpf_l3_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + __sum16 sum, *ptr; + + if (unlikely(offset > 0xffff)) + return -EFAULT; + + if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); + if (unlikely(!ptr)) + return -EFAULT; + + switch (BPF_HEADER_FIELD_SIZE(flags)) { + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); + break; + case 4: + inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); + break; + default: + return -EINVAL; + } + + if (ptr == &sum) + /* skb_store_bits guaranteed to not return -EFAULT here */ + skb_store_bits(skb, offset, ptr, sizeof(sum)); + + return 0; +} + +const struct bpf_func_proto bpf_l4_csum_replace_proto = { + .func = bpf_l4_csum_replace, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, }; static const struct bpf_func_proto * @@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_store_bytes: return &bpf_skb_store_bytes_proto; + case BPF_FUNC_l3_csum_replace: + return &bpf_l3_csum_replace_proto; + case BPF_FUNC_l4_csum_replace: + return &bpf_l4_csum_replace_proto; default: return sk_filter_func_proto(func_id); } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b5b3600dcdf5..d24f51bca465 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -17,6 +17,7 @@ sockex2-objs := bpf_load.o libbpf.o sockex2_user.o always := $(hostprogs-y) always += sockex1_kern.o always += sockex2_kern.o +always += tcbpf1_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index ca0333146006..72540ec1f003 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -37,4 +37,11 @@ struct bpf_map_def { unsigned int max_entries; }; +static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = + (void *) BPF_FUNC_skb_store_bytes; +static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l3_csum_replace; +static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l4_csum_replace; + #endif diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c new file mode 100644 index 000000000000..7cf3f42a6e39 --- /dev/null +++ b/samples/bpf/tcbpf1_kern.c @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +/* compiler workaround */ +#define _htonl __builtin_bswap32 + +static inline void set_dst_mac(struct __sk_buff *skb, char *mac) +{ + bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); +} + +/* use 1 below for ingress qdisc and 0 for egress */ +#if 0 +#undef ETH_HLEN +#define ETH_HLEN 0 +#endif + +#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) +#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) + +static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) +{ + __u8 old_tos = load_byte(skb, TOS_OFF); + + bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); + bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); +} + +#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check)) +#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr)) + +#define IS_PSEUDO 0x10 + +static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) +{ + __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF)); + + bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); + bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); + bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0); +} + +#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) +static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) +{ + __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF)); + + bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); + bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); +} + +SEC("classifier") +int bpf_prog1(struct __sk_buff *skb) +{ + __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + long *value; + + if (proto == IPPROTO_TCP) { + set_ip_tos(skb, 8); + set_tcp_ip_src(skb, 0xA010101); + set_tcp_dest_port(skb, 5001); + } + + return 0; +} +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9a9634545c7051f567096117d417e9c3be24706d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 7 Apr 2015 11:51:53 +0200 Subject: netns: notify netns id events With this patch, netns ids that are created and deleted are advertised into the group RTNLGRP_NSID. Because callers of rtnl_net_notifyid() already know the id of the peer, there is no need to call __peernet2id() in rtnl_net_fill(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 4 ++++ net/core/net_namespace.c | 52 +++++++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index bea910f924dd..974db03f7b1a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -134,6 +134,8 @@ enum { RTM_NEWNSID = 88, #define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID RTM_GETNSID = 90, #define RTM_GETNSID RTM_GETNSID @@ -635,6 +637,8 @@ enum rtnetlink_groups { #define RTNLGRP_MDB RTNLGRP_MDB RTNLGRP_MPLS_ROUTE, #define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index be28afccfbbb..b3b5f22f0e90 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id); static int alloc_netid(struct net *net, struct net *peer, int reqid) { - int min = 0, max = 0; + int min = 0, max = 0, id; ASSERT_RTNL(); @@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid) max = reqid + 1; } - return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + if (id >= 0) + rtnl_net_notifyid(net, peer, RTM_NEWNSID, id); + + return id; } /* This function is used by idr_for_each(). If net is equal to peer, the @@ -359,8 +365,10 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id = __peernet2id(tmp, net, false); - if (id >= 0) + if (id >= 0) { + rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); idr_remove(&tmp->netns_ids, id); + } } idr_destroy(&net->netns_ids); @@ -531,7 +539,8 @@ static int rtnl_net_get_size(void) } static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, - int cmd, struct net *net, struct net *peer) + int cmd, struct net *net, struct net *peer, + int nsid) { struct nlmsghdr *nlh; struct rtgenmsg *rth; @@ -546,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; - id = __peernet2id(net, peer, false); - if (id < 0) - id = NETNSA_NSID_NOT_ASSIGNED; + if (nsid >= 0) { + id = nsid; + } else { + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + } if (nla_put_s32(skb, NETNSA_NSID, id)) goto nla_put_failure; @@ -589,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) } err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_GETNSID, net, peer); + RTM_GETNSID, net, peer, -1); if (err < 0) goto err_out; @@ -603,6 +616,29 @@ out: return err; } +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id) +{ + struct sk_buff *msg; + int err = -ENOMEM; + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) + goto out; + + err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); + if (err < 0) + goto err_out; + + rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); + return; + +err_out: + nlmsg_free(msg); +out: + rtnl_set_sk_err(net, RTNLGRP_NSID, err); +} + static int __init net_ns_init(void) { struct net_generic *ng; -- cgit v1.2.3 From b6e5b8f1a90230f922de8af59cdaf1ad83e1ac1e Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 20 Mar 2015 10:45:43 -0300 Subject: [media] media: New flag V4L2_CTRL_FLAG_EXECUTE_ON_WRITE Create a new flag that represent controls which its value needs to be passed to the driver even if it has not changed. They typically represent actions, like triggering a flash or clearing an error flag. So writing to such a control means some action is executed. Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 47df18f36c4b..810bade873f4 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1457,6 +1457,7 @@ struct v4l2_querymenu { #define V4L2_CTRL_FLAG_WRITE_ONLY 0x0040 #define V4L2_CTRL_FLAG_VOLATILE 0x0080 #define V4L2_CTRL_FLAG_HAS_PAYLOAD 0x0100 +#define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE 0x0200 /* Query flags, to be ORed with the control ID */ #define V4L2_CTRL_FLAG_NEXT_CTRL 0x80000000 -- cgit v1.2.3 From 5ce65d1f874ddc109780fc781bb3b099bff82001 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 20 Mar 2015 14:05:03 -0300 Subject: [media] videodev2.h/v4l2-dv-timings.h: add V4L2_DV_FL_IS_CE_VIDEO flag In the past the V4L2_DV_BT_STD_CEA861 standard bit was used to determine whether the format is a CE (Consumer Electronics) format or not. However, the 640x480p59.94 format is part of the CEA-861 standard, but it is *not* a CE video format. Add a new flag to make this explicit. This information is needed in order to determine the default R'G'B' encoding for the format: for CE video this is limited range (16-235) instead of full range (0-255). The header with all the timings has been updated with this new flag. Signed-off-by: Hans Verkuil Cc: Martin Bugge Cc: Mats Randgaard Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-dv-timings.h | 64 ++++++++++++++++++++++-------------- include/uapi/linux/videodev2.h | 6 ++++ 2 files changed, 45 insertions(+), 25 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index 6c8f159e416e..c039f1d68a09 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -48,14 +48,15 @@ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(720, 480, 1, 0, \ 13500000, 19, 62, 57, 4, 3, 15, 4, 3, 16, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_720X480P59_94 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(720, 480, 0, 0, \ 27000000, 16, 62, 60, 9, 6, 30, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } /* Note: these are the nominal timings, for HDMI links this format is typically @@ -64,14 +65,15 @@ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(720, 576, 1, 0, \ 13500000, 12, 63, 69, 2, 3, 19, 2, 3, 20, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_720X576P50 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(720, 576, 0, 0, \ 27000000, 12, 64, 68, 5, 5, 39, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1280X720P24 { \ @@ -88,7 +90,7 @@ V4L2_INIT_BT_TIMINGS(1280, 720, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 2420, 40, 220, 5, 5, 20, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1280X720P30 { \ @@ -96,7 +98,8 @@ V4L2_INIT_BT_TIMINGS(1280, 720, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 1760, 40, 220, 5, 5, 20, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1280X720P50 { \ @@ -104,7 +107,7 @@ V4L2_INIT_BT_TIMINGS(1280, 720, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 440, 40, 220, 5, 5, 20, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1280X720P60 { \ @@ -112,7 +115,8 @@ V4L2_INIT_BT_TIMINGS(1280, 720, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 110, 40, 220, 5, 5, 20, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080P24 { \ @@ -120,7 +124,8 @@ V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 638, 44, 148, 4, 5, 36, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080P25 { \ @@ -128,7 +133,7 @@ V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 528, 44, 148, 4, 5, 36, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080P30 { \ @@ -136,7 +141,8 @@ V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 88, 44, 148, 4, 5, 36, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080I50 { \ @@ -144,7 +150,8 @@ V4L2_INIT_BT_TIMINGS(1920, 1080, 1, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 528, 44, 148, 2, 5, 15, 2, 5, 16, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080P50 { \ @@ -152,7 +159,7 @@ V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 148500000, 528, 44, 148, 4, 5, 36, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080I60 { \ @@ -161,7 +168,8 @@ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 74250000, 88, 44, 148, 2, 5, 15, 2, 5, 16, \ V4L2_DV_BT_STD_CEA861, \ - V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_HALF_LINE) \ + V4L2_DV_FL_CAN_REDUCE_FPS | \ + V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_1920X1080P60 { \ @@ -170,77 +178,83 @@ V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 148500000, 88, 44, 148, 4, 5, 36, 0, 0, 0, \ V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CEA861, \ - V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, 0) \ + V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ 594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ - V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \ + V4L2_DV_BT_STD_CEA861, \ + V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ } diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 810bade873f4..fa376f7666ba 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1188,6 +1188,12 @@ struct v4l2_bt_timings { exactly the same number of half-lines. Whether half-lines can be detected or used depends on the hardware. */ #define V4L2_DV_FL_HALF_LINE (1 << 3) +/* If set, then this is a Consumer Electronics (CE) video format. Such formats + * differ from other formats (commonly called IT formats) in that if RGB + * encoding is used then by default the RGB values use limited range (i.e. + * use the range 16-235) as opposed to 0-255. All formats defined in CEA-861 + * except for the 640x480 format are CE formats. */ +#define V4L2_DV_FL_IS_CE_VIDEO (1 << 4) /* A few useful defines to calculate the total blanking and frame sizes */ #define V4L2_DV_BT_BLANKING_WIDTH(bt) \ -- cgit v1.2.3 From 22fe54d5fefcfa98c58cc2f4607dd26d9648b3f5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:41:08 +0200 Subject: netfilter: nf_tables: add support for dynamic set updates Add a new "dynset" expression for dynamic set updates. A new set op ->update() is added which, for non existant elements, invokes an initialization callback and inserts the new element. For both new or existing elements the extenstion pointer is returned to the caller to optionally perform timer updates or other actions. Element removal is not supported so far, however that seems to be a rather exotic need and can be added later on. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 17 +++ include/net/netfilter/nf_tables_core.h | 3 + include/uapi/linux/netfilter/nf_tables.h | 27 ++++ net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 10 +- net/netfilter/nf_tables_core.c | 7 + net/netfilter/nft_dynset.c | 218 +++++++++++++++++++++++++++++++ net/netfilter/nft_hash.c | 37 ++++++ 8 files changed, 315 insertions(+), 6 deletions(-) create mode 100644 net/netfilter/nft_dynset.c (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e7e6365c248f..38c3496f7bf2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -196,6 +196,7 @@ struct nft_set_estimate { }; struct nft_set_ext; +struct nft_expr; /** * struct nft_set_ops - nf_tables set operations @@ -218,6 +219,15 @@ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext); + bool (*update)(struct nft_set *set, + const struct nft_data *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_data []), + const struct nft_expr *expr, + struct nft_data data[], + const struct nft_set_ext **ext); + int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); void (*activate)(const struct nft_set *set, @@ -466,6 +476,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + u64 timeout, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem); /** @@ -845,6 +860,8 @@ static inline u8 nft_genmask_cur(const struct net *net) return 1 << ACCESS_ONCE(net->nft.gencursor); } +#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) + /* * Set element transaction helpers */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a75fc8e27cd6..c6f400cfaac8 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -31,6 +31,9 @@ void nft_cmp_module_exit(void); int nft_lookup_module_init(void); void nft_lookup_module_exit(void); +int nft_dynset_module_init(void); +void nft_dynset_module_exit(void); + int nft_bitwise_module_init(void); void nft_bitwise_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 83441cc4594b..0b87b2f67fe3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -515,6 +515,33 @@ enum nft_lookup_attributes { }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) +enum nft_dynset_ops { + NFT_DYNSET_OP_ADD, + NFT_DYNSET_OP_UPDATE, +}; + +/** + * enum nft_dynset_attributes - dynset expression attributes + * + * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING) + * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32) + * @NFTA_DYNSET_OP: operation (NLA_U32) + * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) + * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) + * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) + */ +enum nft_dynset_attributes { + NFTA_DYNSET_UNSPEC, + NFTA_DYNSET_SET_NAME, + NFTA_DYNSET_SET_ID, + NFTA_DYNSET_OP, + NFTA_DYNSET_SREG_KEY, + NFTA_DYNSET_SREG_DATA, + NFTA_DYNSET_TIMEOUT, + __NFTA_DYNSET_MAX, +}; +#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) + /** * enum nft_payload_bases - nf_tables payload expression offset bases * diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 89f73a9e9874..a87d8b8ec730 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 90b898491da7..598e53eb64b3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3183,11 +3183,11 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } -static void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const struct nft_data *key, - const struct nft_data *data, - u64 timeout, gfp_t gfp) +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + u64 timeout, gfp_t gfp) { struct nft_set_ext *ext; void *elem; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index ef4dfcbaf149..7caf08a9225d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -239,8 +239,14 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err6; + err = nft_dynset_module_init(); + if (err < 0) + goto err7; + return 0; +err7: + nft_payload_module_exit(); err6: nft_byteorder_module_exit(); err5: @@ -257,6 +263,7 @@ err1: void nf_tables_core_module_exit(void) { + nft_dynset_module_exit(); nft_payload_module_exit(); nft_byteorder_module_exit(); nft_bitwise_module_exit(); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c new file mode 100644 index 000000000000..eeb72dee78ef --- /dev/null +++ b/net/netfilter/nft_dynset.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2015 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_dynset { + struct nft_set *set; + struct nft_set_ext_tmpl tmpl; + enum nft_dynset_ops op:8; + enum nft_registers sreg_key:8; + enum nft_registers sreg_data:8; + u64 timeout; + struct nft_set_binding binding; +}; + +static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1]) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + u64 timeout; + void *elem; + + if (set->size && !atomic_add_unless(&set->nelems, 1, set->size)) + return NULL; + + timeout = priv->timeout ? : set->timeout; + elem = nft_set_elem_init(set, &priv->tmpl, + &data[priv->sreg_key], &data[priv->sreg_data], + timeout, GFP_ATOMIC); + if (elem == NULL) { + if (set->size) + atomic_dec(&set->nelems); + } + return elem; +} + +static void nft_dynset_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set = priv->set; + const struct nft_set_ext *ext; + u64 timeout; + + if (set->ops->update(set, &data[priv->sreg_key], nft_dynset_new, + expr, data, &ext)) { + if (priv->op == NFT_DYNSET_OP_UPDATE && + nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + timeout = priv->timeout ? : set->timeout; + *nft_set_ext_expiration(ext) = jiffies + timeout; + return; + } + } + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { + [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING }, + [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, + [NFTA_DYNSET_OP] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, + [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, +}; + +static int nft_dynset_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set; + u64 timeout; + int err; + + if (tb[NFTA_DYNSET_SET_NAME] == NULL || + tb[NFTA_DYNSET_OP] == NULL || + tb[NFTA_DYNSET_SREG_KEY] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + if (IS_ERR(set)) { + if (tb[NFTA_DYNSET_SET_ID]) + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_DYNSET_SET_ID]); + if (IS_ERR(set)) + return PTR_ERR(set); + } + + if (set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); + switch (priv->op) { + case NFT_DYNSET_OP_ADD: + break; + case NFT_DYNSET_OP_UPDATE: + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + timeout = 0; + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + } + + priv->sreg_key = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_KEY])); + err = nft_validate_input_register(priv->sreg_key); + if (err < 0) + return err; + + if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + if (set->dtype == NFT_DATA_VERDICT) + return -EOPNOTSUPP; + + priv->sreg_data = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_DATA])); + err = nft_validate_input_register(priv->sreg_data); + if (err < 0) + return err; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + nft_set_ext_prepare(&priv->tmpl); + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); + if (set->flags & NFT_SET_MAP) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); + if (set->flags & NFT_SET_TIMEOUT) { + if (timeout || set->timeout) + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); + } + + priv->timeout = timeout; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static void nft_dynset_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(ctx, priv->set, &priv->binding); +} + +static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_DYNSET_SREG_KEY, htonl(priv->sreg_key))) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP && + nla_put_be32(skb, NFTA_DYNSET_SREG_DATA, htonl(priv->sreg_data))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dynset_type; +static const struct nft_expr_ops nft_dynset_ops = { + .type = &nft_dynset_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), + .eval = nft_dynset_eval, + .init = nft_dynset_init, + .destroy = nft_dynset_destroy, + .dump = nft_dynset_dump, +}; + +static struct nft_expr_type nft_dynset_type __read_mostly = { + .name = "dynset", + .ops = &nft_dynset_ops, + .policy = nft_dynset_policy, + .maxattr = NFTA_DYNSET_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_dynset_module_init(void) +{ + return nft_register_expr(&nft_dynset_type); +} + +void nft_dynset_module_exit(void) +{ + nft_unregister_expr(&nft_dynset_type); +} diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c74e2bf1a1e4..bc23806b7fbe 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -90,6 +90,42 @@ static bool nft_hash_lookup(const struct nft_set *set, return !!he; } +static bool nft_hash_update(struct nft_set *set, const struct nft_data *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_data []), + const struct nft_expr *expr, + struct nft_data data[], + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + goto out; + + he = new(set, expr, data); + if (he == NULL) + goto err1; + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params)) + goto err2; +out: + *ext = &he->ext; + return true; + +err2: + nft_set_elem_destroy(set, he); +err1: + return false; +} + static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { @@ -335,6 +371,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, + .update = nft_hash_update, .walk = nft_hash_walk, .features = NFT_SET_MAP | NFT_SET_TIMEOUT, .owner = THIS_MODULE, -- cgit v1.2.3 From 68e942e88add0ac8576fc8397e86495edf3dcea7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 5 Apr 2015 14:43:38 +0200 Subject: netfilter: nf_tables: support optional userdata for set elements Add an userdata set extension and allow the user to attach arbitrary data to set elements. This is intended to hold TLV encoded data like comments or DNS annotations that have no meaning to the kernel. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +++++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 38c3496f7bf2..63c44bdfdd3b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -350,6 +350,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_FLAGS: element flags * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_EXPIRATION: element expiration time + * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -358,6 +359,7 @@ enum nft_set_extensions { NFT_SET_EXT_FLAGS, NFT_SET_EXT_TIMEOUT, NFT_SET_EXT_EXPIRATION, + NFT_SET_EXT_USERDATA, NFT_SET_EXT_NUM }; @@ -464,6 +466,11 @@ static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ex return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); } +static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_USERDATA); +} + static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0b87b2f67fe3..05ee1e0804a3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -292,6 +292,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) + * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -300,6 +301,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_FLAGS, NFTA_SET_ELEM_TIMEOUT, NFTA_SET_ELEM_EXPIRATION, + NFTA_SET_ELEM_USERDATA, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 598e53eb64b3..0b96fa0d64b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2872,6 +2872,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [NFT_SET_EXT_USERDATA] = { + .len = sizeof(struct nft_userdata), + .align = __alignof__(struct nft_userdata), + }, }; EXPORT_SYMBOL_GPL(nft_set_ext_types); @@ -2884,6 +2888,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -2964,6 +2970,15 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, goto nla_put_failure; } + if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { + struct nft_userdata *udata; + + udata = nft_set_ext_userdata(ext); + if (nla_put(skb, NFTA_SET_ELEM_USERDATA, + udata->len + 1, udata->data)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); return 0; @@ -3232,11 +3247,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_userdata *udata; struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; u64 timeout; u32 flags; + u8 ulen; int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, @@ -3325,6 +3342,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); } + /* The full maximum length of userdata can exceed the maximum + * offset value (U8_MAX) for following extensions, therefor it + * must be the last extension added. + */ + ulen = 0; + if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { + ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); + if (ulen > 0) + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + } + err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, timeout, GFP_KERNEL); @@ -3334,6 +3363,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; + if (ulen > 0) { + udata = nft_set_ext_userdata(ext); + udata->len = ulen - 1; + nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) -- cgit v1.2.3 From 01a3d796813d6302af9f828f34b73d21a4b96c9a Mon Sep 17 00:00:00 2001 From: Vlad Zolotarov Date: Mon, 30 Mar 2015 21:35:23 +0300 Subject: if_link: Add an additional parameter to ifla_vf_info for RSS querying Add configuration setting for drivers to allow/block an RSS Redirection Table and a Hash Key querying for discrete VFs. On some devices VF share the mentioned above information with PF and querying it may adduce a theoretical security risk. We want to let a system administrator to decide if he/she wants to take this risk or not. Signed-off-by: Vlad Zolotarov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 8 ++++++++ include/uapi/linux/if_link.h | 8 ++++++++ net/core/rtnetlink.c | 32 ++++++++++++++++++++++++++------ 4 files changed, 43 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 119130e9298b..da4929927f69 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -14,5 +14,6 @@ struct ifla_vf_info { __u32 linkstate; __u32 min_tx_rate; __u32 max_tx_rate; + __u32 rss_query_en; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf6d9df34d7b..13acb3d8ecdd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -878,6 +878,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); + * + * Enable or disable the VF ability to query its RSS Redirection Table and + * Hash Key. This is needed since on some devices VF share this information + * with PF and querying it may adduce a theoretical security risk. + * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) * Called to setup 'tc' number of traffic classes in the net device. This @@ -1099,6 +1104,9 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_set_vf_rss_query_en)( + struct net_device *dev, + int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7ffb18df01ca..d9cd19214b98 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -465,6 +465,9 @@ enum { IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */ + IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query + * on/off switch + */ __IFLA_VF_MAX, }; @@ -509,6 +512,11 @@ struct ifla_vf_link_state { __u32 link_state; }; +struct ifla_vf_rss_query_en { + __u32 vf; + __u32 setting; +}; + /* VF ports management section * * Nested layout of set/get msg is: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7a836152359b..358d52a38533 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + - nla_total_size(sizeof(struct ifla_vf_link_state))); + nla_total_size(sizeof(struct ifla_vf_link_state)) + + nla_total_size(sizeof(struct ifla_vf_rss_query_en))); return size; } else return 0; @@ -1132,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_rss_query_en vf_rss_query_en; /* * Not all SR-IOV capable drivers support the - * spoofcheck query. Preset to -1 so the user - * space tool can detect that the driver didn't - * report anything. + * spoofcheck and "RSS query enable" query. Preset to + * -1 so the user space tool can detect that the driver + * didn't report anything. */ ivi.spoofchk = -1; + ivi.rss_query_en = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1152,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = - vf_linkstate.vf = ivi.vf; + vf_linkstate.vf = + vf_rss_query_en.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1162,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; + vf_rss_query_en.setting = ivi.rss_query_en; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1176,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), &vf_spoofchk) || nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), - &vf_linkstate)) + &vf_linkstate) || + nla_put(skb, IFLA_VF_RSS_QUERY_EN, + sizeof(vf_rss_query_en), + &vf_rss_query_en)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1290,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, + [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1500,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivl->link_state); break; } + case IFLA_VF_RSS_QUERY_EN: { + struct ifla_vf_rss_query_en *ivrssq_en; + + ivrssq_en = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, + ivrssq_en->vf, + ivrssq_en->setting); + break; + } default: err = -EINVAL; break; -- cgit v1.2.3 From 7a6c8c34e5b71ac50e39588e20b39494a9e1d8e5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 10 Apr 2015 12:00:30 -0700 Subject: fou: implement FOU_CMD_GET Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/fou.h | 1 + net/ipv4/fou.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index c303588bb767..d2947c52dc67 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -25,6 +25,7 @@ enum { FOU_CMD_UNSPEC, FOU_CMD_ADD, FOU_CMD_DEL, + FOU_CMD_GET, __FOU_CMD_MAX, }; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index c244b1a65787..263710259774 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -21,6 +21,7 @@ struct fou { u8 protocol; u8 flags; __be16 port; + u16 type; struct udp_offload udp_offloads; struct list_head list; }; @@ -487,6 +488,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, goto error; } + fou->type = cfg->type; + udp_sk(sk)->encap_type = 1; udp_encap_enable(); @@ -617,6 +620,106 @@ static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) return fou_destroy(net, &cfg); } +static int fou_fill_info(struct fou *fou, struct sk_buff *msg) +{ + if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || + nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || + nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || + nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) + return -1; + + if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) + if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) + return -1; + return 0; +} + +static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, + u32 flags, struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (fou_fill_info(fou, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct fou_net *fn = net_generic(net, fou_net_id); + struct sk_buff *msg; + struct fou_cfg cfg; + struct fou *fout; + __be16 port; + int ret; + + ret = parse_nl_config(info, &cfg); + if (ret) + return ret; + port = cfg.udp_config.local_udp_port; + if (port == 0) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = -ESRCH; + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (port == fout->port) { + ret = fou_dump_info(fout, info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + break; + } + } + mutex_unlock(&fn->fou_lock); + if (ret < 0) + goto out_free; + + return genlmsg_reply(msg, info); + +out_free: + nlmsg_free(msg); + return ret; +} + +static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct fou_net *fn = net_generic(net, fou_net_id); + struct fou *fout; + int idx = 0, ret; + + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (idx++ < cb->args[0]) + continue; + ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, FOU_CMD_GET); + if (ret) + goto done; + } + mutex_unlock(&fn->fou_lock); + +done: + cb->args[0] = idx; + return skb->len; +} + static const struct genl_ops fou_nl_ops[] = { { .cmd = FOU_CMD_ADD, @@ -630,6 +733,12 @@ static const struct genl_ops fou_nl_ops[] = { .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = FOU_CMD_GET, + .doit = fou_nl_cmd_get_port, + .dumpit = fou_nl_dump, + .policy = fou_nl_policy, + }, }; size_t fou_encap_hlen(struct ip_tunnel_encap *e) -- cgit v1.2.3 From 49499c3e6e18b7677a63316f3ff54a16533dc28f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 11 Apr 2015 02:27:37 +0100 Subject: netfilter: nf_tables: switch registers to 32 bit addressing Switch the nf_tables registers from 128 bit addressing to 32 bit addressing to support so called concatenations, where multiple values can be concatenated over multiple registers for O(1) exact matches of multiple dimensions using sets. The old register values are mapped to areas of 128 bits for compatibility. When dumping register numbers, values are expressed using the old values if they refer to the beginning of a 128 bit area for compatibility. To support concatenations, register loads of less than a full 32 bit value need to be padded. This mainly affects the payload and exthdr expressions, which both unconditionally zero the last word before copying the data. Userspace fully passes the testsuite using both old and new register addressing. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 13 +++----- include/uapi/linux/netfilter/nf_tables.h | 31 +++++++++++++++++- net/bridge/netfilter/nft_meta_bridge.c | 2 +- net/ipv4/netfilter/nft_redir_ipv4.c | 4 +-- net/ipv6/netfilter/nft_redir_ipv6.c | 4 +-- net/netfilter/nf_tables_api.c | 54 +++++++++++++++++++++++++------- net/netfilter/nf_tables_core.c | 5 +-- net/netfilter/nft_bitwise.c | 4 +-- net/netfilter/nft_byteorder.c | 4 +-- net/netfilter/nft_ct.c | 4 +-- net/netfilter/nft_exthdr.c | 3 +- net/netfilter/nft_immediate.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_meta.c | 7 +++-- net/netfilter/nft_nat.c | 16 +++++----- net/netfilter/nft_payload.c | 3 +- 16 files changed, 110 insertions(+), 48 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f8f27a48bbe9..1f9b848c778c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -64,17 +64,15 @@ struct nft_data { */ struct nft_regs { union { - struct nft_data data[NFT_REG_MAX + 1]; + u32 data[20]; struct nft_verdict verdict; }; }; -static inline void nft_data_copy(struct nft_data *dst, - const struct nft_data *src) +static inline void nft_data_copy(u32 *dst, const struct nft_data *src, + unsigned int len) { - BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64)); - *(u64 *)&dst->data[0] = *(u64 *)&src->data[0]; - *(u64 *)&dst->data[2] = *(u64 *)&src->data[2]; + memcpy(dst, src, len); } static inline void nft_data_debug(const struct nft_data *data) @@ -502,8 +500,7 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, - const struct nft_data *key, - const struct nft_data *data, + const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 05ee1e0804a3..4221a6c3a8a5 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -5,16 +5,45 @@ #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 +/** + * enum nft_registers - nf_tables registers + * + * nf_tables used to have five registers: a verdict register and four data + * registers of size 16. The data registers have been changed to 16 registers + * of size 4. For compatibility reasons, the NFT_REG_[1-4] registers still + * map to areas of size 16, the 4 byte registers are addressed using + * NFT_REG32_00 - NFT_REG32_15. + */ enum nft_registers { NFT_REG_VERDICT, NFT_REG_1, NFT_REG_2, NFT_REG_3, NFT_REG_4, - __NFT_REG_MAX + __NFT_REG_MAX, + + NFT_REG32_00 = 8, + MFT_REG32_01, + NFT_REG32_02, + NFT_REG32_03, + NFT_REG32_04, + NFT_REG32_05, + NFT_REG32_06, + NFT_REG32_07, + NFT_REG32_08, + NFT_REG32_09, + NFT_REG32_10, + NFT_REG32_11, + NFT_REG32_12, + NFT_REG32_13, + NFT_REG32_14, + NFT_REG32_15, }; #define NFT_REG_MAX (__NFT_REG_MAX - 1) +#define NFT_REG_SIZE 16 +#define NFT_REG32_SIZE 4 + /** * enum nft_verdicts - nf_tables internal verdicts * diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 99dab70ecae0..a21269b83f16 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -24,7 +24,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, { const struct nft_meta *priv = nft_expr_priv(expr); const struct net_device *in = pkt->in, *out = pkt->out; - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; switch (priv->key) { diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 312cf6f3b6dc..d8d795df9c13 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -27,9 +27,9 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { mr.range[0].min.all = - *(__be16 *)®s->data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; mr.range[0].max.all = - *(__be16 *)®s->data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 0eed774815cf..effd393bd517 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -27,9 +27,9 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min], range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max], range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a25fd19453e7..03faf76ce3b8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3201,8 +3201,7 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, - const struct nft_data *key, - const struct nft_data *data, + const u32 *key, const u32 *data, u64 timeout, gfp_t gfp) { struct nft_set_ext *ext; @@ -3357,7 +3356,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.data, data.data, timeout, GFP_KERNEL); if (elem.priv == NULL) goto err3; @@ -4122,14 +4121,47 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, return 0; } +/** + * nft_parse_register - parse a register value from a netlink attribute + * + * @attr: netlink attribute + * + * Parse and translate a register value from a netlink attribute. + * Registers used to be 128 bit wide, these register numbers will be + * mapped to the corresponding 32 bit register numbers. + */ unsigned int nft_parse_register(const struct nlattr *attr) { - return ntohl(nla_get_be32(attr)); + unsigned int reg; + + reg = ntohl(nla_get_be32(attr)); + switch (reg) { + case NFT_REG_VERDICT...NFT_REG_4: + return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + default: + return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + } } EXPORT_SYMBOL_GPL(nft_parse_register); +/** + * nft_dump_register - dump a register value to a netlink attribute + * + * @skb: socket buffer + * @attr: attribute number + * @reg: register number + * + * Construct a netlink attribute containing the register number. For + * compatibility reasons, register numbers being a multiple of 4 are + * translated to the corresponding 128 bit register numbers. + */ int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg) { + if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0) + reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE); + else + reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00; + return nla_put_be32(skb, attr, htonl(reg)); } EXPORT_SYMBOL_GPL(nft_dump_register); @@ -4145,14 +4177,13 @@ EXPORT_SYMBOL_GPL(nft_dump_register); */ int nft_validate_register_load(enum nft_registers reg, unsigned int len) { - if (reg <= NFT_REG_VERDICT) + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; - if (reg > NFT_REG_MAX) - return -ERANGE; if (len == 0) return -EINVAL; - if (len > FIELD_SIZEOF(struct nft_data, data)) + if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data)) return -ERANGE; + return 0; } EXPORT_SYMBOL_GPL(nft_validate_register_load); @@ -4200,13 +4231,12 @@ int nft_validate_register_store(const struct nft_ctx *ctx, return 0; default: - if (reg < NFT_REG_1) + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; - if (reg > NFT_REG_MAX) - return -ERANGE; if (len == 0) return -EINVAL; - if (len > FIELD_SIZEOF(struct nft_data, data)) + if (reg * NFT_REG32_SIZE + len > + FIELD_SIZEOF(struct nft_regs, data)) return -ERANGE; if (data != NULL && type != NFT_DATA_VALUE) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 5ef07d17b358..f153b07073af 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -70,7 +70,7 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); u32 mask = nft_cmp_fast_mask(priv->len); - if ((regs->data[priv->sreg].data[0] & mask) == priv->data) + if ((regs->data[priv->sreg] & mask) == priv->data) return; regs->verdict.code = NFT_BREAK; } @@ -81,7 +81,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) @@ -94,6 +94,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) return false; + *dest = 0; if (priv->len == 2) *(u16 *)dest = *(u16 *)ptr; else if (priv->len == 4) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index aa1147032ace..f1a9be2aecd1 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -30,8 +30,8 @@ static void nft_bitwise_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); - const u32 *src = ®s->data[priv->sreg].data[0]; - u32 *dst = ®s->data[priv->dreg].data[0]; + const u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; unsigned int i; for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 2ee3e57ad814..fde5145f2e36 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -30,8 +30,8 @@ static void nft_byteorder_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); - u32 *src = ®s->data[priv->sreg].data[0]; - u32 *dst = ®s->data[priv->dreg].data[0]; + u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; union { u32 u32; u16 u16; } *s, *d; unsigned int i; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index fab8e754b18a..8cbca3432f90 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -35,7 +35,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; const struct nf_conn_help *help; @@ -156,7 +156,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; #ifdef CONFIG_NF_CONNTRACK_MARK - u32 value = regs->data[priv->sreg].data[0]; + u32 value = regs->data[priv->sreg]; #endif enum ip_conntrack_info ctinfo; struct nf_conn *ct; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 098ffee793d7..ba7aed13e174 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -30,7 +30,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; unsigned int offset = 0; int err; @@ -39,6 +39,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, goto err; offset += priv->offset; + dest[priv->len / NFT_REG32_SIZE] = 0; if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) goto err; return; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0682f600c7a5..1e8e412eadae 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -29,7 +29,7 @@ static void nft_immediate_eval(const struct nft_expr *expr, { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - nft_data_copy(®s->data[priv->dreg], &priv->data); + nft_data_copy(®s->data[priv->dreg], &priv->data, priv->dlen); } static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index fc7afff81566..ba1466209f2a 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -36,7 +36,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { if (set->flags & NFT_SET_MAP) nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext)); + nft_set_ext_data(ext), set->dlen); return; } regs->verdict.code = NFT_BREAK; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5f744eb61de5..52561e1c31e2 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -31,13 +31,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; const struct net_device *in = pkt->in, *out = pkt->out; - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { case NFT_META_LEN: *dest = skb->len; break; case NFT_META_PROTOCOL: + *dest = 0; *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: @@ -75,11 +76,13 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_IIFTYPE: if (in == NULL) goto err; + *dest = 0; *(u16 *)dest = in->type; break; case NFT_META_OIFTYPE: if (out == NULL) goto err; + *dest = 0; *(u16 *)dest = out->type; break; case NFT_META_SKUID: @@ -185,7 +188,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = regs->data[meta->sreg].data[0]; + u32 value = regs->data[meta->sreg]; switch (meta->key) { case NFT_META_MARK: diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 065cbda63b0a..ee2d71753746 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -49,26 +49,26 @@ static void nft_nat_eval(const struct nft_expr *expr, if (priv->sreg_addr_min) { if (priv->family == AF_INET) { range.min_addr.ip = (__force __be32) - regs->data[priv->sreg_addr_min].data[0]; + regs->data[priv->sreg_addr_min]; range.max_addr.ip = (__force __be32) - regs->data[priv->sreg_addr_max].data[0]; + regs->data[priv->sreg_addr_max]; } else { memcpy(range.min_addr.ip6, - ®s->data[priv->sreg_addr_min].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_min], + sizeof(range.min_addr.ip6)); memcpy(range.max_addr.ip6, - ®s->data[priv->sreg_addr_max].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_max], + sizeof(range.max_addr.ip6)); } range.flags |= NF_NAT_RANGE_MAP_IPS; } if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 5fa997346a23..94fb3b27a2c5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -23,7 +23,7 @@ static void nft_payload_eval(const struct nft_expr *expr, { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - u32 *dest = ®s->data[priv->dreg].data[0]; + u32 *dest = ®s->data[priv->dreg]; int offset; switch (priv->base) { @@ -43,6 +43,7 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; + dest[priv->len / NFT_REG32_SIZE] = 0; if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; -- cgit v1.2.3 From 7d7402642eaf385aef0772eff5a35e34fc4995d7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 11 Apr 2015 02:27:39 +0100 Subject: netfilter: nf_tables: variable sized set element keys / data This patch changes sets to support variable sized set element keys / data up to 64 bytes each by using variable sized set extensions. This allows to use concatenations with bigger data items suchs as IPv6 addresses. As a side effect, small keys/data now don't require the full 16 bytes of struct nft_data anymore but just the space they need. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 ++++- include/uapi/linux/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 27 ++++++++++++--------------- net/netfilter/nft_hash.c | 4 ++-- net/netfilter/nft_rbtree.c | 3 ++- 5 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 160577bf0f0a..cb42da1011ef 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -158,7 +158,10 @@ struct nft_userdata { * @priv: element private data and extensions */ struct nft_set_elem { - struct nft_data key; + union { + u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)]; + struct nft_data val; + } key; void *priv; }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4221a6c3a8a5..be8584c95297 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -388,6 +388,9 @@ enum nft_data_attributes { }; #define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) +/* Maximum length of a value */ +#define NFT_DATA_VALUE_MAXLEN 64 + /** * enum nft_verdict_attributes - nf_tables verdict netlink attributes * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b3f88f4c70f..ed0e70ea2bc5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2608,7 +2608,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; flags = 0; @@ -2634,11 +2634,10 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (desc.dlen == 0 || - desc.dlen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; } else - desc.dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_verdict); } else if (flags & NFT_SET_MAP) return -EINVAL; @@ -2854,12 +2853,10 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { - .len = sizeof(struct nft_data), - .align = __alignof__(struct nft_data), + .align = __alignof__(u32), }, [NFT_SET_EXT_DATA] = { - .len = sizeof(struct nft_data), - .align = __alignof__(struct nft_data), + .align = __alignof__(u32), }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), @@ -3299,7 +3296,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, timeout = set->timeout; } - err = nft_data_init(ctx, &elem.key, sizeof(elem.key), &d1, + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1, nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; @@ -3307,7 +3304,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len); if (timeout > 0) { nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); if (timeout != set->timeout) @@ -3342,7 +3339,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err3; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); } /* The full maximum length of userdata can exceed the maximum @@ -3358,7 +3355,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, elem.key.data, data.data, + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data, timeout, GFP_KERNEL); if (elem.priv == NULL) goto err3; @@ -3393,7 +3390,7 @@ err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_uninit(&data, d2.type); err2: - nft_data_uninit(&elem.key, d1.type); + nft_data_uninit(&elem.key.val, d1.type); err1: return err; } @@ -3460,7 +3457,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) goto err1; - err = nft_data_init(ctx, &elem.key, sizeof(elem.key), &desc, + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; @@ -3488,7 +3485,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, err3: kfree(trans); err2: - nft_data_uninit(&elem.key, desc.type); + nft_data_uninit(&elem.key.val, desc.type); err1: return err; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 767df41d28ea..3f9d45d3d9b7 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -133,7 +133,7 @@ static int nft_hash_insert(const struct nft_set *set, struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, - .key = elem->key.data, + .key = elem->key.val.data, }; return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, @@ -157,7 +157,7 @@ static void *nft_hash_deactivate(const struct nft_set *set, struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, - .key = elem->key.data, + .key = elem->key.val.data, }; rcu_read_lock(); diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index b888e0cdf1e2..1c30f41cff5b 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -152,7 +152,8 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) -- cgit v1.2.3 From 24477e57412a7a7dea62637ac990bc5c1cff0665 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 8 Apr 2015 19:41:40 +0200 Subject: uapi: ebtables: don't include linux/if.h linux/if.h creates conflicts in userspace with net/if.h By using it here we force userspace to use linux/if.h while net/if.h may be needed. Note that: include/linux/netfilter_ipv4/ip_tables.h and include/linux/netfilter_ipv6/ip6_tables.h don't include linux/if.h and they also refer to IFNAMSIZ, so they are expecting userspace to include use net/if.h from the client program. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 3 ++- include/uapi/linux/netfilter_bridge/ebtables.h | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 34e7a2b7f867..f1bd3962e6b6 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -12,9 +12,10 @@ #ifndef __LINUX_BRIDGE_EFF_H #define __LINUX_BRIDGE_EFF_H +#include +#include #include - /* return values for match() functions */ #define EBT_MATCH 0 #define EBT_NOMATCH 1 diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h index ba993360dbe9..773dfe8924c7 100644 --- a/include/uapi/linux/netfilter_bridge/ebtables.h +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -12,9 +12,7 @@ #ifndef _UAPI__LINUX_BRIDGE_EFF_H #define _UAPI__LINUX_BRIDGE_EFF_H -#include #include -#include #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN -- cgit v1.2.3 From f25ad2e907f110378159fe5e088aa13176faaa5b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 11 Apr 2015 10:46:39 +0100 Subject: netfilter: nf_tables: prepare for expressions associated to set elements Preparation to attach expressions to set elements: add a set extension type to hold an expression and dump the expression information with the set element. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +++++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 9 +++++++++ 3 files changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e21623cb7b20..d45a871b3da6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -371,6 +371,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_TIMEOUT: element timeout * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element + * @NFT_SET_EXT_EXPR: expression assiociated with the element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -380,6 +381,7 @@ enum nft_set_extensions { NFT_SET_EXT_TIMEOUT, NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, + NFT_SET_EXT_EXPR, NFT_SET_EXT_NUM }; @@ -491,6 +493,11 @@ static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext return nft_set_ext(ext, NFT_SET_EXT_USERDATA); } +static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPR); +} + static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be8584c95297..f9c5af22a6af 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -322,6 +322,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -331,6 +332,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_TIMEOUT, NFTA_SET_ELEM_EXPIRATION, NFTA_SET_ELEM_USERDATA, + NFTA_SET_ELEM_EXPR, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e97bee59fe08..8830811550ec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2904,6 +2904,9 @@ const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_DATA] = { .align = __alignof__(u32), }, + [NFT_SET_EXT_EXPR] = { + .align = __alignof__(struct nft_expr), + }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), @@ -2990,6 +2993,10 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, set->dlen) < 0) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) && + nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) + goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) @@ -3276,6 +3283,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem) nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); } -- cgit v1.2.3 From 7c6c6e95a12e46f499749bdd496e53d03950f377 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 11 Apr 2015 10:46:41 +0100 Subject: netfilter: nf_tables: add flag to indicate set contains expressions Add a set flag to indicate that the set is used as a state table and contains expressions for evaluation. This operation is mutually exclusive with the mapping operation, so sets specifying both are rejected. The lookup expression also rejects binding to state tables since it only deals with loopup and map operations. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 8 ++++++-- net/netfilter/nft_lookup.c | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f9c5af22a6af..48942381d02f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -238,6 +238,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_INTERVAL: set contains intervals * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts + * @NFT_SET_EVAL: set contains expressions for evaluation */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -245,6 +246,7 @@ enum nft_set_flags { NFT_SET_INTERVAL = 0x4, NFT_SET_MAP = 0x8, NFT_SET_TIMEOUT = 0x10, + NFT_SET_EVAL = 0x20, }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8830811550ec..78af83bc9c8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2661,9 +2661,13 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_FLAGS] != NULL) { flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | - NFT_SET_INTERVAL | NFT_SET_MAP | - NFT_SET_TIMEOUT)) + NFT_SET_INTERVAL | NFT_SET_TIMEOUT | + NFT_SET_MAP | NFT_SET_EVAL)) return -EINVAL; + /* Only one of both operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == + (NFT_SET_MAP | NFT_SET_EVAL)) + return -EOPNOTSUPP; } dtype = 0; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index ba1466209f2a..b3c31ef8015d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -71,6 +71,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return PTR_ERR(set); } + if (set->flags & NFT_SET_EVAL) + return -EOPNOTSUPP; + priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); err = nft_validate_register_load(priv->sreg, set->klen); if (err < 0) -- cgit v1.2.3 From 3e135cd499bfbec15684fe9c756162d58df4dc77 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 11 Apr 2015 10:46:42 +0100 Subject: netfilter: nft_dynset: dynamic stateful expression instantiation Support instantiating stateful expressions based on a template that are associated with dynamically created set entries. The expressions are evaluated when adding or updating the set element. This allows to maintain per flow state using the existing set infrastructure and expression types, with arbitrary definitions of a flow. Usage is currently restricted to anonymous sets, meaning only a single binding can exist, since the desired semantics of multiple independant bindings haven't been defined so far. Examples (userspace syntax is still WIP): 1. Limit the rate of new SSH connections per host, similar to iptables hashlimit: flow ip saddr timeout 60s \ limit 10/second \ accept 2. Account network traffic between each set of /24 networks: flow ip saddr & 255.255.255.0 . ip daddr & 255.255.255.0 \ counter 3. Account traffic to each host per user: flow skuid . ip daddr \ counter 4. Account traffic for each combination of source address and TCP flags: flow ip saddr . tcp flags \ counter The resulting set content after a Xmas-scan look like this: { 192.168.122.1 . fin | psh | urg : counter packets 1001 bytes 40040, 192.168.122.1 . ack : counter packets 74 bytes 3848, 192.168.122.1 . psh | ack : counter packets 35 bytes 3144 } Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_dynset.c | 54 +++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 48942381d02f..5fa1cd04762e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -567,6 +567,7 @@ enum nft_dynset_ops { * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) + * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) */ enum nft_dynset_attributes { NFTA_DYNSET_UNSPEC, @@ -576,6 +577,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_SREG_KEY, NFTA_DYNSET_SREG_DATA, NFTA_DYNSET_TIMEOUT, + NFTA_DYNSET_EXPR, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 03699d5c0b4b..513a8ef60a59 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -23,6 +23,7 @@ struct nft_dynset { enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; u64 timeout; + struct nft_expr *expr; struct nft_set_binding binding; }; @@ -30,6 +31,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set_ext *ext; u64 timeout; void *elem; @@ -44,7 +46,13 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, if (elem == NULL) { if (set->size) atomic_dec(&set->nelems); + return NULL; } + + ext = nft_set_elem_ext(set, elem); + if (priv->expr != NULL) + nft_expr_clone(nft_set_ext_expr(ext), priv->expr); + return elem; } @@ -55,18 +63,27 @@ static void nft_dynset_eval(const struct nft_expr *expr, const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; const struct nft_set_ext *ext; + const struct nft_expr *sexpr; u64 timeout; if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { + sexpr = NULL; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + sexpr = nft_set_ext_expr(ext); + if (priv->op == NFT_DYNSET_OP_UPDATE && nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { timeout = priv->timeout ? : set->timeout; *nft_set_ext_expiration(ext) = jiffies + timeout; - return; - } - } + } else if (sexpr == NULL) + goto out; + if (sexpr != NULL) + sexpr->ops->eval(sexpr, regs, pkt); + return; + } +out: regs->verdict.code = NFT_BREAK; } @@ -77,6 +94,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, }; static int nft_dynset_init(const struct nft_ctx *ctx, @@ -142,10 +160,29 @@ static int nft_dynset_init(const struct nft_ctx *ctx, } else if (set->flags & NFT_SET_MAP) return -EINVAL; + if (tb[NFTA_DYNSET_EXPR] != NULL) { + if (!(set->flags & NFT_SET_EVAL)) + return -EINVAL; + if (!(set->flags & NFT_SET_ANONYMOUS)) + return -EOPNOTSUPP; + + priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); + if (IS_ERR(priv->expr)) + return PTR_ERR(priv->expr); + + err = -EOPNOTSUPP; + if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) + goto err1; + } else if (set->flags & NFT_SET_EVAL) + return -EINVAL; + nft_set_ext_prepare(&priv->tmpl); nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); if (set->flags & NFT_SET_MAP) nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); + if (priv->expr != NULL) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR, + priv->expr->ops->size); if (set->flags & NFT_SET_TIMEOUT) { if (timeout || set->timeout) nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); @@ -155,10 +192,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) - return err; + goto err1; priv->set = set; return 0; + +err1: + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); + return err; } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -167,6 +209,8 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, struct nft_dynset *priv = nft_expr_priv(expr); nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); } static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -184,6 +228,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) goto nla_put_failure; + if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) + goto nla_put_failure; return 0; nla_put_failure: -- cgit v1.2.3 From 996f545fbb0dc9ed4a640b5ef098f51fe28cca5c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Feb 2015 12:44:51 +0900 Subject: drm/nouveau/gem: allow user-space to specify an object should be coherent User-space use mappable BOs notably for fences, and expects that a value update by the GPU will be immediatly visible through the user-space mapping. ARM has a property that may prevent this from happening though: memory can be mapped multiple times only if the different mappings share the same caching properties. However all the lowmem memory is already identity-mapped into the kernel with cache enabled, so when user-space requests an uncached mapping, we actually get an "undefined caching policy" one and this has strange side-effects described on Freedesktop bug 86690. To prevent this from happening, allow user-space to explicitly specify which objects should be coherent, and create such objects with the TTM_PL_FLAG_UNCACHED flag. This will make TTM allocate memory using the DMA API, which will fix the identify mapping and allow us to safely map the objects to user-space uncached. Signed-off-by: Alexandre Courbot Reviewed-by: Lucas Stach Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +++ include/uapi/drm/nouveau_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 7c077fced1d1..0e690bf19fc9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -189,6 +189,9 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain, if (!flags || domain & NOUVEAU_GEM_DOMAIN_CPU) flags |= TTM_PL_FLAG_SYSTEM; + if (domain & NOUVEAU_GEM_DOMAIN_COHERENT) + flags |= TTM_PL_FLAG_UNCACHED; + ret = nouveau_bo_new(dev, size, align, flags, tile_mode, tile_flags, NULL, NULL, pnvbo); if (ret) diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 0d7608dc1a34..5507eead5863 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -39,6 +39,7 @@ #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) #define NOUVEAU_GEM_DOMAIN_GART (1 << 2) #define NOUVEAU_GEM_DOMAIN_MAPPABLE (1 << 3) +#define NOUVEAU_GEM_DOMAIN_COHERENT (1 << 4) #define NOUVEAU_GEM_TILE_COMP 0x00030000 /* nv50-only */ #define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00 -- cgit v1.2.3 From df81b29c7b81b9d70ee29b7a263dd5009daa0ce4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Apr 2015 10:17:43 +0930 Subject: virtio_balloon: transitional interface Virtio 1.0 doesn't include a modern balloon device. But it's not a big change to support a transitional balloon device: this has the advantage of supporting existing drivers, transparently. Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Signed-off-by: Rusty Russell --- drivers/virtio/virtio_balloon.c | 21 +++++++++++++++------ include/uapi/linux/virtio_balloon.h | 32 ++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 10 deletions(-) (limited to 'include/uapi') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 6a356e344f82..82e80e034f25 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -214,8 +214,8 @@ static inline void update_stat(struct virtio_balloon *vb, int idx, u16 tag, u64 val) { BUG_ON(idx >= VIRTIO_BALLOON_S_NR); - vb->stats[idx].tag = tag; - vb->stats[idx].val = val; + vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag); + vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val); } #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) @@ -283,18 +283,27 @@ static void virtballoon_changed(struct virtio_device *vdev) static inline s64 towards_target(struct virtio_balloon *vb) { - __le32 v; s64 target; + u32 num_pages; - virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v); + virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, + &num_pages); - target = le32_to_cpu(v); + /* Legacy balloon config space is LE, unlike all other devices. */ + if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) + num_pages = le32_to_cpu((__force __le32)num_pages); + + target = num_pages; return target - vb->num_pages; } static void update_balloon_size(struct virtio_balloon *vb) { - __le32 actual = cpu_to_le32(vb->num_pages); + u32 actual = vb->num_pages; + + /* Legacy balloon config space is LE, unlike all other devices. */ + if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) + actual = (__force u32)cpu_to_le32(actual); virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, &actual); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 4b0488f20b2e..984169a819ee 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -25,6 +25,7 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include #include #include @@ -38,9 +39,9 @@ struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ - __le32 num_pages; + __u32 num_pages; /* Number of pages we've actually got in balloon. */ - __le32 actual; + __u32 actual; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ @@ -51,9 +52,32 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ #define VIRTIO_BALLOON_S_NR 6 +/* + * Memory statistics structure. + * Driver fills an array of these structures and passes to device. + * + * NOTE: fields are laid out in a way that would make compiler add padding + * between and after fields, so we have to use compiler-specific attributes to + * pack it, to disable this padding. This also often causes compiler to + * generate suboptimal code. + * + * We maintain this statistics structure format for backwards compatibility, + * but don't follow this example. + * + * If implementing a similar structure, do something like the below instead: + * struct virtio_balloon_stat { + * __virtio16 tag; + * __u8 reserved[6]; + * __virtio64 val; + * }; + * + * In other words, add explicit reserved fields to align field and + * structure boundaries at field size, avoiding compiler padding + * without the packed attribute. + */ struct virtio_balloon_stat { - __u16 tag; - __u64 val; + __virtio16 tag; + __virtio64 val; } __attribute__((packed)); #endif /* _LINUX_VIRTIO_BALLOON_H */ -- cgit v1.2.3 From bfebd1cdb497a57757c83f5fbf1a29931591e2a4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 8 Mar 2015 00:51:47 -0500 Subject: dm: add full blk-mq support to request-based DM Commit e5863d9ad ("dm: allocate requests in target when stacking on blk-mq devices") served as the first step toward fully utilizing blk-mq in request-based DM -- it enabled stacking an old-style (request_fn) request_queue ontop of the underlying blk-mq device(s). That first step didn't improve performance of DM multipath ontop of fast blk-mq devices (e.g. NVMe) because the top-level old-style request_queue was severely limited by the queue_lock. The second step offered here enables stacking a blk-mq request_queue ontop of the underlying blk-mq device(s). This unlocks significant performance gains on fast blk-mq devices, Keith Busch tested on his NVMe testbed and offered this really positive news: "Just providing a performance update. All my fio tests are getting roughly equal performance whether accessed through the raw block device or the multipath device mapper (~470k IOPS). I could only push ~20% of the raw iops through dm before this conversion, so this latest tree is looking really solid from a performance standpoint." Signed-off-by: Mike Snitzer Tested-by: Keith Busch --- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-table.c | 11 +- drivers/md/dm.c | 317 +++++++++++++++++++++++++++++++++--------- include/uapi/linux/dm-ioctl.h | 4 +- 4 files changed, 261 insertions(+), 73 deletions(-) (limited to 'include/uapi') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index add6391f3f8e..c8f07e5a9a17 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1703,7 +1703,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 8, 0}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 057312048b68..66600cab9fa5 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -18,6 +18,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "table" @@ -1695,9 +1696,13 @@ void dm_table_run_md_queue_async(struct dm_table *t) md = dm_table_get_md(t); queue = dm_get_md_queue(md); if (queue) { - spin_lock_irqsave(queue->queue_lock, flags); - blk_run_queue_async(queue); - spin_unlock_irqrestore(queue->queue_lock, flags); + if (queue->mq_ops) + blk_mq_run_hw_queues(queue, true); + else { + spin_lock_irqsave(queue->queue_lock, flags); + blk_run_queue_async(queue); + spin_unlock_irqrestore(queue->queue_lock, flags); + } } } EXPORT_SYMBOL(dm_table_run_md_queue_async); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5294e016e92b..3a66baac76ed 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -23,6 +23,7 @@ #include #include #include /* for rq_end_sector() */ +#include #include @@ -224,6 +225,9 @@ struct mapped_device { int last_rq_rw; sector_t last_rq_pos; ktime_t last_rq_start_time; + + /* for blk-mq request-based DM support */ + struct blk_mq_tag_set tag_set; }; /* @@ -1025,6 +1029,11 @@ static void end_clone_bio(struct bio *clone, int error) blk_update_request(tio->orig, 0, nr_bytes); } +static struct dm_rq_target_io *tio_from_request(struct request *rq) +{ + return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); +} + /* * Don't touch any member of the md after calling this function because * the md may be freed in dm_put() at the end of this function. @@ -1048,8 +1057,10 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * queue lock again. */ if (run_queue) { - if (!nr_requests_pending || - (nr_requests_pending >= md->queue->nr_congestion_on)) + if (md->queue->mq_ops) + blk_mq_run_hw_queues(md->queue, true); + else if (!nr_requests_pending || + (nr_requests_pending >= md->queue->nr_congestion_on)) blk_run_queue_async(md->queue); } @@ -1062,13 +1073,17 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) static void free_rq_clone(struct request *clone) { struct dm_rq_target_io *tio = clone->end_io_data; + struct mapped_device *md = tio->md; blk_rq_unprep_clone(clone); + if (clone->q && clone->q->mq_ops) tio->ti->type->release_clone_rq(clone); else - free_clone_request(tio->md, clone); - free_rq_tio(tio); + free_clone_request(md, clone); + + if (!md->queue->mq_ops) + free_rq_tio(tio); } /* @@ -1097,17 +1112,22 @@ static void dm_end_request(struct request *clone, int error) } free_rq_clone(clone); - blk_end_request_all(rq, error); + if (!rq->q->mq_ops) + blk_end_request_all(rq, error); + else + blk_mq_end_request(rq, error); rq_completed(md, rw, true); } static void dm_unprep_request(struct request *rq) { - struct dm_rq_target_io *tio = rq->special; + struct dm_rq_target_io *tio = tio_from_request(rq); struct request *clone = tio->clone; - rq->special = NULL; - rq->cmd_flags &= ~REQ_DONTPREP; + if (!rq->q->mq_ops) { + rq->special = NULL; + rq->cmd_flags &= ~REQ_DONTPREP; + } if (clone) free_rq_clone(clone); @@ -1116,18 +1136,29 @@ static void dm_unprep_request(struct request *rq) /* * Requeue the original request of a clone. */ -static void dm_requeue_unmapped_original_request(struct mapped_device *md, - struct request *rq) +static void old_requeue_request(struct request *rq) { - int rw = rq_data_dir(rq); struct request_queue *q = rq->q; unsigned long flags; - dm_unprep_request(rq); - spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); spin_unlock_irqrestore(q->queue_lock, flags); +} + +static void dm_requeue_unmapped_original_request(struct mapped_device *md, + struct request *rq) +{ + int rw = rq_data_dir(rq); + + dm_unprep_request(rq); + + if (!rq->q->mq_ops) + old_requeue_request(rq); + else { + blk_mq_requeue_request(rq); + blk_mq_kick_requeue_list(rq->q); + } rq_completed(md, rw, false); } @@ -1139,35 +1170,44 @@ static void dm_requeue_unmapped_request(struct request *clone) dm_requeue_unmapped_original_request(tio->md, tio->orig); } -static void __stop_queue(struct request_queue *q) -{ - blk_stop_queue(q); -} - -static void stop_queue(struct request_queue *q) +static void old_stop_queue(struct request_queue *q) { unsigned long flags; + if (blk_queue_stopped(q)) + return; + spin_lock_irqsave(q->queue_lock, flags); - __stop_queue(q); + blk_stop_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } -static void __start_queue(struct request_queue *q) +static void stop_queue(struct request_queue *q) { - if (blk_queue_stopped(q)) - blk_start_queue(q); + if (!q->mq_ops) + old_stop_queue(q); + else + blk_mq_stop_hw_queues(q); } -static void start_queue(struct request_queue *q) +static void old_start_queue(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __start_queue(q); + if (blk_queue_stopped(q)) + blk_start_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } +static void start_queue(struct request_queue *q) +{ + if (!q->mq_ops) + old_start_queue(q); + else + blk_mq_start_stopped_hw_queues(q, true); +} + static void dm_done(struct request *clone, int error, bool mapped) { int r = error; @@ -1206,13 +1246,20 @@ static void dm_done(struct request *clone, int error, bool mapped) static void dm_softirq_done(struct request *rq) { bool mapped = true; - struct dm_rq_target_io *tio = rq->special; + struct dm_rq_target_io *tio = tio_from_request(rq); struct request *clone = tio->clone; + int rw; if (!clone) { - blk_end_request_all(rq, tio->error); - rq_completed(tio->md, rq_data_dir(rq), false); - free_rq_tio(tio); + rw = rq_data_dir(rq); + if (!rq->q->mq_ops) { + blk_end_request_all(rq, tio->error); + rq_completed(tio->md, rw, false); + free_rq_tio(tio); + } else { + blk_mq_end_request(rq, tio->error); + rq_completed(tio->md, rw, false); + } return; } @@ -1228,7 +1275,7 @@ static void dm_softirq_done(struct request *rq) */ static void dm_complete_request(struct request *rq, int error) { - struct dm_rq_target_io *tio = rq->special; + struct dm_rq_target_io *tio = tio_from_request(rq); tio->error = error; blk_complete_request(rq); @@ -1247,7 +1294,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error) } /* - * Called with the clone's queue lock held + * Called with the clone's queue lock held (for non-blk-mq) */ static void end_clone_request(struct request *clone, int error) { @@ -1808,6 +1855,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, static void map_tio_request(struct kthread_work *work); +static void init_tio(struct dm_rq_target_io *tio, struct request *rq, + struct mapped_device *md) +{ + tio->md = md; + tio->ti = NULL; + tio->clone = NULL; + tio->orig = rq; + tio->error = 0; + memset(&tio->info, 0, sizeof(tio->info)); + init_kthread_work(&tio->work, map_tio_request); +} + static struct dm_rq_target_io *prep_tio(struct request *rq, struct mapped_device *md, gfp_t gfp_mask) { @@ -1819,13 +1878,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq, if (!tio) return NULL; - tio->md = md; - tio->ti = NULL; - tio->clone = NULL; - tio->orig = rq; - tio->error = 0; - memset(&tio->info, 0, sizeof(tio->info)); - init_kthread_work(&tio->work, map_tio_request); + init_tio(tio, rq, md); table = dm_get_live_table(md, &srcu_idx); if (!dm_table_mq_request_based(table)) { @@ -1869,11 +1922,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) * DM_MAPIO_REQUEUE : the original request needs to be requeued * < 0 : the request was completed due to failure */ -static int map_request(struct dm_target *ti, struct request *rq, +static int map_request(struct dm_rq_target_io *tio, struct request *rq, struct mapped_device *md) { int r; - struct dm_rq_target_io *tio = rq->special; + struct dm_target *ti = tio->ti; struct request *clone = NULL; if (tio->clone) { @@ -1888,7 +1941,7 @@ static int map_request(struct dm_target *ti, struct request *rq, } if (IS_ERR(clone)) return DM_MAPIO_REQUEUE; - if (setup_clone(clone, rq, tio, GFP_KERNEL)) { + if (setup_clone(clone, rq, tio, GFP_NOIO)) { /* -ENOMEM */ ti->type->release_clone_rq(clone); return DM_MAPIO_REQUEUE; @@ -1929,13 +1982,16 @@ static void map_tio_request(struct kthread_work *work) struct request *rq = tio->orig; struct mapped_device *md = tio->md; - if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE) + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) dm_requeue_unmapped_original_request(md, rq); } static void dm_start_request(struct mapped_device *md, struct request *orig) { - blk_start_request(orig); + if (!orig->q->mq_ops) + blk_start_request(orig); + else + blk_mq_start_request(orig); atomic_inc(&md->pending[rq_data_dir(orig)]); if (md->seq_rq_merge_deadline_usecs) { @@ -2045,7 +2101,7 @@ static void dm_request_fn(struct request_queue *q) dm_start_request(md, rq); - tio = rq->special; + tio = tio_from_request(rq); /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; queue_kthread_work(&md->kworker, &tio->work); @@ -2142,7 +2198,7 @@ static void dm_init_md_queue(struct mapped_device *md) { /* * Request-based dm devices cannot be stacked on top of bio-based dm - * devices. The type of this dm device has not been decided yet. + * devices. The type of this dm device may not have been decided yet. * The type is decided at the first table loading time. * To prevent problematic device stacking, clear the queue flag * for request stacking support until then. @@ -2150,7 +2206,15 @@ static void dm_init_md_queue(struct mapped_device *md) * This queue is new, so no concurrency on the queue_flags. */ queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); +} + +static void dm_init_old_md_queue(struct mapped_device *md) +{ + dm_init_md_queue(md); + /* + * Initialize aspects of queue that aren't relevant for blk-mq + */ md->queue->queuedata = md; md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; @@ -2273,6 +2337,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { int minor = MINOR(disk_devt(md->disk)); + bool using_blk_mq = !!md->queue->mq_ops; unlock_fs(md); destroy_workqueue(md->wq); @@ -2298,6 +2363,8 @@ static void free_dev(struct mapped_device *md) del_gendisk(md->disk); put_disk(md->disk); blk_cleanup_queue(md->queue); + if (using_blk_mq) + blk_mq_free_tag_set(&md->tag_set); bdput(md->bdev); free_minor(minor); @@ -2457,7 +2524,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, * This must be done before setting the queue restrictions, * because request-based dm may be run just after the setting. */ - if (dm_table_request_based(t) && !blk_queue_stopped(q)) + if (dm_table_request_based(t)) stop_queue(q); __bind_mempools(md, t); @@ -2539,14 +2606,6 @@ unsigned dm_get_md_type(struct mapped_device *md) return md->type; } -static bool dm_md_type_request_based(struct mapped_device *md) -{ - unsigned table_type = dm_get_md_type(md); - - return (table_type == DM_TYPE_REQUEST_BASED || - table_type == DM_TYPE_MQ_REQUEST_BASED); -} - struct target_type *dm_get_immutable_target_type(struct mapped_device *md) { return md->immutable_target_type; @@ -2563,6 +2622,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); +static void init_rq_based_worker_thread(struct mapped_device *md) +{ + /* Initialize the request-based DM worker thread */ + init_kthread_worker(&md->kworker); + md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, + "kdmwork-%s", dm_device_name(md)); +} + /* * Fully initialize a request-based queue (->elevator, ->request_fn, etc). */ @@ -2571,29 +2638,131 @@ static int dm_init_request_based_queue(struct mapped_device *md) struct request_queue *q = NULL; if (md->queue->elevator) - return 1; + return 0; /* Fully initialize the queue */ q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); if (!q) - return 0; + return -EINVAL; /* disable dm_request_fn's merge heuristic by default */ md->seq_rq_merge_deadline_usecs = 0; md->queue = q; - dm_init_md_queue(md); + dm_init_old_md_queue(md); blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); - /* Also initialize the request-based DM worker thread */ - init_kthread_worker(&md->kworker); - md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, - "kdmwork-%s", dm_device_name(md)); + init_rq_based_worker_thread(md); elv_register_queue(md->queue); - return 1; + return 0; +} + +static int dm_mq_init_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int request_idx, + unsigned int numa_node) +{ + struct mapped_device *md = data; + struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); + + /* + * Must initialize md member of tio, otherwise it won't + * be available in dm_mq_queue_rq. + */ + tio->md = md; + + return 0; +} + +static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *rq = bd->rq; + struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); + struct mapped_device *md = tio->md; + int srcu_idx; + struct dm_table *map = dm_get_live_table(md, &srcu_idx); + struct dm_target *ti; + sector_t pos; + + /* always use block 0 to find the target for flushes for now */ + pos = 0; + if (!(rq->cmd_flags & REQ_FLUSH)) + pos = blk_rq_pos(rq); + + ti = dm_table_find_target(map, pos); + if (!dm_target_is_valid(ti)) { + dm_put_live_table(md, srcu_idx); + DMERR_LIMIT("request attempted access beyond the end of device"); + /* + * Must perform setup, that rq_completed() requires, + * before returning BLK_MQ_RQ_QUEUE_ERROR + */ + dm_start_request(md, rq); + return BLK_MQ_RQ_QUEUE_ERROR; + } + dm_put_live_table(md, srcu_idx); + + if (ti->type->busy && ti->type->busy(ti)) + return BLK_MQ_RQ_QUEUE_BUSY; + + dm_start_request(md, rq); + + /* Init tio using md established in .init_request */ + init_tio(tio, rq, md); + + /* Establish tio->ti before queuing work (map_tio_request) */ + tio->ti = ti; + queue_kthread_work(&md->kworker, &tio->work); + + return BLK_MQ_RQ_QUEUE_OK; +} + +static struct blk_mq_ops dm_mq_ops = { + .queue_rq = dm_mq_queue_rq, + .map_queue = blk_mq_map_queue, + .complete = dm_softirq_done, + .init_request = dm_mq_init_request, +}; + +static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) +{ + struct request_queue *q; + int err; + + memset(&md->tag_set, 0, sizeof(md->tag_set)); + md->tag_set.ops = &dm_mq_ops; + md->tag_set.queue_depth = BLKDEV_MAX_RQ; + md->tag_set.numa_node = NUMA_NO_NODE; + md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + md->tag_set.nr_hw_queues = 1; + md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); + md->tag_set.driver_data = md; + + err = blk_mq_alloc_tag_set(&md->tag_set); + if (err) + return err; + + q = blk_mq_init_allocated_queue(&md->tag_set, md->queue); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto out_tag_set; + } + md->queue = q; + dm_init_md_queue(md); + + /* backfill 'mq' sysfs registration normally done in blk_register_queue */ + blk_mq_register_disk(md->disk); + + init_rq_based_worker_thread(md); + + return 0; + +out_tag_set: + blk_mq_free_tag_set(&md->tag_set); + return err; } /* @@ -2601,15 +2770,29 @@ static int dm_init_request_based_queue(struct mapped_device *md) */ int dm_setup_md_queue(struct mapped_device *md) { - if (dm_md_type_request_based(md)) { - if (!dm_init_request_based_queue(md)) { + int r; + unsigned md_type = dm_get_md_type(md); + + switch (md_type) { + case DM_TYPE_REQUEST_BASED: + r = dm_init_request_based_queue(md); + if (r) { DMWARN("Cannot initialize queue for request-based mapped device"); - return -EINVAL; + return r; } - } else { - /* bio-based specific initialization */ + break; + case DM_TYPE_MQ_REQUEST_BASED: + r = dm_init_request_based_blk_mq_queue(md); + if (r) { + DMWARN("Cannot initialize queue for request-based blk-mq mapped device"); + return r; + } + break; + case DM_TYPE_BIO_BASED: + dm_init_old_md_queue(md); blk_queue_make_request(md->queue, dm_make_request); blk_queue_merge_bvec(md->queue, dm_merge_bvec); + break; } return 0; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 889f3a5b7b18..eac8c3641f39 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 30 +#define DM_VERSION_MINOR 31 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2014-12-22)" +#define DM_VERSION_EXTRA "-ioctl (2015-3-12)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From a166151cbe33b53221c24259e4a7201064b3ba79 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Apr 2015 12:55:45 -0700 Subject: bpf: fix bpf helpers to use skb->mac_header relative offsets For the short-term solution, lets fix bpf helper functions to use skb->mac_header relative offsets instead of skb->data in order to get the same eBPF programs with cls_bpf and act_bpf work on ingress and egress qdisc path. We need to ensure that mac_header is set before calling into programs. This is effectively the first option from below referenced discussion. More long term solution for LD_ABS|LD_IND instructions will be more intrusive but also more beneficial than this, and implemented later as it's too risky at this point in time. I.e., we plan to look into the option of moving skb_pull() out of eth_type_trans() and into netif_receive_skb() as has been suggested as second option. Meanwhile, this solution ensures ingress can be used with eBPF, too, and that we won't run into ABI troubles later. For dealing with negative offsets inside eBPF helper functions, we've implemented bpf_skb_clone_unwritable() to test for unwriteable headers. Reference: http://thread.gmane.org/gmane.linux.network/359129/focus=359694 Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 +- include/uapi/linux/filter.h | 7 +++++-- net/core/filter.c | 41 ++++++++++++++++++++++++++++++++--------- net/sched/act_bpf.c | 3 +++ net/sched/cls_bpf.c | 3 +++ samples/bpf/tcbpf1_kern.c | 16 ++++++---------- 6 files changed, 50 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5c1cee11f777..a9ebdf5701e8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -177,7 +177,7 @@ enum bpf_func_id { /** * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet * @skb: pointer to skb - * @offset: offset within packet from skb->data + * @offset: offset within packet from skb->mac_header * @from: pointer where to copy bytes from * @len: number of bytes to store into packet * @flags: bit 0 - if true, recompute skb->csum diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 34c7936ca114..c97340e43dd6 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -79,8 +79,11 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_RANDOM 56 #define SKF_AD_VLAN_TPID 60 #define SKF_AD_MAX 64 -#define SKF_NET_OFF (-0x100000) -#define SKF_LL_OFF (-0x200000) +#define SKF_NET_OFF (-0x100000) +#define SKF_LL_OFF (-0x200000) + +#define BPF_NET_OFF SKF_NET_OFF +#define BPF_LL_OFF SKF_LL_OFF #endif /* _UAPI__LINUX_FILTER_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index b669e75d2b36..bf831a85c315 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +/** + * bpf_skb_clone_not_writable - is the header of a clone not writable + * @skb: buffer to check + * @len: length up to which to write, can be negative + * + * Returns true if modifying the header part of the cloned buffer + * does require the data to be copied. I.e. this version works with + * negative lengths needed for eBPF case! + */ +static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len) +{ + return skb_header_cloned(skb) || + (int) skb_headroom(skb) + len > skb->hdr_len; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; + int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; char buf[16]; @@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) * * so check for invalid 'offset' and too large 'len' */ - if (unlikely(offset > 0xffff || len > sizeof(buf))) + if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len)) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + len))) return -EFAULT; ptr = skb_header_pointer(skb, offset, len, buf); @@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) -static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); @@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags) +static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags); + int offset = (int) r2; __sum16 sum, *ptr; - if (unlikely(offset > 0xffff)) + if (unlikely((u32) offset > 0xffff)) return -EFAULT; - if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum))) + offset -= skb->data - skb_mac_header(skb); + if (unlikely(skb_cloned(skb) && + bpf_skb_clone_unwritable(skb, offset + sizeof(sum)))) return -EFAULT; ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 4d2cede17468..dc6a2d324bd8 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_bpf *prog = act->priv; int action, filter_res; + if (unlikely(!skb_mac_header_was_set(skb))) + return TC_ACT_UNSPEC; + spin_lock(&prog->tcf_lock); prog->tcf_tm.lastuse = jiffies; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5c4171c5d2bd..91bd9c19471d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; + if (unlikely(!skb_mac_header_was_set(skb))) + return -1; + /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 7cf3f42a6e39..7c27710f8296 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -4,6 +4,8 @@ #include #include #include +#include + #include "bpf_helpers.h" /* compiler workaround */ @@ -14,18 +16,12 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac) bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1); } -/* use 1 below for ingress qdisc and 0 for egress */ -#if 0 -#undef ETH_HLEN -#define ETH_HLEN 0 -#endif - #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check)) #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos)) static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) { - __u8 old_tos = load_byte(skb, TOS_OFF); + __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF); bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2); bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0); @@ -38,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos) static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) { - __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF)); + __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip)); bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip)); @@ -48,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip) #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest)) static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) { - __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF)); + __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF)); bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port)); bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0); @@ -57,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port) SEC("classifier") int bpf_prog1(struct __sk_buff *skb) { - __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); + __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol)); long *value; if (proto == IPPROTO_TCP) { -- cgit v1.2.3 From e15f431fe2d53cd4673510736da7d4fa1090e096 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Apr 2015 12:44:47 -0700 Subject: errno.h: Improve ENOSYS's comment ENOSYS is the mechanism used by user code to detect whether the running kernel implements a given system call. It should not be returned by anything except an unimplemented system call. Unfortunately, it is rather frequently used in the kernel to indicate that various new functions of existing system calls are not implemented. This should be discouraged. Improve the comment in errno.h to help clarify ENOSYS's purpose. Signed-off-by: Andy Lutomirski Cc: Pavel Machek Cc: Michael Kerrisk Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/errno.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index 1e1ea6e6e7a5..88e0914cf2d9 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -6,7 +6,16 @@ #define EDEADLK 35 /* Resource deadlock would occur */ #define ENAMETOOLONG 36 /* File name too long */ #define ENOLCK 37 /* No record locks available */ -#define ENOSYS 38 /* Function not implemented */ + +/* + * This error code is special: arch syscall entry code will return + * -ENOSYS if users try to call a syscall that doesn't exist. To keep + * failures of syscalls that really do exist distinguishable from + * failures due to attempts to use a nonexistent syscall, syscall + * implementations should refrain from returning -ENOSYS. + */ +#define ENOSYS 38 /* Invalid system call number */ + #define ENOTEMPTY 39 /* Directory not empty */ #define ELOOP 40 /* Too many symbolic links encountered */ #define EWOULDBLOCK EAGAIN /* Operation would block */ -- cgit v1.2.3 From 43c499dc2778b96e21ed14e1a71e7b117a0b406f Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 17 Apr 2015 22:53:32 +0530 Subject: ALSA: asound.h - use SNDRV_CTL_ELEM_ID_NAME_MAXLEN we have defined SNDRV_CTL_ELEM_ID_NAME_MAXLEN as size of name array so use this define instead of numeric value Signed-off-by: Vinod Koul Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 46145a5277fe..a45be6bdcf5b 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -864,7 +864,7 @@ struct snd_ctl_elem_id { snd_ctl_elem_iface_t iface; /* interface identifier */ unsigned int device; /* device/client number */ unsigned int subdevice; /* subdevice (substream) number */ - unsigned char name[44]; /* ASCII name of item */ + unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; /* ASCII name of item */ unsigned int index; /* index of item */ }; -- cgit v1.2.3 From cec32a47010647e8b0603726ebb75b990a4057a4 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 17 Apr 2015 19:12:41 +0200 Subject: media-bus: Fixup RGB444_1X12, RGB565_1X16, and YUV8_1X24 media bus format Change the constant values for RGB444_1X12, RGB565_1X16, and YUV8_1X24 media bus formats in anticipation of a merge conflict with the media tree, where the old values are already taken by RBG888_1X24, RGB888_1X32_PADHI, and VUY8_1X24, respectively. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- Documentation/DocBook/media/v4l/subdev-formats.xml | 6 +++--- include/uapi/linux/media-bus-format.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 18b71aff48c9..553a38024745 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -196,7 +196,7 @@ see . MEDIA_BUS_FMT_RGB444_1X12 - 0x100e + 0x1016 &dash-ent-20; r3 @@ -326,7 +326,7 @@ see . MEDIA_BUS_FMT_RGB565_1X16 - 0x100f + 0x1017 &dash-ent-16; r4 @@ -3049,7 +3049,7 @@ see . MEDIA_BUS_FMT_YUV8_1X24 - 0x2024 + 0x2025 - - diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 83ea46f4be51..73c78f18a328 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -33,13 +33,13 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1016 */ -#define MEDIA_BUS_FMT_RGB444_1X12 0x100e +/* RGB - next is 0x1018 */ +#define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE 0x1003 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE 0x1004 -#define MEDIA_BUS_FMT_RGB565_1X16 0x100f +#define MEDIA_BUS_FMT_RGB565_1X16 0x1017 #define MEDIA_BUS_FMT_BGR565_2X8_BE 0x1005 #define MEDIA_BUS_FMT_BGR565_2X8_LE 0x1006 #define MEDIA_BUS_FMT_RGB565_2X8_BE 0x1007 @@ -56,7 +56,7 @@ #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA 0x1012 #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d -/* YUV (including grey) - next is 0x2025 */ +/* YUV (including grey) - next is 0x2026 */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 #define MEDIA_BUS_FMT_UV8_1X8 0x2015 #define MEDIA_BUS_FMT_UYVY8_1_5X8 0x2002 @@ -82,7 +82,7 @@ #define MEDIA_BUS_FMT_VYUY10_1X20 0x201b #define MEDIA_BUS_FMT_YUYV10_1X20 0x200d #define MEDIA_BUS_FMT_YVYU10_1X20 0x200e -#define MEDIA_BUS_FMT_YUV8_1X24 0x2024 +#define MEDIA_BUS_FMT_YUV8_1X24 0x2025 #define MEDIA_BUS_FMT_YUV10_1X30 0x2016 #define MEDIA_BUS_FMT_AYUV8_1X32 0x2017 #define MEDIA_BUS_FMT_UYVY12_2X12 0x201c -- cgit v1.2.3 From 0ad46af8a618fc38e0cdc3927cfa9f7b42cc9423 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 14 Apr 2015 17:30:04 -0700 Subject: target: Version 2 of TCMU ABI The initial version of TCMU (in 3.18) does not properly handle bidirectional SCSI commands -- those with both an in and out buffer. In looking to fix this it also became clear that TCMU's support for adding new types of entries (opcodes) to the command ring was broken. We need to fix this now, so that future issues can be handled properly by adding new opcodes. We make the most of this ABI break by enabling bidi cmd handling within TCMP_OP_CMD opcode. Add an iov_bidi_cnt field to tcmu_cmd_entry.req. This enables TCMU to describe bidi commands, but further kernel work is needed for full bidi support. Enlarge tcmu_cmd_entry_hdr by 32 bits by pulling in cmd_id and __pad1. Turn __pad1 into two 8 bit flags fields, for kernel-set and userspace-set flags, "kflags" and "uflags" respectively. Update version fields so userspace can tell the interface is changed. Update tcmu-design.txt with details of how new stuff works: - Specify an additional requirement for userspace to set UNKNOWN_OP (bit 0) in hdr.uflags for unknown/unhandled opcodes. - Define how Data-In and Data-Out fields are described in req.iov[] Changed in v2: - Change name of SKIPPED bit to UNKNOWN bit - PAD op does not set the bit any more - Change len_op helper functions to take just len_op, not the whole struct - Change version to 2 in missed spots, and use defines - Add 16 unused bytes to cmd_entry.req, in case additional SAM cmd parameters need to be included - Add iov_dif_cnt field to specify buffers used for DIF info in iov[] - Rearrange fields to naturally align cdb_off - Handle if userspace sets UNKNOWN_OP by indicating failure of the cmd - Wrap some overly long UPDATE_HEAD lines (Add missing req.iov_bidi_cnt + req.iov_dif_cnt zeroing - Ilias) Signed-off-by: Andy Grover Reviewed-by: Ilias Tsitsimpis Signed-off-by: Nicholas Bellinger --- Documentation/target/tcmu-design.txt | 43 ++++++++++++++++++++------------ drivers/target/target_core_user.c | 46 ++++++++++++++++++++++++++--------- include/uapi/linux/target_core_user.h | 44 ++++++++++++++++++++------------- 3 files changed, 89 insertions(+), 44 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index 5518465290bf..43e94ea6d2ca 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -138,27 +138,40 @@ signals the kernel via a 4-byte write(). When cmd_head equals cmd_tail, the ring is empty -- no commands are currently waiting to be processed by userspace. -TCMU commands start with a common header containing "len_op", a 32-bit -value that stores the length, as well as the opcode in the lowest -unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and -TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it -should skip ahead by the bytes in "length". (The kernel inserts PAD -entries to ensure each CMD entry fits contigously into the circular -buffer.) - -When userspace handles a CMD, it finds the SCSI CDB (Command Data -Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the -start of the overall shared memory region, not the entry. The data -in/out buffers are accessible via tht req.iov[] array. Note that -each iov.iov_base is also an offset from the start of the region. - -TCMU currently does not support BIDI operations. +TCMU commands are 8-byte aligned. They start with a common header +containing "len_op", a 32-bit value that stores the length, as well as +the opcode in the lowest unused bits. It also contains cmd_id and +flags fields for setting by the kernel (kflags) and userspace +(uflags). + +Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD. + +When the opcode is CMD, the entry in the command ring is a struct +tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via +tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the +overall shared memory region, not the entry. The data in/out buffers +are accessible via tht req.iov[] array. iov_cnt contains the number of +entries in iov[] needed to describe either the Data-In or Data-Out +buffers. For bidirectional commands, iov_cnt specifies how many iovec +entries cover the Data-Out area, and iov_bidi_count specifies how many +iovec entries immediately after that in iov[] cover the Data-In +area. Just like other fields, iov.iov_base is an offset from the start +of the region. When completing a command, userspace sets rsp.scsi_status, and rsp.sense_buffer if necessary. Userspace then increments mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the kernel via the UIO method, a 4-byte write to the file descriptor. +When the opcode is PAD, userspace only updates cmd_tail as above -- +it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry +is contiguous within the command ring.) + +More opcodes may be added in the future. If userspace encounters an +opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in +hdr.uflags, update cmd_tail, and proceed with processing additional +commands, if any. + The Data Area: This is shared-memory space after the command ring. The organization diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 1fbf304a9491..dbc872a6c981 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -344,8 +344,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) entry = (void *) mb + CMDR_OFF + cmd_head; tcmu_flush_dcache_range(entry, sizeof(*entry)); - tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD); - tcmu_hdr_set_len(&entry->hdr, pad_size); + tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD); + tcmu_hdr_set_len(&entry->hdr.len_op, pad_size); + entry->hdr.cmd_id = 0; /* not used for PAD */ + entry->hdr.kflags = 0; + entry->hdr.uflags = 0; UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size); @@ -355,9 +358,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) entry = (void *) mb + CMDR_OFF + cmd_head; tcmu_flush_dcache_range(entry, sizeof(*entry)); - tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD); - tcmu_hdr_set_len(&entry->hdr, command_size); - entry->cmd_id = tcmu_cmd->cmd_id; + tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD); + tcmu_hdr_set_len(&entry->hdr.len_op, command_size); + entry->hdr.cmd_id = tcmu_cmd->cmd_id; + entry->hdr.kflags = 0; + entry->hdr.uflags = 0; /* * Fix up iovecs, and handle if allocation in data ring wrapped. @@ -407,6 +412,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) kunmap_atomic(from); } entry->req.iov_cnt = iov_cnt; + entry->req.iov_bidi_cnt = 0; + entry->req.iov_dif_cnt = 0; /* All offsets relative to mb_addr, not start of entry! */ cdb_off = CMDR_OFF + cmd_head + base_command_size; @@ -464,6 +471,17 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * return; } + if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { + UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); + pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", + cmd->se_cmd); + transport_generic_request_failure(cmd->se_cmd, + TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); + cmd->se_cmd = NULL; + kmem_cache_free(tcmu_cmd_cache, cmd); + return; + } + if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer, se_cmd->scsi_sense_length); @@ -542,14 +560,16 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) tcmu_flush_dcache_range(entry, sizeof(*entry)); - if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) { - UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size); + if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) { + UPDATE_HEAD(udev->cmdr_last_cleaned, + tcmu_hdr_get_len(entry->hdr.len_op), + udev->cmdr_size); continue; } - WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD); + WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD); spin_lock(&udev->commands_lock); - cmd = idr_find(&udev->commands, entry->cmd_id); + cmd = idr_find(&udev->commands, entry->hdr.cmd_id); if (cmd) idr_remove(&udev->commands, cmd->cmd_id); spin_unlock(&udev->commands_lock); @@ -562,7 +582,9 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) tcmu_handle_completion(cmd, entry); - UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size); + UPDATE_HEAD(udev->cmdr_last_cleaned, + tcmu_hdr_get_len(entry->hdr.len_op), + udev->cmdr_size); handled++; } @@ -840,14 +862,14 @@ static int tcmu_configure_device(struct se_device *dev) udev->data_size = TCMU_RING_SIZE - CMDR_SIZE; mb = udev->mb_addr; - mb->version = 1; + mb->version = TCMU_MAILBOX_VERSION; mb->cmdr_off = CMDR_OFF; mb->cmdr_size = udev->cmdr_size; WARN_ON(!PAGE_ALIGNED(udev->data_off)); WARN_ON(udev->data_size % PAGE_SIZE); - info->version = "1"; + info->version = xstr(TCMU_MAILBOX_VERSION); info->mem[0].name = "tcm-user command & data buffer"; info->mem[0].addr = (phys_addr_t) udev->mb_addr; diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index b483d1909d3e..b67f99d3c520 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -6,7 +6,7 @@ #include #include -#define TCMU_VERSION "1.0" +#define TCMU_VERSION "2.0" /* * Ring Design @@ -39,9 +39,13 @@ * should process the next packet the same way, and so on. */ -#define TCMU_MAILBOX_VERSION 1 +#define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ +/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */ +#define xstr(s) str(s) +#define str(s) #s + struct tcmu_mailbox { __u16 version; __u16 flags; @@ -64,31 +68,36 @@ enum tcmu_opcode { * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode. */ struct tcmu_cmd_entry_hdr { - __u32 len_op; + __u32 len_op; + __u16 cmd_id; + __u8 kflags; +#define TCMU_UFLAG_UNKNOWN_OP 0x1 + __u8 uflags; + } __packed; #define TCMU_OP_MASK 0x7 -static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr) +static inline enum tcmu_opcode tcmu_hdr_get_op(__u32 len_op) { - return hdr->len_op & TCMU_OP_MASK; + return len_op & TCMU_OP_MASK; } -static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op) +static inline void tcmu_hdr_set_op(__u32 *len_op, enum tcmu_opcode op) { - hdr->len_op &= ~TCMU_OP_MASK; - hdr->len_op |= (op & TCMU_OP_MASK); + *len_op &= ~TCMU_OP_MASK; + *len_op |= (op & TCMU_OP_MASK); } -static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr) +static inline __u32 tcmu_hdr_get_len(__u32 len_op) { - return hdr->len_op & ~TCMU_OP_MASK; + return len_op & ~TCMU_OP_MASK; } -static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len) +static inline void tcmu_hdr_set_len(__u32 *len_op, __u32 len) { - hdr->len_op &= TCMU_OP_MASK; - hdr->len_op |= len; + *len_op &= TCMU_OP_MASK; + *len_op |= len; } /* Currently the same as SCSI_SENSE_BUFFERSIZE */ @@ -97,13 +106,14 @@ static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len) struct tcmu_cmd_entry { struct tcmu_cmd_entry_hdr hdr; - uint16_t cmd_id; - uint16_t __pad1; - union { struct { + uint32_t iov_cnt; + uint32_t iov_bidi_cnt; + uint32_t iov_dif_cnt; uint64_t cdb_off; - uint64_t iov_cnt; + uint64_t __pad1; + uint64_t __pad2; struct iovec iov[0]; } req; struct { -- cgit v1.2.3 From e928e9cb3601ce240189bfea05b67ebd391c85ae Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 20 Mar 2015 20:39:41 +1100 Subject: KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation. Some PowerNV systems include a hardware random-number generator. This HWRNG is present on POWER7+ and POWER8 chips and is capable of generating one 64-bit random number every microsecond. The random numbers are produced by sampling a set of 64 unstable high-frequency oscillators and are almost completely entropic. PAPR defines an H_RANDOM hypercall which guests can use to obtain one 64-bit random sample from the HWRNG. This adds a real-mode implementation of the H_RANDOM hypercall. This hypercall was implemented in real mode because the latency of reading the HWRNG is generally small compared to the latency of a guest exit and entry for all the threads in the same virtual core. Userspace can detect the presence of the HWRNG and the H_RANDOM implementation by querying the KVM_CAP_PPC_HWRNG capability. The H_RANDOM hypercall implementation will only be invoked when the guest does an H_RANDOM hypercall if userspace first enables the in-kernel H_RANDOM implementation using the KVM_CAP_PPC_ENABLE_HCALL capability. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 17 +++++ arch/powerpc/include/asm/archrandom.h | 11 ++- arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/kvm/book3s_hv_builtin.c | 15 +++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 115 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 3 + arch/powerpc/platforms/powernv/rng.c | 29 ++++++++ include/uapi/linux/kvm.h | 1 + 8 files changed, 191 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bc9f6fe44e27..9fa2bf8c3f6f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3573,3 +3573,20 @@ struct { @ar - access register number KVM handlers should exit to userspace with rc = -EREMOTE. + + +8. Other capabilities. +---------------------- + +This section lists capabilities that give information about other +features of the KVM implementation. + +8.1 KVM_CAP_PPC_HWRNG + +Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that that the kernel has an implementation of the +H_RANDOM hypercall backed by a hardware random-number generator. +If present, the kernel H_RANDOM handler can be enabled for guest use +with the KVM_CAP_PPC_ENABLE_HCALL capability. diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index bde531103638..0cc6eedc4780 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -30,8 +30,6 @@ static inline int arch_has_random(void) return !!ppc_md.get_random_long; } -int powernv_get_random_long(unsigned long *v); - static inline int arch_get_random_seed_long(unsigned long *v) { return 0; @@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void) #endif /* CONFIG_ARCH_RANDOM */ +#ifdef CONFIG_PPC_POWERNV +int powernv_hwrng_present(void); +int powernv_get_random_long(unsigned long *v); +int powernv_get_random_real_mode(unsigned long *v); +#else +static inline int powernv_hwrng_present(void) { return 0; } +static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; } +#endif + #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 46bf652c9169..b8475daad884 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) return kvm->arch.kvm_ops == kvmppc_hv_ops; } +extern int kvmppc_hwrng_present(void); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 1f083ff8a61a..1954a1c4b1f9 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -21,6 +21,7 @@ #include #include #include +#include #define KVM_CMA_CHUNK_ORDER 18 @@ -169,3 +170,17 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd) return 0; } EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); + +int kvmppc_hwrng_present(void) +{ + return powernv_hwrng_present(); +} +EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); + +long kvmppc_h_random(struct kvm_vcpu *vcpu) +{ + if (powernv_get_random_real_mode(&vcpu->arch.gpr[4])) + return H_SUCCESS; + + return H_HARDWARE; +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6cbf1630cb70..0814ca122fcd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1839,6 +1839,121 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table + .long 0 /* 0x138 */ + .long 0 /* 0x13c */ + .long 0 /* 0x140 */ + .long 0 /* 0x144 */ + .long 0 /* 0x148 */ + .long 0 /* 0x14c */ + .long 0 /* 0x150 */ + .long 0 /* 0x154 */ + .long 0 /* 0x158 */ + .long 0 /* 0x15c */ + .long 0 /* 0x160 */ + .long 0 /* 0x164 */ + .long 0 /* 0x168 */ + .long 0 /* 0x16c */ + .long 0 /* 0x170 */ + .long 0 /* 0x174 */ + .long 0 /* 0x178 */ + .long 0 /* 0x17c */ + .long 0 /* 0x180 */ + .long 0 /* 0x184 */ + .long 0 /* 0x188 */ + .long 0 /* 0x18c */ + .long 0 /* 0x190 */ + .long 0 /* 0x194 */ + .long 0 /* 0x198 */ + .long 0 /* 0x19c */ + .long 0 /* 0x1a0 */ + .long 0 /* 0x1a4 */ + .long 0 /* 0x1a8 */ + .long 0 /* 0x1ac */ + .long 0 /* 0x1b0 */ + .long 0 /* 0x1b4 */ + .long 0 /* 0x1b8 */ + .long 0 /* 0x1bc */ + .long 0 /* 0x1c0 */ + .long 0 /* 0x1c4 */ + .long 0 /* 0x1c8 */ + .long 0 /* 0x1cc */ + .long 0 /* 0x1d0 */ + .long 0 /* 0x1d4 */ + .long 0 /* 0x1d8 */ + .long 0 /* 0x1dc */ + .long 0 /* 0x1e0 */ + .long 0 /* 0x1e4 */ + .long 0 /* 0x1e8 */ + .long 0 /* 0x1ec */ + .long 0 /* 0x1f0 */ + .long 0 /* 0x1f4 */ + .long 0 /* 0x1f8 */ + .long 0 /* 0x1fc */ + .long 0 /* 0x200 */ + .long 0 /* 0x204 */ + .long 0 /* 0x208 */ + .long 0 /* 0x20c */ + .long 0 /* 0x210 */ + .long 0 /* 0x214 */ + .long 0 /* 0x218 */ + .long 0 /* 0x21c */ + .long 0 /* 0x220 */ + .long 0 /* 0x224 */ + .long 0 /* 0x228 */ + .long 0 /* 0x22c */ + .long 0 /* 0x230 */ + .long 0 /* 0x234 */ + .long 0 /* 0x238 */ + .long 0 /* 0x23c */ + .long 0 /* 0x240 */ + .long 0 /* 0x244 */ + .long 0 /* 0x248 */ + .long 0 /* 0x24c */ + .long 0 /* 0x250 */ + .long 0 /* 0x254 */ + .long 0 /* 0x258 */ + .long 0 /* 0x25c */ + .long 0 /* 0x260 */ + .long 0 /* 0x264 */ + .long 0 /* 0x268 */ + .long 0 /* 0x26c */ + .long 0 /* 0x270 */ + .long 0 /* 0x274 */ + .long 0 /* 0x278 */ + .long 0 /* 0x27c */ + .long 0 /* 0x280 */ + .long 0 /* 0x284 */ + .long 0 /* 0x288 */ + .long 0 /* 0x28c */ + .long 0 /* 0x290 */ + .long 0 /* 0x294 */ + .long 0 /* 0x298 */ + .long 0 /* 0x29c */ + .long 0 /* 0x2a0 */ + .long 0 /* 0x2a4 */ + .long 0 /* 0x2a8 */ + .long 0 /* 0x2ac */ + .long 0 /* 0x2b0 */ + .long 0 /* 0x2b4 */ + .long 0 /* 0x2b8 */ + .long 0 /* 0x2bc */ + .long 0 /* 0x2c0 */ + .long 0 /* 0x2c4 */ + .long 0 /* 0x2c8 */ + .long 0 /* 0x2cc */ + .long 0 /* 0x2d0 */ + .long 0 /* 0x2d4 */ + .long 0 /* 0x2d8 */ + .long 0 /* 0x2dc */ + .long 0 /* 0x2e0 */ + .long 0 /* 0x2e4 */ + .long 0 /* 0x2e8 */ + .long 0 /* 0x2ec */ + .long 0 /* 0x2f0 */ + .long 0 /* 0x2f4 */ + .long 0 /* 0x2f8 */ + .long 0 /* 0x2fc */ + .long DOTSYM(kvmppc_h_random) - hcall_real_table .globl hcall_real_table_end hcall_real_table_end: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 24bfe401373e..55a4763d6d11 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_RMA: r = 0; break; + case KVM_CAP_PPC_HWRNG: + r = kvmppc_hwrng_present(); + break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 80db43944afe..6eb808ff637e 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -24,12 +24,22 @@ struct powernv_rng { void __iomem *regs; + void __iomem *regs_real; unsigned long mask; }; static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); +int powernv_hwrng_present(void) +{ + struct powernv_rng *rng; + + rng = get_cpu_var(powernv_rng); + put_cpu_var(rng); + return rng != NULL; +} + static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) { unsigned long parity; @@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) return val; } +int powernv_get_random_real_mode(unsigned long *v) +{ + struct powernv_rng *rng; + + rng = raw_cpu_read(powernv_rng); + + *v = rng_whiten(rng, in_rm64(rng->regs_real)); + + return 1; +} + int powernv_get_random_long(unsigned long *v) { struct powernv_rng *rng; @@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng, static __init int rng_create(struct device_node *dn) { struct powernv_rng *rng; + struct resource res; unsigned long val; rng = kzalloc(sizeof(*rng), GFP_KERNEL); if (!rng) return -ENOMEM; + if (of_address_to_resource(dn, 0, &res)) { + kfree(rng); + return -ENXIO; + } + + rng->regs_real = (void __iomem *)res.start; + rng->regs = of_iomap(dn, 0); if (!rng->regs) { kfree(rng); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f574d7be7631..4b60056776d1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -813,6 +813,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MIPS_MSA 112 #define KVM_CAP_S390_INJECT_IRQ 113 #define KVM_CAP_S390_IRQ_STATE 114 +#define KVM_CAP_PPC_HWRNG 115 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 135dd002c23054aaa056ea3162c1e0356905c195 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 6 Apr 2015 09:46:00 -0400 Subject: nfsd: eliminate NFSD_DEBUG Commit f895b252d4edf ("sunrpc: eliminate RPC_DEBUG") introduced use of IS_ENABLED() in a uapi header which leads to a build failure for userspace apps trying to use : linux/nfsd/debug.h:18:15: error: missing binary operator before token "(" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) ^ Since this was only used to define NFSD_DEBUG if CONFIG_SUNRPC_DEBUG is enabled, replace instances of NFSD_DEBUG with CONFIG_SUNRPC_DEBUG. Cc: stable@vger.kernel.org Fixes: f895b252d4edf "sunrpc: eliminate RPC_DEBUG" Signed-off-by: Mark Salter Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svcsubs.c | 2 +- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/nfsd.h | 2 +- include/uapi/linux/nfsd/debug.h | 8 -------- 4 files changed, 3 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 665ef5a05183..a563ddbc19e6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -31,7 +31,7 @@ static struct hlist_head nlm_files[FILE_NRHASH]; static DEFINE_MUTEX(nlm_file_mutex); -#ifdef NFSD_DEBUG +#ifdef CONFIG_SUNRPC_DEBUG static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) { u32 *fhp = (u32*)f->data; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fd875b6107ba..693495551bb2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid) return sid->sequence % SESSION_HASH_SIZE; } -#ifdef NFSD_DEBUG +#ifdef CONFIG_SUNRPC_DEBUG static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 565c4da1a9eb..cf980523898b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,7 +24,7 @@ #include "export.h" #undef ifdebug -#ifdef NFSD_DEBUG +#ifdef CONFIG_SUNRPC_DEBUG # define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) #else # define ifdebug(flag) if (0) diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h index 0bf130a1c58d..28ec6c9c421a 100644 --- a/include/uapi/linux/nfsd/debug.h +++ b/include/uapi/linux/nfsd/debug.h @@ -11,14 +11,6 @@ #include -/* - * Enable debugging for nfsd. - * Requires RPC_DEBUG. - */ -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define NFSD_DEBUG 1 -#endif - /* * knfsd debug flags */ -- cgit v1.2.3 From bff175238a2416110e2258989c462234baeb5f46 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 Mar 2015 17:50:00 +0100 Subject: uapi: Remove kernel internal declaration The enum nfs4_acl_whotype is only used in nfs4d's internal nfs4 acl representation. No longer expose it to user space. Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 7 +++++++ include/uapi/linux/nfs4.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ed43cb74b11d..32201c269890 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -16,6 +16,13 @@ #include #include +enum nfs4_acl_whotype { + NFS4_ACL_WHO_NAMED = 0, + NFS4_ACL_WHO_OWNER, + NFS4_ACL_WHO_GROUP, + NFS4_ACL_WHO_EVERYONE, +}; + struct nfs4_ace { uint32_t type; uint32_t flag; diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 35f5f4c6c260..adc0aff83fbb 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -162,13 +162,6 @@ */ #define NFS4_MAX_BACK_CHANNEL_OPS 2 -enum nfs4_acl_whotype { - NFS4_ACL_WHO_NAMED = 0, - NFS4_ACL_WHO_OWNER, - NFS4_ACL_WHO_GROUP, - NFS4_ACL_WHO_EVERYONE, -}; - #endif /* _UAPI_LINUX_NFS4_H */ /* -- cgit v1.2.3 From 40c64c26a43494ba9982fd1b87dc54e3819566fc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 15 Apr 2015 13:00:05 -0400 Subject: NFS: Move nfs_idmap.h into fs/nfs/ This file is only used internally to the NFS v4 module, so it doesn't need to be in the global include path. I also renamed it from nfs_idmap.h to nfs4idmap.h to emphasize that it's an NFSv4-only include file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- fs/nfs/idmap.c | 2 +- fs/nfs/nfs4client.c | 2 +- fs/nfs/nfs4idmap.h | 68 ++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4super.c | 2 +- fs/nfs/nfs4sysctl.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_idmap.h | 68 ---------------------------------- include/uapi/linux/nfs_idmap.h | 2 +- 11 files changed, 77 insertions(+), 77 deletions(-) create mode 100644 fs/nfs/nfs4idmap.h delete mode 100644 include/linux/nfs_idmap.h (limited to 'include/uapi') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f3ff66e4fb6a..7d05089e52d6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -11,10 +11,10 @@ #include #include -#include #include "flexfilelayout.h" #include "../nfs4session.h" +#include "../nfs4idmap.h" #include "../internal.h" #include "../delegation.h" #include "../nfs4trace.h" diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 857e2a99acc8..2e1737c40a29 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -49,6 +48,7 @@ #include "internal.h" #include "netns.h" +#include "nfs4idmap.h" #include "nfs4trace.h" #define NFS_UINT_MAXLEN 11 diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 86d6214ea022..43ca5a7ac1da 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -4,7 +4,6 @@ */ #include #include -#include #include #include #include @@ -15,6 +14,7 @@ #include "callback.h" #include "delegation.h" #include "nfs4session.h" +#include "nfs4idmap.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/nfs4idmap.h b/fs/nfs/nfs4idmap.h new file mode 100644 index 000000000000..de44d7330ab3 --- /dev/null +++ b/fs/nfs/nfs4idmap.h @@ -0,0 +1,68 @@ +/* + * fs/nfs/nfs4idmap.h + * + * UID and GID to name mapping for clients. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef NFS_IDMAP_H +#define NFS_IDMAP_H + +#include +#include + + +/* Forward declaration to make this header independent of others */ +struct nfs_client; +struct nfs_server; +struct nfs_fattr; +struct nfs4_string; + +int nfs_idmap_init(void); +void nfs_idmap_quit(void); +int nfs_idmap_new(struct nfs_client *); +void nfs_idmap_delete(struct nfs_client *); + +void nfs_fattr_init_names(struct nfs_fattr *fattr, + struct nfs4_string *owner_name, + struct nfs4_string *group_name); +void nfs_fattr_free_names(struct nfs_fattr *); +void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *); + +int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *); +int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *); +int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); +int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); + +int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); + +extern unsigned int nfs_idmap_cache_timeout; +#endif /* NFS_IDMAP_H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d48f37cfe8a..60396330044f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -63,6 +62,7 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f95e3b58bbc3..935a6ffe8643 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -57,6 +56,7 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "nfs4idmap.h" #include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index d91898193b2f..6fb7cb6b3f4b 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -3,12 +3,12 @@ */ #include #include -#include #include #include #include "delegation.h" #include "internal.h" #include "nfs4_fs.h" +#include "nfs4idmap.h" #include "dns_resolve.h" #include "pnfs.h" #include "nfs.h" diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index b6ebe7e445f6..0fbd3ab1be22 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -6,10 +6,10 @@ * Copyright (c) 2006 Trond Myklebust */ #include -#include #include #include "nfs4_fs.h" +#include "nfs4idmap.h" #include "callback.h" static const int nfs_set_port_min = 0; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a26880cb8ce8..0aea97841d30 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,10 +52,10 @@ #include #include #include -#include #include "nfs4_fs.h" #include "internal.h" +#include "nfs4idmap.h" #include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h deleted file mode 100644 index daaf3ea5e9d8..000000000000 --- a/include/linux/nfs_idmap.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * include/linux/nfs_idmap.h - * - * UID and GID to name mapping for clients. - * - * Copyright (c) 2002 The Regents of the University of Michigan. - * All rights reserved. - * - * Marius Aamodt Eriksen - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef NFS_IDMAP_H -#define NFS_IDMAP_H - -#include -#include - - -/* Forward declaration to make this header independent of others */ -struct nfs_client; -struct nfs_server; -struct nfs_fattr; -struct nfs4_string; - -int nfs_idmap_init(void); -void nfs_idmap_quit(void); -int nfs_idmap_new(struct nfs_client *); -void nfs_idmap_delete(struct nfs_client *); - -void nfs_fattr_init_names(struct nfs_fattr *fattr, - struct nfs4_string *owner_name, - struct nfs4_string *group_name); -void nfs_fattr_free_names(struct nfs_fattr *); -void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *); - -int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *); -int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *); -int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); -int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); - -int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); - -extern unsigned int nfs_idmap_cache_timeout; -#endif /* NFS_IDMAP_H */ diff --git a/include/uapi/linux/nfs_idmap.h b/include/uapi/linux/nfs_idmap.h index 8d4b1c7b24d4..038e36c96669 100644 --- a/include/uapi/linux/nfs_idmap.h +++ b/include/uapi/linux/nfs_idmap.h @@ -1,5 +1,5 @@ /* - * include/linux/nfs_idmap.h + * include/uapi/linux/nfs_idmap.h * * UID and GID to name mapping for clients. * -- cgit v1.2.3 From 0df48c26d8418c5c9fba63fac15b660d70ca2f1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 15:28:17 -0700 Subject: tcp: add tcpi_bytes_acked to tcp_info This patch tracks total number of bytes acked for a TCP socket. This is the sum of all changes done to tp->snd_una, and allows for precise tracking of delivered data. RFC4898 named this : tcpEStatsAppHCThruOctetsAcked This is a 64bit field, and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->bytes_acked was placed near tp->snd_una for best data locality and minimal performance impact. Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Cc: Matt Mathis Cc: Eric Salo Cc: Martin Lau Cc: Chris Rapier Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++++ include/net/tcp.h | 2 +- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 6 +++++- net/ipv4/tcp_input.c | 13 +++++++++++-- 5 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0caa3a2d4106..0f73b43171da 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -150,6 +150,10 @@ struct tcp_sock { u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ + u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 051dc5c2802d..dd7b4ea6a10c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -576,7 +576,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) } /* tcp.c */ -void tcp_get_info(const struct sock *, struct tcp_info *); +void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 3b9718328d8b..6666e98a0af9 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -189,6 +189,7 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8c5cd9efebbc..4bf0e8ca7b5b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2592,7 +2592,7 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(const struct sock *sk, struct tcp_info *info) +void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2663,6 +2663,10 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) rate = READ_ONCE(sk->sk_max_pacing_rate); info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + + spin_lock_bh(&sk->sk_lock.slock); + info->tcpi_bytes_acked = tp->bytes_acked; + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3a4d9b34bed4..378d3f4d4dc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3280,6 +3280,15 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); } +/* If we update tp->snd_una, also update tp->bytes_acked */ +static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) +{ + u32 delta = ack - tp->snd_una; + + tp->bytes_acked += delta; + tp->snd_una = ack; +} + /* Update our send window. * * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 @@ -3315,7 +3324,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 } } - tp->snd_una = ack; + tcp_snd_una_update(tp, ack); return flag; } @@ -3497,7 +3506,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * Note, we use the fact that SND.UNA>=SND.WL2. */ tcp_update_wl(tp, ack_seq); - tp->snd_una = ack; + tcp_snd_una_update(tp, ack); flag |= FLAG_WIN_UPDATE; tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); -- cgit v1.2.3 From bdd1f9edacb5f5835d1e6276571bbbe5b88ded48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 15:28:18 -0700 Subject: tcp: add tcpi_bytes_received to tcp_info This patch tracks total number of payload bytes received on a TCP socket. This is the sum of all changes done to tp->rcv_nxt RFC4898 named this : tcpEStatsAppHCThruOctetsReceived This is a 64bit field, and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->bytes_received was placed near tp->rcv_nxt for best data locality and minimal performance impact. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Matt Mathis Cc: Eric Salo Cc: Martin Lau Cc: Chris Rapier Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++++ include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 1 + net/ipv4/tcp_fastopen.c | 1 + net/ipv4/tcp_input.c | 17 +++++++++++++---- 5 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0f73b43171da..3b2911502a8c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -145,6 +145,10 @@ struct tcp_sock { * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. */ + u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6666e98a0af9..a48f93f3207b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -190,6 +190,7 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bf0e8ca7b5b..99fcc0b22c92 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2666,6 +2666,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) spin_lock_bh(&sk->sk_lock.slock); info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3d87aca6be8..3c673d5e6cff 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -206,6 +206,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, skb_set_owner_r(skb2, child); __skb_queue_tail(&child->sk_receive_queue, skb2); tp->syn_data_acked = 1; + tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1; } else { end_seq = TCP_SKB_CB(skb)->seq + 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 378d3f4d4dc3..7e6962bcfc30 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3289,6 +3289,15 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) tp->snd_una = ack; } +/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + tp->bytes_received += delta; + tp->rcv_nxt = seq; +} + /* Update our send window. * * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 @@ -4245,7 +4254,7 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) @@ -4413,7 +4422,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; - tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -4506,7 +4515,7 @@ queue_and_out: eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); } - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) @@ -5254,7 +5263,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); __skb_pull(skb, tcp_header_len); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); eaten = 1; } -- cgit v1.2.3 From 64f40ff5bbdb1b679fb3c4dbc8230d6517d2b8dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 16:23:48 -0700 Subject: tcp: prepare CC get_info() access from getsockopt() We would like that optional info provided by Congestion Control modules using netlink can also be read using getsockopt() This patch changes get_info() to put this information in a buffer, instead of skb, like tcp_get_info(), so that following patch can reuse this common infrastructure. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Daniel Borkmann Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 5 ++++- include/uapi/linux/inet_diag.h | 4 ++++ net/ipv4/inet_diag.c | 8 +++++--- net/ipv4/tcp_dctcp.c | 20 ++++++++++---------- net/ipv4/tcp_illinois.c | 21 +++++++++++---------- net/ipv4/tcp_vegas.c | 19 ++++++++++--------- net/ipv4/tcp_vegas.h | 3 ++- 7 files changed, 46 insertions(+), 34 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tcp.h b/include/net/tcp.h index dd7b4ea6a10c..6d204f3f9df8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -804,6 +804,8 @@ enum tcp_ca_ack_event_flags { /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 +union tcp_cc_info; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -829,7 +831,8 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); /* get info for inet_diag (optional) */ - int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); + size_t (*get_info)(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info); char name[TCP_CA_NAME_MAX]; struct module *owner; diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index d65c0a09efd3..c7093c75bdd6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,4 +143,8 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +union tcp_cc_info { + struct tcpvegas_info vegas; + struct tcp_dctcp_info dctcp; +}; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bb77ebdae3b3..4d32262c7502 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler->idiag_get_info(sk, r, info); if (sk->sk_state < TCP_TIME_WAIT) { - int err = 0; + union tcp_cc_info info; + size_t sz = 0; + int attr; rcu_read_lock(); ca_ops = READ_ONCE(icsk->icsk_ca_ops); if (ca_ops && ca_ops->get_info) - err = ca_ops->get_info(sk, ext, skb); + sz = ca_ops->get_info(sk, ext, &attr, &info); rcu_read_unlock(); - if (err < 0) + if (sz && nla_put(skb, attr, sz, &info) < 0) goto errout; } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 4376016f7fa5..4c41c1287197 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) } } -static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); @@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) */ if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcp_dctcp_info info; - - memset(&info, 0, sizeof(info)); + memset(info, 0, sizeof(struct tcp_dctcp_info)); if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) { - info.dctcp_enabled = 1; - info.dctcp_ce_state = (u16) ca->ce_state; - info.dctcp_alpha = ca->dctcp_alpha; - info.dctcp_ab_ecn = ca->acked_bytes_ecn; - info.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_enabled = 1; + info->dctcp.dctcp_ce_state = (u16) ca->ce_state; + info->dctcp.dctcp_alpha = ca->dctcp_alpha; + info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; + info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; } - return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); + *attr = INET_DIAG_DCTCPINFO; + return sizeof(*info); } return 0; } diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 67476f085e48..f71002e4db0b 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) } /* Extract info for Tcp socket info provided via netlink. */ -static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct illinois *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info info = { - .tcpv_enabled = 1, - .tcpv_rttcnt = ca->cnt_rtt, - .tcpv_minrtt = ca->base_rtt, - }; + info->vegas.tcpv_enabled = 1; + info->vegas.tcpv_rttcnt = ca->cnt_rtt; + info->vegas.tcpv_minrtt = ca->base_rtt; + info->vegas.tcpv_rtt = 0; - if (info.tcpv_rttcnt > 0) { + if (info->vegas.tcpv_rttcnt > 0) { u64 t = ca->sum_rtt; - do_div(t, info.tcpv_rttcnt); - info.tcpv_rtt = t; + do_div(t, info->vegas.tcpv_rttcnt); + info->vegas.tcpv_rtt = t; } - return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + *attr = INET_DIAG_VEGASINFO; + return sizeof(struct tcpvegas_info); } return 0; } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c71a1b8f7bde..a6cea1d5e20d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } /* Extract info for Tcp socket info provided via netlink. */ -int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) { const struct vegas *ca = inet_csk_ca(sk); + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcpvegas_info info = { - .tcpv_enabled = ca->doing_vegas_now, - .tcpv_rttcnt = ca->cntRTT, - .tcpv_rtt = ca->baseRTT, - .tcpv_minrtt = ca->minRTT, - }; - - return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + info->vegas.tcpv_enabled = ca->doing_vegas_now, + info->vegas.tcpv_rttcnt = ca->cntRTT, + info->vegas.tcpv_rtt = ca->baseRTT, + info->vegas.tcpv_minrtt = ca->minRTT, + + *attr = INET_DIAG_VEGASINFO; + return sizeof(struct tcpvegas_info); } return 0; } diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index e8a6b33cc61d..ef9da5306c68 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); -int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); +size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info); #endif /* __TCP_VEGAS_H */ -- cgit v1.2.3 From 6e9250f59ef9efb932c84850cd221f22c2a03c4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 16:23:49 -0700 Subject: tcp: add TCP_CC_INFO socket option Some Congestion Control modules can provide per flow information, but current way to get this information is to use netlink. Like TCP_INFO, let's add TCP_CC_INFO so that applications can issue a getsockopt() if they have a socket file descriptor, instead of playing complex netlink games. Sample usage would be : union tcp_cc_info info; socklen_t len = sizeof(info); if (getsockopt(fd, SOL_TCP, TCP_CC_INFO, &info, &len) == -1) Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Daniel Borkmann Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index a48f93f3207b..faa72f4fa547 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -112,6 +112,7 @@ enum { #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 99fcc0b22c92..46efa03d2b11 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -252,6 +252,7 @@ #include #include #include +#include #include #include #include @@ -2739,6 +2740,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_CC_INFO: { + const struct tcp_congestion_ops *ca_ops; + union tcp_cc_info info; + size_t sz = 0; + int attr; + + if (get_user(len, optlen)) + return -EFAULT; + + ca_ops = icsk->icsk_ca_ops; + if (ca_ops && ca_ops->get_info) + sz = ca_ops->get_info(sk, ~0U, &attr, &info); + + len = min_t(unsigned int, len, sz); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) + return -EFAULT; + return 0; + } case TCP_QUICKACK: val = !icsk->icsk_ack.pingpong; break; -- cgit v1.2.3 From e412d3a32badcf17541d7443b033769fdf39b545 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sat, 2 May 2015 08:42:29 +0930 Subject: virtio: fix typo in vring_need_event() doc comment Here the "other side" refers to the guest or host. Signed-off-by: Stefan Hajnoczi Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/uapi/linux/virtio_ring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index a3318f31e8e7..915980ac68df 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -155,7 +155,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) } /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ -/* Assuming a given event_idx value from the other size, if +/* Assuming a given event_idx value from the other side, if * we have just incremented index from old to new_idx, * should we trigger an event? */ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) -- cgit v1.2.3 From 6eec177461751f0fe191cf9977cde692b9481d0a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Apr 2015 16:28:10 -0400 Subject: RDMA/core: Enable the iWarp Port Mapper to provide the actual address of the connecting peer to its clients Add functionality to enable the port mapper on the passive side to provide to its clients the actual (non-mapped) ip/tcp address information of the connecting peer 1) Adding remote_info_cb() to process the address info of the connecting peer The address info is provided by the user space port mapper service when the connection is initiated by the peer 2) Adding a hash list to store the remote address info 3) Adding functionality to add/remove the remote address info After the info has been provided to the port mapper client, it is removed from the hash list Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 73 ++++++++++++- drivers/infiniband/core/iwpm_util.c | 208 ++++++++++++++++++++++++++++++------ drivers/infiniband/core/iwpm_util.h | 15 +++ include/rdma/iw_portmap.h | 25 +++++ include/uapi/rdma/rdma_netlink.h | 1 + 5 files changed, 288 insertions(+), 34 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index b85ddbc979e0..ab081702566f 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -468,7 +468,8 @@ add_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_mapping_cb); -/* netlink attribute policy for the response to add and query mapping request */ +/* netlink attribute policy for the response to add and query mapping request + * and response with remote address info */ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, @@ -559,6 +560,76 @@ query_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); +/* + * iwpm_remote_info_cb - Process a port mapper message, containing + * the remote connecting peer address info + */ +int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr, *remote_sockaddr; + struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; + struct iwpm_remote_info *rem_info; + const char *msg_type; + u8 nl_client; + int ret = -EINVAL; + + msg_type = "Remote Mapping info"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, + resp_query_policy, nltb, msg_type)) + return ret; + + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + remote_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + mapped_loc_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); + mapped_rem_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + + if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || + mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + return ret; + } + rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); + if (!rem_info) { + pr_err("%s: Unable to allocate a remote info\n", __func__); + ret = -ENOMEM; + return ret; + } + memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->remote_sockaddr, remote_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr, + sizeof(struct sockaddr_storage)); + rem_info->nl_client = nl_client; + + iwpm_add_remote_info(rem_info); + + iwpm_print_sockaddr(local_sockaddr, + "remote_info: Local sockaddr:"); + iwpm_print_sockaddr(mapped_loc_sockaddr, + "remote_info: Mapped local sockaddr:"); + iwpm_print_sockaddr(remote_sockaddr, + "remote_info: Remote sockaddr:"); + iwpm_print_sockaddr(mapped_rem_sockaddr, + "remote_info: Mapped remote sockaddr:"); + return ret; +} +EXPORT_SYMBOL(iwpm_remote_info_cb); + /* netlink attribute policy for the received request for mapping info */ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 69e9f84c1605..a626795bf9c7 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -33,8 +33,10 @@ #include "iwpm_util.h" -#define IWPM_HASH_BUCKET_SIZE 512 -#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1) +#define IWPM_MAPINFO_HASH_SIZE 512 +#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) +#define IWPM_REMINFO_HASH_SIZE 64 +#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); static struct hlist_head *iwpm_hash_bucket; static DEFINE_SPINLOCK(iwpm_mapinfo_lock); +static struct hlist_head *iwpm_reminfo_bucket; +static DEFINE_SPINLOCK(iwpm_reminfo_lock); + static DEFINE_MUTEX(iwpm_admin_lock); static struct iwpm_admin_data iwpm_admin; int iwpm_init(u8 nl_client) { + int ret = 0; if (iwpm_valid_client(nl_client)) return -EINVAL; mutex_lock(&iwpm_admin_lock); if (atomic_read(&iwpm_admin.refcount) == 0) { - iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * + iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { - mutex_unlock(&iwpm_admin_lock); + ret = -ENOMEM; pr_err("%s Unable to create mapinfo hash table\n", __func__); - return -ENOMEM; + goto init_exit; + } + iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * + sizeof(struct hlist_head), GFP_KERNEL); + if (!iwpm_reminfo_bucket) { + kfree(iwpm_hash_bucket); + ret = -ENOMEM; + pr_err("%s Unable to create reminfo hash table\n", __func__); + goto init_exit; } } atomic_inc(&iwpm_admin.refcount); +init_exit: mutex_unlock(&iwpm_admin_lock); - iwpm_set_valid(nl_client, 1); - return 0; + if (!ret) { + iwpm_set_valid(nl_client, 1); + pr_debug("%s: Mapinfo and reminfo tables are created\n", + __func__); + } + return ret; } EXPORT_SYMBOL(iwpm_init); static void free_hash_bucket(void); +static void free_reminfo_bucket(void); int iwpm_exit(u8 nl_client) { @@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client) } if (atomic_dec_and_test(&iwpm_admin.refcount)) { free_hash_bucket(); - pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); + free_reminfo_bucket(); + pr_debug("%s: Resources are destroyed\n", __func__); } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); @@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client) } EXPORT_SYMBOL(iwpm_exit); -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, @@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct hlist_head *hash_bucket_head; struct iwpm_mapping_info *map_info; unsigned long flags; + int ret = -EINVAL; if (!iwpm_valid_client(nl_client)) - return -EINVAL; + return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); if (!map_info) { pr_err("%s: Unable to allocate a mapping info\n", __func__); @@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( &map_info->local_sockaddr, &map_info->mapped_sockaddr); - hlist_add_head(&map_info->hlist_node, hash_bucket_head); + if (hash_bucket_head) { + hlist_add_head(&map_info->hlist_node, hash_bucket_head); + ret = 0; + } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); - return 0; + return ret; } EXPORT_SYMBOL(iwpm_create_mapinfo); @@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( local_sockaddr, mapped_local_addr); + if (!hash_bucket_head) + goto remove_mapinfo_exit; + hlist_for_each_entry_safe(map_info, tmp_hlist_node, hash_bucket_head, hlist_node) { @@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, } } } +remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } @@ -166,7 +195,7 @@ static void free_hash_bucket(void) /* remove all the mapinfo data from the list */ spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(map_info, tmp_hlist_node, &iwpm_hash_bucket[i], hlist_node) { @@ -180,6 +209,96 @@ static void free_hash_bucket(void) spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); } +static void free_reminfo_bucket(void) +{ + struct hlist_node *tmp_hlist_node; + struct iwpm_remote_info *rem_info; + unsigned long flags; + int i; + + /* remove all the remote info from the list */ + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + &iwpm_reminfo_bucket[i], hlist_node) { + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + } + } + /* free the hash list */ + kfree(iwpm_reminfo_bucket); + iwpm_reminfo_bucket = NULL; + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *, + struct sockaddr_storage *); + +void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) +{ + struct hlist_head *hash_bucket_head; + unsigned long flags; + + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + &rem_info->mapped_loc_sockaddr, + &rem_info->mapped_rem_sockaddr); + if (hash_bucket_head) + hlist_add_head(&rem_info->hlist_node, hash_bucket_head); + } + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) +{ + struct hlist_node *tmp_hlist_node; + struct hlist_head *hash_bucket_head; + struct iwpm_remote_info *rem_info = NULL; + unsigned long flags; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid client = %d\n", __func__, nl_client); + return ret; + } + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + mapped_loc_addr, + mapped_rem_addr); + if (!hash_bucket_head) + goto get_remote_info_exit; + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + hash_bucket_head, hlist_node) { + + if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr, + mapped_loc_addr) && + !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr, + mapped_rem_addr)) { + + memcpy(remote_addr, &rem_info->remote_sockaddr, + sizeof(struct sockaddr_storage)); + iwpm_print_sockaddr(remote_addr, + "get_remote_info: Remote sockaddr:"); + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + ret = 0; + break; + } + } + } +get_remote_info_exit: + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); + return ret; +} +EXPORT_SYMBOL(iwpm_get_remote_info); + struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { @@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) return hash; } -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage - *local_sockaddr, - struct sockaddr_storage - *mapped_sockaddr) +static int get_hash_bucket(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr, u32 *hash) { - u32 local_hash, mapped_hash, hash; + u32 a_hash, b_hash; - if (local_sockaddr->ss_family == AF_INET) { - local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); - mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + if (a_sockaddr->ss_family == AF_INET) { + a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr); + b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr); - } else if (local_sockaddr->ss_family == AF_INET6) { - local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); - mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); + } else if (a_sockaddr->ss_family == AF_INET6) { + a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr); + b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr); } else { pr_err("%s: Invalid sockaddr family\n", __func__); - return NULL; + return -EINVAL; } - if (local_hash == mapped_hash) /* if port mapper isn't available */ - hash = local_hash; + if (a_hash == b_hash) /* if port mapper isn't available */ + *hash = a_hash; else - hash = jhash_2words(local_hash, mapped_hash, 0); + *hash = jhash_2words(a_hash, b_hash, 0); + return 0; +} + +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage + *local_sockaddr, struct sockaddr_storage + *mapped_sockaddr) +{ + u32 hash; + int ret; - return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; + ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK]; +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage + *mapped_loc_sockaddr, struct sockaddr_storage + *mapped_rem_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK]; } static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) @@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) } skb_num++; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], hlist_node) { if (map_info->nl_client != nl_client) @@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void) spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { if (!hlist_empty(&iwpm_hash_bucket[i])) { full_bucket = 1; break; diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 9777c869a140..ee2d9ff095be 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -76,6 +76,14 @@ struct iwpm_mapping_info { u8 nl_client; }; +struct iwpm_remote_info { + struct hlist_node hlist_node; + struct sockaddr_storage remote_sockaddr; + struct sockaddr_storage mapped_loc_sockaddr; + struct sockaddr_storage mapped_rem_sockaddr; + u8 nl_client; +}; + struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; @@ -127,6 +135,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); */ int iwpm_get_nlmsg_seq(void); +/** + * iwpm_add_reminfo - Add remote address info of the connecting peer + * to the remote info hash table + * @reminfo: The remote info to be added + */ +void iwpm_add_remote_info(struct iwpm_remote_info *reminfo); + /** * iwpm_valid_client - Check if the port mapper client is valid * @nl_client: The index of the netlink client diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index 928b2775e992..fda31673a562 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -147,6 +147,16 @@ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_remote_info_cb - Process remote connecting peer address info, which + * the port mapper has received from the connecting peer + * + * @cb: Contains the received message (payload and netlink header) + * + * Stores the IPv4/IPv6 address info in a hash table + */ +int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *); + /** * iwpm_mapping_error_cb - Process port mapper notification for error * @@ -174,6 +184,21 @@ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_get_remote_info - Get the remote connecting peer address info + * + * @mapped_loc_addr: Mapped local address of the listening peer + * @mapped_rem_addr: Mapped remote address of the connecting peer + * @remote_addr: To store the remote address of the connecting peer + * @nl_client: The index of the netlink client + * + * The remote address info is retrieved and provided to the client in + * the remote_addr. After that it is removed from the hash table + */ +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, u8 nl_client); + /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index de69170a30ce..6e4bb4270ca2 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -37,6 +37,7 @@ enum { RDMA_NL_IWPM_ADD_MAPPING, RDMA_NL_IWPM_QUERY_MAPPING, RDMA_NL_IWPM_REMOVE_MAPPING, + RDMA_NL_IWPM_REMOTE_INFO, RDMA_NL_IWPM_HANDLE_ERR, RDMA_NL_IWPM_MAPINFO, RDMA_NL_IWPM_MAPINFO_NUM, -- cgit v1.2.3 From c967a0873a7836c7a77bf611f1c7d3f47c554c45 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 May 2015 09:06:30 -0700 Subject: mpls: Move reserved label definitions Move to include/uapi/linux/mpls.h to be externally visibile. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/mpls.h | 10 ++++++++++ net/mpls/af_mpls.c | 18 +++++++++--------- net/mpls/internal.h | 10 ---------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h index bc9abfe88c9a..0fe6ea5a41d5 100644 --- a/include/uapi/linux/mpls.h +++ b/include/uapi/linux/mpls.h @@ -31,4 +31,14 @@ struct mpls_label { #define MPLS_LS_TTL_MASK 0x000000FF #define MPLS_LS_TTL_SHIFT 0 +/* Reserved labels */ +#define MPLS_LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ +#define MPLS_LABEL_ROUTER_ALERT 1 /* RFC3032 */ +#define MPLS_LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ +#define MPLS_LABEL_IMPLICIT_NULL 3 /* RFC3032 */ +#define MPLS_LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ +#define MPLS_LABEL_GAL 13 /* RFC5586 */ +#define MPLS_LABEL_OAM_ALERT 14 /* RFC3429 */ +#define MPLS_LABEL_EXTENSION 15 /* RFC7274 */ + #endif /* _UAPI_MPLS_H */ diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 954810c76a86..b6eb7615960a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -647,7 +647,7 @@ int nla_get_labels(const struct nlattr *nla, return -EINVAL; switch (dec.label) { - case LABEL_IMPLICIT_NULL: + case MPLS_LABEL_IMPLICIT_NULL: /* RFC3032: This is a label that an LSR may * assign and distribute, but which never * actually appears in the encapsulation. @@ -935,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* In case the predefined labels need to be populated */ - if (limit > LABEL_IPV4_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL) { struct net_device *lo = net->loopback_dev; rt0 = mpls_rt_alloc(lo->addr_len); if (!rt0) @@ -945,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); } - if (limit > LABEL_IPV6_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL) { struct net_device *lo = net->loopback_dev; rt2 = mpls_rt_alloc(lo->addr_len); if (!rt2) @@ -973,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit) memcpy(labels, old, cp_size); /* If needed set the predefined labels */ - if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && - (limit > LABEL_IPV6_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); + if ((old_limit <= MPLS_LABEL_IPV6_EXPLICIT_NULL) && + (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6_EXPLICIT_NULL], rt2); rt2 = NULL; } - if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && - (limit > LABEL_IPV4_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); + if ((old_limit <= MPLS_LABEL_IPV4_EXPLICIT_NULL) && + (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4_EXPLICIT_NULL], rt0); rt0 = NULL; } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 693877d69606..b064c345042c 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,16 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H -#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ -#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */ -#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ -#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */ -#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ -#define LABEL_GAL 13 /* RFC5586 */ -#define LABEL_OAM_ALERT 14 /* RFC3429 */ -#define LABEL_EXTENSION 15 /* RFC7274 */ - - struct mpls_shim_hdr { __be32 label_stack_entry; }; -- cgit v1.2.3 From 78f5b899195019f71f7593c604d75ca61658eae3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 7 May 2015 08:08:51 -0700 Subject: mpls: Change reserved label names to be consistent with netbsd Since these are now visible to userspace it is nice to be consistent with BSD (sys/netmpls/mpls.h in netBSD). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/mpls.h | 12 ++++++------ net/mpls/af_mpls.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h index 0fe6ea5a41d5..139d4dd1cab8 100644 --- a/include/uapi/linux/mpls.h +++ b/include/uapi/linux/mpls.h @@ -32,13 +32,13 @@ struct mpls_label { #define MPLS_LS_TTL_SHIFT 0 /* Reserved labels */ -#define MPLS_LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */ -#define MPLS_LABEL_ROUTER_ALERT 1 /* RFC3032 */ -#define MPLS_LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */ -#define MPLS_LABEL_IMPLICIT_NULL 3 /* RFC3032 */ -#define MPLS_LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */ +#define MPLS_LABEL_IPV4NULL 0 /* RFC3032 */ +#define MPLS_LABEL_RTALERT 1 /* RFC3032 */ +#define MPLS_LABEL_IPV6NULL 2 /* RFC3032 */ +#define MPLS_LABEL_IMPLNULL 3 /* RFC3032 */ +#define MPLS_LABEL_ENTROPY 7 /* RFC6790 */ #define MPLS_LABEL_GAL 13 /* RFC5586 */ -#define MPLS_LABEL_OAM_ALERT 14 /* RFC3429 */ +#define MPLS_LABEL_OAMALERT 14 /* RFC3429 */ #define MPLS_LABEL_EXTENSION 15 /* RFC7274 */ #endif /* _UAPI_MPLS_H */ diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b6eb7615960a..7b3f732269e4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -647,7 +647,7 @@ int nla_get_labels(const struct nlattr *nla, return -EINVAL; switch (dec.label) { - case MPLS_LABEL_IMPLICIT_NULL: + case MPLS_LABEL_IMPLNULL: /* RFC3032: This is a label that an LSR may * assign and distribute, but which never * actually appears in the encapsulation. @@ -935,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* In case the predefined labels need to be populated */ - if (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; rt0 = mpls_rt_alloc(lo->addr_len); if (!rt0) @@ -945,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); } - if (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL) { + if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; rt2 = mpls_rt_alloc(lo->addr_len); if (!rt2) @@ -973,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit) memcpy(labels, old, cp_size); /* If needed set the predefined labels */ - if ((old_limit <= MPLS_LABEL_IPV6_EXPLICIT_NULL) && - (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6_EXPLICIT_NULL], rt2); + if ((old_limit <= MPLS_LABEL_IPV6NULL) && + (limit > MPLS_LABEL_IPV6NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); rt2 = NULL; } - if ((old_limit <= MPLS_LABEL_IPV4_EXPLICIT_NULL) && - (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL)) { - RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4_EXPLICIT_NULL], rt0); + if ((old_limit <= MPLS_LABEL_IPV4NULL) && + (limit > MPLS_LABEL_IPV4NULL)) { + RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); rt0 = NULL; } -- cgit v1.2.3