From b0e878759452314676fbdd71df4ac67e7d08de5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Aug 2015 14:06:07 +0200 Subject: perf/abi: Document some more aspects of the perf ABI Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 105 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 092a0e8a479a..d61ee4459eee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -140,27 +140,60 @@ struct hw_perf_event { }; #endif }; + /* + * If the event is a per task event, this will point to the task in + * question. See the comment in perf_event_alloc(). + */ struct task_struct *target; + +/* + * hw_perf_event::state flags; used to track the PERF_EF_* state. + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + int state; + + /* + * The last observed hardware counter value, updated with a + * local64_cmpxchg() such that pmu::read() can be called nested. + */ local64_t prev_count; + + /* + * The period to start the next sample with. + */ u64 sample_period; + + /* + * The period we started this sample with. + */ u64 last_period; + + /* + * However much is left of the current period; note that this is + * a full 64bit value and allows for generation of periods longer + * than hardware might allow. + */ local64_t period_left; + + /* + * State for throttling the event, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 interrupts_seq; u64 interrupts; + /* + * State for freq target events, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 freq_time_stamp; u64 freq_count_stamp; #endif }; -/* - * hw_perf_event::state flags - */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 - struct perf_event; /* @@ -210,7 +243,19 @@ struct pmu { /* * Try and initialize the event for this PMU. - * Should return -ENOENT when the @event doesn't match this PMU. + * + * Returns: + * -ENOENT -- @event is not for this PMU + * + * -ENODEV -- @event is for this PMU but PMU not present + * -EBUSY -- @event is for this PMU but PMU temporarily unavailable + * -EINVAL -- @event is for this PMU but @event is not valid + * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported + * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * + * 0 -- @event is for this PMU and valid + * + * Other error return values are allowed. */ int (*event_init) (struct perf_event *event); @@ -221,27 +266,61 @@ struct pmu { void (*event_mapped) (struct perf_event *event); /*optional*/ void (*event_unmapped) (struct perf_event *event); /*optional*/ + /* + * Flags for ->add()/->del()/ ->start()/->stop(). There are + * matching hw_perf_event::state flags. + */ #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Adds/Removes a counter to/from the PMU, can be done inside - * a transaction, see the ->*_txn() methods. + * Adds/Removes a counter to/from the PMU, can be done inside a + * transaction, see the ->*_txn() methods. + * + * The add/del callbacks will reserve all hardware resources required + * to service the event, this includes any counter constraint + * scheduling etc. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on. + * + * ->add() called without PERF_EF_START should result in the same state + * as ->add() followed by ->stop(). + * + * ->del() must always PERF_EF_UPDATE stop an event. If it calls + * ->stop() that must deal with already being stopped without + * PERF_EF_UPDATE. */ int (*add) (struct perf_event *event, int flags); void (*del) (struct perf_event *event, int flags); /* - * Starts/Stops a counter present on the PMU. The PMI handler - * should stop the counter when perf_event_overflow() returns - * !0. ->start() will be used to continue. + * Starts/Stops a counter present on the PMU. + * + * The PMI handler should stop the counter when perf_event_overflow() + * returns !0. ->start() will be used to continue. + * + * Also used to change the sample period. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on -- will be called from NMI context with the PMU generates + * NMIs. + * + * ->stop() with PERF_EF_UPDATE will read the counter and update + * period/count values like ->read() would. + * + * ->start() with PERF_EF_RELOAD will reprogram the the counter + * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); /* * Updates the counter value of the event. + * + * For sampling capable PMUs this will also update the software period + * hw_perf_event::period_left field. */ void (*read) (struct perf_event *event); -- cgit v1.2.3 From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:45 -0700 Subject: perf/core: Add a 'flags' parameter to the PMU transactional interfaces Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a "transaction type". The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu [peterz: s390 compile fix] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/powerpc/perf/core-book3s.c | 30 +++++++++++++++++++++++++++++- arch/s390/kernel/perf_cpum_cf.c | 30 +++++++++++++++++++++++++++++- arch/sparc/kernel/perf_event.c | 25 ++++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event.h | 1 + include/linux/perf_event.h | 14 +++++++++++--- kernel/events/core.c | 31 ++++++++++++++++++++++++++++--- 7 files changed, 153 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0382f3f1095..c84074185c80 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -49,6 +49,7 @@ struct cpu_hw_events { unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned int group_flag; + unsigned int txn_flags; int n_txn_start; /* BHRB bits */ @@ -1586,11 +1587,21 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; @@ -1604,6 +1615,14 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1621,7 +1640,15 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1633,6 +1660,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad479115..19138be412d6 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned int flags; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -572,11 +574,21 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; @@ -589,8 +601,16 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + WARN_ON(cpuhw->tx_state != cpuhw->state); cpuhw->flags &= ~PERF_EVENT_TXN; @@ -607,6 +627,13 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + /* check if the updated state can be scheduled */ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); state >>= CPUMF_LCCTL_ENABLE_SHIFT; @@ -614,6 +641,7 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) return -EPERM; cpuhw->flags &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index e3f89c7e5062..2c0984d146ec 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -109,6 +109,7 @@ struct cpu_hw_events { int enabled; unsigned int group_flag; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1494,10 +1495,16 @@ static int sparc_pmu_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1510,6 +1517,14 @@ static void sparc_pmu_start_txn(struct pmu *pmu) static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1528,6 +1543,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) if (!sparc_pmu) return -EINVAL; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; @@ -1535,6 +1557,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66dd3fe99b82..dc77ef470e0e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1748,9 +1748,21 @@ static inline void x86_pmu_read(struct perf_event *event) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void x86_pmu_start_txn(struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ + + cpuc->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); __this_cpu_write(cpu_hw_events.n_txn, 0); @@ -1763,6 +1775,16 @@ static void x86_pmu_start_txn(struct pmu *pmu) */ static void x86_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + txn_flags = cpuc->txn_flags; + cpuc->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* * Truncate collected array by the number of events added in this @@ -1786,6 +1808,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu) int assign[X86_PMC_IDX_MAX]; int n, ret; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (!x86_pmu_initialized()) @@ -1802,6 +1831,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..c99c93b9e348 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -196,6 +196,7 @@ struct cpu_hw_events { int n_excl; /* the number of exclusive events */ unsigned int group_flag; + unsigned int txn_flags; int is_fake; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d61ee4459eee..ea3b5dd21a4c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -201,6 +201,8 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ + /** * pmu::capabilities flags */ @@ -331,20 +333,26 @@ struct pmu { * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, diff --git a/kernel/events/core.c b/kernel/events/core.c index 76e64be9bfb5..c80cee82959f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1914,7 +1914,7 @@ group_sched_in(struct perf_event *group_event, if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - pmu->start_txn(pmu); + pmu->start_txn(pmu, PERF_PMU_TXN_ADD); if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); @@ -7267,24 +7267,49 @@ static void perf_pmu_nop_void(struct pmu *pmu) { } +static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags) +{ +} + static int perf_pmu_nop_int(struct pmu *pmu) { return 0; } -static void perf_pmu_start_txn(struct pmu *pmu) +DEFINE_PER_CPU(unsigned int, nop_txn_flags); + +static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { + __this_cpu_write(nop_txn_flags, flags); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); } static int perf_pmu_commit_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return 0; + perf_pmu_enable(pmu); return 0; } static void perf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_enable(pmu); } @@ -7523,7 +7548,7 @@ got_cpu_context: pmu->commit_txn = perf_pmu_commit_txn; pmu->cancel_txn = perf_pmu_cancel_txn; } else { - pmu->start_txn = perf_pmu_nop_void; + pmu->start_txn = perf_pmu_nop_txn; pmu->commit_txn = perf_pmu_nop_int; pmu->cancel_txn = perf_pmu_nop_void; } -- cgit v1.2.3 From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:51 -0700 Subject: perf/core: Define PERF_PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu->start_txn() // Initialize before first event for each event in group pmu->read(event); // Queue each event to be read rc = pmu->commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the ->read() operation to _queue_ the counters to be read and use ->commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and ->commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea3b5dd21a4c..b83cea932f74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -202,6 +202,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index ade04dfab368..55b0f7cf2614 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3199,6 +3199,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event->pmu; /* * If this is a task context, we need to check whether it is @@ -3217,18 +3218,31 @@ static void __perf_event_read(void *info) } update_event_times(event); - if (event->state == PERF_EVENT_STATE_ACTIVE) - event->pmu->read(event); + if (event->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; - if (!data->group) + if (!data->group) { + pmu->read(event); + data->ret = 0; goto unlock; + } + + pmu->start_txn(pmu, PERF_PMU_TXN_READ); + + pmu->read(event); list_for_each_entry(sub, &event->sibling_list, group_entry) { update_event_times(sub); - if (sub->state == PERF_EVENT_STATE_ACTIVE) + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + /* + * Use sibling's PMU rather than @event's since + * sibling could be on different (eg: software) PMU. + */ sub->pmu->read(sub); + } } - data->ret = 0; + + data->ret = pmu->commit_txn(pmu); unlock: raw_spin_unlock(&ctx->lock); -- cgit v1.2.3 From 8f3e5684d3fbd91ead283916676fa3dac22615e5 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:53 -0700 Subject: perf/core: Drop PERF_EVENT_TXN We currently use PERF_EVENT_TXN flag to determine if we are in the middle of a transaction. If in a transaction, we defer the schedulability checks from pmu->add() operation to the pmu->commit() operation. Now that we have "transaction types" (PERF_PMU_TXN_ADD, PERF_PMU_TXN_READ) we can use the type to determine if we are in a transaction and drop the PERF_EVENT_TXN flag. When PERF_EVENT_TXN is dropped, the cpuhw->group_flag on some architectures becomes unused, so drop that field as well. This is an extension of the Powerpc patch from Peter Zijlstra to s390, Sparc and x86 architectures. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-11-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/powerpc/perf/core-book3s.c | 6 +----- arch/s390/kernel/perf_cpum_cf.c | 5 +---- arch/sparc/kernel/perf_event.c | 6 +----- arch/x86/kernel/cpu/perf_event.c | 7 ++----- arch/x86/kernel/cpu/perf_event.h | 1 - include/linux/perf_event.h | 2 -- 6 files changed, 5 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index c84074185c80..d1e65ce545b3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -48,7 +48,6 @@ struct cpu_hw_events { unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; - unsigned int group_flag; unsigned int txn_flags; int n_txn_start; @@ -1442,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN) + if (cpuhw->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -1603,7 +1602,6 @@ static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -1624,7 +1622,6 @@ static void power_pmu_cancel_txn(struct pmu *pmu) if (txn_flags & ~PERF_PMU_TXN_ADD) return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1659,7 +1656,6 @@ static int power_pmu_commit_txn(struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuhw->group_flag &= ~PERF_EVENT_TXN; cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 19138be412d6..cb774ff6e749 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -536,7 +536,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) * For group events transaction, the authorization check is * done in cpumf_pmu_commit_txn(). */ - if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD)) if (validate_ctr_auth(&event->hw)) return -EPERM; @@ -590,7 +590,6 @@ static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; } @@ -613,7 +612,6 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu) WARN_ON(cpuhw->tx_state != cpuhw->state); - cpuhw->flags &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -640,7 +638,6 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) if ((state & cpuhw->info.auth_ctl) != state) return -EPERM; - cpuhw->flags &= ~PERF_EVENT_TXN; cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2c0984d146ec..b0da5aedb336 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -108,7 +108,6 @@ struct cpu_hw_events { /* Enabled/disable state. */ int enabled; - unsigned int group_flag; unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1380,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1506,7 +1505,6 @@ static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; } /* @@ -1526,7 +1524,6 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) if (txn_flags & ~PERF_PMU_TXN_ADD) return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1556,7 +1553,6 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) if (sparc_check_constraints(cpuc->event, cpuc->events, n)) return -EAGAIN; - cpuc->group_flag &= ~PERF_EVENT_TXN; cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dc77ef470e0e..4562cf070c27 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) * skip the schedulability test here, it will be performed * at commit time (->commit_txn) as a whole. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * XXX assumes any ->del() called during a TXN will only be on * an event added during that same TXN. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) return; /* @@ -1764,7 +1764,6 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) return; perf_pmu_disable(pmu); - __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); __this_cpu_write(cpu_hw_events.n_txn, 0); } @@ -1785,7 +1784,6 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) if (txn_flags & ~PERF_PMU_TXN_ADD) return; - __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* * Truncate collected array by the number of events added in this * transaction. See x86_pmu_add() and x86_pmu_*_txn(). @@ -1830,7 +1828,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - cpuc->group_flag &= ~PERF_EVENT_TXN; cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c99c93b9e348..953a0e4e3284 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -195,7 +195,6 @@ struct cpu_hw_events { int n_excl; /* the number of exclusive events */ - unsigned int group_flag; unsigned int txn_flags; int is_fake; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b83cea932f74..d841d33bcdc9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -199,8 +199,6 @@ struct perf_event; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_EVENT_TXN 0x1 - #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ -- cgit v1.2.3 From b9511cd761faafca7a1acc059e792c1399f9d7c6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 16 Oct 2015 16:24:05 +0300 Subject: perf/x86: Fix time_shift in perf_event_mmap_page Commit: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") allowed the time_shift value in perf_event_mmap_page to be as much as 32. Unfortunately the documented algorithms for using time_shift have it shifting an integer, whereas to work correctly with the value 32, the type must be u64. In the case of perf tools, Intel PT decodes correctly but the timestamps that are output (for example by perf script) have lost 32-bits of granularity so they look like they are not changing at all. Fix by limiting the shift to 31 and adjusting the multiplier accordingly. Also update the documentation of perf_event_mmap_page so that new code based on it will be more future-proof. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") Link: http://lkml.kernel.org/r/1445001845-13688-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 11 +++++++++++ include/uapi/linux/perf_event.h | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 69b84a26ea17..c7c4d9c51e99 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -259,6 +259,17 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, NSEC_PER_MSEC, 0); + /* + * cyc2ns_shift is exported via arch_perf_update_userpage() where it is + * not expected to be greater than 31 due to the original published + * conversion algorithm shifting a 32-bit value (now specifies a 64-bit + * value) - refer perf_event_mmap_page documentation in perf_event.h. + */ + if (data->cyc2ns_shift == 32) { + data->cyc2ns_shift = 31; + data->cyc2ns_mul >>= 1; + } + data->cyc2ns_offset = ns_now - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2881145cda86..6c72e72e975c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -476,7 +476,7 @@ struct perf_event_mmap_page { * u64 delta; * * quot = (cyc >> time_shift); - * rem = cyc & ((1 << time_shift) - 1); + * rem = cyc & (((u64)1 << time_shift) - 1); * delta = time_offset + quot * time_mult + * ((rem * time_mult) >> time_shift); * @@ -507,7 +507,7 @@ struct perf_event_mmap_page { * And vice versa: * * quot = cyc >> time_shift; - * rem = cyc & ((1 << time_shift) - 1); + * rem = cyc & (((u64)1 << time_shift) - 1); * timestamp = time_zero + quot * time_mult + * ((rem * time_mult) >> time_shift); */ -- cgit v1.2.3 From c229bf9dc179d2023e185c0f705bdf68484c1e73 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 13 Oct 2015 09:09:08 +0200 Subject: perf: Add PERF_SAMPLE_BRANCH_CALL Add a new branch sample type to cover only call branches (function calls). The current ANY_CALL included direct, indirect calls and far jumps. We want to be able to differentiate indirect from direct calls. Therefore we introduce PERF_SAMPLE_BRANCH_CALL. The implementation is up to each architecture. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: khandual@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1444720151-10275-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 6c72e72e975c..651221334f49 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -168,6 +168,7 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ + PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -188,6 +189,7 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; -- cgit v1.2.3