aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/scripts/python/stackcollapse.py
blob: 5a605f70ef32268fb92a749c970cba321cba11d1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# stackcollapse.py - format perf samples with one line per distinct call stack
#
# This script's output has two space-separated fields.  The first is a semicolon
# separated stack including the program name (from the "comm" field) and the
# function names from the call stack.  The second is a count:
#
#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
#
# The file is sorted according to the first field.
#
# Input may be created and processed using:
#
#  perf record -a -g -F 99 sleep 60
#  perf script report stackcollapse > out.stacks-folded
#
# (perf script record stackcollapse works too).
#
# Written by Paolo Bonzini <pbonzini@redhat.com>
# Based on Brendan Gregg's stackcollapse-perf.pl script.

import os
import sys
from collections import defaultdict
from optparse import OptionParser, make_option

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
                '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')

from perf_trace_context import *
from Core import *
from EventClass import *

# command line parsing

option_list = [
    # formatting options for the bottom entry of the stack
    make_option("--include-tid", dest="include_tid",
                 action="store_true", default=False,
                 help="include thread id in stack"),
    make_option("--include-pid", dest="include_pid",
                 action="store_true", default=False,
                 help="include process id in stack"),
    make_option("--no-comm", dest="include_comm",
                 action="store_false", default=True,
                 help="do not separate stacks according to comm"),
    make_option("--tidy-java", dest="tidy_java",
                 action="store_true", default=False,
                 help="beautify Java signatures"),
    make_option("--kernel", dest="annotate_kernel",
                 action="store_true", default=False,
                 help="annotate kernel functions with _[k]")
]

parser = OptionParser(option_list=option_list)
(opts, args) = parser.parse_args()

if len(args) != 0:
    parser.error("unexpected command line argument")
if opts.include_tid and not opts.include_comm:
    parser.error("requesting tid but not comm is invalid")
if opts.include_pid and not opts.include_comm:
    parser.error("requesting pid but not comm is invalid")

# event handlers

lines = defaultdict(lambda: 0)

def process_event(param_dict):
    def tidy_function_name(sym, dso):
        if sym is None:
            sym = '[unknown]'

        sym = sym.replace(';', ':')
        if opts.tidy_java:
            # the original stackcollapse-perf.pl script gives the
            # example of converting this:
            #    Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
            # to this:
            #    org/mozilla/javascript/MemberBox:.init
            sym = sym.replace('<', '')
            sym = sym.replace('>', '')
            if sym[0] == 'L' and sym.find('/'):
                sym = sym[1:]
            try:
                sym = sym[:sym.index('(')]
            except ValueError:
                pass

        if opts.annotate_kernel and dso == '[kernel.kallsyms]':
            return sym + '_[k]'
        else:
            return sym

    stack = list()
    if 'callchain' in param_dict:
        for entry in param_dict['callchain']:
            entry.setdefault('sym', dict())
            entry['sym'].setdefault('name', None)
            entry.setdefault('dso', None)
            stack.append(tidy_function_name(entry['sym']['name'],
                                            entry['dso']))
    else:
        param_dict.setdefault('symbol', None)
        param_dict.setdefault('dso', None)
        stack.append(tidy_function_name(param_dict['symbol'],
                                        param_dict['dso']))

    if opts.include_comm:
        comm = param_dict["comm"].replace(' ', '_')
        sep = "-"
        if opts.include_pid:
            comm = comm + sep + str(param_dict['sample']['pid'])
            sep = "/"
        if opts.include_tid:
            comm = comm + sep + str(param_dict['sample']['tid'])
        stack.append(comm)

    stack_string = ';'.join(reversed(stack))
    lines[stack_string] = lines[stack_string] + 1

def trace_end():
    list = lines.keys()
    list.sort()
    for stack in list:
        print "%s %d" % (stack, lines[stack])
-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com> Link: https://lore.kernel.org/all/20211223153139.7661-2-repk@triplefau.lt/ Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2021-12-23net: stmmac: ptp: fix potentially overflowing expressionXiaoliang Yang1-1/+1 Convert the u32 variable to type u64 in a context where expression of type u64 is required to avoid potential overflow. Fixes: e9e3720002f6 ("net: stmmac: ptp: update tas basetime after ptp adjust") Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com> Link: https://lore.kernel.org/r/20211223073928.37371-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2021-12-23net: dsa: tag_ocelot: use traffic class to map priority on injected headerXiaoliang Yang1-1/+5 For Ocelot switches, the CPU injected frames have an injection header where it can specify the QoS class of the packet and the DSA tag, now it uses the SKB priority to set that. If a traffic class to priority mapping is configured on the netdevice (with mqprio for example ...), it won't be considered for CPU injected headers. This patch make the QoS class aligned to the priority to traffic class mapping if it exists. Fixes: 8dce89aa5f32 ("net: dsa: ocelot: add tagger for Ocelot/Felix switches") Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com> Signed-off-by: Marouen Ghodhbane <marouen.ghodhbane@nxp.com> Link: https://lore.kernel.org/r/20211223072211.33130-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2021-12-23veth: ensure skb entering GRO are not cloned.Paolo Abeni1-2/+6 After commit d3256efd8e8b ("veth: allow enabling NAPI even without XDP"), if GRO is enabled on a veth device and TSO is disabled on the peer device, TCP skbs will go through the NAPI callback. If there is no XDP program attached, the veth code does not perform any share check, and shared/cloned skbs could enter the GRO engine. Ignat reported a BUG triggered later-on due to the above condition: [ 53.970529][ C1] kernel BUG at net/core/skbuff.c:3574! [ 53.981755][ C1] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 53.982634][ C1] CPU: 1 PID: 19 Comm: ksoftirqd/1 Not tainted 5.16.0-rc5+ #25 [ 53.982634][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 53.982634][ C1] RIP: 0010:skb_shift+0x13ef/0x23b0 [ 53.982634][ C1] Code: ea 03 0f b6 04 02 48 89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 41 0c 00 00 41 80 7f 02 00 4d 8d b5 d0 00 00 00 0f 85 74 f5 ff ff <0f> 0b 4d 8d 77 20 be 04 00 00 00 4c 89 44 24 78 4c 89 f7 4c 89 8c [ 53.982634][ C1] RSP: 0018:ffff8881008f7008 EFLAGS: 00010246 [ 53.982634][ C1] RAX: 0000000000000000 RBX: ffff8881180b4c80 RCX: 0000000000000000 [ 53.982634][ C1] RDX: 0000000000000002 RSI: ffff8881180b4d3c RDI: ffff88810bc9cac2 [ 53.982634][ C1] RBP: ffff8881008f70b8 R08: ffff8881180b4cf4 R09: ffff8881180b4cf0 [ 53.982634][ C1] R10: ffffed1022999e5c R11: 0000000000000002 R12: 0000000000000590 [ 53.982634][ C1] R13: ffff88810f940c80 R14: ffff88810f940d50 R15: ffff88810bc9cac0 [ 53.982634][ C1] FS: 0000000000000000(0000) GS:ffff888235880000(0000) knlGS:0000000000000000 [ 53.982634][ C1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.982634][ C1] CR2: 00007ff5f9b86680 CR3: 0000000108ce8004 CR4: 0000000000170ee0 [ 53.982634][ C1] Call Trace: [ 53.982634][ C1] <TASK> [ 53.982634][ C1] tcp_sacktag_walk+0xaba/0x18e0 [ 53.982634][ C1] tcp_sacktag_write_queue+0xe7b/0x3460 [ 53.982634][ C1] tcp_ack+0x2666/0x54b0 [ 53.982634][ C1] tcp_rcv_established+0x4d9/0x20f0 [ 53.982634][ C1] tcp_v4_do_rcv+0x551/0x810 [ 53.982634][ C1] tcp_v4_rcv+0x22ed/0x2ed0 [ 53.982634][ C1] ip_protocol_deliver_rcu+0x96/0xaf0 [ 53.982634][ C1] ip_local_deliver_finish+0x1e0/0x2f0 [ 53.982634][ C1] ip_sublist_rcv_finish+0x211/0x440 [ 53.982634][ C1] ip_list_rcv_finish.constprop.0+0x424/0x660 [ 53.982634][ C1] ip_list_rcv+0x2c8/0x410 [ 53.982634][ C1] __netif_receive_skb_list_core+0x65c/0x910 [ 53.982634][ C1] netif_receive_skb_list_internal+0x5f9/0xcb0 [ 53.982634][ C1] napi_complete_done+0x188/0x6e0 [ 53.982634][ C1] gro_cell_poll+0x10c/0x1d0 [ 53.982634][ C1] __napi_poll+0xa1/0x530 [ 53.982634][ C1] net_rx_action+0x567/0x1270 [ 53.982634][ C1] __do_softirq+0x28a/0x9ba [ 53.982634][ C1] run_ksoftirqd+0x32/0x60 [ 53.982634][ C1] smpboot_thread_fn+0x559/0x8c0 [ 53.982634][ C1] kthread+0x3b9/0x490 [ 53.982634][ C1] ret_from_fork+0x22/0x30 [ 53.982634][ C1] </TASK> Address the issue by skipping the GRO stage for shared or cloned skbs. To reduce the chance of OoO, try to unclone the skbs before giving up. v1 -> v2: - use avoid skb_copy and fallback to netif_receive_skb - Eric Reported-by: Ignat Korchagin <ignat@cloudflare.com> Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Signed-off-by: Paolo Abeni <pabeni@redhat.com> Tested-by: Ignat Korchagin <ignat@cloudflare.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/b5f61c5602aab01bac8d711d8d1bfab0a4817db7.1640197544.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> 2021-12-23selftests: mlxsw: devlink_trap_tunnel_vxlan: Fix 'decap_error' caseAmit Cohen1-2/+5 Change the case that sends packets with "too short inner packet" to include part of ethernet header, to make the trap to be triggered due to the correct reason. According to ASIC arch, the trap is triggered if overlay packet length is less than 18B, and the minimum inner packet should include source MAC and destination MAC. Till now the case passed because one of the reserved bits in VxLAN header was used. This issue was found while adding an equivalent test for IPv6. Signed-off-by: Amit Cohen <amcohen@nvidia.com> Reviewed-by: Petr Machata <petrm@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>