From 378831e4daec75fbba6d3612bcf3b4dd00ddbf08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 May 2018 21:25:46 +0100 Subject: afs: Fix directory permissions check Doing faccessat("/afs/some/directory", 0) triggers a BUG in the permissions check code. Fix this by just removing the BUG section. If no permissions are asked for, just return okay if the file exists. Also: (1) Split up the directory check so that it has separate if-statements rather than if-else-if (e.g. checking for MAY_EXEC shouldn't skip the check for MAY_READ and MAY_WRITE). (2) Check for MAY_CHDIR as MAY_EXEC. Without the main fix, the following BUG may occur: kernel BUG at fs/afs/security.c:386! invalid opcode: 0000 [#1] SMP PTI ... RIP: 0010:afs_permission+0x19d/0x1a0 [kafs] ... Call Trace: ? inode_permission+0xbe/0x180 ? do_faccessat+0xdc/0x270 ? do_syscall_64+0x60/0x1f0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 00d3b7a4533e ("[AFS]: Add security support.") Reported-by: Jonathan Billings Signed-off-by: David Howells --- fs/afs/security.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 1992b0ffa543..81dfedb7879f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -372,18 +372,14 @@ int afs_permission(struct inode *inode, int mask) mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); if (S_ISDIR(inode->i_mode)) { - if (mask & MAY_EXEC) { + if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; - } else if (mask & MAY_READ) { - if (!(access & AFS_ACE_LOOKUP)) - goto permission_denied; - } else if (mask & MAY_WRITE) { + } + if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; - } else { - BUG(); } } else { if (!(access & AFS_ACE_LOOKUP)) -- cgit v1.2.3 From 1fba5868eed82766fb374c7d367166706f9269d5 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 16 May 2018 11:04:23 -0300 Subject: afs: Fix mounting of backup volumes In theory the AFS_VLSF_BACKVOL flag for a server in a vldb entry would indicate the presence of a backup volume on that server. In practice however, this flag is never set, and the presence of a backup volume is implied by the entry having AFS_VLF_BACKEXISTS set, for the server that hosts the read-write volume (has AFS_VLSF_RWVOL). Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/vlclient.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 1ed7e2fd2f35..c3b740813fc7 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -23,7 +23,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uvldbentry__xdr *uvldb; struct afs_vldb_entry *entry; bool new_only = false; - u32 tmp, nr_servers; + u32 tmp, nr_servers, vlflags; int i, ret; _enter(""); @@ -55,6 +55,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) new_only = true; } + vlflags = ntohl(uvldb->flags); for (i = 0; i < nr_servers; i++) { struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; @@ -64,12 +65,13 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) if (tmp & AFS_VLSF_DONTUSE || (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) continue; - if (tmp & AFS_VLSF_RWVOL) + if (tmp & AFS_VLSF_RWVOL) { entry->fs_mask[i] |= AFS_VOL_VTM_RW; + if (vlflags & AFS_VLF_BACKEXISTS) + entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + } if (tmp & AFS_VLSF_ROVOL) entry->fs_mask[i] |= AFS_VOL_VTM_RO; - if (tmp & AFS_VLSF_BACKVOL) - entry->fs_mask[i] |= AFS_VOL_VTM_BAK; if (!entry->fs_mask[i]) continue; @@ -89,15 +91,14 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) for (i = 0; i < AFS_MAXTYPES; i++) entry->vid[i] = ntohl(uvldb->volumeId[i]); - tmp = ntohl(uvldb->flags); - if (tmp & AFS_VLF_RWEXISTS) + if (vlflags & AFS_VLF_RWEXISTS) __set_bit(AFS_VLDB_HAS_RW, &entry->flags); - if (tmp & AFS_VLF_ROEXISTS) + if (vlflags & AFS_VLF_ROEXISTS) __set_bit(AFS_VLDB_HAS_RO, &entry->flags); - if (tmp & AFS_VLF_BACKEXISTS) + if (vlflags & AFS_VLF_BACKEXISTS) __set_bit(AFS_VLDB_HAS_BAK, &entry->flags); - if (!(tmp & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { + if (!(vlflags & (AFS_VLF_RWEXISTS | AFS_VLF_ROEXISTS | AFS_VLF_BACKEXISTS))) { entry->error = -ENOMEDIUM; __set_bit(AFS_VLDB_QUERY_ERROR, &entry->flags); } -- cgit v1.2.3 From d8de7565735af5a95057c83eedb38e36cfaa04b5 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 16 May 2018 14:06:32 +0200 Subject: s390/purgatory: Fix endless interrupt loop New compilers use the floating-point registers as spill registers when there is high register pressure. In the purgatory however, the afp control bit is not set. This leads to an exception whenever a floating-point instruction is used, which again causes an interrupt loop. Forbid the compiler to use floating-point instructions by adding -msoft-float to KBUILD_CFLAGS. Signed-off-by: Philipp Rudo Fixes: 840798a1f529 (s390/kexec_file: Add purgatory) Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index e9525bc1b4a6..1ace023cbdce 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -21,7 +21,7 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE -- cgit v1.2.3 From aeaa7af744fadf5fa03bdea2d12ecc0fa2a41542 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 11:06:31 +0800 Subject: nds32: lib: To use generic lib instead of libgcc to prevent the symbol undefined issue. We can use the generic lib to fix these error because the symbol of libgcc in toolchain is not exported. ERROR: "__ucmpdi2" [fs/xfs/xfs.ko] undefined! ERROR: "__ashrdi3" [fs/xfs/xfs.ko] undefined! ERROR: "__lshrdi3" [fs/xfs/xfs.ko] undefined! ERROR: "__ashldi3" [fs/ntfs/ntfs.ko] undefined! ... Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/Kconfig | 6 ++++++ arch/nds32/Makefile | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 249f38d3388f..98e05f997f91 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -9,6 +9,12 @@ config NDS32 select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK + select GENERIC_ASHLDI3 + select GENERIC_ASHRDI3 + select GENERIC_LSHRDI3 + select GENERIC_CMPDI2 + select GENERIC_MULDI3 + select GENERIC_UCMPDI2 select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 91f933d5a962..20edf34e70ce 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -23,9 +23,6 @@ export TEXTADDR # If we have a machine-specific directory, then include it in the build. core-y += arch/nds32/kernel/ arch/nds32/mm/ libs-y += arch/nds32/lib/ -LIBGCC_PATH := \ - $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) -libs-y += $(LIBGCC_PATH) ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""' BUILTIN_DTB := y -- cgit v1.2.3 From 6ca4b262dbc94b3ed5fd4c8c1ad4d86616286779 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 11:17:35 +0800 Subject: nds32: Fix building error when CONFIG_FREEZE is enabled. To include kernel/Kconfig.freezer to make sure the dependency between CONFIG_CGROUP_FREEZER and CONFIG_FREEZER It will cause building error when I make allmodconfig. kernel/cgroup/freezer.c: In function 'freezer_css_online': kernel/cgroup/freezer.c:116:15: error: 'system_freezing_cnt' undeclared (first use in this function) atomic_inc(&system_freezing_cnt); ^~~~~~~~~~~~~~~~~~~ kernel/cgroup/freezer.c:116:15: note: each undeclared identifier is reported only once for each function it appears in kernel/cgroup/freezer.c: In function 'freezer_css_offline': kernel/cgroup/freezer.c:137:15: error: 'system_freezing_cnt' undeclared (first use in this function) atomic_dec(&system_freezing_cnt); ^~~~~~~~~~~~~~~~~~~ kernel/cgroup/freezer.c: In function 'freezer_attach': kernel/cgroup/freezer.c:181:4: error: implicit declaration of function 'freeze_task' [-Werror=implicit-function-declaration] freeze_task(task); ^~~~~~~~~~~ kernel/cgroup/freezer.c: In function 'freezer_apply_state': kernel/cgroup/freezer.c:360:16: error: 'system_freezing_cnt' undeclared (first use in this function) atomic_inc(&system_freezing_cnt); ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 98e05f997f91..b7404f2dcf5b 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -88,6 +88,7 @@ endmenu menu "Kernel Features" source "kernel/Kconfig.preempt" +source "kernel/Kconfig.freezer" source "mm/Kconfig" source "kernel/Kconfig.hz" endmenu -- cgit v1.2.3 From 81560e011a3420db03f7b021d1bc37a2242634f7 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 11:27:59 +0800 Subject: nds32: Fix building error of crypto/xor.c by adding xor.h When I compiled with allmodconfig, it caused this building failed. crypto/xor.c:25:21: fatal error: asm/xor.h: No such file or directory #include ^ compilation terminated. Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index 06bdf8167f5a..a64e87cc8b49 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -49,6 +49,7 @@ generic-y += switch_to.h generic-y += timex.h generic-y += topology.h generic-y += trace_clock.h +generic-y += xor.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h -- cgit v1.2.3 From 8cedb78fdf82468020a2f3b24ef63ccd397662a2 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 11:33:01 +0800 Subject: nds32: Fix drivers/gpu/drm/udl/udl_fb.c building error by defining PAGE_SHARED It broke the 'allmodconfig' build. drivers/gpu/drm/udl/udl_fb.c: In function 'udl_fb_mmap': drivers/gpu/drm/udl/udl_fb.c:183:52: error: 'PAGE_SHARED' undeclared (first use in this function) if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) ^~~~~~~~~~~ drivers/gpu/drm/udl/udl_fb.c:183:52: note: each undeclared identifier is reported only once for each function it appears in make[4]: *** [drivers/gpu/drm/udl/udl_fb.o] Error 1 Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6783937edbeb..d3e19a55cf53 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -152,6 +152,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_CACHE_L1 __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE) #define PAGE_MEMORY __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) #define PAGE_KERNEL __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) +#define PAGE_SHARED __pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D | _PAGE_CACHE_SHRD) #define PAGE_DEVICE __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV) #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 03969d0b3a3f6a7cd2c731eed1c4ccf76da88586 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 15:38:58 +0800 Subject: nds32: Fix xfs_buf built failed by export invalidate_kernel_vmap_range and flush_kernel_vmap_range It broke the 'allmodconfig' build. fs/xfs/xfs_buf.c: In function 'xfs_buf_bio_end_io': fs/xfs/xfs_buf.c:1242:3: error: implicit declaration of function 'invalidate_kernel_vmap_range' [-Werror=implicit-function-declaration] invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/xfs/xfs_buf.c: In function 'xfs_buf_ioapply_map': fs/xfs/xfs_buf.c:1312:4: error: implicit declaration of function 'flush_kernel_vmap_range' [-Werror=implicit-function-declaration] flush_kernel_vmap_range(bp->b_addr, ^~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/include/asm/cacheflush.h | 2 ++ arch/nds32/mm/cacheflush.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 1240f148ec0f..10b48f0d8e85 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -32,6 +32,8 @@ void flush_anon_page(struct vm_area_struct *vma, #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page(struct page *page); +void flush_kernel_vmap_range(void *addr, int size); +void invalidate_kernel_vmap_range(void *addr, int size); void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_page(struct vm_area_struct *vma, struct page *page); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 6eb786a399a2..bd52918d5923 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -273,6 +273,24 @@ void flush_kernel_dcache_page(struct page *page) local_irq_restore(flags); } +void flush_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_wb_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_inval_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); + void flush_icache_range(unsigned long start, unsigned long end) { unsigned long line_size, flags; -- cgit v1.2.3 From e3f4624388731eb475ef7494b43440b3ad9269e1 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 15:45:45 +0800 Subject: nds32: Fix the symbols undefined issue by exporting them. It broke the 'allmodconfig' build. LD vmlinux SYSMAP System.map Building modules, stage 2. MODPOST 5028 modules ERROR: "flush_dcache_page" [net/sunrpc/xprtrdma/rpcrdma.ko] undefined! ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined! ERROR: "save_stack_trace" [kernel/backtracetest.ko] undefined! ERROR: "clear_page" [fs/ocfs2/dlm/ocfs2_dlm.ko] undefined! ERROR: "copy_page" [fs/nilfs2/nilfs2.ko] undefined! ... Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/include/asm/Kbuild | 1 + arch/nds32/include/asm/page.h | 3 +++ arch/nds32/kernel/stacktrace.c | 2 ++ arch/nds32/lib/copy_page.S | 3 +++ arch/nds32/mm/cacheflush.c | 22 ++++++++++++++++++++++ arch/nds32/mm/init.c | 1 + 6 files changed, 32 insertions(+) diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index a64e87cc8b49..142e612aa639 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += dma.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += export.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index e27365c097b6..947f0491c9a7 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -27,6 +27,9 @@ extern void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma); extern void clear_user_highpage(struct page *page, unsigned long vaddr); +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to); +void clear_user_page(void *addr, unsigned long vaddr, struct page *page); #define __HAVE_ARCH_COPY_USER_HIGHPAGE #define clear_user_highpage clear_user_highpage #else diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index bc70113c0e84..8b231e910ea6 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -9,6 +9,7 @@ void save_stack_trace(struct stack_trace *trace) { save_stack_trace_tsk(current, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { @@ -45,3 +46,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) fpn = (unsigned long *)fpp; } } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S index 4a2ff85f17ee..f8701ed161a8 100644 --- a/arch/nds32/lib/copy_page.S +++ b/arch/nds32/lib/copy_page.S @@ -2,6 +2,7 @@ // Copyright (C) 2005-2017 Andes Technology Corporation #include +#include #include .text @@ -16,6 +17,7 @@ ENTRY(copy_page) popm $r2, $r10 ret ENDPROC(copy_page) +EXPORT_SYMBOL(copy_page) ENTRY(clear_page) pushm $r1, $r9 @@ -35,3 +37,4 @@ ENTRY(clear_page) popm $r1, $r9 ret ENDPROC(clear_page) +EXPORT_SYMBOL(clear_page) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index bd52918d5923..ee44ad96b6ed 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -147,6 +147,25 @@ void flush_cache_vunmap(unsigned long start, unsigned long end) cpu_icache_inval_all(); } +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + copy_page(vto, vfrom); + cpu_dcache_wbinval_page((unsigned long)vto); + cpu_icache_inval_page((unsigned long)vto); +} + +void clear_user_page(void *addr, unsigned long vaddr, struct page *page) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + clear_page(addr); + cpu_dcache_wbinval_page((unsigned long)addr); + cpu_icache_inval_page((unsigned long)addr); +} + void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { @@ -213,6 +232,7 @@ void flush_dcache_page(struct page *page) local_irq_restore(flags); } } +EXPORT_SYMBOL(flush_dcache_page); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len) @@ -272,6 +292,7 @@ void flush_kernel_dcache_page(struct page *page) cpu_dcache_wbinval_page((unsigned long)page_address(page)); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_kernel_dcache_page); void flush_kernel_vmap_range(void *addr, int size) { @@ -301,6 +322,7 @@ void flush_icache_range(unsigned long start, unsigned long end) cpu_cache_wbinval_range(start, end, 1); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_icache_range); void flush_icache_page(struct vm_area_struct *vma, struct page *page) { diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 93ee0160720b..c713d2ad55dc 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -30,6 +30,7 @@ extern unsigned long phys_initrd_size; * zero-initialized data and COW. */ struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); static void __init zone_sizes_init(void) { -- cgit v1.2.3 From bb912671e36552e11352a6b749dcf68b0df6ad01 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 15:59:38 +0800 Subject: nds32: Fix the unknown type u8 issue. It broke the 'allmodconfig' build. We need to include to make sure the type is defined before using it. Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h index 966e71b3c960..71cd226d6863 100644 --- a/arch/nds32/include/asm/io.h +++ b/arch/nds32/include/asm/io.h @@ -4,6 +4,8 @@ #ifndef __ASM_NDS32_IO_H #define __ASM_NDS32_IO_H +#include + extern void iounmap(volatile void __iomem *addr); #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) -- cgit v1.2.3 From c8c20f9c162da25ea0893d5c5ded3a89382a2b5e Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 16:04:36 +0800 Subject: nds32: Fix build failed because arch_trace_hardirqs_off is changed to trace_hardirqs_off. It broke the 'allmodconfig' build when CONFIG_TRACE_IRQFLAGS is enabled. Signed-off-by: Nick Chun-Ming Hu Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann --- arch/nds32/kernel/ex-entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index a72e83d804f5..b8ae4e9a6b93 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -118,7 +118,7 @@ common_exception_handler: /* interrupt */ 2: #ifdef CONFIG_TRACE_IRQFLAGS - jal arch_trace_hardirqs_off + jal trace_hardirqs_off #endif move $r0, $sp sethi $lp, hi20(ret_from_intr) -- cgit v1.2.3 From b3b112bb06eb26ce44af63d599aaf5fe54de0699 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 19 Apr 2018 16:26:43 +0800 Subject: nds32: Fix the allmodconfig build. To make sure CONFIG_CPU_LITTLE_ENDIAN is default y This way we can build kernel with CONFIG_CPU_LITTLE_ENDIAN=y. Build allmodconfig and allnoconfig are available too. It also fixes the endian mismatch issue because AFLAGS and LDFLAGS is not passed correctly. Signed-off-by: Vincent Ren-Wei Chen Signed-off-by: Greentime Hu --- arch/nds32/Kconfig.cpu | 5 +++-- arch/nds32/Makefile | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index ba44cc539da9..b8c8984d1456 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -1,10 +1,11 @@ comment "Processor Features" config CPU_BIG_ENDIAN - bool "Big endian" + def_bool !CPU_LITTLE_ENDIAN config CPU_LITTLE_ENDIAN - def_bool !CPU_BIG_ENDIAN + bool "Little endian" + default y config HWZOL bool "hardware zero overhead loop support" diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 20edf34e70ce..513bb2e9baf9 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -32,8 +32,12 @@ endif ifdef CONFIG_CPU_LITTLE_ENDIAN KBUILD_CFLAGS += $(call cc-option, -EL) +KBUILD_AFLAGS += $(call cc-option, -EL) +LDFLAGS += $(call cc-option, -EL) else KBUILD_CFLAGS += $(call cc-option, -EB) +KBUILD_AFLAGS += $(call cc-option, -EB) +LDFLAGS += $(call cc-option, -EB) endif boot := arch/nds32/boot -- cgit v1.2.3 From 3ff2228dcea4e5c9b517375bd3aa9e3a1a3f29a0 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 30 Apr 2018 11:21:18 +0800 Subject: nds32: Fix the virtual address may map too much range by tlbop issue. We use tlbop to map virtual address in the first beginning, however it may map too much if DRAM size is not that big. We have to invalidate the mapping when the page table is created. Signed-off-by: Greentime Hu --- arch/nds32/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index ba910e9e4ecb..2f5b2ccebe47 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -293,6 +293,9 @@ void __init setup_arch(char **cmdline_p) /* paging_init() sets up the MMU and marks all pages as reserved */ paging_init(); + /* invalidate all TLB entries because the new mapping is created */ + __nds32__tlbop_flua(); + /* use generic way to parse */ parse_early_param(); -- cgit v1.2.3 From 8769c223491a7fbb345021e7004cbdffe024eaf8 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 30 Apr 2018 11:32:47 +0800 Subject: nds32: To refine readability of INT_MASK_INITAIAL_VAL Refine readability of INT_MASK_INITAIAL_VAL with meaningful macro instead of magic number. Signed-off-by: Greentime Hu --- arch/nds32/include/asm/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index c73f71d67744..28b7d797fd59 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -336,7 +336,7 @@ #define INT_MASK_mskIDIVZE ( 0x1 << INT_MASK_offIDIVZE ) #define INT_MASK_mskDSSIM ( 0x1 << INT_MASK_offDSSIM ) -#define INT_MASK_INITAIAL_VAL 0x10003 +#define INT_MASK_INITAIAL_VAL (INT_MASK_mskDSSIM|INT_MASK_mskIDIVZE) /****************************************************************************** * ir15: INT_PEND (Interrupt Pending Register) -- cgit v1.2.3 From abb90a24eade1f612324de0e6920041ef64795cb Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Mon, 30 Apr 2018 15:02:27 +0800 Subject: nds32: To fix a cache inconsistency issue by setting correct cacheability of NTC The nds32 architecture will use physical memory when interrupt or exception comes and it will use the setting of NTC0-4. The original implementation didn't consider the DRAM start address may start from 1GB, 2GB or 3GB to cause this issue. It will write the data to DRAM if it is running in physical address however kernel will read the data with virtaul address through data cache. In this case, the data of DRAM is latest. This fix will set the correct cacheability to let kernel write/read the latest data in cache instead of DRAM. Signed-off-by: Greentime Hu --- arch/nds32/include/asm/bitfield.h | 1 + arch/nds32/kernel/head.S | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 28b7d797fd59..8e84fc385b94 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -396,6 +396,7 @@ #define MMU_CTL_D8KB 1 #define MMU_CTL_UNA ( 0x1 << MMU_CTL_offUNA ) +#define MMU_CTL_CACHEABLE_NON 0 #define MMU_CTL_CACHEABLE_WB 2 #define MMU_CTL_CACHEABLE_WT 3 diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S index 71f57bd70f3b..c5fdae174ced 100644 --- a/arch/nds32/kernel/head.S +++ b/arch/nds32/kernel/head.S @@ -57,14 +57,32 @@ _nodtb: isb mtsr $r4, $L1_PPTB ! load page table pointer\n" -/* set NTC0 cacheable/writeback, mutliple page size in use */ +#ifdef CONFIG_CPU_DCACHE_DISABLE + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON +#else + #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT + #else + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB + #endif +#endif + +/* set NTC cacheability, mutliple page size in use */ mfsr $r3, $MMU_CTL - li $r0, #~MMU_CTL_mskNTC0 - and $r3, $r3, $r0 +#if CONFIG_MEMORY_START >= 0xc0000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3) +#elif CONFIG_MEMORY_START >= 0x80000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2) +#elif CONFIG_MEMORY_START >= 0x40000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1) +#else + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0) +#endif + #ifdef CONFIG_ANDES_PAGE_SIZE_4KB - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)) + ori $r3, $r3, #(MMU_CTL_mskMPZIU) #else - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)|MMU_CTL_D8KB) + ori $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB) #endif #ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS li $r0, #MMU_CTL_UNA -- cgit v1.2.3 From b3a75846a5cfbea137e2810b2bf9407141e70feb Mon Sep 17 00:00:00 2001 From: Nickhu Date: Fri, 4 May 2018 16:40:11 +0800 Subject: nds32: Renaming the file for unaligned access Change the name of the file '/proc/sys/nds32/unaligned_acess' to '/proc/sys/nds32/unaligned_access' Signed-off-by: Nickhu Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/mm/alignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index b96a01b10ca7..e515f6f3d247 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -552,7 +552,7 @@ static struct ctl_table alignment_tbl[3] = { static struct ctl_table nds32_sysctl_table[2] = { { - .procname = "unaligned_acess", + .procname = "unaligned_access", .mode = 0555, .child = alignment_tbl}, {} -- cgit v1.2.3 From 1613de8a785d21b3aac73d2a2e640b66d514393b Mon Sep 17 00:00:00 2001 From: Nickhu Date: Thu, 3 May 2018 10:15:56 +0800 Subject: nds32: Fix the unaligned access handler If the kernel config 'CONFIG_ALIGNMENT_TRAP' and the file '/proc/sys/nds32/unaligned_access/enable' are set, the kernel unaligned access handler does not handle correctly when the value of immediate field is negative. This commit fixes the unaligned access handler in kernel. Signed-off-by: Nickhu Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/mm/alignment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index e515f6f3d247..e1aed9dc692d 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -19,7 +19,7 @@ #define RA(inst) (((inst) >> 15) & 0x1FUL) #define RB(inst) (((inst) >> 10) & 0x1FUL) #define SV(inst) (((inst) >> 8) & 0x3UL) -#define IMM(inst) (((inst) >> 0) & 0x3FFFUL) +#define IMM(inst) (((inst) >> 0) & 0x7FFFUL) #define RA3(inst) (((inst) >> 3) & 0x7UL) #define RT3(inst) (((inst) >> 6) & 0x7UL) @@ -28,6 +28,9 @@ #define RA5(inst) (((inst) >> 0) & 0x1FUL) #define RT4(inst) (((inst) >> 5) & 0xFUL) +#define GET_IMMSVAL(imm_value) \ + (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value) + #define __get8_data(val,addr,err) \ __asm__( \ "1: lbi.bi %1, [%2], #1\n" \ @@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) } if (imm) - shift = IMM(inst) * len; + shift = GET_IMMSVAL(IMM(inst)) * len; else shift = *idx_to_addr(regs, RB(inst)) << SV(inst); -- cgit v1.2.3 From efcc4ea872edaeded28245d9b2ca8b9d8181b7cf Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 24 Apr 2018 15:08:46 +0800 Subject: nds32: Correct flush_dcache_page function 1. Disable local irq before d-cache write-back and invalidate. The cpu_dcache_wbinval_page function is composed of d-cache write-back and invalidate. If the local irq is enabled when calling cpu_dcache_wbinval_page, the content of d-cache is possibly updated between write-back and invalidate. In this case, the updated data will be dropped due to the following d-cache invalidation. Therefore, we disable the local irq before calling cpu_dcache_wbinval_page. 2. Correct the data write-back for page aliasing case. Only the page whose (page->index << PAGE_SHIFT) is located at the same page color as page_address(page) needs to execute data write-back in flush_dcache_page function. Signed-off-by: Vincent Chen Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/mm/cacheflush.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ee44ad96b6ed..02f25ee39c1d 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -217,17 +217,20 @@ void flush_dcache_page(struct page *page) if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else { - int i, pc; - unsigned long vto, kaddr, flags; + unsigned long kaddr, flags; + kaddr = (unsigned long)page_address(page); - cpu_dcache_wbinval_page(kaddr); - pc = CACHE_SET(DCACHE) * CACHE_LINE_SIZE(DCACHE) / PAGE_SIZE; local_irq_save(flags); - for (i = 0; i < pc; i++) { - vto = - kremap0(kaddr + i * PAGE_SIZE, page_to_phys(page)); - cpu_dcache_wbinval_page(vto); - kunmap01(vto); + cpu_dcache_wbinval_page(kaddr); + if (mapping) { + unsigned long vaddr, kto; + + vaddr = page->index << PAGE_SHIFT; + if (aliasing(vaddr, kaddr)) { + kto = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(kto); + kunmap01(kto); + } } local_irq_restore(flags); } -- cgit v1.2.3 From 5b9f95699b2166e06b7d7820c8173fdbff0ddf35 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 14 May 2018 16:56:53 +0800 Subject: nds32: Flush the cache of the page at vmaddr instead of kaddr in flush_anon_page According to Documentation/cachetlb.txt, the cache of the page at vmaddr shall be flushed in flush_anon_page instead of the cache of the page at page_address(page). Signed-off-by: Vincent Chen Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/mm/cacheflush.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 02f25ee39c1d..ae31cd2c21be 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -274,7 +274,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr) { - unsigned long flags; + unsigned long kaddr, flags, ktmp; if (!PageAnon(page)) return; @@ -284,7 +284,12 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_save(flags); if (vma->vm_flags & VM_EXEC) cpu_icache_inval_page(vaddr & PAGE_MASK); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); + kaddr = (unsigned long)page_address(page); + if (aliasing(vaddr, kaddr)) { + ktmp = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(ktmp); + kunmap01(ktmp); + } local_irq_restore(flags); } -- cgit v1.2.3 From aaaaba57509526ce924f997d8690ee9e0c93945a Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 14 May 2018 18:56:35 +0800 Subject: nds32: Disable local irq before calling cpu_dcache_wb_page in copy_user_highpage In order to ensure that all data in source page has been written back to memory before copy_page, the local irq shall be disabled before calling cpu_dcache_wb_page(). In addition, removing unneeded page invalidation for 'to' page. Signed-off-by: Vincent Chen Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/mm/cacheflush.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ae31cd2c21be..ce8fd34497bf 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -175,11 +175,9 @@ void copy_user_highpage(struct page *to, struct page *from, pto = page_to_phys(to); pfrom = page_to_phys(from); + local_irq_save(flags); if (aliasing(vaddr, (unsigned long)kfrom)) cpu_dcache_wb_page((unsigned long)kfrom); - if (aliasing(vaddr, (unsigned long)kto)) - cpu_dcache_inval_page((unsigned long)kto); - local_irq_save(flags); vto = kremap0(vaddr, pto); vfrom = kremap1(vaddr, pfrom); copy_page((void *)vto, (void *)vfrom); -- cgit v1.2.3 From a30e7d1e37e8acc37c25420d93af218166cca3ae Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 21 May 2018 14:36:22 +0800 Subject: nds32: Fix compiler warning, Wstringop-overflow, in vdso.c Getting a compiler warning, Wstringop-overflow, in arch/nds32/kernel/vdso.c when kernel is built by gcc-8. Declaring vdso_start and vdso_end as a pointer to fix this compiler warning. Signed-off-by: Vincent Chen Reviewed-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/kernel/vdso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c index f1198d7a5654..016f15891f6d 100644 --- a/arch/nds32/kernel/vdso.c +++ b/arch/nds32/kernel/vdso.c @@ -23,7 +23,7 @@ #include #include extern struct cache_info L1_cache_info[2]; -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; static unsigned long timer_mapping_base; @@ -66,16 +66,16 @@ static int __init vdso_init(void) int i; struct page **vdso_pagelist; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } /* Creat a timer io mapping to get clock cycles counter */ get_timer_node_info(); - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist */ vdso_pagelist = kcalloc(vdso_pages, sizeof(struct page *), GFP_KERNEL); @@ -83,7 +83,7 @@ static int __init vdso_init(void) return -ENOMEM; for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i] = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_spec[1].pages = &vdso_pagelist[0]; return 0; -- cgit v1.2.3 From f0f59a2fab8e52b9d582b39da39f22230ca80aee Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 15 May 2018 14:05:13 +0200 Subject: s390/dasd: use blk_mq_rq_from_pdu for per request data Dasd uses completion_data from struct request to store per request private data - this is problematic since this member is part of a union which is also used by IO schedulers. Let the block layer maintain space for per request data behind each struct request. Fixes crashes on block layer timeouts like this one: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000001308007 R3:00000000fffc8007 S:00000000fffcc000 P:000000000000013d Oops: 0004 ilc:2 [#1] PREEMPT SMP Modules linked in: [...] CPU: 0 PID: 1480 Comm: kworker/0:2H Not tainted 4.17.0-rc4-00046-gaa3bcd43b5af #203 Hardware name: IBM 3906 M02 702 (LPAR) Workqueue: kblockd blk_mq_timeout_work Krnl PSW : 0000000067ac406b 00000000b6960308 (do_raw_spin_trylock+0x30/0x78) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000c00 0000000000000000 0000000000000000 0000000000000001 0000000000b9d3c8 0000000000000000 0000000000000001 00000000cf9639d8 0000000000000000 0700000000000000 0000000000000000 000000000099f09e 0000000000000000 000000000076e9d0 000000006247bb08 000000006247bae0 Krnl Code: 00000000001c159c: b90400c2 lgr %r12,%r2 00000000001c15a0: a7180000 lhi %r1,0 #00000000001c15a4: 583003a4 l %r3,932 >00000000001c15a8: ba132000 cs %r1,%r3,0(%r2) 00000000001c15ac: a7180001 lhi %r1,1 00000000001c15b0: a784000b brc 8,1c15c6 00000000001c15b4: c0e5004e72aa brasl %r14,b8fb08 00000000001c15ba: 1812 lr %r1,%r2 Call Trace: ([<0700000000000000>] 0x700000000000000) [<0000000000b9d3d2>] _raw_spin_lock_irqsave+0x7a/0xb8 [<000000000099f09e>] dasd_times_out+0x46/0x278 [<000000000076ea6e>] blk_mq_terminate_expired+0x9e/0x108 [<000000000077497a>] bt_for_each+0x102/0x130 [<0000000000774e54>] blk_mq_queue_tag_busy_iter+0x74/0xd8 [<000000000076fea0>] blk_mq_timeout_work+0x260/0x320 [<0000000000169dd4>] process_one_work+0x3bc/0x708 [<000000000016a382>] worker_thread+0x262/0x408 [<00000000001723a8>] kthread+0x160/0x178 [<0000000000b9e73a>] kernel_thread_starter+0x6/0xc [<0000000000b9e734>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000b9d3cc>] _raw_spin_lock_irqsave+0x74/0xb8 Kernel panic - not syncing: Fatal exception: panic_on_oops Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 04143c08bd6e..02c03e418c27 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3034,7 +3034,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, cqr->callback_data = req; cqr->status = DASD_CQR_FILLED; cqr->dq = dq; - req->completion_data = cqr; + *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr; + blk_mq_start_request(req); spin_lock(&block->queue_lock); list_add_tail(&cqr->blocklist, &block->ccw_queue); @@ -3058,12 +3059,13 @@ out: */ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) { - struct dasd_ccw_req *cqr = req->completion_data; struct dasd_block *block = req->q->queuedata; struct dasd_device *device; + struct dasd_ccw_req *cqr; unsigned long flags; int rc = 0; + cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)); if (!cqr) return BLK_EH_NOT_HANDLED; @@ -3169,6 +3171,7 @@ static int dasd_alloc_queue(struct dasd_block *block) int rc; block->tag_set.ops = &dasd_mq_ops; + block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *); block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; -- cgit v1.2.3 From 9b2071028f8def49971a3b213ab6efd02a7e56e8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Aug 2017 12:08:13 -0700 Subject: Input: synaptics - Lenovo Carbon X1 Gen5 (2017) devices should use RMI The touchpad on Lenovo Carbon X1 Gen 5 (2017 - Kabylake) is accessible over SMBUS/RMI, so let's activate it by default. Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 60f2c463d1cc..096586f12e4a 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,6 +172,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN200f", /* T450s */ NULL }; -- cgit v1.2.3 From 15e2cffec3aa0d47a8d75ae80e1b136bfb5dff30 Mon Sep 17 00:00:00 2001 From: Edvard Holst Date: Sat, 3 Feb 2018 11:46:15 -0800 Subject: Input: synaptics - Lenovo Thinkpad X1 Carbon G5 (2017) with Elantech trackpoints should use RMI Lenovo use two different trackpoints in the fifth generation Thinkpad X1 Carbon. Both are accessible over SMBUS/RMI but the pnpIDs are missing. This patch is for the Elantech trackpoint specifically which also reports SMB version 3 so rmi_smbus needs to be updated in order to handle it. For the record, I was not the first one to come up with this patch as it has been floating around the internet for a while now. However, I have spent significant time with testing and my efforts to find the original author of the patch have been unsuccessful. Signed-off-by: Edvard Holst Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 096586f12e4a..6b9db57faa80 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0046", /* X250 */ "LEN004a", /* W541 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ + "LEN0073", /* X1 Carbon G5 (Elantech) */ "LEN200f", /* T450s */ NULL }; -- cgit v1.2.3 From 5717a09aeaf62d197deba1fc7ccd6bc45f3a9dcc Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sat, 3 Feb 2018 11:49:22 -0800 Subject: Input: synaptics - add Intertouch support on X1 Carbon 6th and X280 Synaptics devices reported it has Intertouch support, and it fails via PS/2 as following logs: psmouse serio2: Failed to reset mouse on synaptics-pt/serio0 psmouse serio2: Failed to enable mouse on synaptics-pt/serio0 Set these new devices to use SMBus to fix this issue, then they report SMBus version 3 is using, patch: https://patchwork.kernel.org/patch/9989547/ enabled SMBus ver 3 and makes synaptics devices work fine on SMBus mode. Signed-off-by: Aaron Ma Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 6b9db57faa80..a88bb312f5f8 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -174,6 +174,8 @@ static const char * const smbus_pnp_ids[] = { "LEN004a", /* W541 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ + "LEN0092", /* X1 Carbon 6 */ + "LEN0096", /* X280 */ "LEN200f", /* T450s */ NULL }; -- cgit v1.2.3 From ad8fb554f04e38f155c9bc34bbf521fc592ceee7 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 22 May 2018 17:16:08 -0700 Subject: Input: synaptics - add Lenovo 80 series ids to SMBus This time, Lenovo decided to go with different pieces in its latest series of Thinkpads. For those we have been able to test: - the T480 is using Synaptics with an IBM trackpoint -> it behaves properly with or without intertouch, there is no point not using RMI4 - the X1 Carbon 6th gen is using Synaptics with an IBM trackpoint -> the touchpad doesn't behave properly under PS/2 so we have to switch it to RMI4 if we do not want to have disappointed users - the X280 is using Synaptics with an ALPS trackpoint -> the recent fixes in the trackpoint handling fixed it so upstream now works fine with or without RMI4, and there is no point not using RMI4 - the T480s is using an Elan touchpad, so that's a different story Cc: # v4.14.x, v4.15.x, v4.16.x Signed-off-by: Benjamin Tissoires Acked-by: KT Liao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index a88bb312f5f8..a9591d278145 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,10 +172,12 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ "LEN0092", /* X1 Carbon 6 */ "LEN0096", /* X280 */ + "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ NULL }; -- cgit v1.2.3 From 40f7090bb1b4ec327ea1e1402ff5783af5b35195 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 22 May 2018 17:19:57 -0700 Subject: Input: elan_i2c_smbus - fix corrupted stack New ICs (like the one on the Lenovo T480s) answer to ETP_SMBUS_IAP_VERSION_CMD 4 bytes instead of 3. This corrupts the stack as i2c_smbus_read_block_data() uses the values returned by the i2c device to know how many data it need to return. i2c_smbus_read_block_data() can read up to 32 bytes (I2C_SMBUS_BLOCK_MAX) and there is no safeguard on how many bytes are provided in the return value. Ensure we always have enough space for any future firmware. Also 0-initialize the values to prevent any access to uninitialized memory. Cc: # v4.4.x, v4.9.x, v4.14.x, v4.15.x, v4.16.x Signed-off-by: Benjamin Tissoires Acked-by: KT Liao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_smbus.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index 29f99529b187..cfcb32559925 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -130,7 +130,7 @@ static int elan_smbus_get_baseline_data(struct i2c_client *client, bool max_baseline, u8 *value) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, max_baseline ? @@ -149,7 +149,7 @@ static int elan_smbus_get_version(struct i2c_client *client, bool iap, u8 *version) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_IAP_VERSION_CMD : @@ -170,7 +170,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *clickpad) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_SM_VERSION_CMD, val); @@ -188,7 +188,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_UNIQUEID_CMD, val); @@ -205,7 +205,7 @@ static int elan_smbus_get_checksum(struct i2c_client *client, bool iap, u16 *csum) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_FW_CHECKSUM_CMD : @@ -226,7 +226,7 @@ static int elan_smbus_get_max(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val); if (ret != 3) { @@ -246,7 +246,7 @@ static int elan_smbus_get_resolution(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RESOLUTION_CMD, val); if (ret != 3) { @@ -267,7 +267,7 @@ static int elan_smbus_get_num_traces(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_XY_TRACENUM_CMD, val); if (ret != 3) { @@ -294,7 +294,7 @@ static int elan_smbus_iap_get_mode(struct i2c_client *client, { int error; u16 constant; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val); if (error < 0) { @@ -345,7 +345,7 @@ static int elan_smbus_prepare_fw_update(struct i2c_client *client) int len; int error; enum tp_mode mode; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06}; u16 password; @@ -419,7 +419,7 @@ static int elan_smbus_write_fw_block(struct i2c_client *client, struct device *dev = &client->dev; int error; u16 result; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; /* * Due to the limitation of smbus protocol limiting -- cgit v1.2.3 From bf4407f081f33466ef0b06e213c04264d356aa35 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 17 May 2018 15:22:14 +0200 Subject: crypto: inside-secure - do not use memset on MMIO This patch fixes the Inside Secure driver which uses a memtset() call to set an MMIO area from the cryptographic engine to 0. This is wrong as memset() isn't guaranteed to work on MMIO for many reasons. This led to kernel paging request panics in certain cases. Use memset_io() instead. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Reported-by: Ofer Heifetz Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index d4a81be0d7d2..b6be62025325 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -152,8 +152,8 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv) EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS; writel(val, EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL); - memset(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0, - EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32)); + memset_io(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM, 0, + EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32)); eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL, EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN); -- cgit v1.2.3 From 86b389ff22bd6ad8fd3cb98e41cd271886c6d023 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 27 May 2018 20:54:44 -0400 Subject: tracing: Fix crash when freeing instances with event triggers If a instance has an event trigger enabled when it is freed, it could cause an access of free memory. Here's the case that crashes: # cd /sys/kernel/tracing # mkdir instances/foo # echo snapshot > instances/foo/events/initcall/initcall_start/trigger # rmdir instances/foo Would produce: general protection fault: 0000 [#1] PREEMPT SMP PTI Modules linked in: tun bridge ... CPU: 5 PID: 6203 Comm: rmdir Tainted: G W 4.17.0-rc4-test+ #933 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:clear_event_triggers+0x3b/0x70 RSP: 0018:ffffc90003783de0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b2b RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800c7130ba0 RBP: ffffc90003783e00 R08: ffff8801131993f8 R09: 0000000100230016 R10: ffffc90003783d80 R11: 0000000000000000 R12: ffff8800c7130ba0 R13: ffff8800c7130bd8 R14: ffff8800cc093768 R15: 00000000ffffff9c FS: 00007f6f4aa86700(0000) GS:ffff88011eb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6f4a5aed60 CR3: 00000000cd552001 CR4: 00000000001606e0 Call Trace: event_trace_del_tracer+0x2a/0xc5 instance_rmdir+0x15c/0x200 tracefs_syscall_rmdir+0x52/0x90 vfs_rmdir+0xdb/0x160 do_rmdir+0x16d/0x1c0 __x64_sys_rmdir+0x17/0x20 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe This was due to the call the clears out the triggers when an instance is being deleted not removing the trigger from the link list. Cc: stable@vger.kernel.org Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_trigger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d251cabcf69a..9be317d388c5 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -483,9 +483,10 @@ clear_event_triggers(struct trace_array *tr) struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { - struct event_trigger_data *data; - list_for_each_entry_rcu(data, &file->triggers, list) { + struct event_trigger_data *data, *n; + list_for_each_entry_safe(data, n, &file->triggers, list) { trace_event_trigger_enable_disable(file, 0); + list_del_rcu(&data->list); if (data->ops->free) data->ops->free(data->ops, data); } -- cgit v1.2.3 From 2824f5033248600673e3e126a4d135363cbfd9ac Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 28 May 2018 10:56:36 -0400 Subject: tracing: Make the snapshot trigger work with instances The snapshot trigger currently only affects the main ring buffer, even when it is used by the instances. This can be confusing as the snapshot trigger is listed in the instance. > # cd /sys/kernel/tracing > # mkdir instances/foo > # echo snapshot > instances/foo/events/syscalls/sys_enter_fchownat/trigger > # echo top buffer > trace_marker > # echo foo buffer > instances/foo/trace_marker > # touch /tmp/bar > # chown rostedt /tmp/bar > # cat instances/foo/snapshot # tracer: nop # # # * Snapshot is freed * # # Snapshot commands: # echo 0 > snapshot : Clears and frees snapshot buffer # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated. # Takes a snapshot of the main buffer. # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free) # (Doesn't have to be '2' works with any number that # is not a '0' or '1') > # cat snapshot # tracer: nop # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | bash-1189 [000] .... 111.488323: tracing_mark_write: top buffer Not only did the snapshot occur in the top level buffer, but the instance snapshot buffer should have been allocated, and it is still free. Cc: stable@vger.kernel.org Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 12 ++++++------ kernel/trace/trace.h | 11 +++++++++++ kernel/trace/trace_events_trigger.c | 10 ++++++++-- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 414d7210b2ec..bcd93031d042 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -893,7 +893,7 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -static void tracing_snapshot_instance(struct trace_array *tr) +void tracing_snapshot_instance(struct trace_array *tr) { struct tracer *tracer = tr->current_trace; unsigned long flags; @@ -949,7 +949,7 @@ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, struct trace_buffer *size_buf, int cpu_id); static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); -static int alloc_snapshot(struct trace_array *tr) +int tracing_alloc_snapshot_instance(struct trace_array *tr) { int ret; @@ -995,7 +995,7 @@ int tracing_alloc_snapshot(void) struct trace_array *tr = &global_trace; int ret; - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); WARN_ON(ret < 0); return ret; @@ -5408,7 +5408,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; } @@ -6451,7 +6451,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, } #endif if (!tr->allocated_snapshot) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; } @@ -7179,7 +7179,7 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, return ret; out_reg: - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6fb46a06c9dc..507954b4e058 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1817,6 +1817,17 @@ static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif +#ifdef CONFIG_TRACER_SNAPSHOT +void tracing_snapshot_instance(struct trace_array *tr); +int tracing_alloc_snapshot_instance(struct trace_array *tr); +#else +static inline void tracing_snapshot_instance(struct trace_array *tr) { } +static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) +{ + return 0; +} +#endif + extern struct trace_iterator *tracepoint_print_iter; #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 9be317d388c5..8b5bdcf64871 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -643,6 +643,7 @@ event_trigger_callback(struct event_command *cmd_ops, trigger_data->count = -1; trigger_data->ops = trigger_ops; trigger_data->cmd_ops = cmd_ops; + trigger_data->private_data = file; INIT_LIST_HEAD(&trigger_data->list); INIT_LIST_HEAD(&trigger_data->named_list); @@ -1054,7 +1055,12 @@ static void snapshot_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - tracing_snapshot(); + struct trace_event_file *file = data->private_data; + + if (file) + tracing_snapshot_instance(file->tr); + else + tracing_snapshot(); } static void @@ -1077,7 +1083,7 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, { int ret = register_trigger(glob, ops, data, file); - if (ret > 0 && tracing_alloc_snapshot() != 0) { + if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) { unregister_trigger(glob, ops, data, file); ret = 0; } -- cgit v1.2.3 From c97f414c54a255f4f05a50a2625efaeee406e134 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 27 May 2018 18:50:10 +0300 Subject: nvme: fix extended data LBA supported setting This value depands on the metadata support value, so reorder the initialization to fit. Fixes: b5be3b392 ("nvme: always unregister the integrity profile in __nvme_revalidate_disk") Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 99b857e5a7a9..b9ca782fe82d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1447,8 +1447,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->lba_shift == 0) ns->lba_shift = 9; ns->noiob = le16_to_cpu(id->noiob); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; -- cgit v1.2.3 From efe3de79e0b52ca281ef6691480c8c68c82a4657 Mon Sep 17 00:00:00 2001 From: Sachin Grover Date: Fri, 25 May 2018 14:01:39 +0530 Subject: selinux: KASAN: slab-out-of-bounds in xattr_getsecurity Call trace: [] dump_backtrace+0x0/0x428 [] show_stack+0x28/0x38 [] dump_stack+0xd4/0x124 [] print_address_description+0x68/0x258 [] kasan_report.part.2+0x228/0x2f0 [] kasan_report+0x5c/0x70 [] check_memory_region+0x12c/0x1c0 [] memcpy+0x34/0x68 [] xattr_getsecurity+0xe0/0x160 [] vfs_getxattr+0xc8/0x120 [] getxattr+0x100/0x2c8 [] SyS_fgetxattr+0x64/0xa0 [] el0_svc_naked+0x24/0x28 If user get root access and calls security.selinux setxattr() with an embedded NUL on a file and then if some process performs a getxattr() on that file with a length greater than the actual length of the string, it would result in a panic. To fix this, add the actual length of the string to the security context instead of the length passed by the userspace process. Signed-off-by: Sachin Grover Cc: stable@vger.kernel.org Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 8057e19dc15f..3ce225e3f142 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1494,7 +1494,7 @@ static int security_context_to_sid_core(struct selinux_state *state, scontext_len, &context, def_sid); if (rc == -EINVAL && force) { context.str = str; - context.len = scontext_len; + context.len = strlen(str) + 1; str = NULL; } else if (rc) goto out_unlock; -- cgit v1.2.3 From 829bc787c1a0403e4d886296dd4d90c5f9c1744a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 May 2018 19:43:53 -0700 Subject: fs: clear writeback errors in inode_init_always In inode_init_always(), we clear the inode mapping flags, which clears any retained error (AS_EIO, AS_ENOSPC) bits. Unfortunately, we do not also clear wb_err, which means that old mapping errors can leak through to new inodes. This is crucial for the XFS inode allocation path because we recycle old in-core inodes and we do not want error state from an old file to leak into the new file. This bug was discovered by running generic/036 and generic/047 in a loop and noticing that the EIOs generated by the collision of direct and buffered writes in generic/036 would survive the remount between 036 and 047, and get reported to the fsyncs (on different files!) in generic/047. Signed-off-by: Darrick J. Wong Reviewed-by: Jeff Layton Reviewed-by: Brian Foster --- fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/inode.c b/fs/inode.c index 13ceb98c3bd3..3b55391072f3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -178,6 +178,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; + mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->private_data = NULL; -- cgit v1.2.3 From 32ffd6e8d1f6cef94bedca15dfcdebdeb590499d Mon Sep 17 00:00:00 2001 From: João Paulo Rechi Vita Date: Tue, 22 May 2018 14:30:15 -0700 Subject: platform/x86: asus-wmi: Fix NULL pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not perform the rfkill cleanup routine when (asus->driver->wlan_ctrl_by_user && ashs_present()) is true, since nothing is registered with the rfkill subsystem in that case. Doing so leads to the following kernel NULL pointer dereference: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __mutex_lock_slowpath+0x98/0x120 PGD 1a3aa8067 PUD 1a3b3d067 PMD 0 Oops: 0002 [#1] PREEMPT SMP Modules linked in: bnep ccm binfmt_misc uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core hid_a4tech videodev x86_pkg_temp_thermal intel_powerclamp coretemp ath3k btusb btrtl btintel bluetooth kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic irqbypass crc32c_intel arc4 i915 snd_hda_intel snd_hda_codec ath9k ath9k_common ath9k_hw ath i2c_algo_bit snd_hwdep mac80211 ghash_clmulni_intel snd_hda_core snd_pcm snd_timer cfg80211 ehci_pci xhci_pci drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm xhci_hcd ehci_hcd asus_nb_wmi(-) asus_wmi sparse_keymap r8169 rfkill mxm_wmi serio_raw snd mii mei_me lpc_ich i2c_i801 video soundcore mei i2c_smbus wmi i2c_core mfd_core CPU: 3 PID: 3275 Comm: modprobe Not tainted 4.9.34-gentoo #34 Hardware name: ASUSTeK COMPUTER INC. K56CM/K56CM, BIOS K56CM.206 08/21/2012 task: ffff8801a639ba00 task.stack: ffffc900014cc000 RIP: 0010:[] [] __mutex_lock_slowpath+0x98/0x120 RSP: 0018:ffffc900014cfce0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8801a54315b0 RCX: 00000000c0000100 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8801a54315b4 RBP: ffffc900014cfd30 R08: 0000000000000000 R09: 0000000000000002 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801a54315b4 R13: ffff8801a639ba00 R14: 00000000ffffffff R15: ffff8801a54315b8 FS: 00007faa254fb700(0000) GS:ffff8801aef80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001a3b1b000 CR4: 00000000001406e0 Stack: ffff8801a54315b8 0000000000000000 ffffffff814733ae ffffc900014cfd28 ffffffff8146a28c ffff8801a54315b0 0000000000000000 ffff8801a54315b0 ffff8801a66f3820 0000000000000000 ffffc900014cfd48 ffffffff816c73e7 Call Trace: [] ? acpi_ut_release_mutex+0x5d/0x61 [] ? acpi_ns_get_node+0x49/0x52 [] mutex_lock+0x17/0x30 [] asus_rfkill_hotplug+0x24/0x1a0 [asus_wmi] [] asus_wmi_rfkill_exit+0x61/0x150 [asus_wmi] [] asus_wmi_remove+0x61/0xb0 [asus_wmi] [] platform_drv_remove+0x28/0x40 [] __device_release_driver+0xa1/0x160 [] device_release_driver+0x23/0x30 [] bus_remove_device+0xfd/0x170 [] device_del+0x139/0x270 [] platform_device_del+0x28/0x90 [] platform_device_unregister+0x12/0x30 [] asus_wmi_unregister_driver+0x19/0x30 [asus_wmi] [] asus_nb_wmi_exit+0x10/0xf26 [asus_nb_wmi] [] SyS_delete_module+0x192/0x270 [] ? exit_to_usermode_loop+0x92/0xa0 [] entry_SYSCALL_64_fastpath+0x13/0x94 Code: e8 5e 30 00 00 8b 03 83 f8 01 0f 84 93 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 41 be ff ff ff ff 4c 89 3c 24 48 89 44 24 08 <48> 89 20 4c 89 6c 24 10 eb 1d 4c 89 e7 49 c7 45 08 02 00 00 00 RIP [] __mutex_lock_slowpath+0x98/0x120 RSP CR2: 0000000000000000 ---[ end trace 8d484233fa7cb512 ]--- note: modprobe[3275] exited with preempt_count 2 https://bugzilla.kernel.org/show_bug.cgi?id=196467 Reported-by: red.f0xyz@gmail.com Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko --- drivers/platform/x86/asus-wmi.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a32c5c00e0e7..ffffb9909ae1 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -163,6 +163,16 @@ MODULE_LICENSE("GPL"); static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; +static bool ashs_present(void) +{ + int i = 0; + while (ashs_ids[i]) { + if (acpi_dev_found(ashs_ids[i++])) + return true; + } + return false; +} + struct bios_args { u32 arg0; u32 arg1; @@ -1025,6 +1035,9 @@ static int asus_new_rfkill(struct asus_wmi *asus, static void asus_wmi_rfkill_exit(struct asus_wmi *asus) { + if (asus->driver->wlan_ctrl_by_user && ashs_present()) + return; + asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6"); asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7"); @@ -2121,16 +2134,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } -static bool ashs_present(void) -{ - int i = 0; - while (ashs_ids[i]) { - if (acpi_dev_found(ashs_ids[i++])) - return true; - } - return false; -} - /* * WMI Driver */ -- cgit v1.2.3 From 41a233dcbe7dfac00da74d703d4e5f6fe17c5f63 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 27 Jan 2018 20:09:16 -0500 Subject: MAINTAINERS: add turbostat utility Signed-off-by: Len Brown --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ca4afd68530c..0d00b6c5370b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14371,6 +14371,15 @@ S: Maintained F: drivers/tc/ F: include/linux/tc.h +TURBOSTAT UTILITY +M: "Len Brown" +L: linux-pm@vger.kernel.org +B: https://bugzilla.kernel.org +Q: https://patchwork.kernel.org/project/linux-pm/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat +S: Supported +F: tools/power/x86/turbostat/ + TW5864 VIDEO4LINUX DRIVER M: Bluecherry Maintainers M: Anton Sviridenko -- cgit v1.2.3 From 2085e12441dc90a126a5f279b3ef03daade901cf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 4 Aug 2017 18:42:23 +0300 Subject: tools/power turbostat: fix Skylake Xeon package C-state display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turbostat neglects to display all package C-states for some Skylake Xeon BIOS configurations. This is due to a typo in the table decoding MSR_PKG_CST_CONFIG_CONTROL (0x000000e2) Here we fix that typo, according to Intel SDM, vol 4, Table 2-41 - "MSRs Supported by Intel® Xeon® Processor Scalable Family with DisplayFamily_DisplayModel 06_55H". Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bd9c6b31a504..4c2d9f3b40ce 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1769,7 +1769,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void -- cgit v1.2.3 From 3f44a5c62be2f2b15317942fa7bc4a810d2420aa Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Oct 2017 15:42:56 -0400 Subject: tools/power turbostat: add --enable Time_Of_Day_Seconds Add a Time_Of_Day_Seconds column showing when measurement for each row was completed. Units are [sec.subsec] since Epoch, as reported by gettimeofday(2). While useful to correlate turbostat output with other tools, this built-in column is disabled, by default. Add the "--enable" option to enable such disabled-by-default built-in columns: "--enable Time_Of_Day_Seconds" "--enable usec" "--enable all", will enable all disabled-by-defauilt built-in counters. When "--debug" is used, all disabled-by-default columns are enabled, unless explicitly skipped using "--hide" Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 9 +- tools/power/x86/turbostat/turbostat.c | 191 ++++++++++++++++++++-------------- 2 files changed, 118 insertions(+), 82 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ccf2a69365cc..04262ab1b78b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -54,9 +54,12 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP -\fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. +\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. .PP -\fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. +\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". +The column name "all" can be used to enable all disabled-by-default built-in counters. +.PP +\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. .PP \fB--Dump\fP displays the raw counter values. .PP @@ -86,6 +89,8 @@ displays the statistics gathered since it was forked. The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. .SH COLUMN DESCRIPTIONS .nf +\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. +\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. \fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBPackage\fP processor package number -- not present on systems with a single processor package. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4c2d9f3b40ce..627e5749d7d1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -50,6 +50,7 @@ int *fd_percpu; struct timespec interval_ts = {5, 0}; unsigned int debug; unsigned int quiet; +unsigned int shown; unsigned int sums_need_wide_columns; unsigned int rapl_joules; unsigned int summary_only; @@ -346,6 +347,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) * Thus, strings that are proper sub-sets must follow their more specific peers. */ struct msr_counter bic[] = { + { 0x0, "usec" }, + { 0x0, "Time_Of_Day_Seconds" }, { 0x0, "Package" }, { 0x0, "Avg_MHz" }, { 0x0, "Bzy_MHz" }, @@ -394,57 +397,63 @@ struct msr_counter bic[] = { #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_Package (1ULL << 0) -#define BIC_Avg_MHz (1ULL << 1) -#define BIC_Bzy_MHz (1ULL << 2) -#define BIC_TSC_MHz (1ULL << 3) -#define BIC_IRQ (1ULL << 4) -#define BIC_SMI (1ULL << 5) -#define BIC_Busy (1ULL << 6) -#define BIC_CPU_c1 (1ULL << 7) -#define BIC_CPU_c3 (1ULL << 8) -#define BIC_CPU_c6 (1ULL << 9) -#define BIC_CPU_c7 (1ULL << 10) -#define BIC_ThreadC (1ULL << 11) -#define BIC_CoreTmp (1ULL << 12) -#define BIC_CoreCnt (1ULL << 13) -#define BIC_PkgTmp (1ULL << 14) -#define BIC_GFX_rc6 (1ULL << 15) -#define BIC_GFXMHz (1ULL << 16) -#define BIC_Pkgpc2 (1ULL << 17) -#define BIC_Pkgpc3 (1ULL << 18) -#define BIC_Pkgpc6 (1ULL << 19) -#define BIC_Pkgpc7 (1ULL << 20) -#define BIC_Pkgpc8 (1ULL << 21) -#define BIC_Pkgpc9 (1ULL << 22) -#define BIC_Pkgpc10 (1ULL << 23) -#define BIC_PkgWatt (1ULL << 24) -#define BIC_CorWatt (1ULL << 25) -#define BIC_GFXWatt (1ULL << 26) -#define BIC_PkgCnt (1ULL << 27) -#define BIC_RAMWatt (1ULL << 28) -#define BIC_PKG__ (1ULL << 29) -#define BIC_RAM__ (1ULL << 30) -#define BIC_Pkg_J (1ULL << 31) -#define BIC_Cor_J (1ULL << 32) -#define BIC_GFX_J (1ULL << 33) -#define BIC_RAM_J (1ULL << 34) -#define BIC_Core (1ULL << 35) -#define BIC_CPU (1ULL << 36) -#define BIC_Mod_c6 (1ULL << 37) -#define BIC_sysfs (1ULL << 38) -#define BIC_Totl_c0 (1ULL << 39) -#define BIC_Any_c0 (1ULL << 40) -#define BIC_GFX_c0 (1ULL << 41) -#define BIC_CPUGFX (1ULL << 42) - -unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; -unsigned long long bic_present = BIC_sysfs; +#define BIC_USEC (1ULL << 0) +#define BIC_TOD (1ULL << 1) +#define BIC_Package (1ULL << 2) +#define BIC_Avg_MHz (1ULL << 3) +#define BIC_Bzy_MHz (1ULL << 4) +#define BIC_TSC_MHz (1ULL << 5) +#define BIC_IRQ (1ULL << 6) +#define BIC_SMI (1ULL << 7) +#define BIC_Busy (1ULL << 8) +#define BIC_CPU_c1 (1ULL << 9) +#define BIC_CPU_c3 (1ULL << 10) +#define BIC_CPU_c6 (1ULL << 11) +#define BIC_CPU_c7 (1ULL << 12) +#define BIC_ThreadC (1ULL << 13) +#define BIC_CoreTmp (1ULL << 14) +#define BIC_CoreCnt (1ULL << 15) +#define BIC_PkgTmp (1ULL << 16) +#define BIC_GFX_rc6 (1ULL << 17) +#define BIC_GFXMHz (1ULL << 18) +#define BIC_Pkgpc2 (1ULL << 19) +#define BIC_Pkgpc3 (1ULL << 20) +#define BIC_Pkgpc6 (1ULL << 21) +#define BIC_Pkgpc7 (1ULL << 22) +#define BIC_Pkgpc8 (1ULL << 23) +#define BIC_Pkgpc9 (1ULL << 24) +#define BIC_Pkgpc10 (1ULL << 25) +#define BIC_PkgWatt (1ULL << 26) +#define BIC_CorWatt (1ULL << 27) +#define BIC_GFXWatt (1ULL << 28) +#define BIC_PkgCnt (1ULL << 29) +#define BIC_RAMWatt (1ULL << 30) +#define BIC_PKG__ (1ULL << 31) +#define BIC_RAM__ (1ULL << 32) +#define BIC_Pkg_J (1ULL << 33) +#define BIC_Cor_J (1ULL << 34) +#define BIC_GFX_J (1ULL << 35) +#define BIC_RAM_J (1ULL << 36) +#define BIC_Core (1ULL << 37) +#define BIC_CPU (1ULL << 38) +#define BIC_Mod_c6 (1ULL << 39) +#define BIC_sysfs (1ULL << 40) +#define BIC_Totl_c0 (1ULL << 41) +#define BIC_Any_c0 (1ULL << 42) +#define BIC_GFX_c0 (1ULL << 43) +#define BIC_CPUGFX (1ULL << 44) + +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) + +unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) + #define MAX_DEFERRED 16 char *deferred_skip_names[MAX_DEFERRED]; int deferred_skip_index; @@ -496,6 +505,9 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (comma) *comma = '\0'; + if (!strcmp(name_list, "all")) + return ~0; + for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { retval |= (1ULL << i); @@ -532,8 +544,10 @@ void print_header(char *delim) struct msr_counter *mp; int printed = 0; - if (debug) - outp += sprintf(outp, "usec %s", delim); + if (DO_BIC(BIC_USEC)) + outp += sprintf(outp, "%susec", (printed++ ? delim : "")); + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -786,7 +800,7 @@ int format_counters(struct thread_data *t, struct core_data *c, (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) return 0; - if (debug) { + if (DO_BIC(BIC_USEC)) { /* on each row, print how many usec each timestamp took to gather */ struct timeval tv; @@ -794,6 +808,10 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); } + /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; tsc = t->tsc * tsc_tweak; @@ -1140,6 +1158,15 @@ delta_thread(struct thread_data *new, struct thread_data *old, int i; struct msr_counter *mp; + /* + * the timestamps from start of measurement interval are in "old" + * the timestamp from end of measurement interval are in "new" + * over-write old w/ new so we can print end of interval values + */ + + old->tv_begin = new->tv_begin; + old->tv_end = new->tv_end; + old->tsc = new->tsc - old->tsc; /* check for TSC < 1 Mcycles over interval */ @@ -1228,6 +1255,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data int i; struct msr_counter *mp; + t->tv_begin.tv_sec = 0; + t->tv_begin.tv_usec = 0; + t->tv_end.tv_sec = 0; + t->tv_end.tv_usec = 0; + t->tsc = 0; t->aperf = 0; t->mperf = 0; @@ -1286,6 +1318,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, int i; struct msr_counter *mp; + /* remember first tv_begin */ + if (average.threads.tv_begin.tv_sec == 0) + average.threads.tv_begin = t->tv_begin; + + /* remember last tv_end */ + average.threads.tv_end = t->tv_end; + average.threads.tsc += t->tsc; average.threads.aperf += t->aperf; average.threads.mperf += t->mperf; @@ -4960,34 +4999,6 @@ error: exit(-1); } -int shown; -/* - * parse_show_hide() - process cmdline to set default counter action - */ -void parse_show_hide(char *optarg, enum show_hide_mode new_mode) -{ - /* - * --show: show only those specified - * The 1st invocation will clear and replace the enabled mask - * subsequent invocations can add to it. - */ - if (new_mode == SHOW_LIST) { - if (shown == 0) - bic_enabled = bic_lookup(optarg, new_mode); - else - bic_enabled |= bic_lookup(optarg, new_mode); - shown = 1; - - return; - } - - /* - * --hide: do not show those specified - * multiple invocations simply clear more bits in enabled mask - */ - bic_enabled &= ~bic_lookup(optarg, new_mode); - -} void cmdline(int argc, char **argv) { @@ -4998,6 +5009,7 @@ void cmdline(int argc, char **argv) {"cpu", required_argument, 0, 'c'}, {"Dump", no_argument, 0, 'D'}, {"debug", no_argument, 0, 'd'}, /* internal, not documented */ + {"enable", required_argument, 0, 'e'}, {"interval", required_argument, 0, 'i'}, {"help", no_argument, 0, 'h'}, {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help @@ -5014,7 +5026,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jo:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5026,11 +5038,20 @@ void cmdline(int argc, char **argv) case 'D': dump_only++; break; + case 'e': + /* --enable specified counter */ + bic_enabled |= bic_lookup(optarg, SHOW_LIST); + break; case 'd': debug++; + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); break; case 'H': - parse_show_hide(optarg, HIDE_LIST); + /* + * --hide: do not show those specified + * multiple invocations simply clear more bits in enabled mask + */ + bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); break; case 'h': default: @@ -5054,6 +5075,7 @@ void cmdline(int argc, char **argv) rapl_joules++; break; case 'l': + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); list_header_only++; quiet++; break; @@ -5064,7 +5086,16 @@ void cmdline(int argc, char **argv) quiet = 1; break; case 's': - parse_show_hide(optarg, SHOW_LIST); + /* + * --show: show only those specified + * The 1st invocation will clear and replace the enabled mask + * subsequent invocations can add to it. + */ + if (shown == 0) + bic_enabled = bic_lookup(optarg, SHOW_LIST); + else + bic_enabled |= bic_lookup(optarg, SHOW_LIST); + shown = 1; break; case 'S': summary_only++; -- cgit v1.2.3 From 8aa2ed0b2839718b9147bb0119a8012217d25a8b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 15 Jul 2017 14:57:37 -0400 Subject: tools/power turbostat: on SIGINT: sample, print and exit When running in interval-mode, catch interrupts and print a final data record before exiting. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 4 ++++ tools/power/x86/turbostat/turbostat.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 04262ab1b78b..2dafba4900ab 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -267,6 +267,10 @@ CPU PRF_CTRL .fi +.SH SIGNALS + +SIGINT will interrupt interval-mode. +The end-of-interval data will be collected and displayed before turbostat exits. .SH NOTES .B "turbostat " diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 627e5749d7d1..d9703b728fbb 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2600,11 +2600,37 @@ int snapshot_proc_sysfs_files(void) return 0; } +int exit_requested; + +static void signal_handler (int signal) +{ + switch (signal) { + case SIGINT: + exit_requested = 1; + if (debug) + fprintf(stderr, " SIGINT\n"); + break; + } +} + +void setup_signal_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = &signal_handler; + + if (sigaction(SIGINT, &sa, NULL) < 0) + err(1, "sigaction SIGINT"); +} void turbostat_loop() { int retval; int restarted = 0; + setup_signal_handler(); + restart: restarted++; @@ -2646,6 +2672,8 @@ restart: compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); + if (exit_requested) + break; nanosleep(&interval_ts, NULL); if (snapshot_proc_sysfs_files()) goto restart; @@ -2665,6 +2693,8 @@ restart: compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); + if (exit_requested) + break; } } -- cgit v1.2.3 From 072119606a239557bf8532c70af9c211d1028dff Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 15 Jul 2017 15:51:26 -0400 Subject: tools/power turbostat: on SIGUSR1: sample, print and continue Interval-mode turbostat now catches and discards SIGUSR1. Thus, SIGUSR1 can be used to tell turbostat to cut short the current measurement interval. Turbostat will then start the next measurement interval using the regular interval length. This can be used to give turbostat variable intervals. Invoke turbostat with --interval LARGE_NUMBER_SEC and have a program that has permission to send it a SIGUSR1 always before LARGE_NUMBER_SEC expires. It may also be useful to use "--enable Time_Of_Day_Seconds" to observe the actual interval length. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 4 ++++ tools/power/x86/turbostat/turbostat.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 2dafba4900ab..f99bddf16b8b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -271,6 +271,10 @@ CPU PRF_CTRL SIGINT will interrupt interval-mode. The end-of-interval data will be collected and displayed before turbostat exits. + +SIGUSR1 will end current interval, +end-of-interval data will be collected and displayed before turbostat +starts a new interval. .SH NOTES .B "turbostat " diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d9703b728fbb..dd9b2efbbb2a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2610,6 +2610,10 @@ static void signal_handler (int signal) if (debug) fprintf(stderr, " SIGINT\n"); break; + case SIGUSR1: + if (debug > 1) + fprintf(stderr, "SIGUSR1\n"); + break; } } @@ -2623,6 +2627,8 @@ void setup_signal_handler(void) if (sigaction(SIGINT, &sa, NULL) < 0) err(1, "sigaction SIGINT"); + if (sigaction(SIGUSR1, &sa, NULL) < 0) + err(1, "sigaction SIGUSR1"); } void turbostat_loop() { -- cgit v1.2.3 From b9ad8ee0da54b2b4d2f8cd50fc9ce5200d61e7d1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 19 Jul 2017 19:28:37 -0400 Subject: tools/power turbostat: end current interval upon newline input In turbostat interval mode, a newline typed on standard input will now conclude the current interval. Data will immediately be collected and printed for that interval, and the next interval will be started. This is similar to the recently added SIGUSR1 feature. But that is for use by programs, while this is for interactive use. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 7 ++++++ tools/power/x86/turbostat/turbostat.c | 43 ++++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index f99bddf16b8b..4cffa4a5a2b7 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -267,6 +267,13 @@ CPU PRF_CTRL .fi +.SH INPUT + +For interval-mode, turbostat will immediately end the current interval +when it sees a newline on standard input. +turbostat will then start the next interval. +Control-C will be send a SIGINT to turbostat, +which will immediately abort the program with no further processing. .SH SIGNALS SIGINT will interrupt interval-mode. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd9b2efbbb2a..1d4ba4ade6fa 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,8 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; -struct timespec interval_ts = {5, 0}; +struct timeval interval_tv = {5, 0}; +struct timespec one_msec = {0, 1000000}; unsigned int debug; unsigned int quiet; unsigned int shown; @@ -478,7 +480,7 @@ void help(void) "--cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" "--quiet skip decoding system configuration header\n" - "--interval sec Override default 5-second measurement interval\n" + "--interval sec.subsec Override default 5-second measurement interval\n" "--help print this help message\n" "--list list column headers only\n" "--out file create or truncate \"file\" for all output\n" @@ -2615,6 +2617,8 @@ static void signal_handler (int signal) fprintf(stderr, "SIGUSR1\n"); break; } + /* make sure this manually-invoked interval is at least 1ms long */ + nanosleep(&one_msec, NULL); } void setup_signal_handler(void) @@ -2630,6 +2634,33 @@ void setup_signal_handler(void) if (sigaction(SIGUSR1, &sa, NULL) < 0) err(1, "sigaction SIGUSR1"); } + +int do_sleep(void) +{ + struct timeval select_timeout; + fd_set readfds; + int retval; + + FD_ZERO(&readfds); + FD_SET(0, &readfds); + + select_timeout = interval_tv; + + retval = select(1, &readfds, NULL, NULL, &select_timeout); + + if (retval == 1) { + + switch (getc(stdin)) { + case 'q': + exit_requested = 1; + break; + } + /* make sure this manually-invoked interval is at least 1ms long */ + nanosleep(&one_msec, NULL); + } + + return retval; +} void turbostat_loop() { int retval; @@ -2659,7 +2690,7 @@ restart: re_initialize(); goto restart; } - nanosleep(&interval_ts, NULL); + do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; retval = for_all_cpus(get_counters, ODD_COUNTERS); @@ -2680,7 +2711,7 @@ restart: flush_output_stdout(); if (exit_requested) break; - nanosleep(&interval_ts, NULL); + do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; retval = for_all_cpus(get_counters, EVEN_COUNTERS); @@ -5103,8 +5134,8 @@ void cmdline(int argc, char **argv) exit(2); } - interval_ts.tv_sec = interval; - interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; + interval_tv.tv_sec = interval; + interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; } break; case 'J': -- cgit v1.2.3 From 47936f944e7816296888fb4c135f40635083080b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 4 Oct 2017 15:01:47 +0300 Subject: tools/power turbostat: fix printing on input The recent patch that implements table printing on a keypress introduced a regression - turbostat prints the table almost continuously if it is run from a daemon program. The problem is also easy to reproduce like this: echo | turbostat The reason is that we cannot assume that stdin is always a TTY. It can be many things. This patch adds fixes the problem by limiting the new keypress functionality to TTYs only. If stdin is not a TTY, we just sleep for the full interval time. While on it, clean-up 'do_sleep()' to return no value, as callers do not expect that anyway. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1d4ba4ade6fa..dd18aac6ccb2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -49,6 +49,7 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; struct timeval interval_tv = {5, 0}; +struct timespec interval_ts = {5, 0}; struct timespec one_msec = {0, 1000000}; unsigned int debug; unsigned int quiet; @@ -2635,7 +2636,7 @@ void setup_signal_handler(void) err(1, "sigaction SIGUSR1"); } -int do_sleep(void) +void do_sleep(void) { struct timeval select_timeout; fd_set readfds; @@ -2644,12 +2645,15 @@ int do_sleep(void) FD_ZERO(&readfds); FD_SET(0, &readfds); - select_timeout = interval_tv; + if (!isatty(fileno(stdin))) { + nanosleep(&interval_ts, NULL); + return; + } + select_timeout = interval_tv; retval = select(1, &readfds, NULL, NULL, &select_timeout); if (retval == 1) { - switch (getc(stdin)) { case 'q': exit_requested = 1; @@ -2658,9 +2662,8 @@ int do_sleep(void) /* make sure this manually-invoked interval is at least 1ms long */ nanosleep(&one_msec, NULL); } - - return retval; } + void turbostat_loop() { int retval; @@ -5134,8 +5137,9 @@ void cmdline(int argc, char **argv) exit(2); } - interval_tv.tv_sec = interval; + interval_tv.tv_sec = interval_ts.tv_sec = interval; interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; + interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; } break; case 'J': -- cgit v1.2.3 From fd3933ca7bcf500f7f204d16f670b64cceb11898 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 8 Nov 2017 23:15:42 -0500 Subject: tools/power turbostat: fix MSR_IA32_MISC_ENABLE MWAIT printout MSR_IA32_MISC_ENABLE[18] is the MWAIT ENABLE bit, not DISABLE bit... so MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO) should print as: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT PREFETCH TURBO) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd18aac6ccb2..32f3a33a9850 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4056,7 +4056,7 @@ void decode_misc_enable_msr(void) base_cpu, msr, msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", - msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "", + msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } -- cgit v1.2.3 From 46c2797826cc6d1ae36fcbd966e76f9fa1907eef Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Dec 2017 17:38:17 -0500 Subject: tools/power turbostat: fix possible sprintf buffer overflow Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 32f3a33a9850..59742600b7eb 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1529,7 +1529,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) if (get_msr(cpu, mp->msr_num, counterp)) return -1; } else { - char path[128]; + char path[128 + PATH_BYTES]; if (mp->flags & SYSFS_PERCPU) { sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", -- cgit v1.2.3 From 733ef0f8e76e323b5eae36691896fd48ab026056 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 2 Jan 2018 16:17:23 -0500 Subject: tools/power turbostat: do not hard-code 25MHz crystal on SKX Some SKX use a 24 MHz crystal, so do not hard code 25 MHz. Also, SKX crystal is not exact, because SKX uses an EMI reduction circuit that costs a fraction of a percent. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 59742600b7eb..af1f81053be9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4261,7 +4261,6 @@ void process_cpuid() case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ crystal_hz = 24000000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ crystal_hz = 25000000; /* 25.0 MHz */ break; -- cgit v1.2.3 From ac980e1357244299f4c929d9f2c45428b35eeb86 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Sep 2017 15:14:08 +0300 Subject: tools/power turbostat: dump BDX, SKX automatic C-state conversion bit BDX and SKX have a bit that tells them to PROMOTE shallow C-states requests to MWAIT(C6). It is generally a BIOS bug if this bit is set. As we have encountered that BIOS bug, let's print this bit in turbostat debug output. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index af1f81053be9..14530de01e96 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -91,6 +91,7 @@ double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; unsigned int do_core_perf_limit_reasons; +unsigned int has_automatic_cstate_conversion; unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; @@ -2118,7 +2119,7 @@ dump_nhm_cst_cfg(void) fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); - fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", @@ -2126,6 +2127,15 @@ dump_nhm_cst_cfg(void) (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); + +#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) + if (has_automatic_cstate_conversion) { + fprintf(outf, ", automatic c-state conversion=%s", + (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); + } + + fprintf(outf, ")\n"); + return; } @@ -3632,6 +3642,12 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) } } +void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) +{ + if (is_skx(family, model) || is_bdx(family, model)) + has_automatic_cstate_conversion = 1; +} + int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; @@ -4370,6 +4386,7 @@ void process_cpuid() rapl_probe(family, model); perf_limit_reasons_probe(family, model); + automatic_cstate_conversion_probe(family, model); if (!quiet) dump_cstate_pstate_config_info(family, model); -- cgit v1.2.3 From 3e8b62bf0c9bf45649d5c456b5af02c0ee61d4ec Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Sep 2017 15:25:42 +0300 Subject: tools/power turbostat: a small C-states dump readability immprovement Improve readability a little bit by changing this output: MSR_PKG_CST_CONFIG_CONTROL: 0x00008407 (locked: pkg-cstate-limit=7: unlimited, automatic-c-state-conversion=off) with this output: MSR_PKG_CST_CONFIG_CONTROL: 0x00008407 (locked, pkg-cstate-limit=7 (unlimited), automatic-c-state-conversion=off) Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 14530de01e96..00a2a7ca5c47 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2119,7 +2119,7 @@ dump_nhm_cst_cfg(void) fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); - fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s", + fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", -- cgit v1.2.3 From 94d6ab4b1171ae58c11cf892fc008dc4e967de37 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 27 Jan 2018 22:39:21 -0500 Subject: tools/power turbostat: remove blank lines When the user reuests to collect and show columns that are not present on every row (eg. for every CPU) turbostat still prints an (empty) line for every CPU. Update so no blank lines are printed. old: # turbostat --quiet --show Pkg%pc6 Pkg%pc6 9.12 9.12 Pkg%pc6 9.12 9.12 new: # turbostat --quiet --show Pkg%pc6 Pkg%pc6 9.12 9.12 Pkg%pc6 9.12 9.12 Reported-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 00a2a7ca5c47..29e9cd5fcd88 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1028,7 +1028,8 @@ int format_counters(struct thread_data *t, struct core_data *c, } done: - outp += sprintf(outp, "\n"); + if (*(outp - 1) != '\n') + outp += sprintf(outp, "\n"); return 0; } -- cgit v1.2.3 From e29dc460d6c5ec43967ea69cb28120a93a20e904 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 19 Dec 2017 20:02:31 -0800 Subject: tools/power turbostat: Don't make man pages executable rpm-lint flagged these as being executable: kernel-tools.x86_64: W: spurious-executable-perm /usr/share/man/man8/turbostat.8.gz kernel-tools.x86_64: W: spurious-executable-perm /usr/share/man/man8/x86_energy_perf_policy.8.gz Fix this Signed-off-by: Laura Abbott Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/x86_energy_perf_policy/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index a9bc914a8fe8..2ab25aa38263 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -25,4 +25,4 @@ install : turbostat install -d $(DESTDIR)$(PREFIX)/bin install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 2447b1bbaacf..f4534fb8b951 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -24,5 +24,5 @@ install : x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/bin install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 -- cgit v1.2.3 From be0e54c4ebbf106571292ee97b445fd25bc3525b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Jun 2018 12:35:53 -0400 Subject: tools/power turbostat: Build-in "Low Power Idle" counters support Linux 4.15 exports the ACPI Low Power Idle Table's counters in /sys/devices/system/cpu/cpuidle/ low_power_idle_cpu_residency_us Show this in the "CPU%LPI" column. Today this reflects the "North Complex" residency in PC10, so expect it to closely follow "Pk%pc10". low_power_idle_system_residency_us Show this in the "SYS%LPI" column. Today, this reflects the North is in PC10, plus the PCH is sufficiently quiescent to save additional power via the "S0ix" system state, as measured by the PCH SLP_S0 counter. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 95 +++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 29e9cd5fcd88..bfb0cec9618e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -84,6 +84,8 @@ unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; unsigned long long gfx_cur_rc6_ms; +unsigned long long cpuidle_cur_cpu_lpi_us; +unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; @@ -188,6 +190,8 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; + unsigned long long cpu_lpi; + unsigned long long sys_lpi; unsigned long long pkg_wtd_core_c0; unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; @@ -377,6 +381,8 @@ struct msr_counter bic[] = { { 0x0, "Pkg%pc8" }, { 0x0, "Pkg%pc9" }, { 0x0, "Pkg%pc10" }, + { 0x0, "CPU%LPI" }, + { 0x0, "SYS%LPI" }, { 0x0, "PkgWatt" }, { 0x0, "CorWatt" }, { 0x0, "GFXWatt" }, @@ -396,6 +402,7 @@ struct msr_counter bic[] = { { 0x0, "Any%C0" }, { 0x0, "GFX%C0" }, { 0x0, "CPUGFX%" }, + { 0x0, "Node%" }, }; @@ -427,6 +434,8 @@ struct msr_counter bic[] = { #define BIC_Pkgpc8 (1ULL << 23) #define BIC_Pkgpc9 (1ULL << 24) #define BIC_Pkgpc10 (1ULL << 25) +#define BIC_CPU_LPI (1ULL << 26) +#define BIC_SYS_LPI (1ULL << 27) #define BIC_PkgWatt (1ULL << 26) #define BIC_CorWatt (1ULL << 27) #define BIC_GFXWatt (1ULL << 28) @@ -653,6 +662,10 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_LPI)) + outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); + if (DO_BIC(BIC_SYS_LPI)) + outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); if (do_rapl && !rapl_joules) { if (DO_BIC(BIC_PkgWatt)) @@ -757,6 +770,9 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); outp += sprintf(outp, "pc10: %016llX\n", p->pc10); + outp += sprintf(outp, "pc10: %016llX\n", p->pc10); + outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); + outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); @@ -981,6 +997,11 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); + if (DO_BIC(BIC_CPU_LPI)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + if (DO_BIC(BIC_SYS_LPI)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); + /* * If measurement interval exceeds minimum RAPL Joule Counter range, * indicate that results are suspect by printing "**" in fraction place. @@ -1106,6 +1127,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; + old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; /* flag an error when rc6 counter resets/wraps */ @@ -1297,6 +1320,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->cpu_lpi = 0; + p->sys_lpi = 0; p->energy_pkg = 0; p->energy_dram = 0; @@ -1385,6 +1410,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.cpu_lpi = p->cpu_lpi; + average.packages.sys_lpi = p->sys_lpi; + average.packages.energy_pkg += p->energy_pkg; average.packages.energy_dram += p->energy_dram; average.packages.energy_cores += p->energy_cores; @@ -1728,6 +1756,11 @@ retry: if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) return -13; + if (DO_BIC(BIC_CPU_LPI)) + p->cpu_lpi = cpuidle_cur_cpu_lpi_us; + if (DO_BIC(BIC_SYS_LPI)) + p->sys_lpi = cpuidle_cur_sys_lpi_us; + if (do_rapl & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; @@ -2594,6 +2627,52 @@ int snapshot_gfx_mhz(void) return 0; } +/* + * snapshot_cpu_lpi() + * + * record snapshot of + * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_cpu_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); + if (retval != 1) + err(1, "CPU LPI"); + + fclose(fp); + + return 0; +} +/* + * snapshot_sys_lpi() + * + * record snapshot of + * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_sys_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); + if (retval != 1) + err(1, "SYS LPI"); + + fclose(fp); + + return 0; +} /* * snapshot /proc and /sys files * @@ -2611,6 +2690,12 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_CPU_LPI)) + snapshot_cpu_lpi_us(); + + if (DO_BIC(BIC_SYS_LPI)) + snapshot_sys_lpi_us(); + return 0; } @@ -4406,6 +4491,16 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) + BIC_PRESENT(BIC_CPU_LPI); + else + BIC_NOT_PRESENT(BIC_CPU_LPI); + + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK)) + BIC_PRESENT(BIC_SYS_LPI); + else + BIC_NOT_PRESENT(BIC_SYS_LPI); + if (!quiet) decode_misc_feature_control(); -- cgit v1.2.3 From 4bd1f8f21aceae12484a6b2cf3b935a715c75dec Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 28 Jan 2018 23:42:42 -0500 Subject: tools/power turbostat: Fix --hide Pk%pc10 The column header for PC10 residency is "Pk%pc10" This is missing the 'g' that others have, eg Pkg%pc6, to allow tab-delimited columns to fit into 8-columns. However, --hide Pk%pc10 did not work, it was still looking for the 'g'. This was confusing, because --list shows the correct "Pk%pc10" Reported-by: Wendy Wang Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bfb0cec9618e..76c70c2a6f18 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -380,7 +380,7 @@ struct msr_counter bic[] = { { 0x0, "Pkg%pc7" }, { 0x0, "Pkg%pc8" }, { 0x0, "Pkg%pc9" }, - { 0x0, "Pkg%pc10" }, + { 0x0, "Pk%pc10" }, { 0x0, "CPU%LPI" }, { 0x0, "SYS%LPI" }, { 0x0, "PkgWatt" }, -- cgit v1.2.3 From 0748eaf0cf44799563c2d1de4849c7314bd5b52e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Jun 2018 12:38:29 -0400 Subject: tools/power turbostat: add POLL and POLL% column Like the "C1" and "C1%" column, the new POLL and POLL% columns show invocations and residency% during the measurement interval. While it didn't seem important to track in the past, we've recently found some Linux cpuidle bugs related to POLL%. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 76c70c2a6f18..47af118a6ac9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -154,7 +154,8 @@ char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; -#define MAX_ADDED_COUNTERS 16 +#define MAX_ADDED_COUNTERS 8 +#define MAX_ADDED_THREAD_COUNTERS 24 struct thread_data { struct timeval tv_begin; @@ -169,7 +170,7 @@ struct thread_data { unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 - unsigned long long counter[MAX_ADDED_COUNTERS]; + unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -4882,7 +4883,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp->next = sys.tp; sys.tp = msrp; sys.added_thread_counters++; - if (sys.added_thread_counters > MAX_ADDED_COUNTERS) { + if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); exit(-1); @@ -5041,7 +5042,7 @@ void probe_sysfs(void) if (!DO_BIC(BIC_sysfs)) return; - for (state = 10; state > 0; --state) { + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -5068,7 +5069,7 @@ void probe_sysfs(void) FORMAT_PERCENT, SYSFS_PERCPU); } - for (state = 10; state > 0; --state) { + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); -- cgit v1.2.3 From e0d34648b4d77ba715e13739d04e7b0692fe5eaa Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 13 Feb 2018 11:12:04 -0800 Subject: tools/power turbostat: Correct SNB_C1/C3_AUTO_UNDEMOTE defines According to the Intel Software Developers' Manual, Vol. 4, Order No. 335592, these macros have been reversed since they were added. Fixes: 889facbee3e6 ("tools/power turbostat: v3.0: monitor Watts and Temperature") Signed-off-by: Matt Turner Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 47af118a6ac9..64ad2b51ddd4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2149,8 +2149,8 @@ dump_nhm_cst_cfg(void) get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); -- cgit v1.2.3 From a00072a24a9f5b88cfc56f2dec6afe8ce3874e60 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 13 Feb 2018 11:12:05 -0800 Subject: x86: msr-index.h: Correct SNB_C1/C3_AUTO_UNDEMOTE defines According to the Intel Software Developers' Manual, Vol. 4, Order No. 335592, these macros have been reversed since they were added in the initial turbostat commit. The reversed definitions were presumably copied from turbostat.c to this file. Fixes: 9c63a650bb10 ("tools/power/x86/turbostat: share kernel MSR #defines") Signed-off-by: Matt Turner Acked-by: Ingo Molnar Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fda2114197b3..68b2c3150de1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -62,8 +62,8 @@ #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe -- cgit v1.2.3 From 9d4eab02a7fe5526850086326769aa1b7048fd7b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Jun 2018 13:04:18 -0400 Subject: tools/power turbostat: delete duplicate #defines The SNB_C1_AUTO_UNDEMOTE definition should have been deleted once it was copied into msr-index.h. One copy of the truth is better -- particularly when Matt needs to fix it:-) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 64ad2b51ddd4..a9085775a3de 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2149,9 +2149,6 @@ dump_nhm_cst_cfg(void) get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); -#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) - fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", -- cgit v1.2.3 From 997e53950e3e71ceb13a7cc220d430b7cb78ef8f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 31 May 2018 10:39:07 -0700 Subject: tools/power turbostat: Add Cannon Lake support All MSRs related to turbostat are same as Kabylake. Even though SDM claims that core C3 residency can be read from MSR 0x662, the read on this MSR fails on CNL platform. Hence disabled C3 MSR read and display. Signed-off-by: Srinivas Pandruvada Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a9085775a3de..1cb38c12a8b5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -62,6 +62,7 @@ unsigned int dump_only; unsigned int do_snb_cstates; unsigned int do_knl_cstates; unsigned int do_slm_cstates; +unsigned int do_cnl_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; @@ -604,7 +605,7 @@ void print_header(char *delim) if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); @@ -921,7 +922,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); if (DO_BIC(BIC_CPU_c6)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); @@ -1676,7 +1677,7 @@ retry: if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { + if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } @@ -2943,6 +2944,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ pkg_cstate_limits = hsw_pkg_cstate_limits; has_misc_feature_control = 1; break; @@ -3148,6 +3150,7 @@ int has_config_tdp(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ @@ -3602,6 +3605,7 @@ void rapl_probe(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); @@ -3937,6 +3941,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GEMINI_LAKE: @@ -3970,6 +3975,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GEMINI_LAKE: return 1; @@ -3995,6 +4001,7 @@ int has_skl_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ return 1; } return 0; @@ -4024,6 +4031,19 @@ int is_knl(unsigned int family, unsigned int model) return 0; } +int is_cnl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ + return 1; + } + + return 0; +} + unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) { if (is_knl(family, model)) @@ -4461,6 +4481,7 @@ void process_cpuid() } do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); + do_cnl_cstates = is_cnl(family, model); if (!quiet) decode_misc_pwr_mgmt_msr(); -- cgit v1.2.3 From 023fe0ac975e4dff592ce90bbd12747dedf7c598 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Thu, 26 Apr 2018 08:41:03 +0800 Subject: tools/power turbostat: if --num_iterations, print for specific number of iterations There's a use case during test to only print specific round of iterations if --num_iterations is specified, for example, with this patch applied: turbostat -i 5 -n 4 will capture 4 samples with 5 seconds interval. [lenb: renamed to --num_iterations from --iterations] Reviewed-by: Rafael J. Wysocki Signed-off-by: Chen Yu Reviewed-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 ++ tools/power/x86/turbostat/turbostat.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 4cffa4a5a2b7..ca9ef7017624 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -67,6 +67,8 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP +\fB--num_iterations num\fP number of the measurement iterations. +.PP \fB--out output_file\fP turbostat output is written to the specified output_file. The file is truncated if it already exists, and it is created if it does not exist. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1cb38c12a8b5..6910773e85a9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -51,6 +51,7 @@ int *fd_percpu; struct timeval interval_tv = {5, 0}; struct timespec interval_ts = {5, 0}; struct timespec one_msec = {0, 1000000}; +unsigned int num_iterations; unsigned int debug; unsigned int quiet; unsigned int shown; @@ -496,6 +497,7 @@ void help(void) "--interval sec.subsec Override default 5-second measurement interval\n" "--help print this help message\n" "--list list column headers only\n" + "--num_iterations num number of the measurement iterations\n" "--out file create or truncate \"file\" for all output\n" "--version print version information\n" "\n" @@ -2763,6 +2765,7 @@ void turbostat_loop() { int retval; int restarted = 0; + int done_iters = 0; setup_signal_handler(); @@ -2781,6 +2784,7 @@ restart: goto restart; } restarted = 0; + done_iters = 0; gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { @@ -2809,6 +2813,8 @@ restart: flush_output_stdout(); if (exit_requested) break; + if (num_iterations && ++done_iters >= num_iterations) + break; do_sleep(); if (snapshot_proc_sysfs_files()) goto restart; @@ -2830,6 +2836,8 @@ restart: flush_output_stdout(); if (exit_requested) break; + if (num_iterations && ++done_iters >= num_iterations) + break; } } @@ -5212,6 +5220,7 @@ void cmdline(int argc, char **argv) {"debug", no_argument, 0, 'd'}, /* internal, not documented */ {"enable", required_argument, 0, 'e'}, {"interval", required_argument, 0, 'i'}, + {"num_iterations", required_argument, 0, 'n'}, {"help", no_argument, 0, 'h'}, {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, @@ -5227,7 +5236,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jo:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5287,6 +5296,15 @@ void cmdline(int argc, char **argv) case 'q': quiet = 1; break; + case 'n': + num_iterations = strtod(optarg, NULL); + + if (num_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", + num_iterations); + exit(2); + } + break; case 's': /* * --show: show only those specified -- cgit v1.2.3 From 843c57916dde0e260e500e4ae1a443a7d7fbe962 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:28 -0400 Subject: tools/power turbostat: set max_num_cpus equal to the cpumask length Future fixes will use sysfs files that contain cpumask output. The code needs to know the length of the cpumask in order to determine which cpus are set in a cpumask. Currently topo.max_cpu_num is the maximum cpu number. It can be increased the the maximum value of cpus represented in cpumasks. Set max_num_cpus to the length of a cpumask. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6910773e85a9..1684f4ec627e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2496,6 +2496,20 @@ void re_initialize(void) printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } +void set_max_cpu_num(void) +{ + FILE *filep; + unsigned long dummy; + + topo.max_cpu_num = 0; + filep = fopen_or_die( + "/sys/devices/system/cpu/cpu0/topology/thread_siblings", + "r"); + while (fscanf(filep, "%lx,", &dummy) == 1) + topo.max_cpu_num += 32; + fclose(filep); + topo.max_cpu_num--; /* 0 based */ +} /* * count_cpus() @@ -2503,10 +2517,7 @@ void re_initialize(void) */ int count_cpus(int cpu) { - if (topo.max_cpu_num < cpu) - topo.max_cpu_num = cpu; - - topo.num_cpus += 1; + topo.num_cpus++; return 0; } int mark_cpu_present(int cpu) @@ -4564,8 +4575,8 @@ void topology_probe() } *cpus; /* Initialize num_cpus, max_cpu_num */ + set_max_cpu_num(); topo.num_cpus = 0; - topo.max_cpu_num = 0; for_all_proc_cpus(count_cpus); if (!summary_only && topo.num_cpus > 1) BIC_PRESENT(BIC_CPU); -- cgit v1.2.3 From 0e2d8f058f9924c373ee7061064936cd582bcbe7 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Jun 2018 22:08:58 -0400 Subject: tools/power turbostat: Fix node and siblings lookup data The turbostat code only looks at thread_siblings_list to determine if processing units/threads are on the same the core. This works well on Intel systems which have a shared L1 instruction and data cache. This does not work on AMD systems which have shared L1 instruction cache but separate L1 data caches. Other utilities also check sibling's core ID to determine if the processing unit shares the same core. Additionally, the cpu_topology *cpus list used in topology_probe() can be used elsewhere in the code to simplify things. Export *cpus to the entire turbostat code, and add Processing Unit/Thread IDs information to each cpu_topology struct. Confirm that the thread is on the same core as indicated by thread_siblings_list. [v2]: Fixup CPU_* usage that caused gcc malloc error. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 117 ++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 35 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1684f4ec627e..6bf426813797 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -158,6 +158,7 @@ cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_COUNTERS 8 #define MAX_ADDED_THREAD_COUNTERS 24 +#define BITMASK_SIZE 32 struct thread_data { struct timeval tv_begin; @@ -256,6 +257,13 @@ struct system_summary { struct pkg_data packages; } average; +struct cpu_topology { + int physical_package_id; + int logical_cpu_id; + int node_id; + int physical_core_id; + cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ +} *cpus; struct topo_params { int num_packages; @@ -2271,6 +2279,8 @@ void free_fd_percpu(void) void free_all_buffers(void) { + int i; + CPU_FREE(cpu_present_set); cpu_present_set = NULL; cpu_present_setsize = 0; @@ -2303,6 +2313,12 @@ void free_all_buffers(void) free(irq_column_2_cpu); free(irqs_per_cpu); + + for (i = 0; i <= topo.max_cpu_num; ++i) { + if (cpus[i].put_ids) + CPU_FREE(cpus[i].put_ids); + } + free(cpus); } @@ -2383,35 +2399,60 @@ int get_core_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); } -int get_num_ht_siblings(int cpu) +int get_node_id(struct cpu_topology *thiscpu) { char path[80]; FILE *filep; - int sib1; - int matches = 0; - char character; - char str[100]; - char *ch; - - sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); - filep = fopen_or_die(path, "r"); + int i; + int cpu = thiscpu->logical_cpu_id; - /* - * file format: - * A ',' separated or '-' separated set of numbers - * (eg 1-2 or 1,3,4,5) - */ - fscanf(filep, "%d%c\n", &sib1, &character); - fseek(filep, 0, SEEK_SET); - fgets(str, 100, filep); - ch = strchr(str, character); - while (ch != NULL) { - matches++; - ch = strchr(ch+1, character); + for (i = 0; i <= topo.max_cpu_num; i++) { + sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", + cpu, i); + filep = fopen(path, "r"); + if (!filep) + continue; + fclose(filep); + return i; } + return -1; +} +int get_thread_siblings(struct cpu_topology *thiscpu) +{ + char path[80], character; + FILE *filep; + unsigned long map; + int shift, sib_core; + int cpu = thiscpu->logical_cpu_id; + int offset = topo.max_cpu_num + 1; + size_t size; + + thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); + if (!thiscpu->put_ids) + return -1; + + size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(size, thiscpu->put_ids); + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + filep = fopen_or_die(path, "r"); + do { + offset -= BITMASK_SIZE; + fscanf(filep, "%lx%c", &map, &character); + for (shift = 0; shift < BITMASK_SIZE; shift++) { + if ((map >> shift) & 0x1) { + sib_core = get_core_id(shift + offset); + if (sib_core == thiscpu->physical_core_id) + CPU_SET_S(shift + offset, size, + thiscpu->put_ids); + } + } + } while (!strncmp(&character, ",", 1)); fclose(filep); - return matches+1; + + return CPU_COUNT_S(size, thiscpu->put_ids); } /* @@ -2506,7 +2547,7 @@ void set_max_cpu_num(void) "/sys/devices/system/cpu/cpu0/topology/thread_siblings", "r"); while (fscanf(filep, "%lx,", &dummy) == 1) - topo.max_cpu_num += 32; + topo.max_cpu_num += BITMASK_SIZE; fclose(filep); topo.max_cpu_num--; /* 0 based */ } @@ -4569,10 +4610,6 @@ void topology_probe() int max_core_id = 0; int max_package_id = 0; int max_siblings = 0; - struct cpu_topology { - int core_id; - int physical_package_id; - } *cpus; /* Initialize num_cpus, max_cpu_num */ set_max_cpu_num(); @@ -4629,20 +4666,32 @@ void topology_probe() fprintf(outf, "cpu%d NOT PRESENT\n", i); continue; } - cpus[i].core_id = get_core_id(i); - if (cpus[i].core_id > max_core_id) - max_core_id = cpus[i].core_id; + cpus[i].logical_cpu_id = i; + + /* get package information */ cpus[i].physical_package_id = get_physical_package_id(i); if (cpus[i].physical_package_id > max_package_id) max_package_id = cpus[i].physical_package_id; - siblings = get_num_ht_siblings(i); + /* get numa node information */ + cpus[i].node_id = get_node_id(&cpus[i]); + + /* get core information */ + cpus[i].physical_core_id = get_core_id(i); + if (cpus[i].physical_core_id > max_core_id) + max_core_id = cpus[i].physical_core_id; + + /* get thread information */ + siblings = get_thread_siblings(&cpus[i]); if (siblings > max_siblings) max_siblings = siblings; + if (debug > 1) - fprintf(outf, "cpu %d pkg %d core %d\n", - i, cpus[i].physical_package_id, cpus[i].core_id); + fprintf(outf, "cpu %d pkg %d node %d core %d\n", + i, cpus[i].physical_package_id, + cpus[i].node_id, + cpus[i].physical_core_id); } topo.num_cores_per_pkg = max_core_id + 1; if (debug > 1) @@ -4661,8 +4710,6 @@ void topology_probe() topo.num_threads_per_core = max_siblings; if (debug > 1) fprintf(outf, "max_siblings %d\n", max_siblings); - - free(cpus); } void -- cgit v1.2.3 From ef6057417a6f770dfc2f1f551763cedb4d93a9cf Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:30 -0400 Subject: tools/power turbostat: Calculate additional node information for a package The code currently assumes each package has exactly one node. This is not the case for AMD systems and Intel systems with COD. AMD systems also may re-enumerate each node's core IDs starting at 0 (for example, an AMD processor may have two nodes, each with core IDs from 0 to 7). In order to properly enumerate the cores we need to track both the physical and logical node IDs. Add physical_node_id to track the node ID assigned by the kernel, and logical_node_id used by turbostat to track the nodes per package ie) a 0-based count within the package. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 65 ++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6bf426813797..96390f7e4898 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -260,7 +260,8 @@ struct system_summary { struct cpu_topology { int physical_package_id; int logical_cpu_id; - int node_id; + int physical_node_id; + int logical_node_id; /* 0-based count within the package */ int physical_core_id; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -270,6 +271,8 @@ struct topo_params { int num_cpus; int num_cores; int max_cpu_num; + int max_node_num; + int num_nodes_per_pkg; int num_cores_per_pkg; int num_threads_per_core; } topo; @@ -2399,7 +2402,54 @@ int get_core_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); } -int get_node_id(struct cpu_topology *thiscpu) +void set_node_data(void) +{ + char path[80]; + FILE *filep; + int pkg, node, cpu; + + struct pkg_node_info { + int count; + int min; + } *pni; + + pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); + if (!pni) + err(1, "calloc pkg_node_count"); + + for (pkg = 0; pkg < topo.num_packages; pkg++) + pni[pkg].min = topo.num_cpus; + + for (node = 0; node <= topo.max_node_num; node++) { + /* find the "first" cpu in the node */ + sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); + filep = fopen(path, "r"); + if (!filep) + continue; + fscanf(filep, "%d", &cpu); + fclose(filep); + + pkg = cpus[cpu].physical_package_id; + pni[pkg].count++; + + if (node < pni[pkg].min) + pni[pkg].min = node; + } + + for (pkg = 0; pkg < topo.num_packages; pkg++) + if (pni[pkg].count > topo.num_nodes_per_pkg) + topo.num_nodes_per_pkg = pni[0].count; + + for (cpu = 0; cpu < topo.num_cpus; cpu++) { + pkg = cpus[cpu].physical_package_id; + node = cpus[cpu].physical_node_id; + cpus[cpu].logical_node_id = node - pni[pkg].min; + } + free(pni); + +} + +int get_physical_node_id(struct cpu_topology *thiscpu) { char path[80]; FILE *filep; @@ -4675,7 +4725,9 @@ void topology_probe() max_package_id = cpus[i].physical_package_id; /* get numa node information */ - cpus[i].node_id = get_node_id(&cpus[i]); + cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); + if (cpus[i].physical_node_id > topo.max_node_num) + topo.max_node_num = cpus[i].physical_node_id; /* get core information */ cpus[i].physical_core_id = get_core_id(i); @@ -4690,9 +4742,10 @@ void topology_probe() if (debug > 1) fprintf(outf, "cpu %d pkg %d node %d core %d\n", i, cpus[i].physical_package_id, - cpus[i].node_id, + cpus[i].physical_node_id, cpus[i].physical_core_id); } + topo.num_cores_per_pkg = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", @@ -4707,6 +4760,10 @@ void topology_probe() if (!summary_only && topo.num_packages > 1) BIC_PRESENT(BIC_Package); + set_node_data(); + if (debug > 1) + fprintf(outf, "num_nodes_per_pkg %d\n", topo.num_nodes_per_pkg); + topo.num_threads_per_core = max_siblings; if (debug > 1) fprintf(outf, "max_siblings %d\n", max_siblings); -- cgit v1.2.3 From 8cb48b32a5defbbf333adb9a3f2101c39d37af64 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:31 -0400 Subject: tools/power turbostat: track thread ID in cpu_topology The code can be simplified if the cpu_topology *cpus tracks the thread IDs. This removes an additional file lookup and simplifies the counter initialization code. Add thread ID to cpu_topology information and cleanup the counter initialization code. v2: prevent thread_id from being overwritten Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 105 +++++++++++++--------------------- 1 file changed, 39 insertions(+), 66 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 96390f7e4898..2ac9119dee96 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -263,6 +263,7 @@ struct cpu_topology { int physical_node_id; int logical_node_id; /* 0-based count within the package */ int physical_core_id; + int thread_id; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -2345,44 +2346,6 @@ int parse_int_file(const char *fmt, ...) return value; } -/* - * get_cpu_position_in_core(cpu) - * return the position of the CPU among its HT siblings in the core - * return -1 if the sibling is not in list - */ -int get_cpu_position_in_core(int cpu) -{ - char path[64]; - FILE *filep; - int this_cpu; - char character; - int i; - - sprintf(path, - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", - cpu); - filep = fopen(path, "r"); - if (filep == NULL) { - perror(path); - exit(1); - } - - for (i = 0; i < topo.num_threads_per_core; i++) { - fscanf(filep, "%d", &this_cpu); - if (this_cpu == cpu) { - fclose(filep); - return i; - } - - /* Account for no separator after last thread*/ - if (i != (topo.num_threads_per_core - 1)) - fscanf(filep, "%c", &character); - } - - fclose(filep); - return -1; -} - /* * cpu_is_first_core_in_package(cpu) * return 1 if given CPU is 1st core in package @@ -2473,12 +2436,15 @@ int get_thread_siblings(struct cpu_topology *thiscpu) char path[80], character; FILE *filep; unsigned long map; - int shift, sib_core; + int so, shift, sib_core; int cpu = thiscpu->logical_cpu_id; int offset = topo.max_cpu_num + 1; size_t size; + int thread_id = 0; thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); + if (thiscpu->thread_id < 0) + thiscpu->thread_id = thread_id++; if (!thiscpu->put_ids) return -1; @@ -2493,10 +2459,15 @@ int get_thread_siblings(struct cpu_topology *thiscpu) fscanf(filep, "%lx%c", &map, &character); for (shift = 0; shift < BITMASK_SIZE; shift++) { if ((map >> shift) & 0x1) { - sib_core = get_core_id(shift + offset); - if (sib_core == thiscpu->physical_core_id) - CPU_SET_S(shift + offset, size, - thiscpu->put_ids); + so = shift + offset; + sib_core = get_core_id(so); + if (sib_core == thiscpu->physical_core_id) { + CPU_SET_S(so, size, thiscpu->put_ids); + if ((so != cpu) && + (cpus[so].thread_id < 0)) + cpus[so].thread_id = + thread_id++; + } } } } while (!strncmp(&character, ",", 1)); @@ -2617,6 +2588,12 @@ int mark_cpu_present(int cpu) return 0; } +int init_thread_id(int cpu) +{ + cpus[cpu].thread_id = -1; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -4703,6 +4680,7 @@ void topology_probe() cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + for_all_proc_cpus(init_thread_id); /* * For online cpus @@ -4738,12 +4716,16 @@ void topology_probe() siblings = get_thread_siblings(&cpus[i]); if (siblings > max_siblings) max_siblings = siblings; + if (cpus[i].thread_id != -1) + topo.num_cores++; if (debug > 1) - fprintf(outf, "cpu %d pkg %d node %d core %d\n", + fprintf(outf, + "cpu %d pkg %d node %d core %d thread %d\n", i, cpus[i].physical_package_id, cpus[i].physical_node_id, - cpus[i].physical_core_id); + cpus[i].physical_core_id, + cpus[i].thread_id); } topo.num_cores_per_pkg = max_core_id + 1; @@ -4805,47 +4787,38 @@ error: /* * init_counter() * - * set cpu_id, core_num, pkg_num * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE - * - * increment topo.num_cores when 1st core in pkg seen */ void init_counter(struct thread_data *thread_base, struct core_data *core_base, - struct pkg_data *pkg_base, int thread_num, int core_num, - int pkg_num, int cpu_id) + struct pkg_data *pkg_base, int cpu_id) { + int pkg_id = cpus[cpu_id].physical_package_id; + int core_id = cpus[cpu_id].physical_core_id; + int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); - c = GET_CORE(core_base, core_num, pkg_num); - p = GET_PKG(pkg_base, pkg_num); + t = GET_THREAD(thread_base, thread_id, core_id, pkg_id); + c = GET_CORE(core_base, core_id, pkg_id); + p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; - if (thread_num == 0) { + if (thread_id == 0) { t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; if (cpu_is_first_core_in_package(cpu_id)) t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; } - c->core_id = core_num; - p->package_id = pkg_num; + c->core_id = core_id; + p->package_id = pkg_id; } int initialize_counters(int cpu_id) { - int my_thread_id, my_core_id, my_package_id; - - my_package_id = get_physical_package_id(cpu_id); - my_core_id = get_core_id(cpu_id); - my_thread_id = get_cpu_position_in_core(cpu_id); - if (!my_thread_id) - topo.num_cores++; - - init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); - init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); + init_counter(EVEN_COUNTERS, cpu_id); + init_counter(ODD_COUNTERS, cpu_id); return 0; } -- cgit v1.2.3 From 139dd0e07c1c116949aed75c2c067873c6f91e50 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:32 -0400 Subject: tools/power turbostat: rename num_cores_per_pkg to num_cores_per_node turbostat incorrectly assumes that there is one node per package. As a result num_cores_per_pkg is not correctly named and is actually num_cores_per_node. Rename num_cores_per_pkg to num_cores_per_node. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2ac9119dee96..154c55530671 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -217,11 +217,11 @@ struct pkg_data { #define EVEN_COUNTERS thread_even, core_even, package_even #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ - (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ + (thread_base + (pkg_no) * topo.num_cores_per_node * \ topo.num_threads_per_core + \ (core_no) * topo.num_threads_per_core + (thread_no)) #define GET_CORE(core_base, core_no, pkg_no) \ - (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) + (core_base + (pkg_no) * topo.num_cores_per_node + (core_no)) #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; @@ -274,7 +274,7 @@ struct topo_params { int max_cpu_num; int max_node_num; int num_nodes_per_pkg; - int num_cores_per_pkg; + int num_cores_per_node; int num_threads_per_core; } topo; @@ -300,7 +300,8 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int retval, pkg_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { + for (core_no = 0; core_no < topo.num_cores_per_node; + ++core_no) { for (thread_no = 0; thread_no < topo.num_threads_per_core; ++thread_no) { struct thread_data *t; @@ -2491,7 +2492,8 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, int retval, pkg_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { + for (core_no = 0; core_no < topo.num_cores_per_node; + ++core_no) { for (thread_no = 0; thread_no < topo.num_threads_per_core; ++thread_no) { struct thread_data *t, *t2; @@ -4728,11 +4730,11 @@ void topology_probe() cpus[i].thread_id); } - topo.num_cores_per_pkg = max_core_id + 1; + topo.num_cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", - max_core_id, topo.num_cores_per_pkg); - if (!summary_only && topo.num_cores_per_pkg > 1) + max_core_id, topo.num_cores_per_node); + if (!summary_only && topo.num_cores_per_node > 1) BIC_PRESENT(BIC_Core); topo.num_packages = max_package_id + 1; @@ -4756,21 +4758,21 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data { int i; - *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * + *t = calloc(topo.num_threads_per_core * topo.num_cores_per_node * topo.num_packages, sizeof(struct thread_data)); if (*t == NULL) goto error; for (i = 0; i < topo.num_threads_per_core * - topo.num_cores_per_pkg * topo.num_packages; i++) + topo.num_cores_per_node * topo.num_packages; i++) (*t)[i].cpu_id = -1; - *c = calloc(topo.num_cores_per_pkg * topo.num_packages, + *c = calloc(topo.num_cores_per_node * topo.num_packages, sizeof(struct core_data)); if (*c == NULL) goto error; - for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) + for (i = 0; i < topo.num_cores_per_node * topo.num_packages; i++) (*c)[i].core_id = -1; *p = calloc(topo.num_packages, sizeof(struct pkg_data)); -- cgit v1.2.3 From 70a9c6e8ed28d974f10251920ccc0ce22f69afa2 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:33 -0400 Subject: tools/power turbostat: remove num_ from cpu_topology struct Cleanup, remove num_ from num_nodes_per_pkg, num_cores_per_node, and num_threads_per_node. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 48 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 154c55530671..fa7e836cdfef 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -217,11 +217,11 @@ struct pkg_data { #define EVEN_COUNTERS thread_even, core_even, package_even #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ - (thread_base + (pkg_no) * topo.num_cores_per_node * \ - topo.num_threads_per_core + \ - (core_no) * topo.num_threads_per_core + (thread_no)) + (thread_base + (pkg_no) * topo.cores_per_node * \ + topo.threads_per_core + \ + (core_no) * topo.threads_per_core + (thread_no)) #define GET_CORE(core_base, core_no, pkg_no) \ - (core_base + (pkg_no) * topo.num_cores_per_node + (core_no)) + (core_base + (pkg_no) * topo.cores_per_node + (core_no)) #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; @@ -273,9 +273,9 @@ struct topo_params { int num_cores; int max_cpu_num; int max_node_num; - int num_nodes_per_pkg; - int num_cores_per_node; - int num_threads_per_core; + int nodes_per_pkg; + int cores_per_node; + int threads_per_core; } topo; struct timeval tv_even, tv_odd, tv_delta; @@ -300,10 +300,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int retval, pkg_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_node; - ++core_no) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { for (thread_no = 0; thread_no < - topo.num_threads_per_core; ++thread_no) { + topo.threads_per_core; ++thread_no) { struct thread_data *t; struct core_data *c; struct pkg_data *p; @@ -2401,8 +2400,8 @@ void set_node_data(void) } for (pkg = 0; pkg < topo.num_packages; pkg++) - if (pni[pkg].count > topo.num_nodes_per_pkg) - topo.num_nodes_per_pkg = pni[0].count; + if (pni[pkg].count > topo.nodes_per_pkg) + topo.nodes_per_pkg = pni[0].count; for (cpu = 0; cpu < topo.num_cpus; cpu++) { pkg = cpus[cpu].physical_package_id; @@ -2492,10 +2491,9 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, int retval, pkg_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.num_cores_per_node; - ++core_no) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { for (thread_no = 0; thread_no < - topo.num_threads_per_core; ++thread_no) { + topo.threads_per_core; ++thread_no) { struct thread_data *t, *t2; struct core_data *c, *c2; struct pkg_data *p, *p2; @@ -4730,11 +4728,11 @@ void topology_probe() cpus[i].thread_id); } - topo.num_cores_per_node = max_core_id + 1; + topo.cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", - max_core_id, topo.num_cores_per_node); - if (!summary_only && topo.num_cores_per_node > 1) + max_core_id, topo.cores_per_node); + if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); topo.num_packages = max_package_id + 1; @@ -4746,9 +4744,9 @@ void topology_probe() set_node_data(); if (debug > 1) - fprintf(outf, "num_nodes_per_pkg %d\n", topo.num_nodes_per_pkg); + fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); - topo.num_threads_per_core = max_siblings; + topo.threads_per_core = max_siblings; if (debug > 1) fprintf(outf, "max_siblings %d\n", max_siblings); } @@ -4758,21 +4756,21 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data { int i; - *t = calloc(topo.num_threads_per_core * topo.num_cores_per_node * + *t = calloc(topo.threads_per_core * topo.cores_per_node * topo.num_packages, sizeof(struct thread_data)); if (*t == NULL) goto error; - for (i = 0; i < topo.num_threads_per_core * - topo.num_cores_per_node * topo.num_packages; i++) + for (i = 0; i < topo.threads_per_core * + topo.cores_per_node * topo.num_packages; i++) (*t)[i].cpu_id = -1; - *c = calloc(topo.num_cores_per_node * topo.num_packages, + *c = calloc(topo.cores_per_node * topo.num_packages, sizeof(struct core_data)); if (*c == NULL) goto error; - for (i = 0; i < topo.num_cores_per_node * topo.num_packages; i++) + for (i = 0; i < topo.cores_per_node * topo.num_packages; i++) (*c)[i].core_id = -1; *p = calloc(topo.num_packages, sizeof(struct pkg_data)); -- cgit v1.2.3 From 40f5cfe7b886676f00e860b482c4bf7103413a24 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:34 -0400 Subject: tools/power turbostat: add node information into turbostat calculations The previous patches have added node information to turbostat, but the counters code does not take it into account. Add node information from cpu_topology calculations to turbostat counters. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 143 ++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 58 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa7e836cdfef..42273019da10 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -216,12 +216,21 @@ struct pkg_data { #define ODD_COUNTERS thread_odd, core_odd, package_odd #define EVEN_COUNTERS thread_even, core_even, package_even -#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ - (thread_base + (pkg_no) * topo.cores_per_node * \ - topo.threads_per_core + \ - (core_no) * topo.threads_per_core + (thread_no)) -#define GET_CORE(core_base, core_no, pkg_no) \ - (core_base + (pkg_no) * topo.cores_per_node + (core_no)) +#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ + ((thread_base) + \ + ((pkg_no) * \ + topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ + ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ + ((core_no) * topo.threads_per_core) + \ + (thread_no)) + +#define GET_CORE(core_base, core_no, node_no, pkg_no) \ + ((core_base) + \ + ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ + ((node_no) * topo.cores_per_node) + \ + (core_no)) + + #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; @@ -297,27 +306,33 @@ int cpu_is_not_present(int cpu) int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { - int retval, pkg_no, core_no, thread_no; + int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (thread_no = 0; thread_no < - topo.threads_per_core; ++thread_no) { - struct thread_data *t; - struct core_data *c; - struct pkg_data *p; - - t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); - - if (cpu_is_not_present(t->cpu_id)) - continue; - - c = GET_CORE(core_base, core_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - - retval = func(t, c, p); - if (retval) - return retval; + for (node_no = 0; node_no < topo.nodes_per_pkg; + node_no++) { + for (thread_no = 0; thread_no < + topo.threads_per_core; ++thread_no) { + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + t = GET_THREAD(thread_base, thread_no, + core_no, node_no, + pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + c = GET_CORE(core_base, core_no, + node_no, pkg_no); + p = GET_PKG(pkg_base, pkg_no); + + retval = func(t, c, p); + if (retval) + return retval; + } } } } @@ -2488,32 +2503,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) { - int retval, pkg_no, core_no, thread_no; + int retval, pkg_no, node_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (thread_no = 0; thread_no < - topo.threads_per_core; ++thread_no) { - struct thread_data *t, *t2; - struct core_data *c, *c2; - struct pkg_data *p, *p2; - - t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); - - if (cpu_is_not_present(t->cpu_id)) - continue; - - t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); - - c = GET_CORE(core_base, core_no, pkg_no); - c2 = GET_CORE(core_base2, core_no, pkg_no); - - p = GET_PKG(pkg_base, pkg_no); - p2 = GET_PKG(pkg_base2, pkg_no); - - retval = func(t, c, p, t2, c2, p2); - if (retval) - return retval; + for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { + for (core_no = 0; core_no < topo.cores_per_node; + ++core_no) { + for (thread_no = 0; thread_no < + topo.threads_per_core; ++thread_no) { + struct thread_data *t, *t2; + struct core_data *c, *c2; + struct pkg_data *p, *p2; + + t = GET_THREAD(thread_base, thread_no, + core_no, node_no, + pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + t2 = GET_THREAD(thread_base2, thread_no, + core_no, node_no, + pkg_no); + + c = GET_CORE(core_base, core_no, + node_no, pkg_no); + c2 = GET_CORE(core_base2, core_no, + node_no, + pkg_no); + + p = GET_PKG(pkg_base, pkg_no); + p2 = GET_PKG(pkg_base2, pkg_no); + + retval = func(t, c, p, t2, c2, p2); + if (retval) + return retval; + } } } } @@ -4752,25 +4777,26 @@ void topology_probe() } void -allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +allocate_counters(struct thread_data **t, struct core_data **c, + struct pkg_data **p) { int i; + int num_cores = topo.cores_per_node * topo.nodes_per_pkg * + topo.num_packages; + int num_threads = topo.threads_per_core * num_cores; - *t = calloc(topo.threads_per_core * topo.cores_per_node * - topo.num_packages, sizeof(struct thread_data)); + *t = calloc(num_threads, sizeof(struct thread_data)); if (*t == NULL) goto error; - for (i = 0; i < topo.threads_per_core * - topo.cores_per_node * topo.num_packages; i++) + for (i = 0; i < num_threads; i++) (*t)[i].cpu_id = -1; - *c = calloc(topo.cores_per_node * topo.num_packages, - sizeof(struct core_data)); + *c = calloc(num_cores, sizeof(struct core_data)); if (*c == NULL) goto error; - for (i = 0; i < topo.cores_per_node * topo.num_packages; i++) + for (i = 0; i < num_cores; i++) (*c)[i].core_id = -1; *p = calloc(topo.num_packages, sizeof(struct pkg_data)); @@ -4793,14 +4819,15 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) { int pkg_id = cpus[cpu_id].physical_package_id; + int node_id = cpus[cpu_id].logical_node_id; int core_id = cpus[cpu_id].physical_core_id; int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_id, core_id, pkg_id); - c = GET_CORE(core_base, core_id, pkg_id); + t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); + c = GET_CORE(core_base, core_id, node_id, pkg_id); p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; -- cgit v1.2.3 From 012350411b09a57e0f15eb438e3c9b877cdc66a1 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 1 Jun 2018 10:04:35 -0400 Subject: tools/power turbostat: Add Node in output Output a Node column if there is more than one node/socket. Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 42273019da10..0049154929f0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -485,6 +485,7 @@ struct msr_counter bic[] = { #define BIC_Any_c0 (1ULL << 42) #define BIC_GFX_c0 (1ULL << 43) #define BIC_CPUGFX (1ULL << 44) +#define BIC_Node (1ULL << 45) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) @@ -594,6 +595,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) @@ -871,6 +874,8 @@ int format_counters(struct thread_data *t, struct core_data *c, if (t == &average.threads) { if (DO_BIC(BIC_Package)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) @@ -882,6 +887,15 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_Node)) { + if (t) + outp += sprintf(outp, "%s%d", + (printed++ ? delim : ""), + cpus[t->cpu_id].physical_node_id); + else + outp += sprintf(outp, "%s-", + (printed++ ? delim : "")); + } if (DO_BIC(BIC_Core)) { if (c) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); @@ -4770,6 +4784,8 @@ void topology_probe() set_node_data(); if (debug > 1) fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); + if (!summary_only && topo.nodes_per_pkg > 1) + BIC_PRESENT(BIC_Node); topo.threads_per_core = max_siblings; if (debug > 1) -- cgit v1.2.3 From 201d4f50fef3c10856022b21cfd9fd81358a62ef Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 28 Jan 2018 21:54:28 -0500 Subject: tools/power turbostat: update version number Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0049154929f0..d6cff3070ebd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5009,7 +5009,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 17.06.23" + fprintf(outf, "turbostat version 18.06.01" " - Len Brown \n"); } -- cgit v1.2.3