summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeo Martins <loemra.dev@gmail.com>2026-02-26 01:51:08 -0800
committerDavid Sterba <dsterba@suse.com>2026-04-07 18:56:00 +0200
commitcc970d21c4f37b7cbedd73e043b69faf2c66a6fe (patch)
tree06d63a845dda044e4fbeded1c57b210460fbf505
parentf9a48549a15aa369d42cebc08a6a72b71a53d547 (diff)
downloadlinux-cc970d21c4f37b7cbedd73e043b69faf2c66a6fe.tar.gz
linux-cc970d21c4f37b7cbedd73e043b69faf2c66a6fe.zip
btrfs: add tracepoint for search slot restart tracking
Add a btrfs_search_slot_restart tracepoint that fires at each restart site in btrfs_search_slot(), recording the root, tree level, and reason for the restart. This enables tracking search slot restarts which contribute to COW amplification under memory pressure. The four restart reasons are: - write_lock: insufficient write lock level, need to restart with higher lock - setup_nodes: node setup returned -EAGAIN - slot_zero: insertion at slot 0 requires higher write lock level - read_block: read_block_for_search returned -EAGAIN (block not cached or lock contention) COW counts are already tracked by the existing trace_btrfs_cow_block() tracepoint. The per-restart-site tracepoint avoids counter overhead in the critical path when tracepoints are disabled, and provides richer per-event information that bpftrace scripts can aggregate into counts, histograms, and per-root breakdowns. Reviewed-by: Filipe Manana <fdmanana@suse.com> Reviewed-by: Boris Burkov <boris@bur.io> Signed-off-by: Leo Martins <loemra.dev@gmail.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--include/trace/events/btrfs.h24
2 files changed, 32 insertions, 2 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e8d260ecdcf6..71e7ada95477 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2102,6 +2102,7 @@ again:
p->nodes[level + 1])) {
write_lock_level = level + 1;
btrfs_release_path(p);
+ trace_btrfs_search_slot_restart(root, level, "write_lock");
goto again;
}
@@ -2164,8 +2165,10 @@ cow_done:
p->slots[level] = slot;
ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len,
&write_lock_level);
- if (ret2 == -EAGAIN)
+ if (ret2 == -EAGAIN) {
+ trace_btrfs_search_slot_restart(root, level, "setup_nodes");
goto again;
+ }
if (ret2) {
ret = ret2;
goto done;
@@ -2181,6 +2184,7 @@ cow_done:
if (slot == 0 && ins_len && write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
+ trace_btrfs_search_slot_restart(root, level, "slot_zero");
goto again;
}
@@ -2194,8 +2198,10 @@ cow_done:
}
ret2 = read_block_for_search(root, p, &b, slot, key);
- if (ret2 == -EAGAIN && !p->nowait)
+ if (ret2 == -EAGAIN && !p->nowait) {
+ trace_btrfs_search_slot_restart(root, level, "read_block");
goto again;
+ }
if (ret2) {
ret = ret2;
goto done;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0864700f76e0..8ad7a2d76c1d 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1113,6 +1113,30 @@ TRACE_EVENT(btrfs_cow_block,
__entry->cow_level)
);
+TRACE_EVENT(btrfs_search_slot_restart,
+
+ TP_PROTO(const struct btrfs_root *root, int level,
+ const char *reason),
+
+ TP_ARGS(root, level, reason),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, root_objectid )
+ __field( int, level )
+ __string( reason, reason )
+ ),
+
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->root_objectid = btrfs_root_id(root);
+ __entry->level = level;
+ __assign_str(reason);
+ ),
+
+ TP_printk_btrfs("root=%llu(%s) level=%d reason=%s",
+ show_root_type(__entry->root_objectid),
+ __entry->level, __get_str(reason))
+);
+
TRACE_EVENT(btrfs_space_reservation,
TP_PROTO(const struct btrfs_fs_info *fs_info, const char *type, u64 val,