summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/liveupdate/kexec_handover.c619
-rw-r--r--kernel/liveupdate/kexec_handover_debugfs.c3
2 files changed, 330 insertions, 292 deletions
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 4356f277b462..ad877926f3f6 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -5,6 +5,7 @@
* Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
* Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
* Copyright (C) 2025 Pasha Tatashin <pasha.tatashin@soleen.com>
+ * Copyright (C) 2026 Google LLC, Jason Miu <jasonmiu@google.com>
*/
#define pr_fmt(fmt) "KHO: " fmt
@@ -15,6 +16,7 @@
#include <linux/count_zeros.h>
#include <linux/kexec.h>
#include <linux/kexec_handover.h>
+#include <linux/kho_radix_tree.h>
#include <linux/kho/abi/kexec_handover.h>
#include <linux/libfdt.h>
#include <linux/list.h>
@@ -64,156 +66,308 @@ static int __init kho_parse_enable(char *p)
}
early_param("kho", kho_parse_enable);
-/*
- * Keep track of memory that is to be preserved across KHO.
- *
- * The serializing side uses two levels of xarrays to manage chunks of per-order
- * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
- * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
- * allocations each bitmap will cover 128M of address space. Thus, for 16G of
- * memory at most 512K of bitmap memory will be needed for order 0.
- *
- * This approach is fully incremental, as the serialization progresses folios
- * can continue be aggregated to the tracker. The final step, immediately prior
- * to kexec would serialize the xarray information into a linked list for the
- * successor kernel to parse.
- */
-
-#define PRESERVE_BITS (PAGE_SIZE * 8)
-
-struct kho_mem_phys_bits {
- DECLARE_BITMAP(preserve, PRESERVE_BITS);
-};
-
-static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
-
-struct kho_mem_phys {
- /*
- * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
- * to order.
- */
- struct xarray phys_bits;
-};
-
-struct kho_mem_track {
- /* Points to kho_mem_phys, each order gets its own bitmap tree */
- struct xarray orders;
-};
-
-struct khoser_mem_chunk;
-
struct kho_out {
void *fdt;
bool finalized;
struct mutex lock; /* protects KHO FDT finalization */
- struct kho_mem_track track;
+ struct kho_radix_tree radix_tree;
struct kho_debugfs dbg;
};
static struct kho_out kho_out = {
.lock = __MUTEX_INITIALIZER(kho_out.lock),
- .track = {
- .orders = XARRAY_INIT(kho_out.track.orders, 0),
+ .radix_tree = {
+ .lock = __MUTEX_INITIALIZER(kho_out.radix_tree.lock),
},
.finalized = false,
};
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
+/**
+ * kho_radix_encode_key - Encodes a physical address and order into a radix key.
+ * @phys: The physical address of the page.
+ * @order: The order of the page.
+ *
+ * This function combines a page's physical address and its order into a
+ * single unsigned long, which is used as a key for all radix tree
+ * operations.
+ *
+ * Return: The encoded unsigned long radix key.
+ */
+static unsigned long kho_radix_encode_key(phys_addr_t phys, unsigned int order)
{
- void *res = xa_load(xa, index);
+ /* Order bits part */
+ unsigned long h = 1UL << (KHO_ORDER_0_LOG2 - order);
+ /* Shifted physical address part */
+ unsigned long l = phys >> (PAGE_SHIFT + order);
- if (res)
- return res;
+ return h | l;
+}
- void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
+/**
+ * kho_radix_decode_key - Decodes a radix key back into a physical address and order.
+ * @key: The unsigned long key to decode.
+ * @order: An output parameter, a pointer to an unsigned int where the decoded
+ * page order will be stored.
+ *
+ * This function reverses the encoding performed by kho_radix_encode_key(),
+ * extracting the original physical address and page order from a given key.
+ *
+ * Return: The decoded physical address.
+ */
+static phys_addr_t kho_radix_decode_key(unsigned long key, unsigned int *order)
+{
+ unsigned int order_bit = fls64(key);
+ phys_addr_t phys;
- if (!elm)
- return ERR_PTR(-ENOMEM);
+ /* order_bit is numbered starting at 1 from fls64 */
+ *order = KHO_ORDER_0_LOG2 - order_bit + 1;
+ /* The order is discarded by the shift */
+ phys = key << (PAGE_SHIFT + *order);
- if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
- return ERR_PTR(-EINVAL);
+ return phys;
+}
- res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
- if (xa_is_err(res))
- return ERR_PTR(xa_err(res));
- else if (res)
- return res;
+static unsigned long kho_radix_get_bitmap_index(unsigned long key)
+{
+ return key % (1 << KHO_BITMAP_SIZE_LOG2);
+}
+
+static unsigned long kho_radix_get_table_index(unsigned long key,
+ unsigned int level)
+{
+ int s;
- return no_free_ptr(elm);
+ s = ((level - 1) * KHO_TABLE_SIZE_LOG2) + KHO_BITMAP_SIZE_LOG2;
+ return (key >> s) % (1 << KHO_TABLE_SIZE_LOG2);
}
-static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
- unsigned int order)
+/**
+ * kho_radix_add_page - Marks a page as preserved in the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to preserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree based on the key derived from @pfn
+ * and @order. It sets the corresponding bit in the leaf bitmap to mark the
+ * page for preservation. If intermediate nodes do not exist along the path,
+ * they are allocated and added to the tree.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int kho_radix_add_page(struct kho_radix_tree *tree,
+ unsigned long pfn, unsigned int order)
{
- struct kho_mem_phys_bits *bits;
- struct kho_mem_phys *physxa;
- const unsigned long pfn_high = pfn >> order;
+ /* Newly allocated nodes for error cleanup */
+ struct kho_radix_node *intermediate_nodes[KHO_TREE_MAX_DEPTH] = { 0 };
+ unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+ struct kho_radix_node *anchor_node = NULL;
+ struct kho_radix_node *node = tree->root;
+ struct kho_radix_node *new_node;
+ unsigned int i, idx, anchor_idx;
+ struct kho_radix_leaf *leaf;
+ int err = 0;
- physxa = xa_load(&track->orders, order);
- if (WARN_ON_ONCE(!physxa))
- return;
+ if (WARN_ON_ONCE(!tree->root))
+ return -EINVAL;
- bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (WARN_ON_ONCE(!bits))
- return;
+ might_sleep();
+
+ guard(mutex)(&tree->lock);
+
+ /* Go from high levels to low levels */
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ idx = kho_radix_get_table_index(key, i);
+
+ if (node->table[idx]) {
+ node = phys_to_virt(node->table[idx]);
+ continue;
+ }
+
+ /* Next node is empty, create a new node for it */
+ new_node = (struct kho_radix_node *)get_zeroed_page(GFP_KERNEL);
+ if (!new_node) {
+ err = -ENOMEM;
+ goto err_free_nodes;
+ }
+
+ node->table[idx] = virt_to_phys(new_node);
- clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+ /*
+ * Capture the node where the new branch starts for cleanup
+ * if allocation fails.
+ */
+ if (!anchor_node) {
+ anchor_node = node;
+ anchor_idx = idx;
+ }
+ intermediate_nodes[i] = new_node;
+
+ node = new_node;
+ }
+
+ /* Handle the leaf level bitmap (level 0) */
+ idx = kho_radix_get_bitmap_index(key);
+ leaf = (struct kho_radix_leaf *)node;
+ __set_bit(idx, leaf->bitmap);
+
+ return 0;
+
+err_free_nodes:
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ if (intermediate_nodes[i])
+ free_page((unsigned long)intermediate_nodes[i]);
+ }
+ if (anchor_node)
+ anchor_node->table[anchor_idx] = 0;
+
+ return err;
}
+EXPORT_SYMBOL_GPL(kho_radix_add_page);
-static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
- unsigned long end_pfn)
+/**
+ * kho_radix_del_page - Removes a page's preservation status from the radix tree.
+ * @tree: The KHO radix tree.
+ * @pfn: The page frame number of the page to unpreserve.
+ * @order: The order of the page.
+ *
+ * This function traverses the radix tree and clears the bit corresponding to
+ * the page, effectively removing its "preserved" status. It does not free
+ * the tree's intermediate nodes, even if they become empty.
+ */
+void kho_radix_del_page(struct kho_radix_tree *tree, unsigned long pfn,
+ unsigned int order)
{
- unsigned int order;
+ unsigned long key = kho_radix_encode_key(PFN_PHYS(pfn), order);
+ struct kho_radix_node *node = tree->root;
+ struct kho_radix_leaf *leaf;
+ unsigned int i, idx;
- while (pfn < end_pfn) {
- order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+ if (WARN_ON_ONCE(!tree->root))
+ return;
+
+ might_sleep();
- __kho_unpreserve_order(track, pfn, order);
+ guard(mutex)(&tree->lock);
- pfn += 1 << order;
+ /* Go from high levels to low levels */
+ for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+ idx = kho_radix_get_table_index(key, i);
+
+ /*
+ * Attempting to delete a page that has not been preserved,
+ * return with a warning.
+ */
+ if (WARN_ON(!node->table[idx]))
+ return;
+
+ node = phys_to_virt(node->table[idx]);
}
+
+ /* Handle the leaf level bitmap (level 0) */
+ leaf = (struct kho_radix_leaf *)node;
+ idx = kho_radix_get_bitmap_index(key);
+ __clear_bit(idx, leaf->bitmap);
}
+EXPORT_SYMBOL_GPL(kho_radix_del_page);
-static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
- unsigned int order)
+static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf,
+ unsigned long key,
+ kho_radix_tree_walk_callback_t cb)
{
- struct kho_mem_phys_bits *bits;
- struct kho_mem_phys *physxa, *new_physxa;
- const unsigned long pfn_high = pfn >> order;
+ unsigned long *bitmap = (unsigned long *)leaf;
+ unsigned int order;
+ phys_addr_t phys;
+ unsigned int i;
+ int err;
- might_sleep();
- physxa = xa_load(&track->orders, order);
- if (!physxa) {
- int err;
+ for_each_set_bit(i, bitmap, PAGE_SIZE * BITS_PER_BYTE) {
+ phys = kho_radix_decode_key(key | i, &order);
+ err = cb(phys, order);
+ if (err)
+ return err;
+ }
- new_physxa = kzalloc_obj(*physxa);
- if (!new_physxa)
- return -ENOMEM;
+ return 0;
+}
+
+static int __kho_radix_walk_tree(struct kho_radix_node *root,
+ unsigned int level, unsigned long start,
+ kho_radix_tree_walk_callback_t cb)
+{
+ struct kho_radix_node *node;
+ struct kho_radix_leaf *leaf;
+ unsigned long key, i;
+ unsigned int shift;
+ int err;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(phys_addr_t); i++) {
+ if (!root->table[i])
+ continue;
- xa_init(&new_physxa->phys_bits);
- physxa = xa_cmpxchg(&track->orders, order, NULL, new_physxa,
- GFP_KERNEL);
+ shift = ((level - 1) * KHO_TABLE_SIZE_LOG2) +
+ KHO_BITMAP_SIZE_LOG2;
+ key = start | (i << shift);
- err = xa_err(physxa);
- if (err || physxa) {
- xa_destroy(&new_physxa->phys_bits);
- kfree(new_physxa);
+ node = phys_to_virt(root->table[i]);
- if (err)
- return err;
+ if (level == 1) {
+ /*
+ * we are at level 1,
+ * node is pointing to the level 0 bitmap.
+ */
+ leaf = (struct kho_radix_leaf *)node;
+ err = kho_radix_walk_leaf(leaf, key, cb);
} else {
- physxa = new_physxa;
+ err = __kho_radix_walk_tree(node, level - 1,
+ key, cb);
}
+
+ if (err)
+ return err;
}
- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (IS_ERR(bits))
- return PTR_ERR(bits);
+ return 0;
+}
+
+/**
+ * kho_radix_walk_tree - Traverses the radix tree and calls a callback for each preserved page.
+ * @tree: A pointer to the KHO radix tree to walk.
+ * @cb: A callback function of type kho_radix_tree_walk_callback_t that will be
+ * invoked for each preserved page found in the tree. The callback receives
+ * the physical address and order of the preserved page.
+ *
+ * This function walks the radix tree, searching from the specified top level
+ * down to the lowest level (level 0). For each preserved page found, it invokes
+ * the provided callback, passing the page's physical address and order.
+ *
+ * Return: 0 if the walk completed the specified tree, or the non-zero return
+ * value from the callback that stopped the walk.
+ */
+int kho_radix_walk_tree(struct kho_radix_tree *tree,
+ kho_radix_tree_walk_callback_t cb)
+{
+ if (WARN_ON_ONCE(!tree->root))
+ return -EINVAL;
- set_bit(pfn_high % PRESERVE_BITS, bits->preserve);
+ guard(mutex)(&tree->lock);
- return 0;
+ return __kho_radix_walk_tree(tree->root, KHO_TREE_MAX_DEPTH - 1, 0, cb);
+}
+EXPORT_SYMBOL_GPL(kho_radix_walk_tree);
+
+static void __kho_unpreserve(struct kho_radix_tree *tree,
+ unsigned long pfn, unsigned long end_pfn)
+{
+ unsigned int order;
+
+ while (pfn < end_pfn) {
+ order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+
+ kho_radix_del_page(tree, pfn, order);
+
+ pfn += 1 << order;
+ }
}
/* For physically contiguous 0-order pages. */
@@ -318,161 +472,24 @@ struct page *kho_restore_pages(phys_addr_t phys, unsigned long nr_pages)
}
EXPORT_SYMBOL_GPL(kho_restore_pages);
-/* Serialize and deserialize struct kho_mem_phys across kexec
- *
- * Record all the bitmaps in a linked list of pages for the next kernel to
- * process. Each chunk holds bitmaps of the same order and each block of bitmaps
- * starts at a given physical address. This allows the bitmaps to be sparse. The
- * xarray is used to store them in a tree while building up the data structure,
- * but the KHO successor kernel only needs to process them once in order.
- *
- * All of this memory is normal kmalloc() memory and is not marked for
- * preservation. The successor kernel will remain isolated to the scratch space
- * until it completes processing this list. Once processed all the memory
- * storing these ranges will be marked as free.
- */
-
-struct khoser_mem_bitmap_ptr {
- phys_addr_t phys_start;
- DECLARE_KHOSER_PTR(bitmap, struct kho_mem_phys_bits *);
-};
-
-struct khoser_mem_chunk_hdr {
- DECLARE_KHOSER_PTR(next, struct khoser_mem_chunk *);
- unsigned int order;
- unsigned int num_elms;
-};
-
-#define KHOSER_BITMAP_SIZE \
- ((PAGE_SIZE - sizeof(struct khoser_mem_chunk_hdr)) / \
- sizeof(struct khoser_mem_bitmap_ptr))
-
-struct khoser_mem_chunk {
- struct khoser_mem_chunk_hdr hdr;
- struct khoser_mem_bitmap_ptr bitmaps[KHOSER_BITMAP_SIZE];
-};
-
-static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
-
-static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
- unsigned long order)
-{
- struct khoser_mem_chunk *chunk __free(free_page) = NULL;
-
- chunk = (void *)get_zeroed_page(GFP_KERNEL);
- if (!chunk)
- return ERR_PTR(-ENOMEM);
-
- if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
- return ERR_PTR(-EINVAL);
-
- chunk->hdr.order = order;
- if (cur_chunk)
- KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
- return no_free_ptr(chunk);
-}
-
-static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
-{
- struct khoser_mem_chunk *chunk = first_chunk;
-
- while (chunk) {
- struct khoser_mem_chunk *tmp = chunk;
-
- chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- free_page((unsigned long)tmp);
- }
-}
-
-/*
- * Update memory map property, if old one is found discard it via
- * kho_mem_ser_free().
- */
-static void kho_update_memory_map(struct khoser_mem_chunk *first_chunk)
-{
- void *ptr;
- u64 phys;
-
- ptr = fdt_getprop_w(kho_out.fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, NULL);
-
- /* Check and discard previous memory map */
- phys = get_unaligned((u64 *)ptr);
- if (phys)
- kho_mem_ser_free((struct khoser_mem_chunk *)phys_to_virt(phys));
-
- /* Update with the new value */
- phys = first_chunk ? (u64)virt_to_phys(first_chunk) : 0;
- put_unaligned(phys, (u64 *)ptr);
-}
-
-static int kho_mem_serialize(struct kho_out *kho_out)
+static int __init kho_preserved_memory_reserve(phys_addr_t phys,
+ unsigned int order)
{
- struct khoser_mem_chunk *first_chunk = NULL;
- struct khoser_mem_chunk *chunk = NULL;
- struct kho_mem_phys *physxa;
- unsigned long order;
- int err = -ENOMEM;
-
- xa_for_each(&kho_out->track.orders, order, physxa) {
- struct kho_mem_phys_bits *bits;
- unsigned long phys;
-
- chunk = new_chunk(chunk, order);
- if (IS_ERR(chunk)) {
- err = PTR_ERR(chunk);
- goto err_free;
- }
-
- if (!first_chunk)
- first_chunk = chunk;
-
- xa_for_each(&physxa->phys_bits, phys, bits) {
- struct khoser_mem_bitmap_ptr *elm;
-
- if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
- chunk = new_chunk(chunk, order);
- if (IS_ERR(chunk)) {
- err = PTR_ERR(chunk);
- goto err_free;
- }
- }
+ union kho_page_info info;
+ struct page *page;
+ u64 sz;
- elm = &chunk->bitmaps[chunk->hdr.num_elms];
- chunk->hdr.num_elms++;
- elm->phys_start = (phys * PRESERVE_BITS)
- << (order + PAGE_SHIFT);
- KHOSER_STORE_PTR(elm->bitmap, bits);
- }
- }
+ sz = 1 << (order + PAGE_SHIFT);
+ page = phys_to_page(phys);
- kho_update_memory_map(first_chunk);
+ /* Reserve the memory preserved in KHO in memblock */
+ memblock_reserve(phys, sz);
+ memblock_reserved_mark_noinit(phys, sz);
+ info.magic = KHO_PAGE_MAGIC;
+ info.order = order;
+ page->private = info.page_private;
return 0;
-
-err_free:
- kho_mem_ser_free(first_chunk);
- return err;
-}
-
-static void __init deserialize_bitmap(unsigned int order,
- struct khoser_mem_bitmap_ptr *elm)
-{
- struct kho_mem_phys_bits *bitmap = KHOSER_LOAD_PTR(elm->bitmap);
- unsigned long bit;
-
- for_each_set_bit(bit, bitmap->preserve, PRESERVE_BITS) {
- int sz = 1 << (order + PAGE_SHIFT);
- phys_addr_t phys =
- elm->phys_start + (bit << (order + PAGE_SHIFT));
- struct page *page = phys_to_page(phys);
- union kho_page_info info;
-
- memblock_reserve(phys, sz);
- memblock_reserved_mark_noinit(phys, sz);
- info.magic = KHO_PAGE_MAGIC;
- info.order = order;
- page->private = info.page_private;
- }
}
/* Returns physical address of the preserved memory map from FDT */
@@ -483,25 +500,13 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
if (!mem_ptr || len != sizeof(u64)) {
- pr_err("failed to get preserved memory bitmaps\n");
+ pr_err("failed to get preserved memory map\n");
return 0;
}
return get_unaligned((const u64 *)mem_ptr);
}
-static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
-{
- while (chunk) {
- unsigned int i;
-
- for (i = 0; i != chunk->hdr.num_elms; i++)
- deserialize_bitmap(chunk->hdr.order,
- &chunk->bitmaps[i]);
- chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
- }
-}
-
/*
* With KHO enabled, memory can become fragmented because KHO regions may
* be anywhere in physical address space. The scratch regions give us a
@@ -812,14 +817,14 @@ EXPORT_SYMBOL_GPL(kho_remove_subtree);
*/
int kho_preserve_folio(struct folio *folio)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.track;
if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
return -EINVAL;
- return __kho_preserve_order(track, pfn, order);
+ return kho_radix_add_page(tree, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_preserve_folio);
@@ -833,11 +838,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_folio);
*/
void kho_unpreserve_folio(struct folio *folio)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long pfn = folio_pfn(folio);
const unsigned int order = folio_order(folio);
- struct kho_mem_track *track = &kho_out.track;
- __kho_unpreserve_order(track, pfn, order);
+ kho_radix_del_page(tree, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
@@ -853,7 +858,7 @@ EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
*/
int kho_preserve_pages(struct page *page, unsigned long nr_pages)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn = start_pfn;
@@ -869,7 +874,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages)
const unsigned int order =
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
- err = __kho_preserve_order(track, pfn, order);
+ err = kho_radix_add_page(tree, pfn, order);
if (err) {
failed_pfn = pfn;
break;
@@ -879,7 +884,7 @@ int kho_preserve_pages(struct page *page, unsigned long nr_pages)
}
if (err)
- __kho_unpreserve(track, start_pfn, failed_pfn);
+ __kho_unpreserve(tree, start_pfn, failed_pfn);
return err;
}
@@ -897,11 +902,11 @@ EXPORT_SYMBOL_GPL(kho_preserve_pages);
*/
void kho_unpreserve_pages(struct page *page, unsigned long nr_pages)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const unsigned long start_pfn = page_to_pfn(page);
const unsigned long end_pfn = start_pfn + nr_pages;
- __kho_unpreserve(track, start_pfn, end_pfn);
+ __kho_unpreserve(tree, start_pfn, end_pfn);
}
EXPORT_SYMBOL_GPL(kho_unpreserve_pages);
@@ -960,14 +965,14 @@ err_free:
static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
unsigned short order)
{
- struct kho_mem_track *track = &kho_out.track;
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
- __kho_unpreserve(track, pfn, pfn + 1);
+ __kho_unpreserve(tree, pfn, pfn + 1);
for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
pfn = PHYS_PFN(chunk->phys[i]);
- __kho_unpreserve(track, pfn, pfn + (1 << order));
+ __kho_unpreserve(tree, pfn, pfn + (1 << order));
}
}
@@ -1238,16 +1243,10 @@ EXPORT_SYMBOL_GPL(kho_restore_free);
int kho_finalize(void)
{
- int ret;
-
if (!kho_enable)
return -EOPNOTSUPP;
guard(mutex)(&kho_out.lock);
- ret = kho_mem_serialize(&kho_out);
- if (ret)
- return ret;
-
kho_out.finalized = true;
return 0;
@@ -1262,7 +1261,6 @@ bool kho_finalized(void)
struct kho_in {
phys_addr_t fdt_phys;
phys_addr_t scratch_phys;
- phys_addr_t mem_map_phys;
struct kho_debugfs dbg;
};
@@ -1330,18 +1328,46 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
}
EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
+static int __init kho_mem_retrieve(const void *fdt)
+{
+ struct kho_radix_tree tree;
+ const phys_addr_t *mem;
+ int len;
+
+ /* Retrieve the KHO radix tree from passed-in FDT. */
+ mem = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
+
+ if (!mem || len != sizeof(*mem)) {
+ pr_err("failed to get preserved KHO memory tree\n");
+ return -ENOENT;
+ }
+
+ if (!*mem)
+ return -EINVAL;
+
+ tree.root = phys_to_virt(*mem);
+ mutex_init(&tree.lock);
+ return kho_radix_walk_tree(&tree, kho_preserved_memory_reserve);
+}
+
static __init int kho_out_fdt_setup(void)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
void *root = kho_out.fdt;
- u64 empty_mem_map = 0;
+ u64 preserved_mem_tree_pa;
int err;
err = fdt_create(root, PAGE_SIZE);
err |= fdt_finish_reservemap(root);
err |= fdt_begin_node(root, "");
err |= fdt_property_string(root, "compatible", KHO_FDT_COMPATIBLE);
- err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME, &empty_mem_map,
- sizeof(empty_mem_map));
+
+ preserved_mem_tree_pa = virt_to_phys(tree->root);
+
+ err |= fdt_property(root, KHO_FDT_MEMORY_MAP_PROP_NAME,
+ &preserved_mem_tree_pa,
+ sizeof(preserved_mem_tree_pa));
+
err |= fdt_end_node(root);
err |= fdt_finish(root);
@@ -1350,16 +1376,23 @@ static __init int kho_out_fdt_setup(void)
static __init int kho_init(void)
{
+ struct kho_radix_tree *tree = &kho_out.radix_tree;
const void *fdt = kho_get_fdt();
int err = 0;
if (!kho_enable)
return 0;
+ tree->root = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!tree->root) {
+ err = -ENOMEM;
+ goto err_free_scratch;
+ }
+
kho_out.fdt = kho_alloc_preserve(PAGE_SIZE);
if (IS_ERR(kho_out.fdt)) {
err = PTR_ERR(kho_out.fdt);
- goto err_free_scratch;
+ goto err_free_kho_radix_tree_root;
}
err = kho_debugfs_init();
@@ -1405,6 +1438,9 @@ static __init int kho_init(void)
err_free_fdt:
kho_unpreserve_free(kho_out.fdt);
+err_free_kho_radix_tree_root:
+ kfree(tree->root);
+ tree->root = NULL;
err_free_scratch:
kho_out.fdt = NULL;
for (int i = 0; i < kho_scratch_cnt; i++) {
@@ -1444,10 +1480,12 @@ static void __init kho_release_scratch(void)
void __init kho_memory_init(void)
{
- if (kho_in.mem_map_phys) {
+ if (kho_in.scratch_phys) {
kho_scratch = phys_to_virt(kho_in.scratch_phys);
kho_release_scratch();
- kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
+
+ if (kho_mem_retrieve(kho_get_fdt()))
+ kho_in.fdt_phys = 0;
} else {
kho_reserve_scratch();
}
@@ -1525,7 +1563,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
kho_in.fdt_phys = fdt_phys;
kho_in.scratch_phys = scratch_phys;
- kho_in.mem_map_phys = mem_map_phys;
kho_scratch_cnt = scratch_cnt;
populated = true;
diff --git a/kernel/liveupdate/kexec_handover_debugfs.c b/kernel/liveupdate/kexec_handover_debugfs.c
index 2f93939168ab..548033fd8a62 100644
--- a/kernel/liveupdate/kexec_handover_debugfs.c
+++ b/kernel/liveupdate/kexec_handover_debugfs.c
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <linux/libfdt.h>
#include <linux/mm.h>
+#include <linux/kho/abi/kexec_handover.h>
#include "kexec_handover_internal.h"
static struct dentry *debugfs_root;
@@ -139,7 +140,7 @@ __init void kho_in_debugfs_init(struct kho_debugfs *dbg, const void *fdt)
const char *name = fdt_get_name(fdt, child, NULL);
const u64 *fdt_phys;
- fdt_phys = fdt_getprop(fdt, child, "fdt", &len);
+ fdt_phys = fdt_getprop(fdt, child, KHO_FDT_SUB_TREE_PROP_NAME, &len);
if (!fdt_phys)
continue;
if (len != sizeof(*fdt_phys)) {