diff options
Diffstat (limited to 'rust/kernel')
| -rw-r--r-- | rust/kernel/alloc/allocator.rs | 30 | ||||
| -rw-r--r-- | rust/kernel/alloc/allocator_test.rs | 11 | ||||
| -rw-r--r-- | rust/kernel/cred.rs | 6 | ||||
| -rw-r--r-- | rust/kernel/device.rs | 208 | ||||
| -rw-r--r-- | rust/kernel/devres.rs | 27 | ||||
| -rw-r--r-- | rust/kernel/driver.rs | 89 | ||||
| -rw-r--r-- | rust/kernel/drm/device.rs | 32 | ||||
| -rw-r--r-- | rust/kernel/faux.rs | 2 | ||||
| -rw-r--r-- | rust/kernel/fs.rs | 3 | ||||
| -rw-r--r-- | rust/kernel/fs/kiocb.rs | 68 | ||||
| -rw-r--r-- | rust/kernel/iov.rs | 314 | ||||
| -rw-r--r-- | rust/kernel/lib.rs | 1 | ||||
| -rw-r--r-- | rust/kernel/miscdevice.rs | 63 | ||||
| -rw-r--r-- | rust/kernel/page.rs | 6 | ||||
| -rw-r--r-- | rust/kernel/security.rs | 37 |
15 files changed, 841 insertions, 56 deletions
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index aa2dfa9dca4c..2692cf90c948 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -43,17 +43,6 @@ pub struct Vmalloc; /// For more details see [self]. pub struct KVmalloc; -/// Returns a proper size to alloc a new object aligned to `new_layout`'s alignment. -fn aligned_size(new_layout: Layout) -> usize { - // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. - let layout = new_layout.pad_to_align(); - - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` - // which together with the slab guarantees means the `krealloc` will return a properly aligned - // object (see comments in `kmalloc()` for more information). - layout.size() -} - /// # Invariants /// /// One of the following: `krealloc`, `vrealloc`, `kvrealloc`. @@ -88,7 +77,7 @@ impl ReallocFunc { old_layout: Layout, flags: Flags, ) -> Result<NonNull<[u8]>, AllocError> { - let size = aligned_size(layout); + let size = layout.size(); let ptr = match ptr { Some(ptr) => { if old_layout.size() == 0 { @@ -123,6 +112,17 @@ impl ReallocFunc { } } +impl Kmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + // SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that // - memory remains valid until it is explicitly freed, // - passing a pointer to a valid memory allocation is OK, @@ -135,6 +135,8 @@ unsafe impl Allocator for Kmalloc { old_layout: Layout, flags: Flags, ) -> Result<NonNull<[u8]>, AllocError> { + let layout = Kmalloc::aligned_layout(layout); + // SAFETY: `ReallocFunc::call` has the same safety requirements as `Allocator::realloc`. unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags) } } @@ -176,6 +178,10 @@ unsafe impl Allocator for KVmalloc { old_layout: Layout, flags: Flags, ) -> Result<NonNull<[u8]>, AllocError> { + // `KVmalloc` may use the `Kmalloc` backend, hence we have to enforce a `Kmalloc` + // compatible layout. + let layout = Kmalloc::aligned_layout(layout); + // TODO: Support alignments larger than PAGE_SIZE. if layout.align() > bindings::PAGE_SIZE { pr_warn!("KVmalloc does not support alignments larger than PAGE_SIZE yet.\n"); diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index a3074480bd8d..90dd987d40e4 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -22,6 +22,17 @@ pub type Kmalloc = Cmalloc; pub type Vmalloc = Kmalloc; pub type KVmalloc = Kmalloc; +impl Cmalloc { + /// Returns a [`Layout`] that makes [`Kmalloc`] fulfill the requested size and alignment of + /// `layout`. + pub fn aligned_layout(layout: Layout) -> Layout { + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of + // `layout.align()` which together with the slab guarantees means that `Kmalloc` will return + // a properly aligned object (see comments in `kmalloc()` for more information). + layout.pad_to_align() + } +} + extern "C" { #[link_name = "aligned_alloc"] fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void; diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index 2599f01e8b28..3aa2e4c4a50c 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -54,6 +54,12 @@ impl Credential { unsafe { &*ptr.cast() } } + /// Returns a raw pointer to the inner credential. + #[inline] + pub fn as_ptr(&self) -> *const bindings::cred { + self.0.get() + } + /// Get the id for this security context. #[inline] pub fn get_secid(&self) -> u32 { diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index b8613289de8e..5902b3714a16 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -15,23 +15,130 @@ use crate::c_str; pub mod property; -/// A reference-counted device. +/// The core representation of a device in the kernel's driver model. /// -/// This structure represents the Rust abstraction for a C `struct device`. This implementation -/// abstracts the usage of an already existing C `struct device` within Rust code that we get -/// passed from the C side. +/// This structure represents the Rust abstraction for a C `struct device`. A [`Device`] can either +/// exist as temporary reference (see also [`Device::from_raw`]), which is only valid within a +/// certain scope or as [`ARef<Device>`], owning a dedicated reference count. /// -/// An instance of this abstraction can be obtained temporarily or permanent. +/// # Device Types /// -/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation. -/// A permanent instance is always reference-counted and hence not restricted by any lifetime -/// boundaries. +/// A [`Device`] can represent either a bus device or a class device. /// -/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem -/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in -/// `T::probe()`, such that a driver can store the `ARef<Device>` (equivalent to storing a -/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent -/// memory. +/// ## Bus Devices +/// +/// A bus device is a [`Device`] that is associated with a physical or virtual bus. Examples of +/// buses include PCI, USB, I2C, and SPI. Devices attached to a bus are registered with a specific +/// bus type, which facilitates matching devices with appropriate drivers based on IDs or other +/// identifying information. Bus devices are visible in sysfs under `/sys/bus/<bus-name>/devices/`. +/// +/// ## Class Devices +/// +/// A class device is a [`Device`] that is associated with a logical category of functionality +/// rather than a physical bus. Examples of classes include block devices, network interfaces, sound +/// cards, and input devices. Class devices are grouped under a common class and exposed to +/// userspace via entries in `/sys/class/<class-name>/`. +/// +/// # Device Context +/// +/// [`Device`] references are generic over a [`DeviceContext`], which represents the type state of +/// a [`Device`]. +/// +/// As the name indicates, this type state represents the context of the scope the [`Device`] +/// reference is valid in. For instance, the [`Bound`] context guarantees that the [`Device`] is +/// bound to a driver for the entire duration of the existence of a [`Device<Bound>`] reference. +/// +/// Other [`DeviceContext`] types besides [`Bound`] are [`Normal`], [`Core`] and [`CoreInternal`]. +/// +/// Unless selected otherwise [`Device`] defaults to the [`Normal`] [`DeviceContext`], which by +/// itself has no additional requirements. +/// +/// It is always up to the caller of [`Device::from_raw`] to select the correct [`DeviceContext`] +/// type for the corresponding scope the [`Device`] reference is created in. +/// +/// All [`DeviceContext`] types other than [`Normal`] are intended to be used with +/// [bus devices](#bus-devices) only. +/// +/// # Implementing Bus Devices +/// +/// This section provides a guideline to implement bus specific devices, such as [`pci::Device`] or +/// [`platform::Device`]. +/// +/// A bus specific device should be defined as follows. +/// +/// ```ignore +/// #[repr(transparent)] +/// pub struct Device<Ctx: device::DeviceContext = device::Normal>( +/// Opaque<bindings::bus_device_type>, +/// PhantomData<Ctx>, +/// ); +/// ``` +/// +/// Since devices are reference counted, [`AlwaysRefCounted`] should be implemented for `Device` +/// (i.e. `Device<Normal>`). Note that [`AlwaysRefCounted`] must not be implemented for any other +/// [`DeviceContext`], since all other device context types are only valid within a certain scope. +/// +/// In order to be able to implement the [`DeviceContext`] dereference hierarchy, bus device +/// implementations should call the [`impl_device_context_deref`] macro as shown below. +/// +/// ```ignore +/// // SAFETY: `Device` is a transparent wrapper of a type that doesn't depend on `Device`'s +/// // generic argument. +/// kernel::impl_device_context_deref!(unsafe { Device }); +/// ``` +/// +/// In order to convert from a any [`Device<Ctx>`] to [`ARef<Device>`], bus devices can implement +/// the following macro call. +/// +/// ```ignore +/// kernel::impl_device_context_into_aref!(Device); +/// ``` +/// +/// Bus devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl<Ctx: device::DeviceContext> AsRef<device::Device<Ctx>> for Device<Ctx> { +/// fn as_ref(&self) -> &device::Device<Ctx> { +/// ... +/// } +/// } +/// ``` +/// +/// # Implementing Class Devices +/// +/// Class device implementations require less infrastructure and depend slightly more on the +/// specific subsystem. +/// +/// An example implementation for a class device could look like this. +/// +/// ```ignore +/// #[repr(C)] +/// pub struct Device<T: class::Driver> { +/// dev: Opaque<bindings::class_device_type>, +/// data: T::Data, +/// } +/// ``` +/// +/// This class device uses the sub-classing pattern to embed the driver's private data within the +/// allocation of the class device. For this to be possible the class device is generic over the +/// class specific `Driver` trait implementation. +/// +/// Just like any device, class devices are reference counted and should hence implement +/// [`AlwaysRefCounted`] for `Device`. +/// +/// Class devices should also implement the following [`AsRef`] implementation, such that users can +/// easily derive a generic [`Device`] reference. +/// +/// ```ignore +/// impl<T: class::Driver> AsRef<device::Device> for Device<T> { +/// fn as_ref(&self) -> &device::Device { +/// ... +/// } +/// } +/// ``` +/// +/// An example for a class device implementation is [`drm::Device`]. /// /// # Invariants /// @@ -42,6 +149,12 @@ pub mod property; /// /// `bindings::device::release` is valid to be called from any thread, hence `ARef<Device>` can be /// dropped from any thread. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted +/// [`drm::Device`]: kernel::drm::Device +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref +/// [`pci::Device`]: kernel::pci::Device +/// [`platform::Device`]: kernel::platform::Device #[repr(transparent)] pub struct Device<Ctx: DeviceContext = Normal>(Opaque<bindings::device>, PhantomData<Ctx>); @@ -311,28 +424,75 @@ unsafe impl Send for Device {} // synchronization in `struct device`. unsafe impl Sync for Device {} -/// Marker trait for the context of a bus specific device. +/// Marker trait for the context or scope of a bus specific device. +/// +/// [`DeviceContext`] is a marker trait for types representing the context of a bus specific +/// [`Device`]. +/// +/// The specific device context types are: [`CoreInternal`], [`Core`], [`Bound`] and [`Normal`]. /// -/// Some functions of a bus specific device should only be called from a certain context, i.e. bus -/// callbacks, such as `probe()`. +/// [`DeviceContext`] types are hierarchical, which means that there is a strict hierarchy that +/// defines which [`DeviceContext`] type can be derived from another. For instance, any +/// [`Device<Core>`] can dereference to a [`Device<Bound>`]. /// -/// This is the marker trait for structures representing the context of a bus specific device. +/// The following enumeration illustrates the dereference hierarchy of [`DeviceContext`] types. +/// +/// - [`CoreInternal`] => [`Core`] => [`Bound`] => [`Normal`] +/// +/// Bus devices can automatically implement the dereference hierarchy by using +/// [`impl_device_context_deref`]. +/// +/// Note that the guarantee for a [`Device`] reference to have a certain [`DeviceContext`] comes +/// from the specific scope the [`Device`] reference is valid in. +/// +/// [`impl_device_context_deref`]: kernel::impl_device_context_deref pub trait DeviceContext: private::Sealed {} -/// The [`Normal`] context is the context of a bus specific device when it is not an argument of -/// any bus callback. +/// The [`Normal`] context is the default [`DeviceContext`] of any [`Device`]. +/// +/// The normal context does not indicate any specific context. Any `Device<Ctx>` is also a valid +/// [`Device<Normal>`]. It is the only [`DeviceContext`] for which it is valid to implement +/// [`AlwaysRefCounted`] for. +/// +/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted pub struct Normal; -/// The [`Core`] context is the context of a bus specific device when it is supplied as argument of -/// any of the bus callbacks, such as `probe()`. +/// The [`Core`] context is the context of a bus specific device when it appears as argument of +/// any bus specific callback, such as `probe()`. +/// +/// The core context indicates that the [`Device<Core>`] reference's scope is limited to the bus +/// callback it appears in. It is intended to be used for synchronization purposes. Bus device +/// implementations can implement methods for [`Device<Core>`], such that they can only be called +/// from bus callbacks. pub struct Core; -/// Semantically the same as [`Core`] but reserved for internal usage of the corresponding bus +/// Semantically the same as [`Core`], but reserved for internal usage of the corresponding bus /// abstraction. +/// +/// The internal core context is intended to be used in exactly the same way as the [`Core`] +/// context, with the difference that this [`DeviceContext`] is internal to the corresponding bus +/// abstraction. +/// +/// This context mainly exists to share generic [`Device`] infrastructure that should only be called +/// from bus callbacks with bus abstractions, but without making them accessible for drivers. pub struct CoreInternal; -/// The [`Bound`] context is the context of a bus specific device reference when it is guaranteed to -/// be bound for the duration of its lifetime. +/// The [`Bound`] context is the [`DeviceContext`] of a bus specific device when it is guaranteed to +/// be bound to a driver. +/// +/// The bound context indicates that for the entire duration of the lifetime of a [`Device<Bound>`] +/// reference, the [`Device`] is guaranteed to be bound to a driver. +/// +/// Some APIs, such as [`dma::CoherentAllocation`] or [`Devres`] rely on the [`Device`] to be bound, +/// which can be proven with the [`Bound`] device context. +/// +/// Any abstraction that can guarantee a scope where the corresponding bus device is bound, should +/// provide a [`Device<Bound>`] reference to its users for this scope. This allows users to benefit +/// from optimizations for accessing device resources, see also [`Devres::access`]. +/// +/// [`Devres`]: kernel::devres::Devres +/// [`Devres::access`]: kernel::devres::Devres::access +/// [`dma::CoherentAllocation`]: kernel::dma::CoherentAllocation pub struct Bound; mod private { diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index da18091143a6..d04e3fcebafb 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -115,10 +115,11 @@ pub struct Devres<T: Send> { /// Contains all the fields shared with [`Self::callback`]. // TODO: Replace with `UnsafePinned`, once available. // - // Subsequently, the `drop_in_place()` in `Devres::drop` and the explicit `Send` and `Sync' - // impls can be removed. + // Subsequently, the `drop_in_place()` in `Devres::drop` and `Devres::new` as well as the + // explicit `Send` and `Sync' impls can be removed. #[pin] inner: Opaque<Inner<T>>, + _add_action: (), } impl<T: Send> Devres<T> { @@ -140,7 +141,15 @@ impl<T: Send> Devres<T> { dev: dev.into(), callback, // INVARIANT: `inner` is properly initialized. - inner <- { + inner <- Opaque::pin_init(try_pin_init!(Inner { + devm <- Completion::new(), + revoke <- Completion::new(), + data <- Revocable::new(data), + })), + // TODO: Replace with "initializer code blocks" [1] once available. + // + // [1] https://github.com/Rust-for-Linux/pin-init/pull/69 + _add_action: { // SAFETY: `this` is a valid pointer to uninitialized memory. let inner = unsafe { &raw mut (*this.as_ptr()).inner }; @@ -152,13 +161,13 @@ impl<T: Send> Devres<T> { // live at least as long as the returned `impl PinInit<Self, Error>`. to_result(unsafe { bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) - })?; + }).inspect_err(|_| { + let inner = Opaque::cast_into(inner); - Opaque::pin_init(try_pin_init!(Inner { - devm <- Completion::new(), - revoke <- Completion::new(), - data <- Revocable::new(data), - })) + // SAFETY: `inner` is a valid pointer to an `Inner<T>` and valid for both reads + // and writes. + unsafe { core::ptr::drop_in_place(inner) }; + })?; }, }) } diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs index a8f2675ba7a7..279e3af20682 100644 --- a/rust/kernel/driver.rs +++ b/rust/kernel/driver.rs @@ -2,8 +2,93 @@ //! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.). //! -//! Each bus / subsystem is expected to implement [`RegistrationOps`], which allows drivers to -//! register using the [`Registration`] class. +//! This documentation describes how to implement a bus specific driver API and how to align it with +//! the design of (bus specific) devices. +//! +//! Note: Readers are expected to know the content of the documentation of [`Device`] and +//! [`DeviceContext`]. +//! +//! # Driver Trait +//! +//! The main driver interface is defined by a bus specific driver trait. For instance: +//! +//! ```ignore +//! pub trait Driver: Send { +//! /// The type holding information about each device ID supported by the driver. +//! type IdInfo: 'static; +//! +//! /// The table of OF device ids supported by the driver. +//! const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = None; +//! +//! /// The table of ACPI device ids supported by the driver. +//! const ACPI_ID_TABLE: Option<acpi::IdTable<Self::IdInfo>> = None; +//! +//! /// Driver probe. +//! fn probe(dev: &Device<device::Core>, id_info: &Self::IdInfo) -> Result<Pin<KBox<Self>>>; +//! +//! /// Driver unbind (optional). +//! fn unbind(dev: &Device<device::Core>, this: Pin<&Self>) { +//! let _ = (dev, this); +//! } +//! } +//! ``` +//! +//! For specific examples see [`auxiliary::Driver`], [`pci::Driver`] and [`platform::Driver`]. +//! +//! The `probe()` callback should return a `Result<Pin<KBox<Self>>>`, i.e. the driver's private +//! data. The bus abstraction should store the pointer in the corresponding bus device. The generic +//! [`Device`] infrastructure provides common helpers for this purpose on its +//! [`Device<CoreInternal>`] implementation. +//! +//! All driver callbacks should provide a reference to the driver's private data. Once the driver +//! is unbound from the device, the bus abstraction should take back the ownership of the driver's +//! private data from the corresponding [`Device`] and [`drop`] it. +//! +//! All driver callbacks should provide a [`Device<Core>`] reference (see also [`device::Core`]). +//! +//! # Adapter +//! +//! The adapter implementation of a bus represents the abstraction layer between the C bus +//! callbacks and the Rust bus callbacks. It therefore has to be generic over an implementation of +//! the [driver trait](#driver-trait). +//! +//! ```ignore +//! pub struct Adapter<T: Driver>; +//! ``` +//! +//! There's a common [`Adapter`] trait that can be implemented to inherit common driver +//! infrastructure, such as finding the ID info from an [`of::IdTable`] or [`acpi::IdTable`]. +//! +//! # Driver Registration +//! +//! In order to register C driver types (such as `struct platform_driver`) the [adapter](#adapter) +//! should implement the [`RegistrationOps`] trait. +//! +//! This trait implementation can be used to create the actual registration with the common +//! [`Registration`] type. +//! +//! Typically, bus abstractions want to provide a bus specific `module_bus_driver!` macro, which +//! creates a kernel module with exactly one [`Registration`] for the bus specific adapter. +//! +//! The generic driver infrastructure provides a helper for this with the [`module_driver`] macro. +//! +//! # Device IDs +//! +//! Besides the common device ID types, such as [`of::DeviceId`] and [`acpi::DeviceId`], most buses +//! may need to implement their own device ID types. +//! +//! For this purpose the generic infrastructure in [`device_id`] should be used. +//! +//! [`auxiliary::Driver`]: kernel::auxiliary::Driver +//! [`Core`]: device::Core +//! [`Device`]: device::Device +//! [`Device<Core>`]: device::Device<device::Core> +//! [`Device<CoreInternal>`]: device::Device<device::CoreInternal> +//! [`DeviceContext`]: device::DeviceContext +//! [`device_id`]: kernel::device_id +//! [`module_driver`]: kernel::module_driver +//! [`pci::Driver`]: kernel::pci::Driver +//! [`platform::Driver`]: kernel::platform::Driver use crate::error::{Error, Result}; use crate::{acpi, device, of, str::CStr, try_pin_init, types::Opaque, ThisModule}; diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 3bb7c83966cf..d29c477e89a8 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -5,6 +5,7 @@ //! C header: [`include/linux/drm/drm_device.h`](srctree/include/linux/drm/drm_device.h) use crate::{ + alloc::allocator::Kmalloc, bindings, device, drm, drm::driver::AllocImpl, error::from_err_ptr, @@ -12,7 +13,7 @@ use crate::{ prelude::*, types::{ARef, AlwaysRefCounted, Opaque}, }; -use core::{mem, ops::Deref, ptr, ptr::NonNull}; +use core::{alloc::Layout, mem, ops::Deref, ptr, ptr::NonNull}; #[cfg(CONFIG_DRM_LEGACY)] macro_rules! drm_legacy_fields { @@ -53,10 +54,8 @@ macro_rules! drm_legacy_fields { /// /// `self.dev` is a valid instance of a `struct device`. #[repr(C)] -#[pin_data] pub struct Device<T: drm::Driver> { dev: Opaque<bindings::drm_device>, - #[pin] data: T::Data, } @@ -96,6 +95,10 @@ impl<T: drm::Driver> Device<T> { /// Create a new `drm::Device` for a `drm::Driver`. pub fn new(dev: &device::Device, data: impl PinInit<T::Data, Error>) -> Result<ARef<Self>> { + // `__drm_dev_alloc` uses `kmalloc()` to allocate memory, hence ensure a `kmalloc()` + // compatible `Layout`. + let layout = Kmalloc::aligned_layout(Layout::new::<Self>()); + // SAFETY: // - `VTABLE`, as a `const` is pinned to the read-only section of the compilation, // - `dev` is valid by its type invarants, @@ -103,7 +106,7 @@ impl<T: drm::Driver> Device<T> { bindings::__drm_dev_alloc( dev.as_raw(), &Self::VTABLE, - mem::size_of::<Self>(), + layout.size(), mem::offset_of!(Self, dev), ) } @@ -117,9 +120,13 @@ impl<T: drm::Driver> Device<T> { // - `raw_data` is a valid pointer to uninitialized memory. // - `raw_data` will not move until it is dropped. unsafe { data.__pinned_init(raw_data) }.inspect_err(|_| { - // SAFETY: `__drm_dev_alloc()` was successful, hence `raw_drm` must be valid and the + // SAFETY: `raw_drm` is a valid pointer to `Self`, given that `__drm_dev_alloc` was + // successful. + let drm_dev = unsafe { Self::into_drm_device(raw_drm) }; + + // SAFETY: `__drm_dev_alloc()` was successful, hence `drm_dev` must be valid and the // refcount must be non-zero. - unsafe { bindings::drm_dev_put(ptr::addr_of_mut!((*raw_drm.as_ptr()).dev).cast()) }; + unsafe { bindings::drm_dev_put(drm_dev) }; })?; // SAFETY: The reference count is one, and now we take ownership of that reference as a @@ -140,6 +147,14 @@ impl<T: drm::Driver> Device<T> { unsafe { crate::container_of!(Opaque::cast_from(ptr), Self, dev) }.cast_mut() } + /// # Safety + /// + /// `ptr` must be a valid pointer to `Self`. + unsafe fn into_drm_device(ptr: NonNull<Self>) -> *mut bindings::drm_device { + // SAFETY: By the safety requirements of this function, `ptr` is a valid pointer to `Self`. + unsafe { &raw mut (*ptr.as_ptr()).dev }.cast() + } + /// Not intended to be called externally, except via declare_drm_ioctls!() /// /// # Safety @@ -189,8 +204,11 @@ unsafe impl<T: drm::Driver> AlwaysRefCounted for Device<T> { } unsafe fn dec_ref(obj: NonNull<Self>) { + // SAFETY: `obj` is a valid pointer to `Self`. + let drm_dev = unsafe { Self::into_drm_device(obj) }; + // SAFETY: The safety requirements guarantee that the refcount is non-zero. - unsafe { bindings::drm_dev_put(obj.cast().as_ptr()) }; + unsafe { bindings::drm_dev_put(drm_dev) }; } } diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 7a906099993f..7fe2dd197e37 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -4,7 +4,7 @@ //! //! This module provides bindings for working with faux devices in kernel modules. //! -//! C header: [`include/linux/device/faux.h`] +//! C header: [`include/linux/device/faux.h`](srctree/include/linux/device/faux.h) use crate::{bindings, device, error::code::*, prelude::*}; use core::ptr::{addr_of_mut, null, null_mut, NonNull}; diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs index 0121b38c59e6..6ba6bdf143cb 100644 --- a/rust/kernel/fs.rs +++ b/rust/kernel/fs.rs @@ -6,3 +6,6 @@ pub mod file; pub use self::file::{File, LocalFile}; + +mod kiocb; +pub use self::kiocb::Kiocb; diff --git a/rust/kernel/fs/kiocb.rs b/rust/kernel/fs/kiocb.rs new file mode 100644 index 000000000000..84c936cd69b0 --- /dev/null +++ b/rust/kernel/fs/kiocb.rs @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Kernel IO callbacks. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) + +use core::marker::PhantomData; +use core::ptr::NonNull; +use kernel::types::ForeignOwnable; + +/// Wrapper for the kernel's `struct kiocb`. +/// +/// Currently this abstractions is incomplete and is essentially just a tuple containing a +/// reference to a file and a file position. +/// +/// The type `T` represents the filesystem or driver specific data associated with the file. +/// +/// # Invariants +/// +/// `inner` points at a valid `struct kiocb` whose file has the type `T` as its private data. +pub struct Kiocb<'a, T> { + inner: NonNull<bindings::kiocb>, + _phantom: PhantomData<&'a T>, +} + +impl<'a, T: ForeignOwnable> Kiocb<'a, T> { + /// Create a `Kiocb` from a raw pointer. + /// + /// # Safety + /// + /// The pointer must reference a valid `struct kiocb` for the duration of `'a`. The private + /// data of the file must be `T`. + pub unsafe fn from_raw(kiocb: *mut bindings::kiocb) -> Self { + Self { + // SAFETY: If a pointer is valid it is not null. + inner: unsafe { NonNull::new_unchecked(kiocb) }, + _phantom: PhantomData, + } + } + + /// Access the underlying `struct kiocb` directly. + pub fn as_raw(&self) -> *mut bindings::kiocb { + self.inner.as_ptr() + } + + /// Get the filesystem or driver specific data associated with the file. + pub fn file(&self) -> <T as ForeignOwnable>::Borrowed<'a> { + // SAFETY: We have shared access to this kiocb and hence the underlying file, so we can + // read the file's private data. + let private = unsafe { (*(*self.as_raw()).ki_filp).private_data }; + // SAFETY: The kiocb has shared access to the private data. + unsafe { <T as ForeignOwnable>::borrow(private) } + } + + /// Gets the current value of `ki_pos`. + pub fn ki_pos(&self) -> i64 { + // SAFETY: We have shared access to the kiocb, so we can read its `ki_pos` field. + unsafe { (*self.as_raw()).ki_pos } + } + + /// Gets a mutable reference to the `ki_pos` field. + pub fn ki_pos_mut(&mut self) -> &mut i64 { + // SAFETY: We have exclusive access to the kiocb, so we can write to `ki_pos`. + unsafe { &mut (*self.as_raw()).ki_pos } + } +} diff --git a/rust/kernel/iov.rs b/rust/kernel/iov.rs new file mode 100644 index 000000000000..43bae8923c46 --- /dev/null +++ b/rust/kernel/iov.rs @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! IO vectors. +//! +//! C headers: [`include/linux/iov_iter.h`](srctree/include/linux/iov_iter.h), +//! [`include/linux/uio.h`](srctree/include/linux/uio.h) + +use crate::{ + alloc::{Allocator, Flags}, + bindings, + prelude::*, + types::Opaque, +}; +use core::{marker::PhantomData, mem::MaybeUninit, ptr, slice}; + +const ITER_SOURCE: bool = bindings::ITER_SOURCE != 0; +const ITER_DEST: bool = bindings::ITER_DEST != 0; + +// Compile-time assertion for the above constants. +const _: () = { + build_assert!( + ITER_SOURCE != ITER_DEST, + "ITER_DEST and ITER_SOURCE should be different." + ); +}; + +/// An IO vector that acts as a source of data. +/// +/// The data may come from many different sources. This includes both things in kernel-space and +/// reading from userspace. It's not necessarily the case that the data source is immutable, so +/// rewinding the IO vector to read the same data twice is not guaranteed to result in the same +/// bytes. It's also possible that the data source is mapped in a thread-local manner using e.g. +/// `kmap_local_page()`, so this type is not `Send` to ensure that the mapping is read from the +/// right context in that scenario. +/// +/// # Invariants +/// +/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_SOURCE`. For the duration +/// of `'data`, it must be safe to read from this IO vector using the standard C methods for this +/// purpose. +#[repr(transparent)] +pub struct IovIterSource<'data> { + iov: Opaque<bindings::iov_iter>, + /// Represent to the type system that this value contains a pointer to readable data it does + /// not own. + _source: PhantomData<&'data [u8]>, +} + +impl<'data> IovIterSource<'data> { + /// Obtain an `IovIterSource` from a raw pointer. + /// + /// # Safety + /// + /// * The referenced `struct iov_iter` must be valid and must only be accessed through the + /// returned reference for the duration of `'iov`. + /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_SOURCE`. + /// * For the duration of `'data`, it must be safe to read from this IO vector using the + /// standard C methods for this purpose. + #[track_caller] + #[inline] + pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterSource<'data> { + // SAFETY: The caller ensures that `ptr` is valid. + let data_source = unsafe { (*ptr).data_source }; + assert_eq!(data_source, ITER_SOURCE); + + // SAFETY: The caller ensures the type invariants for the right durations, and + // `IovIterSource` is layout compatible with `struct iov_iter`. + unsafe { &mut *ptr.cast::<IovIterSource<'data>>() } + } + + /// Access this as a raw `struct iov_iter`. + #[inline] + pub fn as_raw(&mut self) -> *mut bindings::iov_iter { + self.iov.get() + } + + /// Returns the number of bytes available in this IO vector. + /// + /// Note that this may overestimate the number of bytes. For example, reading from userspace + /// memory could fail with `EFAULT`, which will be treated as the end of the IO vector. + #[inline] + pub fn len(&self) -> usize { + // SAFETY: We have shared access to this IO vector, so we can read its `count` field. + unsafe { + (*self.iov.get()) + .__bindgen_anon_1 + .__bindgen_anon_1 + .as_ref() + .count + } + } + + /// Returns whether there are any bytes left in this IO vector. + /// + /// This may return `true` even if there are no more bytes available. For example, reading from + /// userspace memory could fail with `EFAULT`, which will be treated as the end of the IO vector. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Advance this IO vector by `bytes` bytes. + /// + /// If `bytes` is larger than the size of this IO vector, it is advanced to the end. + #[inline] + pub fn advance(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector. + unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) }; + } + + /// Advance this IO vector backwards by `bytes` bytes. + /// + /// # Safety + /// + /// The IO vector must not be reverted to before its beginning. + #[inline] + pub unsafe fn revert(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller + // ensures that `bytes` is in bounds. + unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) }; + } + + /// Read data from this IO vector. + /// + /// Returns the number of bytes that have been copied. + #[inline] + pub fn copy_from_iter(&mut self, out: &mut [u8]) -> usize { + // SAFETY: `Self::copy_from_iter_raw` guarantees that it will not write any uninitialized + // bytes in the provided buffer, so `out` is still a valid `u8` slice after this call. + let out = unsafe { &mut *(ptr::from_mut(out) as *mut [MaybeUninit<u8>]) }; + + self.copy_from_iter_raw(out).len() + } + + /// Read data from this IO vector and append it to a vector. + /// + /// Returns the number of bytes that have been copied. + #[inline] + pub fn copy_from_iter_vec<A: Allocator>( + &mut self, + out: &mut Vec<u8, A>, + flags: Flags, + ) -> Result<usize> { + out.reserve(self.len(), flags)?; + let len = self.copy_from_iter_raw(out.spare_capacity_mut()).len(); + // SAFETY: + // - `len` is the length of a subslice of the spare capacity, so `len` is at most the + // length of the spare capacity. + // - `Self::copy_from_iter_raw` guarantees that the first `len` bytes of the spare capacity + // have been initialized. + unsafe { out.inc_len(len) }; + Ok(len) + } + + /// Read data from this IO vector into potentially uninitialized memory. + /// + /// Returns the sub-slice of the output that has been initialized. If the returned slice is + /// shorter than the input buffer, then the entire IO vector has been read. + /// + /// This will never write uninitialized bytes to the provided buffer. + #[inline] + pub fn copy_from_iter_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> &mut [u8] { + let capacity = out.len(); + let out = out.as_mut_ptr().cast::<u8>(); + + // GUARANTEES: The C API guarantees that it does not write uninitialized bytes to the + // provided buffer. + // SAFETY: + // * By the type invariants, it is still valid to read from this IO vector. + // * `out` is valid for writing for `capacity` bytes because it comes from a slice of + // that length. + let len = unsafe { bindings::_copy_from_iter(out.cast(), capacity, self.as_raw()) }; + + // SAFETY: The underlying C api guarantees that initialized bytes have been written to the + // first `len` bytes of the spare capacity. + unsafe { slice::from_raw_parts_mut(out, len) } + } +} + +/// An IO vector that acts as a destination for data. +/// +/// IO vectors support many different types of destinations. This includes both buffers in +/// kernel-space and writing to userspace. It's possible that the destination buffer is mapped in a +/// thread-local manner using e.g. `kmap_local_page()`, so this type is not `Send` to ensure that +/// the mapping is written to the right context in that scenario. +/// +/// # Invariants +/// +/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_DEST`. For the duration of +/// `'data`, it must be safe to write to this IO vector using the standard C methods for this +/// purpose. +#[repr(transparent)] +pub struct IovIterDest<'data> { + iov: Opaque<bindings::iov_iter>, + /// Represent to the type system that this value contains a pointer to writable data it does + /// not own. + _source: PhantomData<&'data mut [u8]>, +} + +impl<'data> IovIterDest<'data> { + /// Obtain an `IovIterDest` from a raw pointer. + /// + /// # Safety + /// + /// * The referenced `struct iov_iter` must be valid and must only be accessed through the + /// returned reference for the duration of `'iov`. + /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_DEST`. + /// * For the duration of `'data`, it must be safe to write to this IO vector using the + /// standard C methods for this purpose. + #[track_caller] + #[inline] + pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterDest<'data> { + // SAFETY: The caller ensures that `ptr` is valid. + let data_source = unsafe { (*ptr).data_source }; + assert_eq!(data_source, ITER_DEST); + + // SAFETY: The caller ensures the type invariants for the right durations, and + // `IovIterSource` is layout compatible with `struct iov_iter`. + unsafe { &mut *ptr.cast::<IovIterDest<'data>>() } + } + + /// Access this as a raw `struct iov_iter`. + #[inline] + pub fn as_raw(&mut self) -> *mut bindings::iov_iter { + self.iov.get() + } + + /// Returns the number of bytes available in this IO vector. + /// + /// Note that this may overestimate the number of bytes. For example, reading from userspace + /// memory could fail with EFAULT, which will be treated as the end of the IO vector. + #[inline] + pub fn len(&self) -> usize { + // SAFETY: We have shared access to this IO vector, so we can read its `count` field. + unsafe { + (*self.iov.get()) + .__bindgen_anon_1 + .__bindgen_anon_1 + .as_ref() + .count + } + } + + /// Returns whether there are any bytes left in this IO vector. + /// + /// This may return `true` even if there are no more bytes available. For example, reading from + /// userspace memory could fail with EFAULT, which will be treated as the end of the IO vector. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Advance this IO vector by `bytes` bytes. + /// + /// If `bytes` is larger than the size of this IO vector, it is advanced to the end. + #[inline] + pub fn advance(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector. + unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) }; + } + + /// Advance this IO vector backwards by `bytes` bytes. + /// + /// # Safety + /// + /// The IO vector must not be reverted to before its beginning. + #[inline] + pub unsafe fn revert(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller + // ensures that `bytes` is in bounds. + unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) }; + } + + /// Write data to this IO vector. + /// + /// Returns the number of bytes that were written. If this is shorter than the provided slice, + /// then no more bytes can be written. + #[inline] + pub fn copy_to_iter(&mut self, input: &[u8]) -> usize { + // SAFETY: + // * By the type invariants, it is still valid to write to this IO vector. + // * `input` is valid for `input.len()` bytes. + unsafe { bindings::_copy_to_iter(input.as_ptr().cast(), input.len(), self.as_raw()) } + } + + /// Utility for implementing `read_iter` given the full contents of the file. + /// + /// The full contents of the file being read from is represented by `contents`. This call will + /// write the appropriate sub-slice of `contents` and update the file position in `ppos` so + /// that the file will appear to contain `contents` even if takes multiple reads to read the + /// entire file. + #[inline] + pub fn simple_read_from_buffer(&mut self, ppos: &mut i64, contents: &[u8]) -> Result<usize> { + if *ppos < 0 { + return Err(EINVAL); + } + let Ok(pos) = usize::try_from(*ppos) else { + return Ok(0); + }; + if pos >= contents.len() { + return Ok(0); + } + + // BOUNDS: We just checked that `pos < contents.len()` above. + let num_written = self.copy_to_iter(&contents[pos..]); + + // OVERFLOW: `pos+num_written <= contents.len() <= isize::MAX <= i64::MAX`. + *ppos = (pos + num_written) as i64; + + Ok(num_written) + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index ed53169e795c..99dbb7b2812e 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -92,6 +92,7 @@ pub mod fs; pub mod init; pub mod io; pub mod ioctl; +pub mod iov; pub mod jump_label; #[cfg(CONFIG_KUNIT)] pub mod kunit; diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 6373fe183b27..35630fc63875 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -13,7 +13,8 @@ use crate::{ device::Device, error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, - fs::File, + fs::{File, Kiocb}, + iov::{IovIterDest, IovIterSource}, mm::virt::VmaNew, prelude::*, seq_file::SeqFile, @@ -141,6 +142,16 @@ pub trait MiscDevice: Sized { build_error!(VTABLE_DEFAULT_ERROR) } + /// Read from this miscdevice. + fn read_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterDest<'_>) -> Result<usize> { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Write to this miscdevice. + fn write_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterSource<'_>) -> Result<usize> { + build_error!(VTABLE_DEFAULT_ERROR) + } + /// Handler for ioctls. /// /// The `cmd` argument is usually manipulated using the utilities in [`kernel::ioctl`]. @@ -247,6 +258,46 @@ impl<T: MiscDevice> MiscdeviceVTable<T> { /// # Safety /// + /// `kiocb` must be correspond to a valid file that is associated with a + /// `MiscDeviceRegistration<T>`. `iter` must be a valid `struct iov_iter` for writing. + unsafe extern "C" fn read_iter( + kiocb: *mut bindings::kiocb, + iter: *mut bindings::iov_iter, + ) -> isize { + // SAFETY: The caller provides a valid `struct kiocb` associated with a + // `MiscDeviceRegistration<T>` file. + let kiocb = unsafe { Kiocb::from_raw(kiocb) }; + // SAFETY: This is a valid `struct iov_iter` for writing. + let iov = unsafe { IovIterDest::from_raw(iter) }; + + match T::read_iter(kiocb, iov) { + Ok(res) => res as isize, + Err(err) => err.to_errno() as isize, + } + } + + /// # Safety + /// + /// `kiocb` must be correspond to a valid file that is associated with a + /// `MiscDeviceRegistration<T>`. `iter` must be a valid `struct iov_iter` for writing. + unsafe extern "C" fn write_iter( + kiocb: *mut bindings::kiocb, + iter: *mut bindings::iov_iter, + ) -> isize { + // SAFETY: The caller provides a valid `struct kiocb` associated with a + // `MiscDeviceRegistration<T>` file. + let kiocb = unsafe { Kiocb::from_raw(kiocb) }; + // SAFETY: This is a valid `struct iov_iter` for reading. + let iov = unsafe { IovIterSource::from_raw(iter) }; + + match T::write_iter(kiocb, iov) { + Ok(res) => res as isize, + Err(err) => err.to_errno() as isize, + } + } + + /// # Safety + /// /// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`. /// `vma` must be a vma that is currently being mmap'ed with this file. unsafe extern "C" fn mmap( @@ -341,6 +392,16 @@ impl<T: MiscDevice> MiscdeviceVTable<T> { open: Some(Self::open), release: Some(Self::release), mmap: if T::HAS_MMAP { Some(Self::mmap) } else { None }, + read_iter: if T::HAS_READ_ITER { + Some(Self::read_iter) + } else { + None + }, + write_iter: if T::HAS_WRITE_ITER { + Some(Self::write_iter) + } else { + None + }, unlocked_ioctl: if T::HAS_IOCTL { Some(Self::ioctl) } else { diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs index 7c1b17246ed5..811fe30e8e6f 100644 --- a/rust/kernel/page.rs +++ b/rust/kernel/page.rs @@ -85,6 +85,12 @@ impl Page { self.page.as_ptr() } + /// Get the node id containing this page. + pub fn nid(&self) -> i32 { + // SAFETY: Always safe to call with a valid page. + unsafe { bindings::page_to_nid(self.as_ptr()) } + } + /// Runs a piece of code with this page mapped to an address. /// /// The page is unmapped when this call returns. diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs index 0c63e9e7e564..9d271695265f 100644 --- a/rust/kernel/security.rs +++ b/rust/kernel/security.rs @@ -8,9 +8,46 @@ use crate::{ bindings, + cred::Credential, error::{to_result, Result}, + fs::File, }; +/// Calls the security modules to determine if the given task can become the manager of a binder +/// context. +#[inline] +pub fn binder_set_context_mgr(mgr: &Credential) -> Result { + // SAFETY: `mrg.0` is valid because the shared reference guarantees a nonzero refcount. + to_result(unsafe { bindings::security_binder_set_context_mgr(mgr.as_ptr()) }) +} + +/// Calls the security modules to determine if binder transactions are allowed from task `from` to +/// task `to`. +#[inline] +pub fn binder_transaction(from: &Credential, to: &Credential) -> Result { + // SAFETY: `from` and `to` are valid because the shared references guarantee nonzero refcounts. + to_result(unsafe { bindings::security_binder_transaction(from.as_ptr(), to.as_ptr()) }) +} + +/// Calls the security modules to determine if task `from` is allowed to send binder objects +/// (owned by itself or other processes) to task `to` through a binder transaction. +#[inline] +pub fn binder_transfer_binder(from: &Credential, to: &Credential) -> Result { + // SAFETY: `from` and `to` are valid because the shared references guarantee nonzero refcounts. + to_result(unsafe { bindings::security_binder_transfer_binder(from.as_ptr(), to.as_ptr()) }) +} + +/// Calls the security modules to determine if task `from` is allowed to send the given file to +/// task `to` (which would get its own file descriptor) through a binder transaction. +#[inline] +pub fn binder_transfer_file(from: &Credential, to: &Credential, file: &File) -> Result { + // SAFETY: `from`, `to` and `file` are valid because the shared references guarantee nonzero + // refcounts. + to_result(unsafe { + bindings::security_binder_transfer_file(from.as_ptr(), to.as_ptr(), file.as_ptr()) + }) +} + /// A security context string. /// /// # Invariants |
