diff options
Diffstat (limited to 'rust/kernel')
32 files changed, 1936 insertions, 190 deletions
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs new file mode 100644 index 000000000000..531b5e471cb1 --- /dev/null +++ b/rust/kernel/alloc.rs @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Extensions to the [`alloc`] crate. + +#[cfg(not(test))] +#[cfg(not(testlib))] +mod allocator; +pub mod box_ext; +pub mod vec_ext; + +/// Indicates an allocation error. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct AllocError; + +/// Flags to be used when allocating memory. +/// +/// They can be combined with the operators `|`, `&`, and `!`. +/// +/// Values can be used from the [`flags`] module. +#[derive(Clone, Copy)] +pub struct Flags(u32); + +impl core::ops::BitOr for Flags { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +impl core::ops::BitAnd for Flags { + type Output = Self; + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } +} + +impl core::ops::Not for Flags { + type Output = Self; + fn not(self) -> Self::Output { + Self(!self.0) + } +} + +/// Allocation flags. +/// +/// These are meant to be used in functions that can allocate memory. +pub mod flags { + use super::Flags; + + /// Zeroes out the allocated memory. + /// + /// This is normally or'd with other flags. + pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO); + + /// Users can not sleep and need the allocation to succeed. + /// + /// A lower watermark is applied to allow access to "atomic reserves". The current + /// implementation doesn't support NMI and few other strict non-preemptive contexts (e.g. + /// raw_spin_lock). The same applies to [`GFP_NOWAIT`]. + pub const GFP_ATOMIC: Flags = Flags(bindings::GFP_ATOMIC); + + /// Typical for kernel-internal allocations. The caller requires ZONE_NORMAL or a lower zone + /// for direct access but can direct reclaim. + pub const GFP_KERNEL: Flags = Flags(bindings::GFP_KERNEL); + + /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg. + pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT); + + /// Ror kernel allocations that should not stall for direct reclaim, start physical IO or + /// use any filesystem callback. It is very likely to fail to allocate memory, even for very + /// small allocations. + pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT); +} diff --git a/rust/kernel/allocator.rs b/rust/kernel/alloc/allocator.rs index 01ad139e19bc..e6ea601f38c6 100644 --- a/rust/kernel/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -2,11 +2,10 @@ //! Allocator support. +use super::{flags::*, Flags}; use core::alloc::{GlobalAlloc, Layout}; use core::ptr; -use crate::bindings; - struct KernelAllocator; /// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment. @@ -15,35 +14,28 @@ struct KernelAllocator; /// /// - `ptr` can be either null or a pointer which has been allocated by this allocator. /// - `new_layout` must have a non-zero size. -unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 { +pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: Flags) -> *mut u8 { // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. let layout = new_layout.pad_to_align(); - let mut size = layout.size(); - - if layout.align() > bindings::ARCH_SLAB_MINALIGN { - // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size - // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for - // more information). - // - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of - // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee. - size = size.next_power_of_two(); - } + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` + // which together with the slab guarantees means the `krealloc` will return a properly aligned + // object (see comments in `kmalloc()` for more information). + let size = layout.size(); // SAFETY: // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the // function safety requirement. - // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero - // according to the function safety requirement) or a result from `next_power_of_two()`. - unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 } + // - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according + // to the function safety requirement) + unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 } } unsafe impl GlobalAlloc for KernelAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety // requirement. - unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) } + unsafe { krealloc_aligned(ptr::null_mut(), layout, GFP_KERNEL) } } unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { @@ -64,19 +56,13 @@ unsafe impl GlobalAlloc for KernelAllocator { // requirement. // - the size of `layout` is not zero because `new_size` is not zero by the function safety // requirement. - unsafe { krealloc_aligned(ptr, layout, bindings::GFP_KERNEL) } + unsafe { krealloc_aligned(ptr, layout, GFP_KERNEL) } } unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety // requirement. - unsafe { - krealloc_aligned( - ptr::null_mut(), - layout, - bindings::GFP_KERNEL | bindings::__GFP_ZERO, - ) - } + unsafe { krealloc_aligned(ptr::null_mut(), layout, GFP_KERNEL | __GFP_ZERO) } } } diff --git a/rust/kernel/alloc/box_ext.rs b/rust/kernel/alloc/box_ext.rs new file mode 100644 index 000000000000..829cb1c1cf9e --- /dev/null +++ b/rust/kernel/alloc/box_ext.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Extensions to [`Box`] for fallible allocations. + +use super::{AllocError, Flags}; +use alloc::boxed::Box; +use core::mem::MaybeUninit; + +/// Extensions to [`Box`]. +pub trait BoxExt<T>: Sized { + /// Allocates a new box. + /// + /// The allocation may fail, in which case an error is returned. + fn new(x: T, flags: Flags) -> Result<Self, AllocError>; + + /// Allocates a new uninitialised box. + /// + /// The allocation may fail, in which case an error is returned. + fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError>; +} + +impl<T> BoxExt<T> for Box<T> { + fn new(x: T, flags: Flags) -> Result<Self, AllocError> { + let b = <Self as BoxExt<_>>::new_uninit(flags)?; + Ok(Box::write(b, x)) + } + + #[cfg(any(test, testlib))] + fn new_uninit(_flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError> { + Ok(Box::new_uninit()) + } + + #[cfg(not(any(test, testlib)))] + fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError> { + let ptr = if core::mem::size_of::<MaybeUninit<T>>() == 0 { + core::ptr::NonNull::<_>::dangling().as_ptr() + } else { + let layout = core::alloc::Layout::new::<MaybeUninit<T>>(); + + // SAFETY: Memory is being allocated (first arg is null). The only other source of + // safety issues is sleeping on atomic context, which is addressed by klint. Lastly, + // the type is not a SZT (checked above). + let ptr = + unsafe { super::allocator::krealloc_aligned(core::ptr::null_mut(), layout, flags) }; + if ptr.is_null() { + return Err(AllocError); + } + + ptr.cast::<MaybeUninit<T>>() + }; + + // SAFETY: For non-zero-sized types, we allocate above using the global allocator. For + // zero-sized types, we use `NonNull::dangling`. + Ok(unsafe { Box::from_raw(ptr) }) + } +} diff --git a/rust/kernel/alloc/vec_ext.rs b/rust/kernel/alloc/vec_ext.rs new file mode 100644 index 000000000000..1297a4be32e8 --- /dev/null +++ b/rust/kernel/alloc/vec_ext.rs @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Extensions to [`Vec`] for fallible allocations. + +use super::{AllocError, Flags}; +use alloc::vec::Vec; + +/// Extensions to [`Vec`]. +pub trait VecExt<T>: Sized { + /// Creates a new [`Vec`] instance with at least the given capacity. + /// + /// # Examples + /// + /// ``` + /// let v = Vec::<u32>::with_capacity(20, GFP_KERNEL)?; + /// + /// assert!(v.capacity() >= 20); + /// # Ok::<(), Error>(()) + /// ``` + fn with_capacity(capacity: usize, flags: Flags) -> Result<Self, AllocError>; + + /// Appends an element to the back of the [`Vec`] instance. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// v.push(1, GFP_KERNEL)?; + /// assert_eq!(&v, &[1]); + /// + /// v.push(2, GFP_KERNEL)?; + /// assert_eq!(&v, &[1, 2]); + /// # Ok::<(), Error>(()) + /// ``` + fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError>; + + /// Pushes clones of the elements of slice into the [`Vec`] instance. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// v.push(1, GFP_KERNEL)?; + /// + /// v.extend_from_slice(&[20, 30, 40], GFP_KERNEL)?; + /// assert_eq!(&v, &[1, 20, 30, 40]); + /// + /// v.extend_from_slice(&[50, 60], GFP_KERNEL)?; + /// assert_eq!(&v, &[1, 20, 30, 40, 50, 60]); + /// # Ok::<(), Error>(()) + /// ``` + fn extend_from_slice(&mut self, other: &[T], flags: Flags) -> Result<(), AllocError> + where + T: Clone; + + /// Ensures that the capacity exceeds the length by at least `additional` elements. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// v.push(1, GFP_KERNEL)?; + /// + /// v.reserve(10, GFP_KERNEL)?; + /// let cap = v.capacity(); + /// assert!(cap >= 10); + /// + /// v.reserve(10, GFP_KERNEL)?; + /// let new_cap = v.capacity(); + /// assert_eq!(new_cap, cap); + /// + /// # Ok::<(), Error>(()) + /// ``` + fn reserve(&mut self, additional: usize, flags: Flags) -> Result<(), AllocError>; +} + +impl<T> VecExt<T> for Vec<T> { + fn with_capacity(capacity: usize, flags: Flags) -> Result<Self, AllocError> { + let mut v = Vec::new(); + <Self as VecExt<_>>::reserve(&mut v, capacity, flags)?; + Ok(v) + } + + fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError> { + <Self as VecExt<_>>::reserve(self, 1, flags)?; + let s = self.spare_capacity_mut(); + s[0].write(v); + + // SAFETY: We just initialised the first spare entry, so it is safe to increase the length + // by 1. We also know that the new length is <= capacity because of the previous call to + // `reserve` above. + unsafe { self.set_len(self.len() + 1) }; + Ok(()) + } + + fn extend_from_slice(&mut self, other: &[T], flags: Flags) -> Result<(), AllocError> + where + T: Clone, + { + <Self as VecExt<_>>::reserve(self, other.len(), flags)?; + for (slot, item) in core::iter::zip(self.spare_capacity_mut(), other) { + slot.write(item.clone()); + } + + // SAFETY: We just initialised the `other.len()` spare entries, so it is safe to increase + // the length by the same amount. We also know that the new length is <= capacity because + // of the previous call to `reserve` above. + unsafe { self.set_len(self.len() + other.len()) }; + Ok(()) + } + + #[cfg(any(test, testlib))] + fn reserve(&mut self, additional: usize, _flags: Flags) -> Result<(), AllocError> { + Vec::reserve(self, additional); + Ok(()) + } + + #[cfg(not(any(test, testlib)))] + fn reserve(&mut self, additional: usize, flags: Flags) -> Result<(), AllocError> { + let len = self.len(); + let cap = self.capacity(); + + if cap - len >= additional { + return Ok(()); + } + + if core::mem::size_of::<T>() == 0 { + // The capacity is already `usize::MAX` for SZTs, we can't go higher. + return Err(AllocError); + } + + // We know cap is <= `isize::MAX` because `Layout::array` fails if the resulting byte size + // is greater than `isize::MAX`. So the multiplication by two won't overflow. + let new_cap = core::cmp::max(cap * 2, len.checked_add(additional).ok_or(AllocError)?); + let layout = core::alloc::Layout::array::<T>(new_cap).map_err(|_| AllocError)?; + + let (old_ptr, len, cap) = destructure(self); + + // We need to make sure that `ptr` is either NULL or comes from a previous call to + // `krealloc_aligned`. A `Vec<T>`'s `ptr` value is not guaranteed to be NULL and might be + // dangling after being created with `Vec::new`. Instead, we can rely on `Vec<T>`'s capacity + // to be zero if no memory has been allocated yet. + let ptr = if cap == 0 { + core::ptr::null_mut() + } else { + old_ptr + }; + + // SAFETY: `ptr` is valid because it's either NULL or comes from a previous call to + // `krealloc_aligned`. We also verified that the type is not a ZST. + let new_ptr = unsafe { super::allocator::krealloc_aligned(ptr.cast(), layout, flags) }; + if new_ptr.is_null() { + // SAFETY: We are just rebuilding the existing `Vec` with no changes. + unsafe { rebuild(self, old_ptr, len, cap) }; + Err(AllocError) + } else { + // SAFETY: `ptr` has been reallocated with the layout for `new_cap` elements. New cap + // is greater than `cap`, so it continues to be >= `len`. + unsafe { rebuild(self, new_ptr.cast::<T>(), len, new_cap) }; + Ok(()) + } + } +} + +#[cfg(not(any(test, testlib)))] +fn destructure<T>(v: &mut Vec<T>) -> (*mut T, usize, usize) { + let mut tmp = Vec::new(); + core::mem::swap(&mut tmp, v); + let mut tmp = core::mem::ManuallyDrop::new(tmp); + let len = tmp.len(); + let cap = tmp.capacity(); + (tmp.as_mut_ptr(), len, cap) +} + +/// Rebuilds a `Vec` from a pointer, length, and capacity. +/// +/// # Safety +/// +/// The same as [`Vec::from_raw_parts`]. +#[cfg(not(any(test, testlib)))] +unsafe fn rebuild<T>(v: &mut Vec<T>, ptr: *mut T, len: usize, cap: usize) { + // SAFETY: The safety requirements from this function satisfy those of `from_raw_parts`. + let mut tmp = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + core::mem::swap(&mut tmp, v); +} diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs new file mode 100644 index 000000000000..150f710efe5b --- /dev/null +++ b/rust/kernel/block.rs @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Types for working with the block layer. + +pub mod mq; diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs new file mode 100644 index 000000000000..fb0f393c1cea --- /dev/null +++ b/rust/kernel/block/mq.rs @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides types for implementing block drivers that interface the +//! blk-mq subsystem. +//! +//! To implement a block device driver, a Rust module must do the following: +//! +//! - Implement [`Operations`] for a type `T`. +//! - Create a [`TagSet<T>`]. +//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`]. +//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in +//! the `TagSet` reference. +//! +//! The types available in this module that have direct C counterparts are: +//! +//! - The [`TagSet`] type that abstracts the C type `struct tag_set`. +//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`. +//! - The [`Request`] type that abstracts the C type `struct request`. +//! +//! The kernel will interface with the block device driver by calling the method +//! implementations of the `Operations` trait. +//! +//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`] +//! instances. The `Request` type is a wrapper around the C `struct request`. +//! The driver must mark end of processing by calling one of the +//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout +//! errors. Please note that the C function `blk_mq_start_request` is implicitly +//! called when the request is queued with the driver. +//! +//! The `TagSet` is responsible for creating and maintaining a mapping between +//! `Request`s and integer ids as well as carrying a pointer to the vtable +//! generated by `Operations`. This mapping is useful for associating +//! completions from hardware with the correct `Request` instance. The `TagSet` +//! determines the maximum queue depth by setting the number of `Request` +//! instances available to the driver, and it determines the number of queues to +//! instantiate for the driver. If possible, a driver should allocate one queue +//! per core, to keep queue data local to a core. +//! +//! One `TagSet` instance can be shared between multiple `GenDisk` instances. +//! This can be useful when implementing drivers where one piece of hardware +//! with one set of IO resources are represented to the user as multiple disks. +//! +//! One significant difference between block device drivers implemented with +//! these Rust abstractions and drivers implemented in C, is that the Rust +//! drivers have to own a reference count on the `Request` type when the IO is +//! in flight. This is to ensure that the C `struct request` instances backing +//! the Rust `Request` instances are live while the Rust driver holds a +//! reference to the `Request`. In addition, the conversion of an integer tag to +//! a `Request` via the `TagSet` would not be sound without this bookkeeping. +//! +//! [`GenDisk`]: gen_disk::GenDisk +//! [`GenDisk<T>`]: gen_disk::GenDisk +//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder +//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build +//! +//! # Example +//! +//! ```rust +//! use kernel::{ +//! alloc::flags, +//! block::mq::*, +//! new_mutex, +//! prelude::*, +//! sync::{Arc, Mutex}, +//! types::{ARef, ForeignOwnable}, +//! }; +//! +//! struct MyBlkDevice; +//! +//! #[vtable] +//! impl Operations for MyBlkDevice { +//! +//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result { +//! Request::end_ok(rq); +//! Ok(()) +//! } +//! +//! fn commit_rqs() {} +//! } +//! +//! let tagset: Arc<TagSet<MyBlkDevice>> = +//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; +//! let mut disk = gen_disk::GenDiskBuilder::new() +//! .capacity_sectors(4096) +//! .build(format_args!("myblk"), tagset)?; +//! +//! # Ok::<(), kernel::error::Error>(()) +//! ``` + +pub mod gen_disk; +mod operations; +mod raw_writer; +mod request; +mod tag_set; + +pub use operations::Operations; +pub use request::Request; +pub use tag_set::TagSet; diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs new file mode 100644 index 000000000000..f548a6199847 --- /dev/null +++ b/rust/kernel/block/mq/gen_disk.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic disk abstraction. +//! +//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h) +//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) + +use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; +use crate::error; +use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; +use core::fmt::{self, Write}; + +/// A builder for [`GenDisk`]. +/// +/// Use this struct to configure and add new [`GenDisk`] to the VFS. +pub struct GenDiskBuilder { + rotational: bool, + logical_block_size: u32, + physical_block_size: u32, + capacity_sectors: u64, +} + +impl Default for GenDiskBuilder { + fn default() -> Self { + Self { + rotational: false, + logical_block_size: bindings::PAGE_SIZE as u32, + physical_block_size: bindings::PAGE_SIZE as u32, + capacity_sectors: 0, + } + } +} + +impl GenDiskBuilder { + /// Create a new instance. + pub fn new() -> Self { + Self::default() + } + + /// Set the rotational media attribute for the device to be built. + pub fn rotational(mut self, rotational: bool) -> Self { + self.rotational = rotational; + self + } + + /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`, + /// and that it is a power of two. + fn validate_block_size(size: u32) -> Result<()> { + if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() { + Err(error::code::EINVAL) + } else { + Ok(()) + } + } + + /// Set the logical block size of the device to be built. + /// + /// This method will check that block size is a power of two and between 512 + /// and 4096. If not, an error is returned and the block size is not set. + /// + /// This is the smallest unit the storage device can address. It is + /// typically 4096 bytes. + pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> { + Self::validate_block_size(block_size)?; + self.logical_block_size = block_size; + Ok(self) + } + + /// Set the physical block size of the device to be built. + /// + /// This method will check that block size is a power of two and between 512 + /// and 4096. If not, an error is returned and the block size is not set. + /// + /// This is the smallest unit a physical storage device can write + /// atomically. It is usually the same as the logical block size but may be + /// bigger. One example is SATA drives with 4096 byte physical block size + /// that expose a 512 byte logical block size to the operating system. + pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> { + Self::validate_block_size(block_size)?; + self.physical_block_size = block_size; + Ok(self) + } + + /// Set the capacity of the device to be built, in sectors (512 bytes). + pub fn capacity_sectors(mut self, capacity: u64) -> Self { + self.capacity_sectors = capacity; + self + } + + /// Build a new `GenDisk` and add it to the VFS. + pub fn build<T: Operations>( + self, + name: fmt::Arguments<'_>, + tagset: Arc<TagSet<T>>, + ) -> Result<GenDisk<T>> { + let lock_class_key = crate::sync::LockClassKey::new(); + + // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed. + let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() }; + + lim.logical_block_size = self.logical_block_size; + lim.physical_block_size = self.physical_block_size; + if self.rotational { + lim.features = bindings::BLK_FEAT_ROTATIONAL; + } + + // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set + let gendisk = from_err_ptr(unsafe { + bindings::__blk_mq_alloc_disk( + tagset.raw_tag_set(), + &mut lim, + core::ptr::null_mut(), + lock_class_key.as_ptr(), + ) + })?; + + const TABLE: bindings::block_device_operations = bindings::block_device_operations { + submit_bio: None, + open: None, + release: None, + ioctl: None, + compat_ioctl: None, + check_events: None, + unlock_native_capacity: None, + getgeo: None, + set_read_only: None, + swap_slot_free_notify: None, + report_zones: None, + devnode: None, + alternative_gpt_sector: None, + get_unique_id: None, + // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to + // be merged (unstable in rustc 1.78 which is staged for linux 6.10) + // https://github.com/rust-lang/rust/issues/119618 + owner: core::ptr::null_mut(), + pr_ops: core::ptr::null_mut(), + free_disk: None, + poll_bio: None, + }; + + // SAFETY: `gendisk` is a valid pointer as we initialized it above + unsafe { (*gendisk).fops = &TABLE }; + + let mut raw_writer = RawWriter::from_array( + // SAFETY: `gendisk` points to a valid and initialized instance. We + // have exclusive access, since the disk is not added to the VFS + // yet. + unsafe { &mut (*gendisk).disk_name }, + )?; + raw_writer.write_fmt(name)?; + raw_writer.write_char('\0')?; + + // SAFETY: `gendisk` points to a valid and initialized instance of + // `struct gendisk`. `set_capacity` takes a lock to synchronize this + // operation, so we will not race. + unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) }; + + crate::error::to_result( + // SAFETY: `gendisk` points to a valid and initialized instance of + // `struct gendisk`. + unsafe { + bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut()) + }, + )?; + + // INVARIANT: `gendisk` was initialized above. + // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above. + Ok(GenDisk { + _tagset: tagset, + gendisk, + }) + } +} + +/// A generic block device. +/// +/// # Invariants +/// +/// - `gendisk` must always point to an initialized and valid `struct gendisk`. +/// - `gendisk` was added to the VFS through a call to +/// `bindings::device_add_disk`. +pub struct GenDisk<T: Operations> { + _tagset: Arc<TagSet<T>>, + gendisk: *mut bindings::gendisk, +} + +// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a +// `TagSet` It is safe to send this to other threads as long as T is Send. +unsafe impl<T: Operations + Send> Send for GenDisk<T> {} + +impl<T: Operations> Drop for GenDisk<T> { + fn drop(&mut self) { + // SAFETY: By type invariant, `self.gendisk` points to a valid and + // initialized instance of `struct gendisk`, and it was previously added + // to the VFS. + unsafe { bindings::del_gendisk(self.gendisk) }; + } +} diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs new file mode 100644 index 000000000000..9ba7fdfeb4b2 --- /dev/null +++ b/rust/kernel/block/mq/operations.rs @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides an interface for blk-mq drivers to implement. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use crate::{ + bindings, + block::mq::request::RequestDataWrapper, + block::mq::Request, + error::{from_result, Result}, + types::ARef, +}; +use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}; + +/// Implement this trait to interface blk-mq as block devices. +/// +/// To implement a block device driver, implement this trait as described in the +/// [module level documentation]. The kernel will use the implementation of the +/// functions defined in this trait to interface a block device driver. Note: +/// There is no need for an exit_request() implementation, because the `drop` +/// implementation of the [`Request`] type will be invoked by automatically by +/// the C/Rust glue logic. +/// +/// [module level documentation]: kernel::block::mq +#[macros::vtable] +pub trait Operations: Sized { + /// Called by the kernel to queue a request with the driver. If `is_last` is + /// `false`, the driver is allowed to defer committing the request. + fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result; + + /// Called by the kernel to indicate that queued requests should be submitted. + fn commit_rqs(); + + /// Called by the kernel to poll the device for completed requests. Only + /// used for poll queues. + fn poll() -> bool { + crate::build_error(crate::error::VTABLE_DEFAULT_ERROR) + } +} + +/// A vtable for blk-mq to interact with a block device driver. +/// +/// A `bindings::blk_mq_ops` vtable is constructed from pointers to the `extern +/// "C"` functions of this struct, exposed through the `OperationsVTable::VTABLE`. +/// +/// For general documentation of these methods, see the kernel source +/// documentation related to `struct blk_mq_operations` in +/// [`include/linux/blk-mq.h`]. +/// +/// [`include/linux/blk-mq.h`]: srctree/include/linux/blk-mq.h +pub(crate) struct OperationsVTable<T: Operations>(PhantomData<T>); + +impl<T: Operations> OperationsVTable<T> { + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - The caller of this function must ensure that the pointee of `bd` is + /// valid for reads for the duration of this function. + /// - This function must be called for an initialized and live `hctx`. That + /// is, `Self::init_hctx_callback` was called and + /// `Self::exit_hctx_callback()` was not yet called. + /// - `(*bd).rq` must point to an initialized and live `bindings:request`. + /// That is, `Self::init_request_callback` was called but + /// `Self::exit_request_callback` was not yet called for the request. + /// - `(*bd).rq` must be owned by the driver. That is, the block layer must + /// promise to not access the request until the driver calls + /// `bindings::blk_mq_end_request` for the request. + unsafe extern "C" fn queue_rq_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + bd: *const bindings::blk_mq_queue_data, + ) -> bindings::blk_status_t { + // SAFETY: `bd.rq` is valid as required by the safety requirement for + // this function. + let request = unsafe { &*(*bd).rq.cast::<Request<T>>() }; + + // One refcount for the ARef, one for being in flight + request.wrapper_ref().refcount().store(2, Ordering::Relaxed); + + // SAFETY: + // - We own a refcount that we took above. We pass that to `ARef`. + // - By the safety requirements of this function, `request` is a valid + // `struct request` and the private data is properly initialized. + // - `rq` will be alive until `blk_mq_end_request` is called and is + // reference counted by `ARef` until then. + let rq = unsafe { Request::aref_from_raw((*bd).rq) }; + + // SAFETY: We have exclusive access and we just set the refcount above. + unsafe { Request::start_unchecked(&rq) }; + + let ret = T::queue_rq( + rq, + // SAFETY: `bd` is valid as required by the safety requirement for + // this function. + unsafe { (*bd).last }, + ); + + if let Err(e) = ret { + e.to_blk_status() + } else { + bindings::BLK_STS_OK as _ + } + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) { + T::commit_rqs() + } + + /// This function is called by the C kernel. It is not currently + /// implemented, and there is no way to exercise this code path. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {} + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn poll_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _iob: *mut bindings::io_comp_batch, + ) -> core::ffi::c_int { + T::poll().into() + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. This + /// function may only be called once before `exit_hctx_callback` is called + /// for the same context. + unsafe extern "C" fn init_hctx_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _tagset_data: *mut core::ffi::c_void, + _hctx_idx: core::ffi::c_uint, + ) -> core::ffi::c_int { + from_result(|| Ok(0)) + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn exit_hctx_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _hctx_idx: core::ffi::c_uint, + ) { + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - This function may only be called by blk-mq C infrastructure. + /// - `_set` must point to an initialized `TagSet<T>`. + /// - `rq` must point to an initialized `bindings::request`. + /// - The allocation pointed to by `rq` must be at the size of `Request` + /// plus the size of `RequestDataWrapper`. + unsafe extern "C" fn init_request_callback( + _set: *mut bindings::blk_mq_tag_set, + rq: *mut bindings::request, + _hctx_idx: core::ffi::c_uint, + _numa_node: core::ffi::c_uint, + ) -> core::ffi::c_int { + from_result(|| { + // SAFETY: By the safety requirements of this function, `rq` points + // to a valid allocation. + let pdu = unsafe { Request::wrapper_ptr(rq.cast::<Request<T>>()) }; + + // SAFETY: The refcount field is allocated but not initialized, so + // it is valid for writes. + unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) }; + + Ok(0) + }) + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - This function may only be called by blk-mq C infrastructure. + /// - `_set` must point to an initialized `TagSet<T>`. + /// - `rq` must point to an initialized and valid `Request`. + unsafe extern "C" fn exit_request_callback( + _set: *mut bindings::blk_mq_tag_set, + rq: *mut bindings::request, + _hctx_idx: core::ffi::c_uint, + ) { + // SAFETY: The tagset invariants guarantee that all requests are allocated with extra memory + // for the request data. + let pdu = unsafe { bindings::blk_mq_rq_to_pdu(rq) }.cast::<RequestDataWrapper>(); + + // SAFETY: `pdu` is valid for read and write and is properly initialised. + unsafe { core::ptr::drop_in_place(pdu) }; + } + + const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops { + queue_rq: Some(Self::queue_rq_callback), + queue_rqs: None, + commit_rqs: Some(Self::commit_rqs_callback), + get_budget: None, + put_budget: None, + set_rq_budget_token: None, + get_rq_budget_token: None, + timeout: None, + poll: if T::HAS_POLL { + Some(Self::poll_callback) + } else { + None + }, + complete: Some(Self::complete_callback), + init_hctx: Some(Self::init_hctx_callback), + exit_hctx: Some(Self::exit_hctx_callback), + init_request: Some(Self::init_request_callback), + exit_request: Some(Self::exit_request_callback), + cleanup_rq: None, + busy: None, + map_queues: None, + #[cfg(CONFIG_BLK_DEBUG_FS)] + show_rq: None, + }; + + pub(crate) const fn build() -> &'static bindings::blk_mq_ops { + &Self::VTABLE + } +} diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs new file mode 100644 index 000000000000..9222465d670b --- /dev/null +++ b/rust/kernel/block/mq/raw_writer.rs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::fmt::{self, Write}; + +use crate::error::Result; +use crate::prelude::EINVAL; + +/// A mutable reference to a byte buffer where a string can be written into. +/// +/// # Invariants +/// +/// `buffer` is always null terminated. +pub(crate) struct RawWriter<'a> { + buffer: &'a mut [u8], + pos: usize, +} + +impl<'a> RawWriter<'a> { + /// Create a new `RawWriter` instance. + fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> { + *(buffer.last_mut().ok_or(EINVAL)?) = 0; + + // INVARIANT: We null terminated the buffer above. + Ok(Self { buffer, pos: 0 }) + } + + pub(crate) fn from_array<const N: usize>( + a: &'a mut [core::ffi::c_char; N], + ) -> Result<RawWriter<'a>> { + Self::new( + // SAFETY: the buffer of `a` is valid for read and write as `u8` for + // at least `N` bytes. + unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) }, + ) + } +} + +impl Write for RawWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + let bytes = s.as_bytes(); + let len = bytes.len(); + + // We do not want to overwrite our null terminator + if self.pos + len > self.buffer.len() - 1 { + return Err(fmt::Error); + } + + // INVARIANT: We are not overwriting the last byte + self.buffer[self.pos..self.pos + len].copy_from_slice(bytes); + + self.pos += len; + + Ok(()) + } +} diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs new file mode 100644 index 000000000000..a0e22827f3f4 --- /dev/null +++ b/rust/kernel/block/mq/request.rs @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides a wrapper for the C `struct request` type. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use crate::{ + bindings, + block::mq::Operations, + error::Result, + types::{ARef, AlwaysRefCounted, Opaque}, +}; +use core::{ + marker::PhantomData, + ptr::{addr_of_mut, NonNull}, + sync::atomic::{AtomicU64, Ordering}, +}; + +/// A wrapper around a blk-mq `struct request`. This represents an IO request. +/// +/// # Implementation details +/// +/// There are four states for a request that the Rust bindings care about: +/// +/// A) Request is owned by block layer (refcount 0) +/// B) Request is owned by driver but with zero `ARef`s in existence +/// (refcount 1) +/// C) Request is owned by driver with exactly one `ARef` in existence +/// (refcount 2) +/// D) Request is owned by driver with more than one `ARef` in existence +/// (refcount > 2) +/// +/// +/// We need to track A and B to ensure we fail tag to request conversions for +/// requests that are not owned by the driver. +/// +/// We need to track C and D to ensure that it is safe to end the request and hand +/// back ownership to the block layer. +/// +/// The states are tracked through the private `refcount` field of +/// `RequestDataWrapper`. This structure lives in the private data area of the C +/// `struct request`. +/// +/// # Invariants +/// +/// * `self.0` is a valid `struct request` created by the C portion of the kernel. +/// * The private data area associated with this request must be an initialized +/// and valid `RequestDataWrapper<T>`. +/// * `self` is reference counted by atomic modification of +/// self.wrapper_ref().refcount(). +/// +#[repr(transparent)] +pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>); + +impl<T: Operations> Request<T> { + /// Create an `ARef<Request>` from a `struct request` pointer. + /// + /// # Safety + /// + /// * The caller must own a refcount on `ptr` that is transferred to the + /// returned `ARef`. + /// * The type invariants for `Request` must hold for the pointee of `ptr`. + pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> { + // INVARIANT: By the safety requirements of this function, invariants are upheld. + // SAFETY: By the safety requirement of this function, we own a + // reference count that we can pass to `ARef`. + unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) } + } + + /// Notify the block layer that a request is going to be processed now. + /// + /// The block layer uses this hook to do proper initializations such as + /// starting the timeout timer. It is a requirement that block device + /// drivers call this function when starting to process a request. + /// + /// # Safety + /// + /// The caller must have exclusive ownership of `self`, that is + /// `self.wrapper_ref().refcount() == 2`. + pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) { + // SAFETY: By type invariant, `self.0` is a valid `struct request` and + // we have exclusive access. + unsafe { bindings::blk_mq_start_request(this.0.get()) }; + } + + /// Try to take exclusive ownership of `this` by dropping the refcount to 0. + /// This fails if `this` is not the only `ARef` pointing to the underlying + /// `Request`. + /// + /// If the operation is successful, `Ok` is returned with a pointer to the + /// C `struct request`. If the operation fails, `this` is returned in the + /// `Err` variant. + fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> { + // We can race with `TagSet::tag_to_rq` + if let Err(_old) = this.wrapper_ref().refcount().compare_exchange( + 2, + 0, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + return Err(this); + } + + let request_ptr = this.0.get(); + core::mem::forget(this); + + Ok(request_ptr) + } + + /// Notify the block layer that the request has been completed without errors. + /// + /// This function will return `Err` if `this` is not the only `ARef` + /// referencing the request. + pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> { + let request_ptr = Self::try_set_end(this)?; + + // SAFETY: By type invariant, `this.0` was a valid `struct request`. The + // success of the call to `try_set_end` guarantees that there are no + // `ARef`s pointing to this request. Therefore it is safe to hand it + // back to the block layer. + unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) }; + + Ok(()) + } + + /// Return a pointer to the `RequestDataWrapper` stored in the private area + /// of the request structure. + /// + /// # Safety + /// + /// - `this` must point to a valid allocation of size at least size of + /// `Self` plus size of `RequestDataWrapper`. + pub(crate) unsafe fn wrapper_ptr(this: *mut Self) -> NonNull<RequestDataWrapper> { + let request_ptr = this.cast::<bindings::request>(); + // SAFETY: By safety requirements for this function, `this` is a + // valid allocation. + let wrapper_ptr = + unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() }; + // SAFETY: By C API contract, wrapper_ptr points to a valid allocation + // and is not null. + unsafe { NonNull::new_unchecked(wrapper_ptr) } + } + + /// Return a reference to the `RequestDataWrapper` stored in the private + /// area of the request structure. + pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper { + // SAFETY: By type invariant, `self.0` is a valid allocation. Further, + // the private data associated with this request is initialized and + // valid. The existence of `&self` guarantees that the private data is + // valid as a shared reference. + unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() } + } +} + +/// A wrapper around data stored in the private area of the C `struct request`. +pub(crate) struct RequestDataWrapper { + /// The Rust request refcount has the following states: + /// + /// - 0: The request is owned by C block layer. + /// - 1: The request is owned by Rust abstractions but there are no ARef references to it. + /// - 2+: There are `ARef` references to the request. + refcount: AtomicU64, +} + +impl RequestDataWrapper { + /// Return a reference to the refcount of the request that is embedding + /// `self`. + pub(crate) fn refcount(&self) -> &AtomicU64 { + &self.refcount + } + + /// Return a pointer to the refcount of the request that is embedding the + /// pointee of `this`. + /// + /// # Safety + /// + /// - `this` must point to a live allocation of at least the size of `Self`. + pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { + // SAFETY: Because of the safety requirements of this function, the + // field projection is safe. + unsafe { addr_of_mut!((*this).refcount) } + } +} + +// SAFETY: Exclusive access is thread-safe for `Request`. `Request` has no `&mut +// self` methods and `&self` methods that mutate `self` are internally +// synchronized. +unsafe impl<T: Operations> Send for Request<T> {} + +// SAFETY: Shared access is thread-safe for `Request`. `&self` methods that +// mutate `self` are internally synchronized` +unsafe impl<T: Operations> Sync for Request<T> {} + +/// Store the result of `op(target.load())` in target, returning new value of +/// target. +fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 { + let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x))); + + // SAFETY: Because the operation passed to `fetch_update` above always + // return `Some`, `old` will always be `Ok`. + let old = unsafe { old.unwrap_unchecked() }; + + op(old) +} + +/// Store the result of `op(target.load)` in `target` if `target.load() != +/// pred`, returning true if the target was updated. +fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool { + target + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { + if x == pred { + None + } else { + Some(op(x)) + } + }) + .is_ok() +} + +// SAFETY: All instances of `Request<T>` are reference counted. This +// implementation of `AlwaysRefCounted` ensure that increments to the ref count +// keeps the object alive in memory at least until a matching reference count +// decrement is executed. +unsafe impl<T: Operations> AlwaysRefCounted for Request<T> { + fn inc_ref(&self) { + let refcount = &self.wrapper_ref().refcount(); + + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] + let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0); + + #[cfg(CONFIG_DEBUG_MISC)] + if !updated { + panic!("Request refcount zero on clone") + } + } + + unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) { + // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid + // for read. + let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() }; + // SAFETY: The type invariant of `Request` guarantees that the private + // data area is initialized and valid. + let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) }; + + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] + let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1); + + #[cfg(CONFIG_DEBUG_MISC)] + if new_refcount == 0 { + panic!("Request reached refcount zero in Rust abstractions"); + } + } +} diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs new file mode 100644 index 000000000000..f9a1ca655a35 --- /dev/null +++ b/rust/kernel/block/mq/tag_set.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides the `TagSet` struct to wrap the C `struct blk_mq_tag_set`. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use core::pin::Pin; + +use crate::{ + bindings, + block::mq::{operations::OperationsVTable, request::RequestDataWrapper, Operations}, + error, + prelude::PinInit, + try_pin_init, + types::Opaque, +}; +use core::{convert::TryInto, marker::PhantomData}; +use macros::{pin_data, pinned_drop}; + +/// A wrapper for the C `struct blk_mq_tag_set`. +/// +/// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned. +/// +/// # Invariants +/// +/// - `inner` is initialized and valid. +#[pin_data(PinnedDrop)] +#[repr(transparent)] +pub struct TagSet<T: Operations> { + #[pin] + inner: Opaque<bindings::blk_mq_tag_set>, + _p: PhantomData<T>, +} + +impl<T: Operations> TagSet<T> { + /// Try to create a new tag set + pub fn new( + nr_hw_queues: u32, + num_tags: u32, + num_maps: u32, + ) -> impl PinInit<Self, error::Error> { + // SAFETY: `blk_mq_tag_set` only contains integers and pointers, which + // all are allowed to be 0. + let tag_set: bindings::blk_mq_tag_set = unsafe { core::mem::zeroed() }; + let tag_set = core::mem::size_of::<RequestDataWrapper>() + .try_into() + .map(|cmd_size| { + bindings::blk_mq_tag_set { + ops: OperationsVTable::<T>::build(), + nr_hw_queues, + timeout: 0, // 0 means default which is 30Hz in C + numa_node: bindings::NUMA_NO_NODE, + queue_depth: num_tags, + cmd_size, + flags: bindings::BLK_MQ_F_SHOULD_MERGE, + driver_data: core::ptr::null_mut::<core::ffi::c_void>(), + nr_maps: num_maps, + ..tag_set + } + }); + + try_pin_init!(TagSet { + inner <- PinInit::<_, error::Error>::pin_chain(Opaque::new(tag_set?), |tag_set| { + // SAFETY: we do not move out of `tag_set`. + let tag_set = unsafe { Pin::get_unchecked_mut(tag_set) }; + // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`. + error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())}) + }), + _p: PhantomData, + }) + } + + /// Return the pointer to the wrapped `struct blk_mq_tag_set` + pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set { + self.inner.get() + } +} + +#[pinned_drop] +impl<T: Operations> PinnedDrop for TagSet<T> { + fn drop(self: Pin<&mut Self>) { + // SAFETY: By type invariant `inner` is valid and has been properly + // initialized during construction. + unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) }; + } +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs new file mode 100644 index 000000000000..851018eef885 --- /dev/null +++ b/rust/kernel/device.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic devices that are part of the kernel's driver model. +//! +//! C header: [`include/linux/device.h`](srctree/include/linux/device.h) + +use crate::{ + bindings, + types::{ARef, Opaque}, +}; +use core::ptr; + +/// A reference-counted device. +/// +/// This structure represents the Rust abstraction for a C `struct device`. This implementation +/// abstracts the usage of an already existing C `struct device` within Rust code that we get +/// passed from the C side. +/// +/// An instance of this abstraction can be obtained temporarily or permanent. +/// +/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation. +/// A permanent instance is always reference-counted and hence not restricted by any lifetime +/// boundaries. +/// +/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem +/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in +/// `T::probe()`, such that a driver can store the `ARef<Device>` (equivalent to storing a +/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent +/// memory. +/// +/// # Invariants +/// +/// A `Device` instance represents a valid `struct device` created by the C portion of the kernel. +/// +/// Instances of this type are always reference-counted, that is, a call to `get_device` ensures +/// that the allocation remains valid at least until the matching call to `put_device`. +/// +/// `bindings::device::release` is valid to be called from any thread, hence `ARef<Device>` can be +/// dropped from any thread. +#[repr(transparent)] +pub struct Device(Opaque<bindings::device>); + +impl Device { + /// Creates a new reference-counted abstraction instance of an existing `struct device` pointer. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count, + /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to + /// can't drop to zero, for the duration of this function call. + /// + /// It must also be ensured that `bindings::device::release` can be called from any thread. + /// While not officially documented, this should be the case for any `struct device`. + pub unsafe fn from_raw(ptr: *mut bindings::device) -> ARef<Self> { + // SAFETY: By the safety requirements, ptr is valid. + // Initially increase the reference count by one to compensate for the final decrement once + // this newly created `ARef<Device>` instance is dropped. + unsafe { bindings::get_device(ptr) }; + + // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::device`. + let ptr = ptr.cast::<Self>(); + + // SAFETY: `ptr` is valid by the safety requirements of this function. By the above call to + // `bindings::get_device` we also own a reference to the underlying `struct device`. + unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr)) } + } + + /// Obtain the raw `struct device *`. + pub(crate) fn as_raw(&self) -> *mut bindings::device { + self.0.get() + } + + /// Convert a raw C `struct device` pointer to a `&'a Device`. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count, + /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to + /// can't drop to zero, for the duration of this function call and the entire duration when the + /// returned reference exists. + pub unsafe fn as_ref<'a>(ptr: *mut bindings::device) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + unsafe { &*ptr.cast() } + } +} + +// SAFETY: Instances of `Device` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Device { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::get_device(self.as_raw()) }; + } + + unsafe fn dec_ref(obj: ptr::NonNull<Self>) { + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::put_device(obj.cast().as_ptr()) } + } +} + +// SAFETY: As by the type invariant `Device` can be sent to any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` can be shared among threads because all immutable methods are protected by the +// synchronization in `struct device`. +unsafe impl Sync for Device {} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 4786d3ee1e92..145f5c397009 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -4,14 +4,10 @@ //! //! C header: [`include/uapi/asm-generic/errno-base.h`](srctree/include/uapi/asm-generic/errno-base.h) -use crate::str::CStr; +use crate::{alloc::AllocError, str::CStr}; -use alloc::{ - alloc::{AllocError, LayoutError}, - collections::TryReserveError, -}; +use alloc::alloc::LayoutError; -use core::convert::From; use core::fmt; use core::num::TryFromIntError; use core::str::Utf8Error; @@ -130,6 +126,12 @@ impl Error { self.0 } + #[cfg(CONFIG_BLOCK)] + pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { + // SAFETY: `self.0` is a valid error due to its invariant. + unsafe { bindings::errno_to_blk_status(self.0) } + } + /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr<T>(self) -> *mut T { @@ -192,12 +194,6 @@ impl From<Utf8Error> for Error { } } -impl From<TryReserveError> for Error { - fn from(_: TryReserveError) -> Error { - code::ENOMEM - } -} - impl From<LayoutError> for Error { fn from(_: LayoutError) -> Error { code::ENOMEM diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs new file mode 100644 index 000000000000..2ba03af9f036 --- /dev/null +++ b/rust/kernel/firmware.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware abstraction +//! +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") + +use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use core::ptr::NonNull; + +/// # Invariants +/// +/// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, +/// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. +struct FwFunc( + unsafe extern "C" fn(*mut *const bindings::firmware, *const i8, *mut bindings::device) -> i32, +); + +impl FwFunc { + fn request() -> Self { + Self(bindings::request_firmware) + } + + fn request_nowarn() -> Self { + Self(bindings::firmware_request_nowarn) + } +} + +/// Abstraction around a C `struct firmware`. +/// +/// This is a simple abstraction around the C firmware API. Just like with the C API, firmware can +/// be requested. Once requested the abstraction provides direct access to the firmware buffer as +/// `&[u8]`. The firmware is released once [`Firmware`] is dropped. +/// +/// # Invariants +/// +/// The pointer is valid, and has ownership over the instance of `struct firmware`. +/// +/// The `Firmware`'s backing buffer is not modified. +/// +/// # Examples +/// +/// ```no_run +/// # use kernel::{c_str, device::Device, firmware::Firmware}; +/// +/// # fn no_run() -> Result<(), Error> { +/// # // SAFETY: *NOT* safe, just for the example to get an `ARef<Device>` instance +/// # let dev = unsafe { Device::from_raw(core::ptr::null_mut()) }; +/// +/// let fw = Firmware::request(c_str!("path/to/firmware.bin"), &dev)?; +/// let blob = fw.data(); +/// +/// # Ok(()) +/// # } +/// ``` +pub struct Firmware(NonNull<bindings::firmware>); + +impl Firmware { + fn request_internal(name: &CStr, dev: &Device, func: FwFunc) -> Result<Self> { + let mut fw: *mut bindings::firmware = core::ptr::null_mut(); + let pfw: *mut *mut bindings::firmware = &mut fw; + + // SAFETY: `pfw` is a valid pointer to a NULL initialized `bindings::firmware` pointer. + // `name` and `dev` are valid as by their type invariants. + let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) }; + if ret != 0 { + return Err(Error::from_errno(ret)); + } + + // SAFETY: `func` not bailing out with a non-zero error code, guarantees that `fw` is a + // valid pointer to `bindings::firmware`. + Ok(Firmware(unsafe { NonNull::new_unchecked(fw) })) + } + + /// Send a firmware request and wait for it. See also `bindings::request_firmware`. + pub fn request(name: &CStr, dev: &Device) -> Result<Self> { + Self::request_internal(name, dev, FwFunc::request()) + } + + /// Send a request for an optional firmware module. See also + /// `bindings::firmware_request_nowarn`. + pub fn request_nowarn(name: &CStr, dev: &Device) -> Result<Self> { + Self::request_internal(name, dev, FwFunc::request_nowarn()) + } + + fn as_raw(&self) -> *mut bindings::firmware { + self.0.as_ptr() + } + + /// Returns the size of the requested firmware in bytes. + pub fn size(&self) -> usize { + // SAFETY: `self.as_raw()` is valid by the type invariant. + unsafe { (*self.as_raw()).size } + } + + /// Returns the requested firmware as `&[u8]`. + pub fn data(&self) -> &[u8] { + // SAFETY: `self.as_raw()` is valid by the type invariant. Additionally, + // `bindings::firmware` guarantees, if successfully requested, that + // `bindings::firmware::data` has a size of `bindings::firmware::size` bytes. + unsafe { core::slice::from_raw_parts((*self.as_raw()).data, self.size()) } + } +} + +impl Drop for Firmware { + fn drop(&mut self) { + // SAFETY: `self.as_raw()` is valid by the type invariant. + unsafe { bindings::release_firmware(self.as_raw()) }; + } +} + +// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, which is safe to be used from +// any thread. +unsafe impl Send for Firmware {} + +// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, references to which are safe to +// be used from any thread. +unsafe impl Sync for Firmware {} diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 424257284d16..68605b633e73 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -68,7 +68,7 @@ //! # a <- new_mutex!(42, "Foo::a"), //! # b: 24, //! # }); -//! let foo: Result<Pin<Box<Foo>>> = Box::pin_init(foo); +//! let foo: Result<Pin<Box<Foo>>> = Box::pin_init(foo, GFP_KERNEL); //! ``` //! //! For more information see the [`pin_init!`] macro. @@ -80,14 +80,15 @@ //! //! ```rust //! # use kernel::sync::{new_mutex, Arc, Mutex}; -//! let mtx: Result<Arc<Mutex<usize>>> = Arc::pin_init(new_mutex!(42, "example::mtx")); +//! let mtx: Result<Arc<Mutex<usize>>> = +//! Arc::pin_init(new_mutex!(42, "example::mtx"), GFP_KERNEL); //! ``` //! //! To declare an init macro/function you just return an [`impl PinInit<T, E>`]: //! //! ```rust //! # #![allow(clippy::disallowed_names)] -//! # use kernel::{sync::Mutex, prelude::*, new_mutex, init::PinInit, try_pin_init}; +//! # use kernel::{sync::Mutex, new_mutex, init::PinInit, try_pin_init}; //! #[pin_data] //! struct DriverData { //! #[pin] @@ -99,7 +100,7 @@ //! fn new() -> impl PinInit<Self, Error> { //! try_pin_init!(Self { //! status <- new_mutex!(0, "DriverData::status"), -//! buffer: Box::init(kernel::init::zeroed())?, +//! buffer: Box::init(kernel::init::zeroed(), GFP_KERNEL)?, //! }) //! } //! } @@ -121,7 +122,7 @@ //! //! ```rust //! # #![allow(unreachable_pub, clippy::disallowed_names)] -//! use kernel::{prelude::*, init, types::Opaque}; +//! use kernel::{init, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { //! # #![allow(non_camel_case_types)] @@ -210,13 +211,13 @@ //! [`pin_init!`]: crate::pin_init! use crate::{ + alloc::{box_ext::BoxExt, AllocError, Flags}, error::{self, Error}, sync::UniqueArc, types::{Opaque, ScopeGuard}, }; use alloc::boxed::Box; use core::{ - alloc::AllocError, cell::UnsafeCell, convert::Infallible, marker::PhantomData, @@ -305,9 +306,9 @@ macro_rules! stack_pin_init { /// /// stack_try_pin_init!(let foo: Result<Pin<&mut Foo>, AllocError> = pin_init!(Foo { /// a <- new_mutex!(42), -/// b: Box::try_new(Bar { +/// b: Box::new(Bar { /// x: 64, -/// })?, +/// }, GFP_KERNEL)?, /// })); /// let foo = foo.unwrap(); /// pr_info!("a: {}", &*foo.a.lock()); @@ -331,9 +332,9 @@ macro_rules! stack_pin_init { /// /// stack_try_pin_init!(let foo: Pin<&mut Foo> =? pin_init!(Foo { /// a <- new_mutex!(42), -/// b: Box::try_new(Bar { +/// b: Box::new(Bar { /// x: 64, -/// })?, +/// }, GFP_KERNEL)?, /// })); /// pr_info!("a: {}", &*foo.a.lock()); /// # Ok::<_, AllocError>(()) @@ -390,7 +391,7 @@ macro_rules! stack_try_pin_init { /// }, /// }); /// # initializer } -/// # Box::pin_init(demo()).unwrap(); +/// # Box::pin_init(demo(), GFP_KERNEL).unwrap(); /// ``` /// /// Arbitrary Rust expressions can be used to set the value of a variable. @@ -412,7 +413,7 @@ macro_rules! stack_try_pin_init { /// /// ```rust /// # #![allow(clippy::disallowed_names)] -/// # use kernel::{init, pin_init, prelude::*, init::*}; +/// # use kernel::{init, pin_init, init::*}; /// # use core::pin::Pin; /// # #[pin_data] /// # struct Foo { @@ -460,7 +461,7 @@ macro_rules! stack_try_pin_init { /// # }) /// # } /// # } -/// let foo = Box::pin_init(Foo::new()); +/// let foo = Box::pin_init(Foo::new(), GFP_KERNEL); /// ``` /// /// They can also easily embed it into their own `struct`s: @@ -600,7 +601,7 @@ macro_rules! pin_init { /// impl BigBuf { /// fn new() -> impl PinInit<Self, Error> { /// try_pin_init!(Self { -/// big: Box::init(init::zeroed())?, +/// big: Box::init(init::zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// ptr: core::ptr::null_mut(), /// }? Error) @@ -701,7 +702,7 @@ macro_rules! init { /// impl BigBuf { /// fn new() -> impl Init<Self, Error> { /// try_init!(Self { -/// big: Box::init(zeroed())?, +/// big: Box::init(zeroed(), GFP_KERNEL)?, /// small: [0; 1024 * 1024], /// }? Error) /// } @@ -1013,7 +1014,7 @@ pub fn uninit<T, E>() -> impl Init<MaybeUninit<T>, E> { /// /// ```rust /// use kernel::{error::Error, init::init_array_from_fn}; -/// let array: Box<[usize; 1_000]> = Box::init::<Error>(init_array_from_fn(|i| i)).unwrap(); +/// let array: Box<[usize; 1_000]> = Box::init::<Error>(init_array_from_fn(|i| i), GFP_KERNEL).unwrap(); /// assert_eq!(array.len(), 1_000); /// ``` pub fn init_array_from_fn<I, const N: usize, T, E>( @@ -1057,7 +1058,7 @@ where /// ```rust /// use kernel::{sync::{Arc, Mutex}, init::pin_init_array_from_fn, new_mutex}; /// let array: Arc<[Mutex<usize>; 1_000]> = -/// Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i))).unwrap(); +/// Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i)), GFP_KERNEL).unwrap(); /// assert_eq!(array.len(), 1_000); /// ``` pub fn pin_init_array_from_fn<I, const N: usize, T, E>( @@ -1115,7 +1116,7 @@ pub trait InPlaceInit<T>: Sized { /// type. /// /// If `T: !Unpin` it will not be able to move afterwards. - fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> where E: From<AllocError>; @@ -1123,7 +1124,7 @@ pub trait InPlaceInit<T>: Sized { /// type. /// /// If `T: !Unpin` it will not be able to move afterwards. - fn pin_init<E>(init: impl PinInit<T, E>) -> error::Result<Pin<Self>> + fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Pin<Self>> where Error: From<E>, { @@ -1131,16 +1132,16 @@ pub trait InPlaceInit<T>: Sized { let init = unsafe { pin_init_from_closure(|slot| init.__pinned_init(slot).map_err(|e| Error::from(e))) }; - Self::try_pin_init(init) + Self::try_pin_init(init, flags) } /// Use the given initializer to in-place initialize a `T`. - fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E> + fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E> where E: From<AllocError>; /// Use the given initializer to in-place initialize a `T`. - fn init<E>(init: impl Init<T, E>) -> error::Result<Self> + fn init<E>(init: impl Init<T, E>, flags: Flags) -> error::Result<Self> where Error: From<E>, { @@ -1148,17 +1149,17 @@ pub trait InPlaceInit<T>: Sized { let init = unsafe { init_from_closure(|slot| init.__pinned_init(slot).map_err(|e| Error::from(e))) }; - Self::try_init(init) + Self::try_init(init, flags) } } impl<T> InPlaceInit<T> for Box<T> { #[inline] - fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> where E: From<AllocError>, { - let mut this = Box::try_new_uninit()?; + let mut this = <Box<_> as BoxExt<_>>::new_uninit(flags)?; let slot = this.as_mut_ptr(); // SAFETY: When init errors/panics, slot will get deallocated but not dropped, // slot is valid and will not be moved, because we pin it later. @@ -1168,11 +1169,11 @@ impl<T> InPlaceInit<T> for Box<T> { } #[inline] - fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E> + fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E> where E: From<AllocError>, { - let mut this = Box::try_new_uninit()?; + let mut this = <Box<_> as BoxExt<_>>::new_uninit(flags)?; let slot = this.as_mut_ptr(); // SAFETY: When init errors/panics, slot will get deallocated but not dropped, // slot is valid. @@ -1184,11 +1185,11 @@ impl<T> InPlaceInit<T> for Box<T> { impl<T> InPlaceInit<T> for UniqueArc<T> { #[inline] - fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> where E: From<AllocError>, { - let mut this = UniqueArc::try_new_uninit()?; + let mut this = UniqueArc::new_uninit(flags)?; let slot = this.as_mut_ptr(); // SAFETY: When init errors/panics, slot will get deallocated but not dropped, // slot is valid and will not be moved, because we pin it later. @@ -1198,11 +1199,11 @@ impl<T> InPlaceInit<T> for UniqueArc<T> { } #[inline] - fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E> + fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E> where E: From<AllocError>, { - let mut this = UniqueArc::try_new_uninit()?; + let mut this = UniqueArc::new_uninit(flags)?; let slot = this.as_mut_ptr(); // SAFETY: When init errors/panics, slot will get deallocated but not dropped, // slot is valid. @@ -1292,8 +1293,15 @@ impl_zeroable! { i8, i16, i32, i64, i128, isize, f32, f64, - // SAFETY: These are ZSTs, there is nothing to zero. - {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, Infallible, (), + // Note: do not add uninhabited types (such as `!` or `core::convert::Infallible`) to this list; + // creating an instance of an uninhabited type is immediate undefined behavior. For more on + // uninhabited/empty types, consult The Rustonomicon: + // <https://doc.rust-lang.org/stable/nomicon/exotic-sizes.html#empty-types>. The Rust Reference + // also has information on undefined behavior: + // <https://doc.rust-lang.org/stable/reference/behavior-considered-undefined.html>. + // + // SAFETY: These are inhabited ZSTs; there is nothing to zero and a valid value exists. + {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, (), // SAFETY: Type is allowed to take any value, including all zeros. {<T>} MaybeUninit<T>, diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index cb6e61b6c50b..02ecedc4ae7a 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -250,7 +250,7 @@ //! // error type is `Infallible`) we will need to drop this field if there //! // is an error later. This `DropGuard` will drop the field when it gets //! // dropped and has not yet been forgotten. -//! let t = unsafe { +//! let __t_guard = unsafe { //! ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t)) //! }; //! // Expansion of `x: 0,`: @@ -261,14 +261,14 @@ //! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) }; //! } //! // We again create a `DropGuard`. -//! let x = unsafe { +//! let __x_guard = unsafe { //! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x)) //! }; //! // Since initialization has successfully completed, we can now forget //! // the guards. This is not `mem::forget`, since we only have //! // `&DropGuard`. -//! ::core::mem::forget(x); -//! ::core::mem::forget(t); +//! ::core::mem::forget(__x_guard); +//! ::core::mem::forget(__t_guard); //! // Here we use the type checker to ensure that every field has been //! // initialized exactly once, since this is `if false` it will never get //! // executed, but still type-checked. @@ -461,16 +461,16 @@ //! { //! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) }; //! } -//! let a = unsafe { +//! let __a_guard = unsafe { //! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a)) //! }; //! let init = Bar::new(36); //! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? }; -//! let b = unsafe { +//! let __b_guard = unsafe { //! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b)) //! }; -//! ::core::mem::forget(b); -//! ::core::mem::forget(a); +//! ::core::mem::forget(__b_guard); +//! ::core::mem::forget(__a_guard); //! #[allow(unreachable_code, clippy::diverging_sub_expression)] //! let _ = || { //! unsafe { @@ -538,6 +538,7 @@ macro_rules! __pin_data { ), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @body({ $($fields:tt)* }), ) => { // We now use token munching to iterate through all of the fields. While doing this we @@ -560,6 +561,9 @@ macro_rules! __pin_data { @impl_generics($($impl_generics)*), // The 'ty generics', the generics that will need to be specified on the impl blocks. @ty_generics($($ty_generics)*), + // The 'decl generics', the generics that need to be specified on the struct + // definition. + @decl_generics($($decl_generics)*), // The where clause of any impl block and the declaration. @where($($($whr)*)?), // The remaining fields tokens that need to be processed. @@ -585,6 +589,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found a PhantomPinned field, this should generally be pinned! @fields_munch($field:ident : $($($(::)?core::)?marker::)?PhantomPinned, $($rest:tt)*), @@ -607,6 +612,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: ::core::marker::PhantomPinned,), @@ -623,6 +629,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -640,6 +647,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: $type,), @@ -656,6 +664,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -673,6 +682,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -689,6 +699,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found the `#[pin]` attr. @fields_munch(#[pin] $($rest:tt)*), @@ -705,6 +716,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), // We do not include `#[pin]` in the list of attributes, since it is not actually an @@ -724,6 +736,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration with visibility, for simplicity we only munch the // visibility and put it into `$accum`. @@ -741,6 +754,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($field $($rest)*), @pinned($($pinned)*), @@ -757,6 +771,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // Some other attribute, just put it into `$accum`. @fields_munch(#[$($attr:tt)*] $($rest:tt)*), @@ -773,6 +788,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -789,6 +805,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the end of the fields, plus an optional additional comma, since we added one // before and the user is also allowed to put a trailing comma. @@ -802,7 +819,7 @@ macro_rules! __pin_data { ) => { // Declare the struct with all fields in the correct order. $($struct_attrs)* - $vis struct $name <$($impl_generics)*> + $vis struct $name <$($decl_generics)*> where $($whr)* { $($fields)* @@ -1192,14 +1209,14 @@ macro_rules! __init_internal { // We use `paste!` to create new hygiene for `$field`. ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. - let [<$field>] = unsafe { + let [< __ $field _guard >] = unsafe { $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; $crate::__init_internal!(init_slot($use_data): @data($data), @slot($slot), - @guards([<$field>], $($guards,)*), + @guards([< __ $field _guard >], $($guards,)*), @munch_fields($($rest)*), ); } @@ -1223,14 +1240,14 @@ macro_rules! __init_internal { // We use `paste!` to create new hygiene for `$field`. ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. - let [<$field>] = unsafe { + let [< __ $field _guard >] = unsafe { $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; $crate::__init_internal!(init_slot(): @data($data), @slot($slot), - @guards([<$field>], $($guards,)*), + @guards([< __ $field _guard >], $($guards,)*), @munch_fields($($rest)*), ); } @@ -1255,14 +1272,14 @@ macro_rules! __init_internal { // We use `paste!` to create new hygiene for `$field`. ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. - let [<$field>] = unsafe { + let [< __ $field _guard >] = unsafe { $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; $crate::__init_internal!(init_slot($($use_data)?): @data($data), @slot($slot), - @guards([<$field>], $($guards,)*), + @guards([< __ $field _guard >], $($guards,)*), @munch_fields($($rest)*), ); } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index be68d5e567b1..e6b7d3a80bbc 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -12,11 +12,9 @@ //! do so first instead of bypassing this crate. #![no_std] -#![feature(allocator_api)] #![feature(coerce_unsized)] #![feature(dispatch_from_dyn)] #![feature(new_uninit)] -#![feature(offset_of)] #![feature(receiver_trait)] #![feature(unsize)] @@ -28,11 +26,14 @@ compile_error!("Missing kernel configuration for conditional compilation"); // Allow proc-macros to refer to `::kernel` inside the `kernel` crate (this crate). extern crate self as kernel; -#[cfg(not(test))] -#[cfg(not(testlib))] -mod allocator; +pub mod alloc; +#[cfg(CONFIG_BLOCK)] +pub mod block; mod build_assert; +pub mod device; pub mod error; +#[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] +pub mod firmware; pub mod init; pub mod ioctl; #[cfg(CONFIG_KUNIT)] @@ -65,7 +66,7 @@ const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; /// The top level entrypoint to implementing a kernel module. /// /// For any teardown or cleanup operations, your type may implement [`Drop`]. -pub trait Module: Sized + Sync { +pub trait Module: Sized + Sync + Send { /// Called at module initialization time. /// /// Use this method to perform whatever setup or registration your module @@ -92,6 +93,13 @@ impl ThisModule { pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule { ThisModule(ptr) } + + /// Access the raw pointer for this module. + /// + /// It is up to the user to use it correctly. + pub const fn as_ptr(&self) -> *mut bindings::module { + self.0 + } } #[cfg(not(any(testlib, test)))] diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 96e09c6e8530..fd40b703d224 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -6,7 +6,7 @@ //! //! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h). -use crate::{bindings, error::*, prelude::*, str::CStr, types::Opaque}; +use crate::{error::*, prelude::*, types::Opaque}; use core::marker::PhantomData; @@ -640,6 +640,10 @@ pub struct Registration { drivers: Pin<&'static mut [DriverVTable]>, } +// SAFETY: The only action allowed in a `Registration` instance is dropping it, which is safe to do +// from any thread because `phy_drivers_unregister` can be called from any thread context. +unsafe impl Send for Registration {} + impl Registration { /// Registers a PHY driver. pub fn register( diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index ae21600970b3..b37a0b3180fb 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -14,6 +14,8 @@ #[doc(no_inline)] pub use core::pin::Pin; +pub use crate::alloc::{box_ext::BoxExt, flags::*, vec_ext::VecExt}; + #[doc(no_inline)] pub use alloc::{boxed::Box, vec::Vec}; diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index 9b13aca832c2..a78aa3514a0a 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -13,9 +13,6 @@ use core::{ use crate::str::RawFormatter; -#[cfg(CONFIG_PRINTK)] -use crate::bindings; - // Called from `vsprintf` with format specifier `%pA`. #[no_mangle] unsafe extern "C" fn rust_fmt_argument( @@ -35,8 +32,6 @@ unsafe extern "C" fn rust_fmt_argument( /// Public but hidden since it should only be used from public macros. #[doc(hidden)] pub mod format_strings { - use crate::bindings; - /// The length we copy from the `KERN_*` kernel prefixes. const LENGTH_PREFIX: usize = 2; diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs index 388d6a5147a2..39679a960c1a 100644 --- a/rust/kernel/std_vendor.rs +++ b/rust/kernel/std_vendor.rs @@ -146,15 +146,16 @@ macro_rules! dbg { // `$val` expression could be a block (`{ .. }`), in which case the `pr_info!` // will be malformed. () => { - $crate::pr_info!("[{}:{}]\n", ::core::file!(), ::core::line!()) + $crate::pr_info!("[{}:{}:{}]\n", ::core::file!(), ::core::line!(), ::core::column!()) }; ($val:expr $(,)?) => { // Use of `match` here is intentional because it affects the lifetimes // of temporaries - https://stackoverflow.com/a/48732525/1063961 match $val { tmp => { - $crate::pr_info!("[{}:{}] {} = {:#?}\n", - ::core::file!(), ::core::line!(), ::core::stringify!($val), &tmp); + $crate::pr_info!("[{}:{}:{}] {} = {:#?}\n", + ::core::file!(), ::core::line!(), ::core::column!(), + ::core::stringify!($val), &tmp); tmp } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 925ced8fdc61..bb8d4f41475b 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -2,15 +2,12 @@ //! String representations. -use alloc::alloc::AllocError; +use crate::alloc::{flags::*, vec_ext::VecExt, AllocError}; use alloc::vec::Vec; use core::fmt::{self, Write}; -use core::ops::{self, Deref, Index}; +use core::ops::{self, Deref, DerefMut, Index}; -use crate::{ - bindings, - error::{code::*, Error}, -}; +use crate::error::{code::*, Error}; /// Byte string without UTF-8 validity guarantee. #[repr(transparent)] @@ -236,6 +233,19 @@ impl CStr { unsafe { core::mem::transmute(bytes) } } + /// Creates a mutable [`CStr`] from a `[u8]` without performing any + /// additional checks. + /// + /// # Safety + /// + /// `bytes` *must* end with a `NUL` byte, and should only have a single + /// `NUL` byte (or the string will be truncated). + #[inline] + pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr { + // SAFETY: Properties of `bytes` guaranteed by the safety precondition. + unsafe { &mut *(bytes as *mut [u8] as *mut CStr) } + } + /// Returns a C pointer to the string. #[inline] pub const fn as_char_ptr(&self) -> *const core::ffi::c_char { @@ -299,6 +309,70 @@ impl CStr { pub fn to_cstring(&self) -> Result<CString, AllocError> { CString::try_from(self) } + + /// Converts this [`CStr`] to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase()`]. + /// + /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase + pub fn make_ascii_lowercase(&mut self) { + // INVARIANT: This doesn't introduce or remove NUL bytes in the C + // string. + self.0.make_ascii_lowercase(); + } + + /// Converts this [`CStr`] to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase()`]. + /// + /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase + pub fn make_ascii_uppercase(&mut self) { + // INVARIANT: This doesn't introduce or remove NUL bytes in the C + // string. + self.0.make_ascii_uppercase(); + } + + /// Returns a copy of this [`CString`] where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: str::make_ascii_lowercase + pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> { + let mut s = self.to_cstring()?; + + s.make_ascii_lowercase(); + + Ok(s) + } + + /// Returns a copy of this [`CString`] where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: str::make_ascii_uppercase + pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> { + let mut s = self.to_cstring()?; + + s.make_ascii_uppercase(); + + Ok(s) + } } impl fmt::Display for CStr { @@ -729,7 +803,7 @@ impl CString { let size = f.bytes_written(); // Allocate a vector with the required number of bytes, and write to it. - let mut buf = Vec::try_with_capacity(size)?; + let mut buf = <Vec<_> as VecExt<_>>::with_capacity(size, GFP_KERNEL)?; // SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes. let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) }; f.write_fmt(args)?; @@ -764,13 +838,21 @@ impl Deref for CString { } } +impl DerefMut for CString { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: A `CString` is always NUL-terminated and contains no other + // NUL bytes. + unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) } + } +} + impl<'a> TryFrom<&'a CStr> for CString { type Error = AllocError; fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> { let mut buf = Vec::new(); - buf.try_extend_from_slice(cstr.as_bytes_with_nul()) + <Vec<_> as VecExt<_>>::extend_from_slice(&mut buf, cstr.as_bytes_with_nul(), GFP_KERNEL) .map_err(|_| AllocError)?; // INVARIANT: The `CStr` and `CString` types have the same invariants for diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index c983f63fd56e..0ab20975a3b5 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -37,6 +37,12 @@ impl LockClassKey { } } +impl Default for LockClassKey { + fn default() -> Self { + Self::new() + } +} + /// Defines a new static lock class and returns a pointer to it. #[doc(hidden)] #[macro_export] diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 7d4c4bf58388..3673496c2363 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -16,7 +16,7 @@ //! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html use crate::{ - bindings, + alloc::{box_ext::BoxExt, AllocError, Flags}, error::{self, Error}, init::{self, InPlaceInit, Init, PinInit}, try_init, @@ -24,7 +24,7 @@ use crate::{ }; use alloc::boxed::Box; use core::{ - alloc::{AllocError, Layout}, + alloc::Layout, fmt, marker::{PhantomData, Unsize}, mem::{ManuallyDrop, MaybeUninit}, @@ -57,7 +57,7 @@ mod std_vendor; /// } /// /// // Create a refcounted instance of `Example`. -/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?; /// /// // Get a new pointer to `obj` and increment the refcount. /// let cloned = obj.clone(); @@ -96,7 +96,7 @@ mod std_vendor; /// } /// } /// -/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?; /// obj.use_reference(); /// obj.take_over(); /// # Ok::<(), Error>(()) @@ -119,7 +119,7 @@ mod std_vendor; /// impl MyTrait for Example {} /// /// // `obj` has type `Arc<Example>`. -/// let obj: Arc<Example> = Arc::try_new(Example)?; +/// let obj: Arc<Example> = Arc::new(Example, GFP_KERNEL)?; /// /// // `coerced` has type `Arc<dyn MyTrait>`. /// let coerced: Arc<dyn MyTrait> = obj; @@ -137,6 +137,39 @@ struct ArcInner<T: ?Sized> { data: T, } +impl<T: ?Sized> ArcInner<T> { + /// Converts a pointer to the contents of an [`Arc`] into a pointer to the [`ArcInner`]. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`Arc::into_raw`], and the `Arc` must + /// not yet have been destroyed. + unsafe fn container_of(ptr: *const T) -> NonNull<ArcInner<T>> { + let refcount_layout = Layout::new::<bindings::refcount_t>(); + // SAFETY: The caller guarantees that the pointer is valid. + let val_layout = Layout::for_value(unsafe { &*ptr }); + // SAFETY: We're computing the layout of a real struct that existed when compiling this + // binary, so its layout is not so large that it can trigger arithmetic overflow. + let val_offset = unsafe { refcount_layout.extend(val_layout).unwrap_unchecked().1 }; + + // Pointer casts leave the metadata unchanged. This is okay because the metadata of `T` and + // `ArcInner<T>` is the same since `ArcInner` is a struct with `T` as its last field. + // + // This is documented at: + // <https://doc.rust-lang.org/std/ptr/trait.Pointee.html>. + let ptr = ptr as *const ArcInner<T>; + + // SAFETY: The pointer is in-bounds of an allocation both before and after offsetting the + // pointer, since it originates from a previous call to `Arc::into_raw` on an `Arc` that is + // still valid. + let ptr = unsafe { ptr.byte_sub(val_offset) }; + + // SAFETY: The pointer can't be null since you can't have an `ArcInner<T>` value at the null + // address. + unsafe { NonNull::new_unchecked(ptr.cast_mut()) } + } +} + // This is to allow [`Arc`] (and variants) to be used as the type of `self`. impl<T: ?Sized> core::ops::Receiver for Arc<T> {} @@ -162,7 +195,7 @@ unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {} impl<T> Arc<T> { /// Constructs a new reference counted instance of `T`. - pub fn try_new(contents: T) -> Result<Self, AllocError> { + pub fn new(contents: T, flags: Flags) -> Result<Self, AllocError> { // INVARIANT: The refcount is initialised to a non-zero value. let value = ArcInner { // SAFETY: There are no safety requirements for this FFI call. @@ -170,7 +203,7 @@ impl<T> Arc<T> { data: contents, }; - let inner = Box::try_new(value)?; + let inner = <Box<_> as BoxExt<_>>::new(value, flags)?; // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new // `Arc` object. @@ -181,22 +214,22 @@ impl<T> Arc<T> { /// /// If `T: !Unpin` it will not be able to move afterwards. #[inline] - pub fn pin_init<E>(init: impl PinInit<T, E>) -> error::Result<Self> + pub fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Self> where Error: From<E>, { - UniqueArc::pin_init(init).map(|u| u.into()) + UniqueArc::pin_init(init, flags).map(|u| u.into()) } /// Use the given initializer to in-place initialize a `T`. /// /// This is equivalent to [`Arc<T>::pin_init`], since an [`Arc`] is always pinned. #[inline] - pub fn init<E>(init: impl Init<T, E>) -> error::Result<Self> + pub fn init<E>(init: impl Init<T, E>, flags: Flags) -> error::Result<Self> where Error: From<E>, { - UniqueArc::init(init).map(|u| u.into()) + UniqueArc::init(init, flags).map(|u| u.into()) } } @@ -232,27 +265,13 @@ impl<T: ?Sized> Arc<T> { /// `ptr` must have been returned by a previous call to [`Arc::into_raw`]. Additionally, it /// must not be called more than once for each previous call to [`Arc::into_raw`]. pub unsafe fn from_raw(ptr: *const T) -> Self { - let refcount_layout = Layout::new::<bindings::refcount_t>(); - // SAFETY: The caller guarantees that the pointer is valid. - let val_layout = Layout::for_value(unsafe { &*ptr }); - // SAFETY: We're computing the layout of a real struct that existed when compiling this - // binary, so its layout is not so large that it can trigger arithmetic overflow. - let val_offset = unsafe { refcount_layout.extend(val_layout).unwrap_unchecked().1 }; - - // Pointer casts leave the metadata unchanged. This is okay because the metadata of `T` and - // `ArcInner<T>` is the same since `ArcInner` is a struct with `T` as its last field. - // - // This is documented at: - // <https://doc.rust-lang.org/std/ptr/trait.Pointee.html>. - let ptr = ptr as *const ArcInner<T>; - - // SAFETY: The pointer is in-bounds of an allocation both before and after offsetting the - // pointer, since it originates from a previous call to `Arc::into_raw` and is still valid. - let ptr = unsafe { ptr.byte_sub(val_offset) }; + // SAFETY: The caller promises that this pointer originates from a call to `into_raw` on an + // `Arc` that is still valid. + let ptr = unsafe { ArcInner::container_of(ptr) }; // SAFETY: By the safety requirements we know that `ptr` came from `Arc::into_raw`, so the // reference count held then will be owned by the new `Arc` object. - unsafe { Self::from_inner(NonNull::new_unchecked(ptr.cast_mut())) } + unsafe { Self::from_inner(ptr) } } /// Returns an [`ArcBorrow`] from the given [`Arc`]. @@ -271,6 +290,68 @@ impl<T: ?Sized> Arc<T> { pub fn ptr_eq(this: &Self, other: &Self) -> bool { core::ptr::eq(this.ptr.as_ptr(), other.ptr.as_ptr()) } + + /// Converts this [`Arc`] into a [`UniqueArc`], or destroys it if it is not unique. + /// + /// When this destroys the `Arc`, it does so while properly avoiding races. This means that + /// this method will never call the destructor of the value. + /// + /// # Examples + /// + /// ``` + /// use kernel::sync::{Arc, UniqueArc}; + /// + /// let arc = Arc::new(42, GFP_KERNEL)?; + /// let unique_arc = arc.into_unique_or_drop(); + /// + /// // The above conversion should succeed since refcount of `arc` is 1. + /// assert!(unique_arc.is_some()); + /// + /// assert_eq!(*(unique_arc.unwrap()), 42); + /// + /// # Ok::<(), Error>(()) + /// ``` + /// + /// ``` + /// use kernel::sync::{Arc, UniqueArc}; + /// + /// let arc = Arc::new(42, GFP_KERNEL)?; + /// let another = arc.clone(); + /// + /// let unique_arc = arc.into_unique_or_drop(); + /// + /// // The above conversion should fail since refcount of `arc` is >1. + /// assert!(unique_arc.is_none()); + /// + /// # Ok::<(), Error>(()) + /// ``` + pub fn into_unique_or_drop(self) -> Option<Pin<UniqueArc<T>>> { + // We will manually manage the refcount in this method, so we disable the destructor. + let me = ManuallyDrop::new(self); + // SAFETY: We own a refcount, so the pointer is still valid. + let refcount = unsafe { me.ptr.as_ref() }.refcount.get(); + + // If the refcount reaches a non-zero value, then we have destroyed this `Arc` and will + // return without further touching the `Arc`. If the refcount reaches zero, then there are + // no other arcs, and we can create a `UniqueArc`. + // + // SAFETY: We own a refcount, so the pointer is not dangling. + let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) }; + if is_zero { + // SAFETY: We have exclusive access to the arc, so we can perform unsynchronized + // accesses to the refcount. + unsafe { core::ptr::write(refcount, bindings::REFCOUNT_INIT(1)) }; + + // INVARIANT: We own the only refcount to this arc, so we may create a `UniqueArc`. We + // must pin the `UniqueArc` because the values was previously in an `Arc`, and they pin + // their values. + Some(Pin::from(UniqueArc { + inner: ManuallyDrop::into_inner(me), + })) + } else { + None + } + } } impl<T: 'static> ForeignOwnable for Arc<T> { @@ -387,7 +468,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> { /// e.into() /// } /// -/// let obj = Arc::try_new(Example)?; +/// let obj = Arc::new(Example, GFP_KERNEL)?; /// let cloned = do_something(obj.as_arc_borrow()); /// /// // Assert that both `obj` and `cloned` point to the same underlying object. @@ -411,7 +492,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> { /// } /// } /// -/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?; /// obj.as_arc_borrow().use_reference(); /// # Ok::<(), Error>(()) /// ``` @@ -453,6 +534,27 @@ impl<T: ?Sized> ArcBorrow<'_, T> { _p: PhantomData, } } + + /// Creates an [`ArcBorrow`] to an [`Arc`] that has previously been deconstructed with + /// [`Arc::into_raw`]. + /// + /// # Safety + /// + /// * The provided pointer must originate from a call to [`Arc::into_raw`]. + /// * For the duration of the lifetime annotated on this `ArcBorrow`, the reference count must + /// not hit zero. + /// * For the duration of the lifetime annotated on this `ArcBorrow`, there must not be a + /// [`UniqueArc`] reference to this value. + pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: The caller promises that this pointer originates from a call to `into_raw` on an + // `Arc` that is still valid. + let ptr = unsafe { ArcInner::container_of(ptr) }; + + // SAFETY: The caller promises that the value remains valid since the reference count must + // not hit zero, and no mutable reference will be created since that would involve a + // `UniqueArc`. + unsafe { Self::new(ptr) } + } } impl<T: ?Sized> From<ArcBorrow<'_, T>> for Arc<T> { @@ -499,7 +601,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> { /// } /// /// fn test() -> Result<Arc<Example>> { -/// let mut x = UniqueArc::try_new(Example { a: 10, b: 20 })?; +/// let mut x = UniqueArc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?; /// x.a += 1; /// x.b += 1; /// Ok(x.into()) @@ -522,7 +624,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> { /// } /// /// fn test() -> Result<Arc<Example>> { -/// let x = UniqueArc::try_new_uninit()?; +/// let x = UniqueArc::new_uninit(GFP_KERNEL)?; /// Ok(x.write(Example { a: 10, b: 20 }).into()) /// } /// @@ -542,7 +644,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> { /// } /// /// fn test() -> Result<Arc<Example>> { -/// let mut pinned = Pin::from(UniqueArc::try_new(Example { a: 10, b: 20 })?); +/// let mut pinned = Pin::from(UniqueArc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?); /// // We can modify `pinned` because it is `Unpin`. /// pinned.as_mut().a += 1; /// Ok(pinned.into()) @@ -556,21 +658,24 @@ pub struct UniqueArc<T: ?Sized> { impl<T> UniqueArc<T> { /// Tries to allocate a new [`UniqueArc`] instance. - pub fn try_new(value: T) -> Result<Self, AllocError> { + pub fn new(value: T, flags: Flags) -> Result<Self, AllocError> { Ok(Self { // INVARIANT: The newly-created object has a refcount of 1. - inner: Arc::try_new(value)?, + inner: Arc::new(value, flags)?, }) } /// Tries to allocate a new [`UniqueArc`] instance whose contents are not initialised yet. - pub fn try_new_uninit() -> Result<UniqueArc<MaybeUninit<T>>, AllocError> { + pub fn new_uninit(flags: Flags) -> Result<UniqueArc<MaybeUninit<T>>, AllocError> { // INVARIANT: The refcount is initialised to a non-zero value. - let inner = Box::try_init::<AllocError>(try_init!(ArcInner { - // SAFETY: There are no safety requirements for this FFI call. - refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }), - data <- init::uninit::<T, AllocError>(), - }? AllocError))?; + let inner = Box::try_init::<AllocError>( + try_init!(ArcInner { + // SAFETY: There are no safety requirements for this FFI call. + refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }), + data <- init::uninit::<T, AllocError>(), + }? AllocError), + flags, + )?; Ok(UniqueArc { // INVARIANT: The newly-created object has a refcount of 1. // SAFETY: The pointer from the `Box` is valid. diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index 0c3671caffeb..2b306afbe56d 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -7,7 +7,6 @@ use super::{lock::Backend, lock::Guard, LockClassKey}; use crate::{ - bindings, init::PinInit, pin_init, str::CStr, @@ -75,7 +74,7 @@ pub use new_condvar; /// Box::pin_init(pin_init!(Example { /// value <- new_mutex!(0), /// value_changed <- new_condvar!(), -/// })) +/// }), GFP_KERNEL) /// } /// ``` /// diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index 5b5c8efe427a..f6c34ca4d819 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -6,7 +6,7 @@ //! spinlocks, raw spinlocks) to be provided with minimal effort. use super::LockClassKey; -use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard}; +use crate::{init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard}; use core::{cell::UnsafeCell, marker::PhantomData, marker::PhantomPinned}; use macros::pin_data; diff --git a/rust/kernel/sync/lock/mutex.rs b/rust/kernel/sync/lock/mutex.rs index ef4c4634d294..30632070ee67 100644 --- a/rust/kernel/sync/lock/mutex.rs +++ b/rust/kernel/sync/lock/mutex.rs @@ -4,8 +4,6 @@ //! //! This module allows Rust code to use the kernel's `struct mutex`. -use crate::bindings; - /// Creates a [`Mutex`] initialiser with the given name and a newly-created lock class. /// /// It uses the name if one is given, otherwise it generates one based on the file name and line @@ -60,7 +58,7 @@ pub use new_mutex; /// } /// /// // Allocate a boxed `Example`. -/// let e = Box::pin_init(Example::new())?; +/// let e = Box::pin_init(Example::new(), GFP_KERNEL)?; /// assert_eq!(e.c, 10); /// assert_eq!(e.d.lock().a, 20); /// assert_eq!(e.d.lock().b, 30); diff --git a/rust/kernel/sync/lock/spinlock.rs b/rust/kernel/sync/lock/spinlock.rs index 0b22c635634f..ea5c5bc1ce12 100644 --- a/rust/kernel/sync/lock/spinlock.rs +++ b/rust/kernel/sync/lock/spinlock.rs @@ -4,8 +4,6 @@ //! //! This module allows Rust code to use the kernel's `spinlock_t`. -use crate::bindings; - /// Creates a [`SpinLock`] initialiser with the given name and a newly-created lock class. /// /// It uses the name if one is given, otherwise it generates one based on the file name and line @@ -58,7 +56,7 @@ pub use new_spinlock; /// } /// /// // Allocate a boxed `Example`. -/// let e = Box::pin_init(Example::new())?; +/// let e = Box::pin_init(Example::new(), GFP_KERNEL)?; /// assert_eq!(e.c, 10); /// assert_eq!(e.d.lock().a, 20); /// assert_eq!(e.d.lock().b, 30); diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index ca6e7e31d71c..55dff7e088bf 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). -use crate::{bindings, types::Opaque}; +use crate::types::Opaque; use core::{ ffi::{c_int, c_long, c_uint}, marker::PhantomData, diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs index 25a896eed468..e3bb5e89f88d 100644 --- a/rust/kernel/time.rs +++ b/rust/kernel/time.rs @@ -4,6 +4,12 @@ //! //! This module contains the kernel APIs related to time and timers that //! have been ported or wrapped for usage by Rust code in the kernel. +//! +//! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h). +//! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h). + +/// The number of nanoseconds per millisecond. +pub const NSEC_PER_MSEC: i64 = bindings::NSEC_PER_MSEC as i64; /// The time unit of Linux kernel. One jiffy equals (1/HZ) second. pub type Jiffies = core::ffi::c_ulong; @@ -18,3 +24,60 @@ pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies { // matter what the argument is. unsafe { bindings::__msecs_to_jiffies(msecs) } } + +/// A Rust wrapper around a `ktime_t`. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct Ktime { + inner: bindings::ktime_t, +} + +impl Ktime { + /// Create a `Ktime` from a raw `ktime_t`. + #[inline] + pub fn from_raw(inner: bindings::ktime_t) -> Self { + Self { inner } + } + + /// Get the current time using `CLOCK_MONOTONIC`. + #[inline] + pub fn ktime_get() -> Self { + // SAFETY: It is always safe to call `ktime_get` outside of NMI context. + Self::from_raw(unsafe { bindings::ktime_get() }) + } + + /// Divide the number of nanoseconds by a compile-time constant. + #[inline] + fn divns_constant<const DIV: i64>(self) -> i64 { + self.to_ns() / DIV + } + + /// Returns the number of nanoseconds. + #[inline] + pub fn to_ns(self) -> i64 { + self.inner + } + + /// Returns the number of milliseconds. + #[inline] + pub fn to_ms(self) -> i64 { + self.divns_constant::<NSEC_PER_MSEC>() + } +} + +/// Returns the number of milliseconds between two ktimes. +#[inline] +pub fn ktime_ms_delta(later: Ktime, earlier: Ktime) -> i64 { + (later - earlier).to_ms() +} + +impl core::ops::Sub for Ktime { + type Output = Ktime; + + #[inline] + fn sub(self, other: Ktime) -> Ktime { + Self { + inner: self.inner - other.inner, + } + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index aa77bad9bce4..2e7c9008621f 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -157,11 +157,11 @@ impl ForeignOwnable for () { /// let mut vec = /// ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len())); /// -/// vec.try_push(10u8)?; +/// vec.push(10u8, GFP_KERNEL)?; /// if arg { /// return Ok(()); /// } -/// vec.try_push(20u8)?; +/// vec.push(20u8, GFP_KERNEL)?; /// Ok(()) /// } /// @@ -270,7 +270,7 @@ impl<T> Opaque<T> { } /// Returns a raw pointer to the opaque data. - pub fn get(&self) -> *mut T { + pub const fn get(&self) -> *mut T { UnsafeCell::get(&self.value).cast::<T>() } diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 480cb292e7c2..1cec63a2aea8 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -33,7 +33,6 @@ //! we do not need to specify ids for the fields. //! //! ``` -//! use kernel::prelude::*; //! use kernel::sync::Arc; //! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem}; //! @@ -53,7 +52,7 @@ //! Arc::pin_init(pin_init!(MyStruct { //! value, //! work <- new_work!("MyStruct::work"), -//! })) +//! }), GFP_KERNEL) //! } //! } //! @@ -75,7 +74,6 @@ //! The following example shows how multiple `work_struct` fields can be used: //! //! ``` -//! use kernel::prelude::*; //! use kernel::sync::Arc; //! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem}; //! @@ -101,7 +99,7 @@ //! value_2, //! work_1 <- new_work!("MyStruct::work_1"), //! work_2 <- new_work!("MyStruct::work_2"), -//! })) +//! }), GFP_KERNEL) //! } //! } //! @@ -132,11 +130,9 @@ //! //! C header: [`include/linux/workqueue.h`](srctree/include/linux/workqueue.h) -use crate::{bindings, prelude::*, sync::Arc, sync::LockClassKey, types::Opaque}; -use alloc::alloc::AllocError; -use alloc::boxed::Box; +use crate::alloc::{AllocError, Flags}; +use crate::{prelude::*, sync::Arc, sync::LockClassKey, types::Opaque}; use core::marker::PhantomData; -use core::pin::Pin; /// Creates a [`Work`] initialiser with the given name and a newly-created lock class. #[macro_export] @@ -210,13 +206,17 @@ impl Queue { /// Tries to spawn the given function or closure as a work item. /// /// This method can fail because it allocates memory to store the work item. - pub fn try_spawn<T: 'static + Send + FnOnce()>(&self, func: T) -> Result<(), AllocError> { + pub fn try_spawn<T: 'static + Send + FnOnce()>( + &self, + flags: Flags, + func: T, + ) -> Result<(), AllocError> { let init = pin_init!(ClosureWork { work <- new_work!("Queue::try_spawn"), func: Some(func), }); - self.enqueue(Box::pin_init(init).map_err(|_| AllocError)?); + self.enqueue(Box::pin_init(init, flags).map_err(|_| AllocError)?); Ok(()) } } @@ -346,8 +346,10 @@ pub trait WorkItem<const ID: u64 = 0> { /// This is a helper type used to associate a `work_struct` with the [`WorkItem`] that uses it. /// /// [`run`]: WorkItemPointer::run +#[pin_data] #[repr(transparent)] pub struct Work<T: ?Sized, const ID: u64 = 0> { + #[pin] work: Opaque<bindings::work_struct>, _inner: PhantomData<T>, } @@ -369,21 +371,22 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> { where T: WorkItem<ID>, { - // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as the work - // item function. - unsafe { - kernel::init::pin_init_from_closure(move |slot| { - let slot = Self::raw_get(slot); - bindings::init_work_with_key( - slot, - Some(T::Pointer::run), - false, - name.as_char_ptr(), - key.as_ptr(), - ); - Ok(()) - }) - } + pin_init!(Self { + work <- Opaque::ffi_init(|slot| { + // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as + // the work item function. + unsafe { + bindings::init_work_with_key( + slot, + Some(T::Pointer::run), + false, + name.as_char_ptr(), + key.as_ptr(), + ) + } + }), + _inner: PhantomData, + }) } /// Get a pointer to the inner `work_struct`. @@ -408,7 +411,6 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> { /// like this: /// /// ```no_run -/// use kernel::prelude::*; /// use kernel::workqueue::{impl_has_work, Work}; /// /// struct MyWorkItem { |