diff options
Diffstat (limited to 'rust')
-rw-r--r-- | rust/Makefile | 76 | ||||
-rw-r--r-- | rust/bindings/bindings_helper.h | 7 | ||||
-rw-r--r-- | rust/bindings/lib.rs | 1 | ||||
-rw-r--r-- | rust/helpers.c | 51 | ||||
-rw-r--r-- | rust/kernel/alloc.rs | 17 | ||||
-rw-r--r-- | rust/kernel/alloc/allocator.rs | 19 | ||||
-rw-r--r-- | rust/kernel/block.rs | 5 | ||||
-rw-r--r-- | rust/kernel/block/mq.rs | 98 | ||||
-rw-r--r-- | rust/kernel/block/mq/gen_disk.rs | 198 | ||||
-rw-r--r-- | rust/kernel/block/mq/operations.rs | 245 | ||||
-rw-r--r-- | rust/kernel/block/mq/raw_writer.rs | 55 | ||||
-rw-r--r-- | rust/kernel/block/mq/request.rs | 253 | ||||
-rw-r--r-- | rust/kernel/block/mq/tag_set.rs | 86 | ||||
-rw-r--r-- | rust/kernel/device.rs | 105 | ||||
-rw-r--r-- | rust/kernel/error.rs | 6 | ||||
-rw-r--r-- | rust/kernel/firmware.rs | 117 | ||||
-rw-r--r-- | rust/kernel/init.rs | 13 | ||||
-rw-r--r-- | rust/kernel/lib.rs | 7 | ||||
-rw-r--r-- | rust/kernel/page.rs | 250 | ||||
-rw-r--r-- | rust/kernel/types.rs | 64 | ||||
-rw-r--r-- | rust/kernel/uaccess.rs | 388 | ||||
-rw-r--r-- | rust/kernel/workqueue.rs | 16 | ||||
-rw-r--r-- | rust/macros/lib.rs | 45 | ||||
-rw-r--r-- | rust/macros/module.rs | 18 | ||||
-rw-r--r-- | rust/uapi/lib.rs | 1 |
25 files changed, 2038 insertions, 103 deletions
diff --git a/rust/Makefile b/rust/Makefile index f70d5e244fee..1f10f92737f2 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -44,17 +44,10 @@ rustc_sysroot := $(shell MAKEFLAGS= $(RUSTC) $(rust_flags) --print sysroot) rustc_host_target := $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library -ifeq ($(quiet),silent_) -cargo_quiet=-q +ifneq ($(quiet),) rust_test_quiet=-q rustdoc_test_quiet=--test-args -q rustdoc_test_kernel_quiet=>/dev/null -else ifeq ($(quiet),quiet_) -rust_test_quiet=-q -rustdoc_test_quiet=--test-args -q -rustdoc_test_kernel_quiet=>/dev/null -else -cargo_quiet=--verbose endif core-cfgs = \ @@ -135,22 +128,21 @@ quiet_cmd_rustc_test_library = RUSTC TL $< @$(objtree)/include/generated/rustc_cfg $(rustc_target_flags) \ --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ --out-dir $(objtree)/$(obj)/test --cfg testlib \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< -rusttestlib-build_error: $(src)/build_error.rs rusttest-prepare FORCE +rusttestlib-build_error: $(src)/build_error.rs FORCE +$(call if_changed,rustc_test_library) rusttestlib-macros: private rustc_target_flags = --extern proc_macro rusttestlib-macros: private rustc_test_library_proc = yes -rusttestlib-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttestlib-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-bindings: $(src)/bindings/lib.rs rusttest-prepare FORCE +rusttestlib-bindings: $(src)/bindings/lib.rs FORCE +$(call if_changed,rustc_test_library) -rusttestlib-uapi: $(src)/uapi/lib.rs rusttest-prepare FORCE +rusttestlib-uapi: $(src)/uapi/lib.rs FORCE +$(call if_changed,rustc_test_library) quiet_cmd_rustdoc_test = RUSTDOC T $< @@ -159,7 +151,7 @@ quiet_cmd_rustdoc_test = RUSTDOC T $< $(RUSTDOC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) $(rustdoc_test_target_flags) \ - --sysroot $(objtree)/$(obj)/test/sysroot $(rustdoc_test_quiet) \ + $(rustdoc_test_quiet) \ -L$(objtree)/$(obj)/test --output $(rustdoc_output) \ --crate-name $(subst rusttest-,,$@) $< @@ -192,7 +184,6 @@ quiet_cmd_rustc_test = RUSTC T $< $(RUSTC) --test $(rust_common_flags) \ @$(objtree)/include/generated/rustc_cfg \ $(rustc_target_flags) --out-dir $(objtree)/$(obj)/test \ - --sysroot $(objtree)/$(obj)/test/sysroot \ -L$(objtree)/$(obj)/test \ --crate-name $(subst rusttest-,,$@) $<; \ $(objtree)/$(obj)/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ @@ -200,60 +191,15 @@ quiet_cmd_rustc_test = RUSTC T $< rusttest: rusttest-macros rusttest-kernel -# This prepares a custom sysroot with our custom `alloc` instead of -# the standard one. -# -# This requires several hacks: -# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, -# including third-party crates that need to be downloaded, plus custom -# `build.rs` steps. Thus hardcoding things here is not maintainable. -# - `cargo` knows how to build the standard library, but it is an unstable -# feature so far (`-Zbuild-std`). -# - `cargo` only considers the use case of building the standard library -# to use it in a given package. Thus we need to create a dummy package -# and pick the generated libraries from there. -# - The usual ways of modifying the dependency graph in `cargo` do not seem -# to apply for the `-Zbuild-std` steps, thus we have to mislead it -# by modifying the sources in the sysroot. -# - To avoid messing with the user's Rust installation, we create a clone -# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` -# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. -# -# In the future, we hope to avoid the whole ordeal by either: -# - Making the `test` crate not depend on `std` (either improving upstream -# or having our own custom crate). -# - Making the tests run in kernel space (requires the previous point). -# - Making `std` and friends be more like a "normal" crate, so that -# `-Zbuild-std` and related hacks are not needed. -quiet_cmd_rustsysroot = RUSTSYSROOT - cmd_rustsysroot = \ - rm -rf $(objtree)/$(obj)/test; \ - mkdir -p $(objtree)/$(obj)/test; \ - cp -a $(rustc_sysroot) $(objtree)/$(obj)/test/sysroot; \ - echo '\#!/bin/sh' > $(objtree)/$(obj)/test/rustc_sysroot; \ - echo "$(RUSTC) --sysroot=$(abspath $(objtree)/$(obj)/test/sysroot) \"\$$@\"" \ - >> $(objtree)/$(obj)/test/rustc_sysroot; \ - chmod u+x $(objtree)/$(obj)/test/rustc_sysroot; \ - $(CARGO) -q new $(objtree)/$(obj)/test/dummy; \ - RUSTC=$(objtree)/$(obj)/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ - test -Zbuild-std --target $(rustc_host_target) \ - --manifest-path $(objtree)/$(obj)/test/dummy/Cargo.toml; \ - rm $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ - cp $(objtree)/$(obj)/test/dummy/target/$(rustc_host_target)/debug/deps/* \ - $(objtree)/$(obj)/test/sysroot/lib/rustlib/$(rustc_host_target)/lib - -rusttest-prepare: FORCE - +$(call if_changed,rustsysroot) - rusttest-macros: private rustc_target_flags = --extern proc_macro rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro -rusttest-macros: $(src)/macros/lib.rs rusttest-prepare FORCE +rusttest-macros: $(src)/macros/lib.rs FORCE +$(call if_changed,rustc_test) +$(call if_changed,rustdoc_test) rusttest-kernel: private rustc_target_flags = --extern alloc \ --extern build_error --extern macros --extern bindings --extern uapi -rusttest-kernel: $(src)/kernel/lib.rs rusttest-prepare \ +rusttest-kernel: $(src)/kernel/lib.rs \ rusttestlib-build_error rusttestlib-macros rusttestlib-bindings \ rusttestlib-uapi FORCE +$(call if_changed,rustc_test) @@ -421,12 +367,12 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) endif $(obj)/core.o: private skip_clippy = 1 -$(obj)/core.o: private skip_flags = -Dunreachable_pub +$(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) $(obj)/core.o: private rustc_target_flags = $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +$(call if_changed_dep,rustc_library) -ifdef CONFIG_X86_64 +ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) $(obj)/core.o: scripts/target.json endif @@ -435,7 +381,7 @@ $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_dep,rustc_library) $(obj)/alloc.o: private skip_clippy = 1 -$(obj)/alloc.o: private skip_flags = -Dunreachable_pub +$(obj)/alloc.o: private skip_flags = -Wunreachable_pub $(obj)/alloc.o: private rustc_target_flags = $(alloc-cfgs) $(obj)/alloc.o: $(RUST_LIB_SRC)/alloc/src/lib.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_dep,rustc_library) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index ddb5644d4fd9..b940a5777330 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -7,8 +7,12 @@ */ #include <kunit/test.h> +#include <linux/blk_types.h> +#include <linux/blk-mq.h> +#include <linux/blkdev.h> #include <linux/errname.h> #include <linux/ethtool.h> +#include <linux/firmware.h> #include <linux/jiffies.h> #include <linux/mdio.h> #include <linux/phy.h> @@ -20,8 +24,11 @@ /* `bindgen` gets confused at certain things. */ const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; +const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE; const gfp_t RUST_CONST_HELPER_GFP_ATOMIC = GFP_ATOMIC; const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL; const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT; const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT; const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO; +const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM; +const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 40ddaee50d8b..93a1a3fc97bc 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -24,6 +24,7 @@ unsafe_op_in_unsafe_fn )] +#[allow(dead_code)] mod bindings_raw { // Use glob import here to expose all helpers. // Symbols defined within the module will take precedence to the glob import. diff --git a/rust/helpers.c b/rust/helpers.c index 2c37a0f5d7a8..92d3c03ae1bd 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -23,8 +23,11 @@ #include <kunit/test-bug.h> #include <linux/bug.h> #include <linux/build_bug.h> +#include <linux/device.h> #include <linux/err.h> #include <linux/errname.h> +#include <linux/gfp.h> +#include <linux/highmem.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/sched/signal.h> @@ -39,6 +42,20 @@ __noreturn void rust_helper_BUG(void) } EXPORT_SYMBOL_GPL(rust_helper_BUG); +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, + unsigned long n) +{ + return copy_from_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_user); + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, + unsigned long n) +{ + return copy_to_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_user); + void rust_helper_mutex_lock(struct mutex *lock) { mutex_lock(lock); @@ -80,6 +97,24 @@ int rust_helper_signal_pending(struct task_struct *t) } EXPORT_SYMBOL_GPL(rust_helper_signal_pending); +struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages(gfp_mask, order); +} +EXPORT_SYMBOL_GPL(rust_helper_alloc_pages); + +void *rust_helper_kmap_local_page(struct page *page) +{ + return kmap_local_page(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kmap_local_page); + +void rust_helper_kunmap_local(const void *addr) +{ + kunmap_local(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); + refcount_t rust_helper_REFCOUNT_INIT(int n) { return (refcount_t)REFCOUNT_INIT(n); @@ -186,3 +221,19 @@ static_assert( __alignof__(size_t) == __alignof__(uintptr_t), "Rust code expects C `size_t` to match Rust `usize`" ); + +// This will soon be moved to a separate file, so no need to merge with above. +#include <linux/blk-mq.h> +#include <linux/blkdev.h> + +void *rust_helper_blk_mq_rq_to_pdu(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} +EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_to_pdu); + +struct request *rust_helper_blk_mq_rq_from_pdu(void *pdu) +{ + return blk_mq_rq_from_pdu(pdu); +} +EXPORT_SYMBOL_GPL(rust_helper_blk_mq_rq_from_pdu); diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs index 531b5e471cb1..1966bd407017 100644 --- a/rust/kernel/alloc.rs +++ b/rust/kernel/alloc.rs @@ -20,6 +20,13 @@ pub struct AllocError; #[derive(Clone, Copy)] pub struct Flags(u32); +impl Flags { + /// Get the raw representation of this flag. + pub(crate) fn as_raw(self) -> u32 { + self.0 + } +} + impl core::ops::BitOr for Flags { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -52,6 +59,14 @@ pub mod flags { /// This is normally or'd with other flags. pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO); + /// Allow the allocation to be in high memory. + /// + /// Allocations in high memory may not be mapped into the kernel's address space, so this can't + /// be used with `kmalloc` and other similar methods. + /// + /// This is normally or'd with other flags. + pub const __GFP_HIGHMEM: Flags = Flags(bindings::__GFP_HIGHMEM); + /// Users can not sleep and need the allocation to succeed. /// /// A lower watermark is applied to allow access to "atomic reserves". The current @@ -66,7 +81,7 @@ pub mod flags { /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg. pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT); - /// Ror kernel allocations that should not stall for direct reclaim, start physical IO or + /// For kernel allocations that should not stall for direct reclaim, start physical IO or /// use any filesystem callback. It is very likely to fail to allocate memory, even for very /// small allocations. pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT); diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index 229642960cd1..e6ea601f38c6 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -18,23 +18,16 @@ pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: F // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first. let layout = new_layout.pad_to_align(); - let mut size = layout.size(); - - if layout.align() > bindings::ARCH_SLAB_MINALIGN { - // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size - // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for - // more information). - // - // Note that `layout.size()` (after padding) is guaranteed to be a multiple of - // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee. - size = size.next_power_of_two(); - } + // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()` + // which together with the slab guarantees means the `krealloc` will return a properly aligned + // object (see comments in `kmalloc()` for more information). + let size = layout.size(); // SAFETY: // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the // function safety requirement. - // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero - // according to the function safety requirement) or a result from `next_power_of_two()`. + // - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according + // to the function safety requirement) unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 } } diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs new file mode 100644 index 000000000000..150f710efe5b --- /dev/null +++ b/rust/kernel/block.rs @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Types for working with the block layer. + +pub mod mq; diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs new file mode 100644 index 000000000000..fb0f393c1cea --- /dev/null +++ b/rust/kernel/block/mq.rs @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides types for implementing block drivers that interface the +//! blk-mq subsystem. +//! +//! To implement a block device driver, a Rust module must do the following: +//! +//! - Implement [`Operations`] for a type `T`. +//! - Create a [`TagSet<T>`]. +//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`]. +//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in +//! the `TagSet` reference. +//! +//! The types available in this module that have direct C counterparts are: +//! +//! - The [`TagSet`] type that abstracts the C type `struct tag_set`. +//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`. +//! - The [`Request`] type that abstracts the C type `struct request`. +//! +//! The kernel will interface with the block device driver by calling the method +//! implementations of the `Operations` trait. +//! +//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`] +//! instances. The `Request` type is a wrapper around the C `struct request`. +//! The driver must mark end of processing by calling one of the +//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout +//! errors. Please note that the C function `blk_mq_start_request` is implicitly +//! called when the request is queued with the driver. +//! +//! The `TagSet` is responsible for creating and maintaining a mapping between +//! `Request`s and integer ids as well as carrying a pointer to the vtable +//! generated by `Operations`. This mapping is useful for associating +//! completions from hardware with the correct `Request` instance. The `TagSet` +//! determines the maximum queue depth by setting the number of `Request` +//! instances available to the driver, and it determines the number of queues to +//! instantiate for the driver. If possible, a driver should allocate one queue +//! per core, to keep queue data local to a core. +//! +//! One `TagSet` instance can be shared between multiple `GenDisk` instances. +//! This can be useful when implementing drivers where one piece of hardware +//! with one set of IO resources are represented to the user as multiple disks. +//! +//! One significant difference between block device drivers implemented with +//! these Rust abstractions and drivers implemented in C, is that the Rust +//! drivers have to own a reference count on the `Request` type when the IO is +//! in flight. This is to ensure that the C `struct request` instances backing +//! the Rust `Request` instances are live while the Rust driver holds a +//! reference to the `Request`. In addition, the conversion of an integer tag to +//! a `Request` via the `TagSet` would not be sound without this bookkeeping. +//! +//! [`GenDisk`]: gen_disk::GenDisk +//! [`GenDisk<T>`]: gen_disk::GenDisk +//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder +//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build +//! +//! # Example +//! +//! ```rust +//! use kernel::{ +//! alloc::flags, +//! block::mq::*, +//! new_mutex, +//! prelude::*, +//! sync::{Arc, Mutex}, +//! types::{ARef, ForeignOwnable}, +//! }; +//! +//! struct MyBlkDevice; +//! +//! #[vtable] +//! impl Operations for MyBlkDevice { +//! +//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result { +//! Request::end_ok(rq); +//! Ok(()) +//! } +//! +//! fn commit_rqs() {} +//! } +//! +//! let tagset: Arc<TagSet<MyBlkDevice>> = +//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; +//! let mut disk = gen_disk::GenDiskBuilder::new() +//! .capacity_sectors(4096) +//! .build(format_args!("myblk"), tagset)?; +//! +//! # Ok::<(), kernel::error::Error>(()) +//! ``` + +pub mod gen_disk; +mod operations; +mod raw_writer; +mod request; +mod tag_set; + +pub use operations::Operations; +pub use request::Request; +pub use tag_set::TagSet; diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs new file mode 100644 index 000000000000..f548a6199847 --- /dev/null +++ b/rust/kernel/block/mq/gen_disk.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic disk abstraction. +//! +//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h) +//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h) + +use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet}; +use crate::error; +use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc}; +use core::fmt::{self, Write}; + +/// A builder for [`GenDisk`]. +/// +/// Use this struct to configure and add new [`GenDisk`] to the VFS. +pub struct GenDiskBuilder { + rotational: bool, + logical_block_size: u32, + physical_block_size: u32, + capacity_sectors: u64, +} + +impl Default for GenDiskBuilder { + fn default() -> Self { + Self { + rotational: false, + logical_block_size: bindings::PAGE_SIZE as u32, + physical_block_size: bindings::PAGE_SIZE as u32, + capacity_sectors: 0, + } + } +} + +impl GenDiskBuilder { + /// Create a new instance. + pub fn new() -> Self { + Self::default() + } + + /// Set the rotational media attribute for the device to be built. + pub fn rotational(mut self, rotational: bool) -> Self { + self.rotational = rotational; + self + } + + /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`, + /// and that it is a power of two. + fn validate_block_size(size: u32) -> Result<()> { + if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() { + Err(error::code::EINVAL) + } else { + Ok(()) + } + } + + /// Set the logical block size of the device to be built. + /// + /// This method will check that block size is a power of two and between 512 + /// and 4096. If not, an error is returned and the block size is not set. + /// + /// This is the smallest unit the storage device can address. It is + /// typically 4096 bytes. + pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> { + Self::validate_block_size(block_size)?; + self.logical_block_size = block_size; + Ok(self) + } + + /// Set the physical block size of the device to be built. + /// + /// This method will check that block size is a power of two and between 512 + /// and 4096. If not, an error is returned and the block size is not set. + /// + /// This is the smallest unit a physical storage device can write + /// atomically. It is usually the same as the logical block size but may be + /// bigger. One example is SATA drives with 4096 byte physical block size + /// that expose a 512 byte logical block size to the operating system. + pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> { + Self::validate_block_size(block_size)?; + self.physical_block_size = block_size; + Ok(self) + } + + /// Set the capacity of the device to be built, in sectors (512 bytes). + pub fn capacity_sectors(mut self, capacity: u64) -> Self { + self.capacity_sectors = capacity; + self + } + + /// Build a new `GenDisk` and add it to the VFS. + pub fn build<T: Operations>( + self, + name: fmt::Arguments<'_>, + tagset: Arc<TagSet<T>>, + ) -> Result<GenDisk<T>> { + let lock_class_key = crate::sync::LockClassKey::new(); + + // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed. + let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() }; + + lim.logical_block_size = self.logical_block_size; + lim.physical_block_size = self.physical_block_size; + if self.rotational { + lim.features = bindings::BLK_FEAT_ROTATIONAL; + } + + // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set + let gendisk = from_err_ptr(unsafe { + bindings::__blk_mq_alloc_disk( + tagset.raw_tag_set(), + &mut lim, + core::ptr::null_mut(), + lock_class_key.as_ptr(), + ) + })?; + + const TABLE: bindings::block_device_operations = bindings::block_device_operations { + submit_bio: None, + open: None, + release: None, + ioctl: None, + compat_ioctl: None, + check_events: None, + unlock_native_capacity: None, + getgeo: None, + set_read_only: None, + swap_slot_free_notify: None, + report_zones: None, + devnode: None, + alternative_gpt_sector: None, + get_unique_id: None, + // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to + // be merged (unstable in rustc 1.78 which is staged for linux 6.10) + // https://github.com/rust-lang/rust/issues/119618 + owner: core::ptr::null_mut(), + pr_ops: core::ptr::null_mut(), + free_disk: None, + poll_bio: None, + }; + + // SAFETY: `gendisk` is a valid pointer as we initialized it above + unsafe { (*gendisk).fops = &TABLE }; + + let mut raw_writer = RawWriter::from_array( + // SAFETY: `gendisk` points to a valid and initialized instance. We + // have exclusive access, since the disk is not added to the VFS + // yet. + unsafe { &mut (*gendisk).disk_name }, + )?; + raw_writer.write_fmt(name)?; + raw_writer.write_char('\0')?; + + // SAFETY: `gendisk` points to a valid and initialized instance of + // `struct gendisk`. `set_capacity` takes a lock to synchronize this + // operation, so we will not race. + unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) }; + + crate::error::to_result( + // SAFETY: `gendisk` points to a valid and initialized instance of + // `struct gendisk`. + unsafe { + bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut()) + }, + )?; + + // INVARIANT: `gendisk` was initialized above. + // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above. + Ok(GenDisk { + _tagset: tagset, + gendisk, + }) + } +} + +/// A generic block device. +/// +/// # Invariants +/// +/// - `gendisk` must always point to an initialized and valid `struct gendisk`. +/// - `gendisk` was added to the VFS through a call to +/// `bindings::device_add_disk`. +pub struct GenDisk<T: Operations> { + _tagset: Arc<TagSet<T>>, + gendisk: *mut bindings::gendisk, +} + +// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a +// `TagSet` It is safe to send this to other threads as long as T is Send. +unsafe impl<T: Operations + Send> Send for GenDisk<T> {} + +impl<T: Operations> Drop for GenDisk<T> { + fn drop(&mut self) { + // SAFETY: By type invariant, `self.gendisk` points to a valid and + // initialized instance of `struct gendisk`, and it was previously added + // to the VFS. + unsafe { bindings::del_gendisk(self.gendisk) }; + } +} diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs new file mode 100644 index 000000000000..9ba7fdfeb4b2 --- /dev/null +++ b/rust/kernel/block/mq/operations.rs @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides an interface for blk-mq drivers to implement. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use crate::{ + bindings, + block::mq::request::RequestDataWrapper, + block::mq::Request, + error::{from_result, Result}, + types::ARef, +}; +use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering}; + +/// Implement this trait to interface blk-mq as block devices. +/// +/// To implement a block device driver, implement this trait as described in the +/// [module level documentation]. The kernel will use the implementation of the +/// functions defined in this trait to interface a block device driver. Note: +/// There is no need for an exit_request() implementation, because the `drop` +/// implementation of the [`Request`] type will be invoked by automatically by +/// the C/Rust glue logic. +/// +/// [module level documentation]: kernel::block::mq +#[macros::vtable] +pub trait Operations: Sized { + /// Called by the kernel to queue a request with the driver. If `is_last` is + /// `false`, the driver is allowed to defer committing the request. + fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result; + + /// Called by the kernel to indicate that queued requests should be submitted. + fn commit_rqs(); + + /// Called by the kernel to poll the device for completed requests. Only + /// used for poll queues. + fn poll() -> bool { + crate::build_error(crate::error::VTABLE_DEFAULT_ERROR) + } +} + +/// A vtable for blk-mq to interact with a block device driver. +/// +/// A `bindings::blk_mq_ops` vtable is constructed from pointers to the `extern +/// "C"` functions of this struct, exposed through the `OperationsVTable::VTABLE`. +/// +/// For general documentation of these methods, see the kernel source +/// documentation related to `struct blk_mq_operations` in +/// [`include/linux/blk-mq.h`]. +/// +/// [`include/linux/blk-mq.h`]: srctree/include/linux/blk-mq.h +pub(crate) struct OperationsVTable<T: Operations>(PhantomData<T>); + +impl<T: Operations> OperationsVTable<T> { + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - The caller of this function must ensure that the pointee of `bd` is + /// valid for reads for the duration of this function. + /// - This function must be called for an initialized and live `hctx`. That + /// is, `Self::init_hctx_callback` was called and + /// `Self::exit_hctx_callback()` was not yet called. + /// - `(*bd).rq` must point to an initialized and live `bindings:request`. + /// That is, `Self::init_request_callback` was called but + /// `Self::exit_request_callback` was not yet called for the request. + /// - `(*bd).rq` must be owned by the driver. That is, the block layer must + /// promise to not access the request until the driver calls + /// `bindings::blk_mq_end_request` for the request. + unsafe extern "C" fn queue_rq_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + bd: *const bindings::blk_mq_queue_data, + ) -> bindings::blk_status_t { + // SAFETY: `bd.rq` is valid as required by the safety requirement for + // this function. + let request = unsafe { &*(*bd).rq.cast::<Request<T>>() }; + + // One refcount for the ARef, one for being in flight + request.wrapper_ref().refcount().store(2, Ordering::Relaxed); + + // SAFETY: + // - We own a refcount that we took above. We pass that to `ARef`. + // - By the safety requirements of this function, `request` is a valid + // `struct request` and the private data is properly initialized. + // - `rq` will be alive until `blk_mq_end_request` is called and is + // reference counted by `ARef` until then. + let rq = unsafe { Request::aref_from_raw((*bd).rq) }; + + // SAFETY: We have exclusive access and we just set the refcount above. + unsafe { Request::start_unchecked(&rq) }; + + let ret = T::queue_rq( + rq, + // SAFETY: `bd` is valid as required by the safety requirement for + // this function. + unsafe { (*bd).last }, + ); + + if let Err(e) = ret { + e.to_blk_status() + } else { + bindings::BLK_STS_OK as _ + } + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) { + T::commit_rqs() + } + + /// This function is called by the C kernel. It is not currently + /// implemented, and there is no way to exercise this code path. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {} + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn poll_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _iob: *mut bindings::io_comp_batch, + ) -> core::ffi::c_int { + T::poll().into() + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. This + /// function may only be called once before `exit_hctx_callback` is called + /// for the same context. + unsafe extern "C" fn init_hctx_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _tagset_data: *mut core::ffi::c_void, + _hctx_idx: core::ffi::c_uint, + ) -> core::ffi::c_int { + from_result(|| Ok(0)) + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// This function may only be called by blk-mq C infrastructure. + unsafe extern "C" fn exit_hctx_callback( + _hctx: *mut bindings::blk_mq_hw_ctx, + _hctx_idx: core::ffi::c_uint, + ) { + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - This function may only be called by blk-mq C infrastructure. + /// - `_set` must point to an initialized `TagSet<T>`. + /// - `rq` must point to an initialized `bindings::request`. + /// - The allocation pointed to by `rq` must be at the size of `Request` + /// plus the size of `RequestDataWrapper`. + unsafe extern "C" fn init_request_callback( + _set: *mut bindings::blk_mq_tag_set, + rq: *mut bindings::request, + _hctx_idx: core::ffi::c_uint, + _numa_node: core::ffi::c_uint, + ) -> core::ffi::c_int { + from_result(|| { + // SAFETY: By the safety requirements of this function, `rq` points + // to a valid allocation. + let pdu = unsafe { Request::wrapper_ptr(rq.cast::<Request<T>>()) }; + + // SAFETY: The refcount field is allocated but not initialized, so + // it is valid for writes. + unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) }; + + Ok(0) + }) + } + + /// This function is called by the C kernel. A pointer to this function is + /// installed in the `blk_mq_ops` vtable for the driver. + /// + /// # Safety + /// + /// - This function may only be called by blk-mq C infrastructure. + /// - `_set` must point to an initialized `TagSet<T>`. + /// - `rq` must point to an initialized and valid `Request`. + unsafe extern "C" fn exit_request_callback( + _set: *mut bindings::blk_mq_tag_set, + rq: *mut bindings::request, + _hctx_idx: core::ffi::c_uint, + ) { + // SAFETY: The tagset invariants guarantee that all requests are allocated with extra memory + // for the request data. + let pdu = unsafe { bindings::blk_mq_rq_to_pdu(rq) }.cast::<RequestDataWrapper>(); + + // SAFETY: `pdu` is valid for read and write and is properly initialised. + unsafe { core::ptr::drop_in_place(pdu) }; + } + + const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops { + queue_rq: Some(Self::queue_rq_callback), + queue_rqs: None, + commit_rqs: Some(Self::commit_rqs_callback), + get_budget: None, + put_budget: None, + set_rq_budget_token: None, + get_rq_budget_token: None, + timeout: None, + poll: if T::HAS_POLL { + Some(Self::poll_callback) + } else { + None + }, + complete: Some(Self::complete_callback), + init_hctx: Some(Self::init_hctx_callback), + exit_hctx: Some(Self::exit_hctx_callback), + init_request: Some(Self::init_request_callback), + exit_request: Some(Self::exit_request_callback), + cleanup_rq: None, + busy: None, + map_queues: None, + #[cfg(CONFIG_BLK_DEBUG_FS)] + show_rq: None, + }; + + pub(crate) const fn build() -> &'static bindings::blk_mq_ops { + &Self::VTABLE + } +} diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs new file mode 100644 index 000000000000..9222465d670b --- /dev/null +++ b/rust/kernel/block/mq/raw_writer.rs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::fmt::{self, Write}; + +use crate::error::Result; +use crate::prelude::EINVAL; + +/// A mutable reference to a byte buffer where a string can be written into. +/// +/// # Invariants +/// +/// `buffer` is always null terminated. +pub(crate) struct RawWriter<'a> { + buffer: &'a mut [u8], + pos: usize, +} + +impl<'a> RawWriter<'a> { + /// Create a new `RawWriter` instance. + fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> { + *(buffer.last_mut().ok_or(EINVAL)?) = 0; + + // INVARIANT: We null terminated the buffer above. + Ok(Self { buffer, pos: 0 }) + } + + pub(crate) fn from_array<const N: usize>( + a: &'a mut [core::ffi::c_char; N], + ) -> Result<RawWriter<'a>> { + Self::new( + // SAFETY: the buffer of `a` is valid for read and write as `u8` for + // at least `N` bytes. + unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) }, + ) + } +} + +impl Write for RawWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + let bytes = s.as_bytes(); + let len = bytes.len(); + + // We do not want to overwrite our null terminator + if self.pos + len > self.buffer.len() - 1 { + return Err(fmt::Error); + } + + // INVARIANT: We are not overwriting the last byte + self.buffer[self.pos..self.pos + len].copy_from_slice(bytes); + + self.pos += len; + + Ok(()) + } +} diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs new file mode 100644 index 000000000000..a0e22827f3f4 --- /dev/null +++ b/rust/kernel/block/mq/request.rs @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides a wrapper for the C `struct request` type. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use crate::{ + bindings, + block::mq::Operations, + error::Result, + types::{ARef, AlwaysRefCounted, Opaque}, +}; +use core::{ + marker::PhantomData, + ptr::{addr_of_mut, NonNull}, + sync::atomic::{AtomicU64, Ordering}, +}; + +/// A wrapper around a blk-mq `struct request`. This represents an IO request. +/// +/// # Implementation details +/// +/// There are four states for a request that the Rust bindings care about: +/// +/// A) Request is owned by block layer (refcount 0) +/// B) Request is owned by driver but with zero `ARef`s in existence +/// (refcount 1) +/// C) Request is owned by driver with exactly one `ARef` in existence +/// (refcount 2) +/// D) Request is owned by driver with more than one `ARef` in existence +/// (refcount > 2) +/// +/// +/// We need to track A and B to ensure we fail tag to request conversions for +/// requests that are not owned by the driver. +/// +/// We need to track C and D to ensure that it is safe to end the request and hand +/// back ownership to the block layer. +/// +/// The states are tracked through the private `refcount` field of +/// `RequestDataWrapper`. This structure lives in the private data area of the C +/// `struct request`. +/// +/// # Invariants +/// +/// * `self.0` is a valid `struct request` created by the C portion of the kernel. +/// * The private data area associated with this request must be an initialized +/// and valid `RequestDataWrapper<T>`. +/// * `self` is reference counted by atomic modification of +/// self.wrapper_ref().refcount(). +/// +#[repr(transparent)] +pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>); + +impl<T: Operations> Request<T> { + /// Create an `ARef<Request>` from a `struct request` pointer. + /// + /// # Safety + /// + /// * The caller must own a refcount on `ptr` that is transferred to the + /// returned `ARef`. + /// * The type invariants for `Request` must hold for the pointee of `ptr`. + pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> { + // INVARIANT: By the safety requirements of this function, invariants are upheld. + // SAFETY: By the safety requirement of this function, we own a + // reference count that we can pass to `ARef`. + unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) } + } + + /// Notify the block layer that a request is going to be processed now. + /// + /// The block layer uses this hook to do proper initializations such as + /// starting the timeout timer. It is a requirement that block device + /// drivers call this function when starting to process a request. + /// + /// # Safety + /// + /// The caller must have exclusive ownership of `self`, that is + /// `self.wrapper_ref().refcount() == 2`. + pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) { + // SAFETY: By type invariant, `self.0` is a valid `struct request` and + // we have exclusive access. + unsafe { bindings::blk_mq_start_request(this.0.get()) }; + } + + /// Try to take exclusive ownership of `this` by dropping the refcount to 0. + /// This fails if `this` is not the only `ARef` pointing to the underlying + /// `Request`. + /// + /// If the operation is successful, `Ok` is returned with a pointer to the + /// C `struct request`. If the operation fails, `this` is returned in the + /// `Err` variant. + fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> { + // We can race with `TagSet::tag_to_rq` + if let Err(_old) = this.wrapper_ref().refcount().compare_exchange( + 2, + 0, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + return Err(this); + } + + let request_ptr = this.0.get(); + core::mem::forget(this); + + Ok(request_ptr) + } + + /// Notify the block layer that the request has been completed without errors. + /// + /// This function will return `Err` if `this` is not the only `ARef` + /// referencing the request. + pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> { + let request_ptr = Self::try_set_end(this)?; + + // SAFETY: By type invariant, `this.0` was a valid `struct request`. The + // success of the call to `try_set_end` guarantees that there are no + // `ARef`s pointing to this request. Therefore it is safe to hand it + // back to the block layer. + unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) }; + + Ok(()) + } + + /// Return a pointer to the `RequestDataWrapper` stored in the private area + /// of the request structure. + /// + /// # Safety + /// + /// - `this` must point to a valid allocation of size at least size of + /// `Self` plus size of `RequestDataWrapper`. + pub(crate) unsafe fn wrapper_ptr(this: *mut Self) -> NonNull<RequestDataWrapper> { + let request_ptr = this.cast::<bindings::request>(); + // SAFETY: By safety requirements for this function, `this` is a + // valid allocation. + let wrapper_ptr = + unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() }; + // SAFETY: By C API contract, wrapper_ptr points to a valid allocation + // and is not null. + unsafe { NonNull::new_unchecked(wrapper_ptr) } + } + + /// Return a reference to the `RequestDataWrapper` stored in the private + /// area of the request structure. + pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper { + // SAFETY: By type invariant, `self.0` is a valid allocation. Further, + // the private data associated with this request is initialized and + // valid. The existence of `&self` guarantees that the private data is + // valid as a shared reference. + unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() } + } +} + +/// A wrapper around data stored in the private area of the C `struct request`. +pub(crate) struct RequestDataWrapper { + /// The Rust request refcount has the following states: + /// + /// - 0: The request is owned by C block layer. + /// - 1: The request is owned by Rust abstractions but there are no ARef references to it. + /// - 2+: There are `ARef` references to the request. + refcount: AtomicU64, +} + +impl RequestDataWrapper { + /// Return a reference to the refcount of the request that is embedding + /// `self`. + pub(crate) fn refcount(&self) -> &AtomicU64 { + &self.refcount + } + + /// Return a pointer to the refcount of the request that is embedding the + /// pointee of `this`. + /// + /// # Safety + /// + /// - `this` must point to a live allocation of at least the size of `Self`. + pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { + // SAFETY: Because of the safety requirements of this function, the + // field projection is safe. + unsafe { addr_of_mut!((*this).refcount) } + } +} + +// SAFETY: Exclusive access is thread-safe for `Request`. `Request` has no `&mut +// self` methods and `&self` methods that mutate `self` are internally +// synchronized. +unsafe impl<T: Operations> Send for Request<T> {} + +// SAFETY: Shared access is thread-safe for `Request`. `&self` methods that +// mutate `self` are internally synchronized` +unsafe impl<T: Operations> Sync for Request<T> {} + +/// Store the result of `op(target.load())` in target, returning new value of +/// target. +fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 { + let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x))); + + // SAFETY: Because the operation passed to `fetch_update` above always + // return `Some`, `old` will always be `Ok`. + let old = unsafe { old.unwrap_unchecked() }; + + op(old) +} + +/// Store the result of `op(target.load)` in `target` if `target.load() != +/// pred`, returning true if the target was updated. +fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool { + target + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { + if x == pred { + None + } else { + Some(op(x)) + } + }) + .is_ok() +} + +// SAFETY: All instances of `Request<T>` are reference counted. This +// implementation of `AlwaysRefCounted` ensure that increments to the ref count +// keeps the object alive in memory at least until a matching reference count +// decrement is executed. +unsafe impl<T: Operations> AlwaysRefCounted for Request<T> { + fn inc_ref(&self) { + let refcount = &self.wrapper_ref().refcount(); + + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] + let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0); + + #[cfg(CONFIG_DEBUG_MISC)] + if !updated { + panic!("Request refcount zero on clone") + } + } + + unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) { + // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid + // for read. + let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() }; + // SAFETY: The type invariant of `Request` guarantees that the private + // data area is initialized and valid. + let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) }; + + #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))] + let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1); + + #[cfg(CONFIG_DEBUG_MISC)] + if new_refcount == 0 { + panic!("Request reached refcount zero in Rust abstractions"); + } + } +} diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs new file mode 100644 index 000000000000..f9a1ca655a35 --- /dev/null +++ b/rust/kernel/block/mq/tag_set.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This module provides the `TagSet` struct to wrap the C `struct blk_mq_tag_set`. +//! +//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h) + +use core::pin::Pin; + +use crate::{ + bindings, + block::mq::{operations::OperationsVTable, request::RequestDataWrapper, Operations}, + error, + prelude::PinInit, + try_pin_init, + types::Opaque, +}; +use core::{convert::TryInto, marker::PhantomData}; +use macros::{pin_data, pinned_drop}; + +/// A wrapper for the C `struct blk_mq_tag_set`. +/// +/// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned. +/// +/// # Invariants +/// +/// - `inner` is initialized and valid. +#[pin_data(PinnedDrop)] +#[repr(transparent)] +pub struct TagSet<T: Operations> { + #[pin] + inner: Opaque<bindings::blk_mq_tag_set>, + _p: PhantomData<T>, +} + +impl<T: Operations> TagSet<T> { + /// Try to create a new tag set + pub fn new( + nr_hw_queues: u32, + num_tags: u32, + num_maps: u32, + ) -> impl PinInit<Self, error::Error> { + // SAFETY: `blk_mq_tag_set` only contains integers and pointers, which + // all are allowed to be 0. + let tag_set: bindings::blk_mq_tag_set = unsafe { core::mem::zeroed() }; + let tag_set = core::mem::size_of::<RequestDataWrapper>() + .try_into() + .map(|cmd_size| { + bindings::blk_mq_tag_set { + ops: OperationsVTable::<T>::build(), + nr_hw_queues, + timeout: 0, // 0 means default which is 30Hz in C + numa_node: bindings::NUMA_NO_NODE, + queue_depth: num_tags, + cmd_size, + flags: bindings::BLK_MQ_F_SHOULD_MERGE, + driver_data: core::ptr::null_mut::<core::ffi::c_void>(), + nr_maps: num_maps, + ..tag_set + } + }); + + try_pin_init!(TagSet { + inner <- PinInit::<_, error::Error>::pin_chain(Opaque::new(tag_set?), |tag_set| { + // SAFETY: we do not move out of `tag_set`. + let tag_set = unsafe { Pin::get_unchecked_mut(tag_set) }; + // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`. + error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())}) + }), + _p: PhantomData, + }) + } + + /// Return the pointer to the wrapped `struct blk_mq_tag_set` + pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set { + self.inner.get() + } +} + +#[pinned_drop] +impl<T: Operations> PinnedDrop for TagSet<T> { + fn drop(self: Pin<&mut Self>) { + // SAFETY: By type invariant `inner` is valid and has been properly + // initialized during construction. + unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) }; + } +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs new file mode 100644 index 000000000000..851018eef885 --- /dev/null +++ b/rust/kernel/device.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic devices that are part of the kernel's driver model. +//! +//! C header: [`include/linux/device.h`](srctree/include/linux/device.h) + +use crate::{ + bindings, + types::{ARef, Opaque}, +}; +use core::ptr; + +/// A reference-counted device. +/// +/// This structure represents the Rust abstraction for a C `struct device`. This implementation +/// abstracts the usage of an already existing C `struct device` within Rust code that we get +/// passed from the C side. +/// +/// An instance of this abstraction can be obtained temporarily or permanent. +/// +/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation. +/// A permanent instance is always reference-counted and hence not restricted by any lifetime +/// boundaries. +/// +/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem +/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in +/// `T::probe()`, such that a driver can store the `ARef<Device>` (equivalent to storing a +/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent +/// memory. +/// +/// # Invariants +/// +/// A `Device` instance represents a valid `struct device` created by the C portion of the kernel. +/// +/// Instances of this type are always reference-counted, that is, a call to `get_device` ensures +/// that the allocation remains valid at least until the matching call to `put_device`. +/// +/// `bindings::device::release` is valid to be called from any thread, hence `ARef<Device>` can be +/// dropped from any thread. +#[repr(transparent)] +pub struct Device(Opaque<bindings::device>); + +impl Device { + /// Creates a new reference-counted abstraction instance of an existing `struct device` pointer. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count, + /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to + /// can't drop to zero, for the duration of this function call. + /// + /// It must also be ensured that `bindings::device::release` can be called from any thread. + /// While not officially documented, this should be the case for any `struct device`. + pub unsafe fn from_raw(ptr: *mut bindings::device) -> ARef<Self> { + // SAFETY: By the safety requirements, ptr is valid. + // Initially increase the reference count by one to compensate for the final decrement once + // this newly created `ARef<Device>` instance is dropped. + unsafe { bindings::get_device(ptr) }; + + // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::device`. + let ptr = ptr.cast::<Self>(); + + // SAFETY: `ptr` is valid by the safety requirements of this function. By the above call to + // `bindings::get_device` we also own a reference to the underlying `struct device`. + unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr)) } + } + + /// Obtain the raw `struct device *`. + pub(crate) fn as_raw(&self) -> *mut bindings::device { + self.0.get() + } + + /// Convert a raw C `struct device` pointer to a `&'a Device`. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count, + /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to + /// can't drop to zero, for the duration of this function call and the entire duration when the + /// returned reference exists. + pub unsafe fn as_ref<'a>(ptr: *mut bindings::device) -> &'a Self { + // SAFETY: Guaranteed by the safety requirements of the function. + unsafe { &*ptr.cast() } + } +} + +// SAFETY: Instances of `Device` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Device { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::get_device(self.as_raw()) }; + } + + unsafe fn dec_ref(obj: ptr::NonNull<Self>) { + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::put_device(obj.cast().as_ptr()) } + } +} + +// SAFETY: As by the type invariant `Device` can be sent to any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` can be shared among threads because all immutable methods are protected by the +// synchronization in `struct device`. +unsafe impl Sync for Device {} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 55280ae9fe40..145f5c397009 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -126,6 +126,12 @@ impl Error { self.0 } + #[cfg(CONFIG_BLOCK)] + pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { + // SAFETY: `self.0` is a valid error due to its invariant. + unsafe { bindings::errno_to_blk_status(self.0) } + } + /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr<T>(self) -> *mut T { diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs new file mode 100644 index 000000000000..2ba03af9f036 --- /dev/null +++ b/rust/kernel/firmware.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware abstraction +//! +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") + +use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use core::ptr::NonNull; + +/// # Invariants +/// +/// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, +/// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. +struct FwFunc( + unsafe extern "C" fn(*mut *const bindings::firmware, *const i8, *mut bindings::device) -> i32, +); + +impl FwFunc { + fn request() -> Self { + Self(bindings::request_firmware) + } + + fn request_nowarn() -> Self { + Self(bindings::firmware_request_nowarn) + } +} + +/// Abstraction around a C `struct firmware`. +/// +/// This is a simple abstraction around the C firmware API. Just like with the C API, firmware can +/// be requested. Once requested the abstraction provides direct access to the firmware buffer as +/// `&[u8]`. The firmware is released once [`Firmware`] is dropped. +/// +/// # Invariants +/// +/// The pointer is valid, and has ownership over the instance of `struct firmware`. +/// +/// The `Firmware`'s backing buffer is not modified. +/// +/// # Examples +/// +/// ```no_run +/// # use kernel::{c_str, device::Device, firmware::Firmware}; +/// +/// # fn no_run() -> Result<(), Error> { +/// # // SAFETY: *NOT* safe, just for the example to get an `ARef<Device>` instance +/// # let dev = unsafe { Device::from_raw(core::ptr::null_mut()) }; +/// +/// let fw = Firmware::request(c_str!("path/to/firmware.bin"), &dev)?; +/// let blob = fw.data(); +/// +/// # Ok(()) +/// # } +/// ``` +pub struct Firmware(NonNull<bindings::firmware>); + +impl Firmware { + fn request_internal(name: &CStr, dev: &Device, func: FwFunc) -> Result<Self> { + let mut fw: *mut bindings::firmware = core::ptr::null_mut(); + let pfw: *mut *mut bindings::firmware = &mut fw; + + // SAFETY: `pfw` is a valid pointer to a NULL initialized `bindings::firmware` pointer. + // `name` and `dev` are valid as by their type invariants. + let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) }; + if ret != 0 { + return Err(Error::from_errno(ret)); + } + + // SAFETY: `func` not bailing out with a non-zero error code, guarantees that `fw` is a + // valid pointer to `bindings::firmware`. + Ok(Firmware(unsafe { NonNull::new_unchecked(fw) })) + } + + /// Send a firmware request and wait for it. See also `bindings::request_firmware`. + pub fn request(name: &CStr, dev: &Device) -> Result<Self> { + Self::request_internal(name, dev, FwFunc::request()) + } + + /// Send a request for an optional firmware module. See also + /// `bindings::firmware_request_nowarn`. + pub fn request_nowarn(name: &CStr, dev: &Device) -> Result<Self> { + Self::request_internal(name, dev, FwFunc::request_nowarn()) + } + + fn as_raw(&self) -> *mut bindings::firmware { + self.0.as_ptr() + } + + /// Returns the size of the requested firmware in bytes. + pub fn size(&self) -> usize { + // SAFETY: `self.as_raw()` is valid by the type invariant. + unsafe { (*self.as_raw()).size } + } + + /// Returns the requested firmware as `&[u8]`. + pub fn data(&self) -> &[u8] { + // SAFETY: `self.as_raw()` is valid by the type invariant. Additionally, + // `bindings::firmware` guarantees, if successfully requested, that + // `bindings::firmware::data` has a size of `bindings::firmware::size` bytes. + unsafe { core::slice::from_raw_parts((*self.as_raw()).data, self.size()) } + } +} + +impl Drop for Firmware { + fn drop(&mut self) { + // SAFETY: `self.as_raw()` is valid by the type invariant. + unsafe { bindings::release_firmware(self.as_raw()) }; + } +} + +// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, which is safe to be used from +// any thread. +unsafe impl Send for Firmware {} + +// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, references to which are safe to +// be used from any thread. +unsafe impl Sync for Firmware {} diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 68605b633e73..495c09ebe3a3 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -843,11 +843,8 @@ where let val = unsafe { &mut *slot }; // SAFETY: `slot` is considered pinned. let val = unsafe { Pin::new_unchecked(val) }; - (self.1)(val).map_err(|e| { - // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + // SAFETY: `slot` was initialized above. + (self.1)(val).inspect_err(|_| unsafe { core::ptr::drop_in_place(slot) }) } } @@ -941,11 +938,9 @@ where // SAFETY: All requirements fulfilled since this function is `__init`. unsafe { self.0.__pinned_init(slot)? }; // SAFETY: The above call initialized `slot` and we still have unique access. - (self.1)(unsafe { &mut *slot }).map_err(|e| { + (self.1)(unsafe { &mut *slot }).inspect_err(|_| // SAFETY: `slot` was initialized above. - unsafe { core::ptr::drop_in_place(slot) }; - e - }) + unsafe { core::ptr::drop_in_place(slot) }) } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index fbd91a48ff8b..274bdc1b0a82 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -27,14 +27,20 @@ compile_error!("Missing kernel configuration for conditional compilation"); extern crate self as kernel; pub mod alloc; +#[cfg(CONFIG_BLOCK)] +pub mod block; mod build_assert; +pub mod device; pub mod error; +#[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] +pub mod firmware; pub mod init; pub mod ioctl; #[cfg(CONFIG_KUNIT)] pub mod kunit; #[cfg(CONFIG_NET)] pub mod net; +pub mod page; pub mod prelude; pub mod print; mod static_assert; @@ -45,6 +51,7 @@ pub mod sync; pub mod task; pub mod time; pub mod types; +pub mod uaccess; pub mod workqueue; #[doc(hidden)] diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs new file mode 100644 index 000000000000..208a006d587c --- /dev/null +++ b/rust/kernel/page.rs @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel page allocation and management. + +use crate::{ + alloc::{AllocError, Flags}, + bindings, + error::code::*, + error::Result, + uaccess::UserSliceReader, +}; +use core::ptr::{self, NonNull}; + +/// A bitwise shift for the page size. +pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize; + +/// The number of bytes in a page. +pub const PAGE_SIZE: usize = bindings::PAGE_SIZE; + +/// A bitmask that gives the page containing a given address. +pub const PAGE_MASK: usize = !(PAGE_SIZE - 1); + +/// A pointer to a page that owns the page allocation. +/// +/// # Invariants +/// +/// The pointer is valid, and has ownership over the page. +pub struct Page { + page: NonNull<bindings::page>, +} + +// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across +// threads is safe. +unsafe impl Send for Page {} + +// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing +// them concurrently is safe. +unsafe impl Sync for Page {} + +impl Page { + /// Allocates a new page. + /// + /// # Examples + /// + /// Allocate memory for a page. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL)?; + /// # Ok(()) } + /// ``` + /// + /// Allocate memory for a page and zero its contents. + /// + /// ``` + /// use kernel::page::Page; + /// + /// # fn dox() -> Result<(), kernel::alloc::AllocError> { + /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?; + /// # Ok(()) } + /// ``` + pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> { + // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it + // is always safe to call this method. + let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) }; + let page = NonNull::new(page).ok_or(AllocError)?; + // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly + // allocated page. We transfer that ownership to the new `Page` object. + Ok(Self { page }) + } + + /// Returns a raw pointer to the page. + pub fn as_ptr(&self) -> *mut bindings::page { + self.page.as_ptr() + } + + /// Runs a piece of code with this page mapped to an address. + /// + /// The page is unmapped when this call returns. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might + /// only be mapped on the current thread, and when that is the case, dereferencing it on other + /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't + /// cause data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T { + // SAFETY: `page` is valid due to the type invariants on `Page`. + let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) }; + + let res = f(mapped_addr.cast()); + + // This unmaps the page mapped above. + // + // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner + // where the pages are unmapped in reverse order. This is as required by `kunmap_local`. + // + // In other words, if this call to `kunmap_local` happens when a different page should be + // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the + // call just above in `with_page_mapped` that made that possible. In this case, it is the + // unsafe block that wraps that other call that is incorrect. + unsafe { bindings::kunmap_local(mapped_addr) }; + + res + } + + /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking. + /// + /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the + /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on + /// this task, as this method uses a local mapping. + /// + /// If `off` and `len` refers to a region outside of this page, then this method returns + /// [`EINVAL`] and does not call `f`. + /// + /// # Using the raw pointer + /// + /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for + /// `len` bytes and for the duration in which the closure is called. The pointer might only be + /// mapped on the current thread, and when that is the case, dereferencing it on other threads + /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause + /// data races, the memory may be uninitialized, and so on. + /// + /// If multiple threads map the same page at the same time, then they may reference with + /// different addresses. However, even if the addresses are different, the underlying memory is + /// still the same for these purposes (e.g., it's still a data race if they both write to the + /// same underlying byte at the same time). + fn with_pointer_into_page<T>( + &self, + off: usize, + len: usize, + f: impl FnOnce(*mut u8) -> Result<T>, + ) -> Result<T> { + let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE; + + if bounds_ok { + self.with_page_mapped(move |page_addr| { + // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will + // result in a pointer that is in bounds or one off the end of the page. + f(unsafe { page_addr.add(off) }) + }) + } else { + Err(EINVAL) + } + } + + /// Maps the page and reads from it into the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `dst` is valid for writing `len` bytes. + /// * Callers must ensure that this call does not race with a write to the same page that + /// overlaps with this read. + pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |src| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then + // it has performed a bounds check and guarantees that `src` is + // valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and writes into it from the given buffer. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// * Callers must ensure that `src` is valid for reading `len` bytes. + /// * Callers must ensure that this call does not race with a read or write to the same page + /// that overlaps with this write. + pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::copy_nonoverlapping(src, dst, len) }; + Ok(()) + }) + } + + /// Maps the page and zeroes the given slice. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. + // + // There caller guarantees that there is no data race. + unsafe { ptr::write_bytes(dst, 0u8, len) }; + Ok(()) + }) + } + + /// Copies data from userspace into this page. + /// + /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes + /// outside of the page, then this call returns [`EINVAL`]. + /// + /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address. + /// However, they are not allowed on the page you are copying into. + /// + /// # Safety + /// + /// Callers must ensure that this call does not race with a read or write to the same page that + /// overlaps with this write. + pub unsafe fn copy_from_user_slice_raw( + &self, + reader: &mut UserSliceReader, + offset: usize, + len: usize, + ) -> Result { + self.with_pointer_into_page(offset, len, move |dst| { + // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a + // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have + // exclusive access to the slice since the caller guarantees that there are no races. + reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) }) + }) + } +} + +impl Drop for Page { + fn drop(&mut self) { + // SAFETY: By the type invariants, we have ownership of the page and can free it. + unsafe { bindings::__free_pages(self.page.as_ptr(), 0) }; + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 2e7c9008621f..bd189d646adb 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -409,3 +409,67 @@ pub enum Either<L, R> { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// Types for which any bit pattern is valid. +/// +/// Not all types are valid for all values. For example, a `bool` must be either zero or one, so +/// reading arbitrary bytes into something that contains a `bool` is not okay. +/// +/// It's okay for the type to have padding, as initializing those bytes has no effect. +/// +/// # Safety +/// +/// All bit-patterns must be valid for this type. This type must not have interior mutability. +pub unsafe trait FromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl FromBytes for u8 {} +unsafe impl FromBytes for u16 {} +unsafe impl FromBytes for u32 {} +unsafe impl FromBytes for u64 {} +unsafe impl FromBytes for usize {} +unsafe impl FromBytes for i8 {} +unsafe impl FromBytes for i16 {} +unsafe impl FromBytes for i32 {} +unsafe impl FromBytes for i64 {} +unsafe impl FromBytes for isize {} +// SAFETY: If all bit patterns are acceptable for individual values in an array, then all bit +// patterns are also acceptable for arrays of that type. +unsafe impl<T: FromBytes> FromBytes for [T] {} +unsafe impl<T: FromBytes, const N: usize> FromBytes for [T; N] {} + +/// Types that can be viewed as an immutable slice of initialized bytes. +/// +/// If a struct implements this trait, then it is okay to copy it byte-for-byte to userspace. This +/// means that it should not have any padding, as padding bytes are uninitialized. Reading +/// uninitialized memory is not just undefined behavior, it may even lead to leaking sensitive +/// information on the stack to userspace. +/// +/// The struct should also not hold kernel pointers, as kernel pointer addresses are also considered +/// sensitive. However, leaking kernel pointers is not considered undefined behavior by Rust, so +/// this is a correctness requirement, but not a safety requirement. +/// +/// # Safety +/// +/// Values of this type may not contain any uninitialized bytes. This type must not have interior +/// mutability. +pub unsafe trait AsBytes {} + +// SAFETY: Instances of the following types have no uninitialized portions. +unsafe impl AsBytes for u8 {} +unsafe impl AsBytes for u16 {} +unsafe impl AsBytes for u32 {} +unsafe impl AsBytes for u64 {} +unsafe impl AsBytes for usize {} +unsafe impl AsBytes for i8 {} +unsafe impl AsBytes for i16 {} +unsafe impl AsBytes for i32 {} +unsafe impl AsBytes for i64 {} +unsafe impl AsBytes for isize {} +unsafe impl AsBytes for bool {} +unsafe impl AsBytes for char {} +unsafe impl AsBytes for str {} +// SAFETY: If individual values in an array have no uninitialized portions, then the array itself +// does not have any uninitialized portions either. +unsafe impl<T: AsBytes> AsBytes for [T] {} +unsafe impl<T: AsBytes, const N: usize> AsBytes for [T; N] {} diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs new file mode 100644 index 000000000000..e9347cff99ab --- /dev/null +++ b/rust/kernel/uaccess.rs @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Slices to user space memory regions. +//! +//! C header: [`include/linux/uaccess.h`](srctree/include/linux/uaccess.h) + +use crate::{ + alloc::Flags, + bindings, + error::Result, + prelude::*, + types::{AsBytes, FromBytes}, +}; +use alloc::vec::Vec; +use core::ffi::{c_ulong, c_void}; +use core::mem::{size_of, MaybeUninit}; + +/// The type used for userspace addresses. +pub type UserPtr = usize; + +/// A pointer to an area in userspace memory, which can be either read-only or read-write. +/// +/// All methods on this struct are safe: attempting to read or write on bad addresses (either out of +/// the bound of the slice or unmapped addresses) will return [`EFAULT`]. Concurrent access, +/// *including data races to/from userspace memory*, is permitted, because fundamentally another +/// userspace thread/process could always be modifying memory at the same time (in the same way that +/// userspace Rust's [`std::io`] permits data races with the contents of files on disk). In the +/// presence of a race, the exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data after completing a read, and not +/// expect that multiple reads of the same address will return the same value. +/// +/// These APIs are designed to make it difficult to accidentally write TOCTOU (time-of-check to +/// time-of-use) bugs. Every time a memory location is read, the reader's position is advanced by +/// the read length and the next read will start from there. This helps prevent accidentally reading +/// the same location twice and causing a TOCTOU bug. +/// +/// Creating a [`UserSliceReader`] and/or [`UserSliceWriter`] consumes the `UserSlice`, helping +/// ensure that there aren't multiple readers or writers to the same location. +/// +/// If double-fetching a memory location is necessary for some reason, then that is done by creating +/// multiple readers to the same memory location, e.g. using [`clone_reader`]. +/// +/// # Examples +/// +/// Takes a region of userspace memory from the current process, and modify it by adding one to +/// every byte in the region. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::Result; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result<()> { +/// let (read, mut write) = UserSlice::new(uptr, len).reader_writer(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// for b in &mut buf { +/// *b = b.wrapping_add(1); +/// } +/// +/// write.write_slice(&buf)?; +/// Ok(()) +/// } +/// ``` +/// +/// Example illustrating a TOCTOU (time-of-check to time-of-use) bug. +/// +/// ```no_run +/// use alloc::vec::Vec; +/// use core::ffi::c_void; +/// use kernel::error::{code::EINVAL, Result}; +/// use kernel::uaccess::{UserPtr, UserSlice}; +/// +/// /// Returns whether the data in this region is valid. +/// fn is_valid(uptr: UserPtr, len: usize) -> Result<bool> { +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// todo!() +/// } +/// +/// /// Returns the bytes behind this user pointer if they are valid. +/// fn get_bytes_if_valid(uptr: UserPtr, len: usize) -> Result<Vec<u8>> { +/// if !is_valid(uptr, len)? { +/// return Err(EINVAL); +/// } +/// +/// let read = UserSlice::new(uptr, len).reader(); +/// +/// let mut buf = Vec::new(); +/// read.read_all(&mut buf, GFP_KERNEL)?; +/// +/// // THIS IS A BUG! The bytes could have changed since we checked them. +/// // +/// // To avoid this kind of bug, don't call `UserSlice::new` multiple +/// // times with the same address. +/// Ok(buf) +/// } +/// ``` +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +/// [`clone_reader`]: UserSliceReader::clone_reader +pub struct UserSlice { + ptr: UserPtr, + length: usize, +} + +impl UserSlice { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// Constructing a [`UserSlice`] performs no checks on the provided address and length, it can + /// safely be constructed inside a kernel thread with no current userspace process. Reads and + /// writes wrap the kernel APIs `copy_from_user` and `copy_to_user`, which check the memory map + /// of the current process and enforce that the address range is within the user range (no + /// additional calls to `access_ok` are needed). Validity of the pointer is checked when you + /// attempt to read or write, not in the call to `UserSlice::new`. + /// + /// Callers must be careful to avoid time-of-check-time-of-use (TOCTOU) issues. The simplest way + /// is to create a single instance of [`UserSlice`] per user memory block as it reads each byte + /// at most once. + pub fn new(ptr: UserPtr, length: usize) -> Self { + UserSlice { ptr, length } + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(self, buf: &mut Vec<u8>, flags: Flags) -> Result { + self.reader().read_all(buf, flags) + } + + /// Constructs a [`UserSliceReader`]. + pub fn reader(self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs a [`UserSliceWriter`]. + pub fn writer(self) -> UserSliceWriter { + UserSliceWriter { + ptr: self.ptr, + length: self.length, + } + } + + /// Constructs both a [`UserSliceReader`] and a [`UserSliceWriter`]. + /// + /// Usually when this is used, you will first read the data, and then overwrite it afterwards. + pub fn reader_writer(self) -> (UserSliceReader, UserSliceWriter) { + ( + UserSliceReader { + ptr: self.ptr, + length: self.length, + }, + UserSliceWriter { + ptr: self.ptr, + length: self.length, + }, + ) + } +} + +/// A reader for [`UserSlice`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSliceReader { + ptr: UserPtr, + length: usize, +} + +impl UserSliceReader { + /// Skip the provided number of bytes. + /// + /// Returns an error if skipping more than the length of the buffer. + pub fn skip(&mut self, num_skip: usize) -> Result { + // Update `self.length` first since that's the fallible part of this operation. + self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?; + self.ptr = self.ptr.wrapping_add(num_skip); + Ok(()) + } + + /// Create a reader that can access the same range of data. + /// + /// Reading from the clone does not advance the current reader. + /// + /// The caller should take care to not introduce TOCTOU issues, as described in the + /// documentation for [`UserSlice`]. + pub fn clone_reader(&self) -> UserSliceReader { + UserSliceReader { + ptr: self.ptr, + length: self.length, + } + } + + /// Returns the number of bytes left to be read from this reader. + /// + /// Note that even reading less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no data is available in the io buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// For a version that uses `&mut [u8]`, please see [`UserSliceReader::read_slice`]. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + /// + /// # Guarantees + /// + /// After a successful call to this method, all bytes in `out` are initialized. + pub fn read_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> Result { + let len = out.len(); + let out_ptr = out.as_mut_ptr().cast::<c_void>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `out_ptr` points into a mutable slice of length `len_ulong`, so we may write + // that many bytes to it. + let res = + unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Reads raw data from the user slice into a kernel buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error. + pub fn read_slice(&mut self, out: &mut [u8]) -> Result { + // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to + // `out`. + let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit<u8>]) }; + self.read_raw(out) + } + + /// Reads a value of the specified type. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of + /// bounds of this [`UserSliceReader`]. + pub fn read<T: FromBytes>(&mut self) -> Result<T> { + let len = size_of::<T>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); + // SAFETY: The local variable `out` is valid for writing `size_of::<T>()` bytes. + // + // By using the _copy_from_user variant, we skip the check_object_size check that verifies + // the kernel pointer. This mirrors the logic on the C side that skips the check when the + // length is a compile-time constant. + let res = unsafe { + bindings::_copy_from_user( + out.as_mut_ptr().cast::<c_void>(), + self.ptr as *const c_void, + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements + // `FromBytes`, any bit-pattern is a valid value for this type. + Ok(unsafe { out.assume_init() }) + } + + /// Reads the entirety of the user slice, appending it to the end of the provided buffer. + /// + /// Fails with [`EFAULT`] if the read happens on a bad address. + pub fn read_all(mut self, buf: &mut Vec<u8>, flags: Flags) -> Result { + let len = self.length; + VecExt::<u8>::reserve(buf, len, flags)?; + + // The call to `try_reserve` was successful, so the spare capacity is at least `len` bytes + // long. + self.read_raw(&mut buf.spare_capacity_mut()[..len])?; + + // SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the + // vector have been initialized. + unsafe { buf.set_len(buf.len() + len) }; + Ok(()) + } +} + +/// A writer for [`UserSlice`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSliceWriter { + ptr: UserPtr, + length: usize, +} + +impl UserSliceWriter { + /// Returns the amount of space remaining in this buffer. + /// + /// Note that even writing less than this number of bytes may fail. + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if no more data can be written to this buffer. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Writes raw data to this user pointer from a kernel buffer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write_slice(&mut self, data: &[u8]) -> Result { + let len = data.len(); + let data_ptr = data.as_ptr().cast::<c_void>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: `data_ptr` points into an immutable slice of length `len_ulong`, so we may read + // that many bytes from it. + let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len_ulong) }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } + + /// Writes the provided Rust value to this userspace pointer. + /// + /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of + /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even + /// if it returns an error. + pub fn write<T: AsBytes>(&mut self, value: &T) -> Result { + let len = size_of::<T>(); + if len > self.length { + return Err(EFAULT); + } + let Ok(len_ulong) = c_ulong::try_from(len) else { + return Err(EFAULT); + }; + // SAFETY: The reference points to a value of type `T`, so it is valid for reading + // `size_of::<T>()` bytes. + // + // By using the _copy_to_user variant, we skip the check_object_size check that verifies the + // kernel pointer. This mirrors the logic on the C side that skips the check when the length + // is a compile-time constant. + let res = unsafe { + bindings::_copy_to_user( + self.ptr as *mut c_void, + (value as *const T).cast::<c_void>(), + len_ulong, + ) + }; + if res != 0 { + return Err(EFAULT); + } + self.ptr = self.ptr.wrapping_add(len); + self.length -= len; + Ok(()) + } +} diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 1cec63a2aea8..553a5cba2adc 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -482,24 +482,26 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> { /// use kernel::sync::Arc; /// use kernel::workqueue::{self, impl_has_work, Work}; /// -/// struct MyStruct { -/// work_field: Work<MyStruct, 17>, +/// struct MyStruct<'a, T, const N: usize> { +/// work_field: Work<MyStruct<'a, T, N>, 17>, +/// f: fn(&'a [T; N]), /// } /// /// impl_has_work! { -/// impl HasWork<MyStruct, 17> for MyStruct { self.work_field } +/// impl{'a, T, const N: usize} HasWork<MyStruct<'a, T, N>, 17> +/// for MyStruct<'a, T, N> { self.work_field } /// } /// ``` #[macro_export] macro_rules! impl_has_work { - ($(impl$(<$($implarg:ident),*>)? + ($(impl$({$($generics:tt)*})? HasWork<$work_type:ty $(, $id:tt)?> - for $self:ident $(<$($selfarg:ident),*>)? + for $self:ty { self.$field:ident } )*) => {$( // SAFETY: The implementation of `raw_get_work` only compiles if the field has the right // type. - unsafe impl$(<$($implarg),*>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self $(<$($selfarg),*>)? { + unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self { const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; #[inline] @@ -515,7 +517,7 @@ macro_rules! impl_has_work { pub use impl_has_work; impl_has_work! { - impl<T> HasWork<Self> for ClosureWork<T> { self.work } + impl{T} HasWork<Self> for ClosureWork<T> { self.work } } unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Arc<T> diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 520eae5fd792..159e75292970 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -35,6 +35,7 @@ use proc_macro::TokenStream; /// author: "Rust for Linux Contributors", /// description: "My very own kernel module!", /// license: "GPL", +/// alias: ["alternate_module_name"], /// } /// /// struct MyModule; @@ -55,13 +56,45 @@ use proc_macro::TokenStream; /// } /// ``` /// +/// ## Firmware +/// +/// The following example shows how to declare a kernel module that needs +/// to load binary firmware files. You need to specify the file names of +/// the firmware in the `firmware` field. The information is embedded +/// in the `modinfo` section of the kernel module. For example, a tool to +/// build an initramfs uses this information to put the firmware files into +/// the initramfs image. +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyDeviceDriverModule, +/// name: "my_device_driver_module", +/// author: "Rust for Linux Contributors", +/// description: "My device driver requires firmware", +/// license: "GPL", +/// firmware: ["my_device_firmware1.bin", "my_device_firmware2.bin"], +/// } +/// +/// struct MyDeviceDriverModule; +/// +/// impl kernel::Module for MyDeviceDriverModule { +/// fn init() -> Result<Self> { +/// Ok(Self) +/// } +/// } +/// ``` +/// /// # Supported argument types /// - `type`: type which implements the [`Module`] trait (required). -/// - `name`: byte array of the name of the kernel module (required). -/// - `author`: byte array of the author of the kernel module. -/// - `description`: byte array of the description of the kernel module. -/// - `license`: byte array of the license of the kernel module (required). -/// - `alias`: byte array of alias name of the kernel module. +/// - `name`: ASCII string literal of the name of the kernel module (required). +/// - `author`: string literal of the author of the kernel module. +/// - `description`: string literal of the description of the kernel module. +/// - `license`: ASCII string literal of the license of the kernel module (required). +/// - `alias`: array of ASCII string literals of the alias names of the kernel module. +/// - `firmware`: array of ASCII string literals of the firmware files of +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) @@ -312,7 +345,7 @@ pub fn pinned_drop(args: TokenStream, input: TokenStream) -> TokenStream { /// /// Currently supported modifiers are: /// * `span`: change the span of concatenated identifier to the span of the specified token. By -/// default the span of the `[< >]` group is used. +/// default the span of the `[< >]` group is used. /// * `lower`: change the identifier to lower case. /// * `upper`: change the identifier to upper case. /// diff --git a/rust/macros/module.rs b/rust/macros/module.rs index acd0393b5095..411dc103d82e 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -97,14 +97,22 @@ struct ModuleInfo { author: Option<String>, description: Option<String>, alias: Option<Vec<String>>, + firmware: Option<Vec<String>>, } impl ModuleInfo { fn parse(it: &mut token_stream::IntoIter) -> Self { let mut info = ModuleInfo::default(); - const EXPECTED_KEYS: &[&str] = - &["type", "name", "author", "description", "license", "alias"]; + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "firmware", + ]; const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; let mut seen_keys = Vec::new(); @@ -131,6 +139,7 @@ impl ModuleInfo { "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), + "firmware" => info.firmware = Some(expect_string_array(it)), _ => panic!( "Unknown key \"{}\". Valid keys are: {:?}.", key, EXPECTED_KEYS @@ -186,6 +195,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { modinfo.emit("alias", &alias); } } + if let Some(firmware) = info.firmware { + for fw in firmware { + modinfo.emit("firmware", &fw); + } + } // Built-in modules also export the `file` modinfo string. let file = diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 0caad902ba40..80a00260e3e7 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -14,6 +14,7 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( clippy::all, + dead_code, missing_docs, non_camel_case_types, non_upper_case_globals, |