diff options
Diffstat (limited to 'arch/s390/kernel/uv.c')
-rw-r--r-- | arch/s390/kernel/uv.c | 207 |
1 files changed, 139 insertions, 68 deletions
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 265fea37e030..fa62fa0e369f 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(uv_pin_shared); * * @paddr: Absolute host address of page to be destroyed */ -static int uv_destroy_page(unsigned long paddr) +static int uv_destroy(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_DESTR_SEC_STOR, @@ -131,28 +131,40 @@ static int uv_destroy_page(unsigned long paddr) } /* - * The caller must already hold a reference to the page + * The caller must already hold a reference to the folio */ -int uv_destroy_owned_page(unsigned long paddr) +int uv_destroy_folio(struct folio *folio) { - struct page *page = phys_to_page(paddr); int rc; - get_page(page); - rc = uv_destroy_page(paddr); + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); + rc = uv_destroy(folio_to_phys(folio)); if (!rc) - clear_bit(PG_arch_1, &page->flags); - put_page(page); + clear_bit(PG_arch_1, &folio->flags); + folio_put(folio); return rc; } /* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_destroy_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_destroy_folio(pfn_folio(pte_pfn(pte))); +} + +/* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). * * @paddr: Absolute host address of page to be exported */ -int uv_convert_from_secure(unsigned long paddr) +static int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -166,22 +178,34 @@ int uv_convert_from_secure(unsigned long paddr) } /* - * The caller must already hold a reference to the page + * The caller must already hold a reference to the folio. */ -int uv_convert_owned_from_secure(unsigned long paddr) +static int uv_convert_from_secure_folio(struct folio *folio) { - struct page *page = phys_to_page(paddr); int rc; - get_page(page); - rc = uv_convert_from_secure(paddr); + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) + return 0; + + folio_get(folio); + rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) - clear_bit(PG_arch_1, &page->flags); - put_page(page); + clear_bit(PG_arch_1, &folio->flags); + folio_put(folio); return rc; } /* + * The present PTE still indirectly holds a folio reference through the mapping. + */ +int uv_convert_from_secure_pte(pte_t pte) +{ + VM_WARN_ON(!pte_present(pte)); + return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); +} + +/* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in * the kernel, but with some slight modifications. We know that a secure @@ -267,6 +291,36 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str } /* + * Drain LRU caches: the local one on first invocation and the ones of all + * CPUs on successive invocations. Returns "true" on the first invocation. + */ +static bool drain_lru(bool *drain_lru_called) +{ + /* + * If we have tried a local drain and the folio refcount + * still does not match our expected safe value, try with a + * system wide drain. This is needed if the pagevecs holding + * the page are on a different CPU. + */ + if (*drain_lru_called) { + lru_add_drain_all(); + /* We give up here, don't retry immediately. */ + return false; + } + /* + * We are here if the folio refcount does not match the + * expected safe value. The main culprits are usually + * pagevecs. With lru_add_drain() we drain the pagevecs + * on the local CPU so that hopefully the refcount will + * reach the expected safe value. + */ + lru_add_drain(); + *drain_lru_called = true; + /* The caller should try again immediately */ + return true; +} + +/* * Requests the Ultravisor to make a page accessible to a guest. * If it's brought in the first time, it will be cleared. If * it has been exported before, it will be decrypted and integrity @@ -275,7 +329,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) { struct vm_area_struct *vma; - bool local_drain = false; + bool drain_lru_called = false; spinlock_t *ptelock; unsigned long uaddr; struct folio *folio; @@ -308,52 +362,63 @@ again: goto out; if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { folio = page_folio(pte_page(*ptep)); - rc = -EINVAL; - if (folio_test_large(folio)) - goto unlock; rc = -EAGAIN; - if (folio_trylock(folio)) { + if (folio_test_large(folio)) { + rc = -E2BIG; + } else if (folio_trylock(folio)) { if (should_export_before_import(uvcb, gmap->mm)) uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); rc = make_folio_secure(folio, uvcb); folio_unlock(folio); } + + /* + * Once we drop the PTL, the folio may get unmapped and + * freed immediately. We need a temporary reference. + */ + if (rc == -EAGAIN || rc == -E2BIG) + folio_get(folio); } -unlock: pte_unmap_unlock(ptep, ptelock); out: mmap_read_unlock(gmap->mm); - if (rc == -EAGAIN) { + switch (rc) { + case -E2BIG: + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + folio_put(folio); + + switch (rc) { + case 0: + /* Splitting succeeded, try again immediately. */ + goto again; + case -EAGAIN: + /* Additional folio references. */ + if (drain_lru(&drain_lru_called)) + goto again; + return -EAGAIN; + case -EBUSY: + /* Unexpected race. */ + return -EAGAIN; + } + WARN_ON_ONCE(1); + return -ENXIO; + case -EAGAIN: /* * If we are here because the UVC returned busy or partial * completion, this is just a useless check, but it is safe. */ folio_wait_writeback(folio); - } else if (rc == -EBUSY) { - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (local_drain) { - lru_add_drain_all(); - /* We give up here, and let the caller try again */ - return -EAGAIN; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - local_drain = true; - /* And now we try again immediately after draining */ - goto again; - } else if (rc == -ENXIO) { + folio_put(folio); + return -EAGAIN; + case -EBUSY: + /* Additional folio references. */ + if (drain_lru(&drain_lru_called)) + goto again; + return -EAGAIN; + case -ENXIO: if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) return -EFAULT; return -EAGAIN; @@ -388,6 +453,7 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) { struct vm_area_struct *vma; unsigned long uaddr; + struct folio *folio; struct page *page; int rc; @@ -411,7 +477,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET); if (IS_ERR_OR_NULL(page)) goto out; - rc = uv_destroy_owned_page(page_to_phys(page)); + folio = page_folio(page); + rc = uv_destroy_folio(folio); /* * Fault handlers can race; it is possible that two CPUs will fault * on the same secure page. One CPU can destroy the page, reboot, @@ -422,8 +489,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) * we instead try to export the page. */ if (rc) - rc = uv_convert_owned_from_secure(page_to_phys(page)); - put_page(page); + rc = uv_convert_from_secure_folio(folio); + folio_put(folio); out: mmap_read_unlock(gmap->mm); return rc; @@ -431,47 +498,51 @@ out: EXPORT_SYMBOL_GPL(gmap_destroy_page); /* - * To be called with the page locked or with an extra reference! This will - * prevent gmap_make_secure from touching the page concurrently. Having 2 - * parallel make_page_accessible is fine, as the UV calls will become a - * no-op if the page is already exported. + * To be called with the folio locked or with an extra reference! This will + * prevent gmap_make_secure from touching the folio concurrently. Having 2 + * parallel arch_make_folio_accessible is fine, as the UV calls will become a + * no-op if the folio is already exported. */ -int arch_make_page_accessible(struct page *page) +int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* Hugepage cannot be protected, so nothing to do */ - if (PageHuge(page)) + /* See gmap_make_secure(): large folios cannot be secure */ + if (unlikely(folio_test_large(folio))) return 0; /* - * PG_arch_1 is used in 3 places: - * 1. for kernel page tables during early boot - * 2. for storage keys of huge pages and KVM - * 3. As an indication that this page might be secure. This can + * PG_arch_1 is used in 2 places: + * 1. for storage keys of hugetlb folios and KVM + * 2. As an indication that this small folio might be secure. This can * overindicate, e.g. we set the bit before calling * convert_to_secure. - * As secure pages are never huge, all 3 variants can co-exists. + * As secure pages are never large folios, both variants can co-exists. */ - if (!test_bit(PG_arch_1, &page->flags)) + if (!test_bit(PG_arch_1, &folio->flags)) return 0; - rc = uv_pin_shared(page_to_phys(page)); + rc = uv_pin_shared(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &page->flags); + clear_bit(PG_arch_1, &folio->flags); return 0; } - rc = uv_convert_from_secure(page_to_phys(page)); + rc = uv_convert_from_secure(folio_to_phys(folio)); if (!rc) { - clear_bit(PG_arch_1, &page->flags); + clear_bit(PG_arch_1, &folio->flags); return 0; } return rc; } -EXPORT_SYMBOL_GPL(arch_make_page_accessible); +EXPORT_SYMBOL_GPL(arch_make_folio_accessible); +int arch_make_page_accessible(struct page *page) +{ + return arch_make_folio_accessible(page_folio(page)); +} +EXPORT_SYMBOL_GPL(arch_make_page_accessible); #endif #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) |