diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_pseries.c | 18 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-memory.c | 16 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/papr-sysparm.c | 205 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/papr-vpd.c | 541 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/pseries.h | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/suspend.c | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/vas.c | 51 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/vas.h | 2 |
9 files changed, 813 insertions, 23 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 1476c5e4433c..f936962a2946 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o rtas-work-area.o papr-sysparm.o \ + papr-vpd.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index def184da51cf..b1ae0c0d1187 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -252,7 +252,7 @@ static int pseries_eeh_cap_start(struct pci_dn *pdn) if (!pdn) return 0; - rtas_read_config(pdn, PCI_STATUS, 2, &status); + rtas_pci_dn_read_config(pdn, PCI_STATUS, 2, &status); if (!(status & PCI_STATUS_CAP_LIST)) return 0; @@ -270,11 +270,11 @@ static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap) return 0; while (cnt--) { - rtas_read_config(pdn, pos, 1, &pos); + rtas_pci_dn_read_config(pdn, pos, 1, &pos); if (pos < 0x40) break; pos &= ~3; - rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + rtas_pci_dn_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); if (id == 0xff) break; if (id == cap) @@ -294,7 +294,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (!edev || !edev->pcie_cap) return 0; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) return 0; else if (!header) return 0; @@ -307,7 +307,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (pos < 256) break; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) break; } @@ -412,8 +412,8 @@ static void pseries_eeh_init_edev(struct pci_dn *pdn) if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { - rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, - 2, &pcie_flags); + rtas_pci_dn_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) edev->mode |= EEH_DEV_ROOT_PORT; @@ -676,7 +676,7 @@ static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u3 { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_read_config(pdn, where, size, val); + return rtas_pci_dn_read_config(pdn, where, size, val); } /** @@ -692,7 +692,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a43bfb01720a..3fe3ddb30c04 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -208,8 +208,10 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) int rc; mem_block = lmb_to_memblock(lmb); - if (!mem_block) + if (!mem_block) { + pr_err("Failed memory block lookup for LMB 0x%x\n", lmb->drc_index); return -EINVAL; + } if (online && mem_block->dev.offline) rc = device_online(&mem_block->dev); @@ -436,14 +438,15 @@ static int dlpar_memory_remove_by_index(u32 drc_index) } } - if (!lmb_found) + if (!lmb_found) { + pr_debug("Failed to look up LMB for drc index %x\n", drc_index); rc = -EINVAL; - - if (rc) + } else if (rc) { pr_debug("Failed to hot-remove memory at %llx\n", lmb->base_addr); - else + } else { pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); + } return rc; } @@ -575,6 +578,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = update_lmb_associativity_index(lmb); if (rc) { dlpar_release_drc(lmb->drc_index); + pr_err("Failed to configure LMB 0x%x\n", lmb->drc_index); return rc; } @@ -588,12 +592,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) /* Add the memory */ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY); if (rc) { + pr_err("Failed to add LMB 0x%x to node %u", lmb->drc_index, nid); invalidate_lmb_associativity_index(lmb); return rc; } rc = dlpar_online_lmb(lmb); if (rc) { + pr_err("Failed to online LMB 0x%x on node %u\n", lmb->drc_index, nid); __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c index fedc61599e6c..7063ce8884e4 100644 --- a/arch/powerpc/platforms/pseries/papr-sysparm.c +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -2,14 +2,20 @@ #define pr_fmt(fmt) "papr-sysparm: " fmt +#include <linux/anon_inodes.h> #include <linux/bug.h> +#include <linux/file.h> +#include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/miscdevice.h> #include <linux/printk.h> #include <linux/slab.h> -#include <asm/rtas.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> #include <asm/papr-sysparm.h> #include <asm/rtas-work-area.h> +#include <asm/rtas.h> struct papr_sysparm_buf *papr_sysparm_buf_alloc(void) { @@ -23,6 +29,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) kfree(buf); } +static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf) +{ + return be16_to_cpu(buf->len); +} + +static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t length) +{ + WARN_ONCE(length > sizeof(buf->val), + "bogus length %zu, clamping to safe value", length); + length = min(sizeof(buf->val), length); + buf->len = cpu_to_be16(length); +} + +/* + * For use on buffers returned from ibm,get-system-parameter before + * returning them to callers. Ensures the encoded length of valid data + * cannot overrun buf->val[]. + */ +static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf) +{ + papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf)); +} + +/* + * Perform some basic diligence on the system parameter buffer before + * submitting it to RTAS. + */ +static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf) +{ + /* + * Firmware ought to reject buffer lengths that exceed the + * maximum specified in PAPR, but there's no reason for the + * kernel to allow them either. + */ + if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val)) + return false; + + return true; +} + /** * papr_sysparm_get() - Retrieve the value of a PAPR system parameter. * @param: PAPR system parameter token as described in @@ -47,7 +93,6 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) * * Return: 0 on success, -errno otherwise. @buf is unmodified on error. */ - int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) { const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER); @@ -63,6 +108,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); @@ -77,6 +125,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) case 0: ret = 0; memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf)); + papr_sysparm_buf_clamp_length(buf); break; case -3: /* parameter not implemented */ ret = -EOPNOTSUPP; @@ -115,6 +164,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); @@ -149,3 +201,152 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) return ret; } + +static struct papr_sysparm_buf * +papr_sysparm_buf_from_user(const struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + long err; + u16 len; + + /* + * The length of valid data that userspace claims to be in + * user_iob->data[]. + */ + if (get_user(len, &user_iob->length)) + return ERR_PTR(-EFAULT); + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_INPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_INPUT); + + if (len > PAPR_SYSPARM_MAX_INPUT) + return ERR_PTR(-EINVAL); + + kern_spbuf = papr_sysparm_buf_alloc(); + if (!kern_spbuf) + return ERR_PTR(-ENOMEM); + + papr_sysparm_buf_set_length(kern_spbuf, len); + + if (len > 0 && copy_from_user(kern_spbuf->val, user_iob->data, len)) { + err = -EFAULT; + goto free_sysparm_buf; + } + + return kern_spbuf; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ERR_PTR(err); +} + +static int papr_sysparm_buf_to_user(const struct papr_sysparm_buf *kern_spbuf, + struct papr_sysparm_io_block __user *user_iob) +{ + u16 len_out = papr_sysparm_buf_get_length(kern_spbuf); + + if (put_user(len_out, &user_iob->length)) + return -EFAULT; + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_OUTPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_OUTPUT); + + if (copy_to_user(user_iob->data, kern_spbuf->val, PAPR_SYSPARM_MAX_OUTPUT)) + return -EFAULT; + + return 0; +} + +static long papr_sysparm_ioctl_get(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_get(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = papr_sysparm_buf_to_user(kern_spbuf, user_iob); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + + +static long papr_sysparm_ioctl_set(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_set(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + +static long papr_sysparm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_SYSPARM_IOC_GET: + ret = papr_sysparm_ioctl_get(argp); + break; + case PAPR_SYSPARM_IOC_SET: + if (filp->f_mode & FMODE_WRITE) + ret = papr_sysparm_ioctl_set(argp); + else + ret = -EBADF; + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_sysparm_ops = { + .unlocked_ioctl = papr_sysparm_ioctl, +}; + +static struct miscdevice papr_sysparm_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-sysparm", + .fops = &papr_sysparm_ops, +}; + +static __init int papr_sysparm_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER)) + return -ENODEV; + + return misc_register(&papr_sysparm_dev); +} +machine_device_initcall(pseries, papr_sysparm_init); diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c new file mode 100644 index 000000000000..c29e85db5f35 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-vpd.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-vpd: " fmt + +#include <linux/anon_inodes.h> +#include <linux/build_bug.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/lockdep.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/string_helpers.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> +#include <asm/papr-vpd.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> +#include <uapi/asm/papr-vpd.h> + +/* + * Function-specific return values for ibm,get-vpd, derived from PAPR+ + * v2.13 7.3.20 "ibm,get-vpd RTAS Call". + */ +#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */ +#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */ +#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */ + +/** + * struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd. + * @loc_code: In: Caller-provided location code buffer. Must be RTAS-addressable. + * @work_area: In: Caller-provided work area buffer for results. + * @sequence: In: Sequence number. Out: Next sequence number. + * @written: Out: Bytes written by ibm,get-vpd to @work_area. + * @status: Out: RTAS call status. + */ +struct rtas_ibm_get_vpd_params { + const struct papr_location_code *loc_code; + struct rtas_work_area *work_area; + u32 sequence; + u32 written; + s32 status; +}; + +/** + * rtas_ibm_get_vpd() - Call ibm,get-vpd to fill a work area buffer. + * @params: See &struct rtas_ibm_get_vpd_params. + * + * Calls ibm,get-vpd until it errors or successfully deposits data + * into the supplied work area. Handles RTAS retry statuses. Maps RTAS + * error statuses to reasonable errno values. + * + * The caller is expected to invoke rtas_ibm_get_vpd() multiple times + * to retrieve all the VPD for the provided location code. Only one + * sequence should be in progress at any time; starting a new sequence + * will disrupt any sequence already in progress. Serialization of VPD + * retrieval sequences is the responsibility of the caller. + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 otherwise. + */ +static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params) +{ + const struct papr_location_code *loc_code = params->loc_code; + struct rtas_work_area *work_area = params->work_area; + u32 rets[2]; + s32 fwrc; + int ret; + + lockdep_assert_held(&rtas_ibm_get_vpd_lock); + + do { + fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_GET_VPD), 4, 3, + rets, + __pa(loc_code), + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area), + params->sequence); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_INVALID_PARAMETER: + ret = -EINVAL; + break; + case RTAS_IBM_GET_VPD_START_OVER: + ret = -EAGAIN; + break; + case RTAS_IBM_GET_VPD_MORE_DATA: + params->sequence = rets[0]; + fallthrough; + case RTAS_IBM_GET_VPD_COMPLETE: + params->written = rets[1]; + /* + * Kernel or firmware bug, do not continue. + */ + if (WARN(params->written > rtas_work_area_size(work_area), + "possible write beyond end of work area")) + ret = -EFAULT; + else + ret = 0; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,get-vpd status %d\n", fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into + * an immutable buffer to be attached to a file descriptor. + */ +struct vpd_blob { + const char *data; + size_t len; +}; + +static bool vpd_blob_has_data(const struct vpd_blob *blob) +{ + return blob->data && blob->len; +} + +static void vpd_blob_free(const struct vpd_blob *blob) +{ + if (blob) { + kvfree(blob->data); + kfree(blob); + } +} + +/** + * vpd_blob_extend() - Append data to a &struct vpd_blob. + * @blob: The blob to extend. + * @data: The new data to append to @blob. + * @len: The length of @data. + * + * Context: May sleep. + * Return: -ENOMEM on allocation failure, 0 otherwise. + */ +static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len) +{ + const size_t new_len = blob->len + len; + const size_t old_len = blob->len; + const char *old_ptr = blob->data; + char *new_ptr; + + new_ptr = old_ptr ? + kvrealloc(old_ptr, old_len, new_len, GFP_KERNEL_ACCOUNT) : + kvmalloc(len, GFP_KERNEL_ACCOUNT); + + if (!new_ptr) + return -ENOMEM; + + memcpy(&new_ptr[old_len], data, len); + blob->data = new_ptr; + blob->len = new_len; + return 0; +} + +/** + * vpd_blob_generate() - Construct a new &struct vpd_blob. + * @generator: Function that supplies the blob data. + * @arg: Context pointer supplied by caller, passed to @generator. + * + * The @generator callback is invoked until it returns NULL. @arg is + * passed to @generator in its first argument on each call. When + * @generator returns data, it should store the data length in its + * second argument. + * + * Context: May sleep. + * Return: A completely populated &struct vpd_blob, or NULL on error. + */ +static const struct vpd_blob * +vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg) +{ + struct vpd_blob *blob; + const char *buf; + size_t len; + int err = 0; + + blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT); + if (!blob) + return NULL; + + while (err == 0 && (buf = generator(arg, &len))) + err = vpd_blob_extend(blob, buf, len); + + if (err != 0 || !vpd_blob_has_data(blob)) + goto free_blob; + + return blob; +free_blob: + vpd_blob_free(blob); + return NULL; +} + +/* + * Internal VPD sequence APIs. A VPD sequence is a series of calls to + * ibm,get-vpd for a given location code. The sequence ends when an + * error is encountered or all VPD for the location code has been + * returned. + */ + +/** + * struct vpd_sequence - State for managing a VPD sequence. + * @error: Shall be zero as long as the sequence has not encountered an error, + * -ve errno otherwise. Use vpd_sequence_set_err() to update this. + * @params: Parameter block to pass to rtas_ibm_get_vpd(). + */ +struct vpd_sequence { + int error; + struct rtas_ibm_get_vpd_params params; +}; + +/** + * vpd_sequence_begin() - Begin a VPD retrieval sequence. + * @seq: Uninitialized sequence state. + * @loc_code: Location code that defines the scope of the VPD to return. + * + * Initializes @seq with the resources necessary to carry out a VPD + * sequence. Callers must pass @seq to vpd_sequence_end() regardless + * of whether the sequence succeeds. + * + * Context: May sleep. + */ +static void vpd_sequence_begin(struct vpd_sequence *seq, + const struct papr_location_code *loc_code) +{ + /* + * Use a static data structure for the location code passed to + * RTAS to ensure it's in the RMA and avoid a separate work + * area allocation. Guarded by the function lock. + */ + static struct papr_location_code static_loc_code; + + /* + * We could allocate the work area before acquiring the + * function lock, but that would allow concurrent requests to + * exhaust the limited work area pool for no benefit. So + * allocate the work area under the lock. + */ + mutex_lock(&rtas_ibm_get_vpd_lock); + static_loc_code = *loc_code; + *seq = (struct vpd_sequence) { + .params = { + .work_area = rtas_work_area_alloc(SZ_4K), + .loc_code = &static_loc_code, + .sequence = 1, + }, + }; +} + +/** + * vpd_sequence_end() - Finalize a VPD retrieval sequence. + * @seq: Sequence state. + * + * Releases resources obtained by vpd_sequence_begin(). + */ +static void vpd_sequence_end(struct vpd_sequence *seq) +{ + rtas_work_area_free(seq->params.work_area); + mutex_unlock(&rtas_ibm_get_vpd_lock); +} + +/** + * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence + * should continue. + * @seq: VPD sequence state. + * + * Examines the sequence error state and outputs of the last call to + * ibm,get-vpd to determine whether the sequence in progress should + * continue or stop. + * + * Return: True if the sequence has encountered an error or if all VPD for + * this sequence has been retrieved. False otherwise. + */ +static bool vpd_sequence_should_stop(const struct vpd_sequence *seq) +{ + bool done; + + if (seq->error) + return true; + + switch (seq->params.status) { + case 0: + if (seq->params.written == 0) + done = false; /* Initial state. */ + else + done = true; /* All data consumed. */ + break; + case 1: + done = false; /* More data available. */ + break; + default: + done = true; /* Error encountered. */ + break; + } + + return done; +} + +static int vpd_sequence_set_err(struct vpd_sequence *seq, int err) +{ + /* Preserve the first error recorded. */ + if (seq->error == 0) + seq->error = err; + + return seq->error; +} + +/* + * Generator function to be passed to vpd_blob_generate(). + */ +static const char *vpd_sequence_fill_work_area(void *arg, size_t *len) +{ + struct vpd_sequence *seq = arg; + struct rtas_ibm_get_vpd_params *p = &seq->params; + + if (vpd_sequence_should_stop(seq)) + return NULL; + if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p))) + return NULL; + *len = p->written; + return rtas_work_area_raw_buf(p->work_area); +} + +/* + * Higher-level VPD retrieval code below. These functions use the + * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based + * VPD handles for consumption by user space. + */ + +/** + * papr_vpd_run_sequence() - Run a single VPD retrieval sequence. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Context: May sleep. Holds a mutex and an RTAS work area for its + * duration. Typically performs multiple sleepable slab + * allocations. + * + * Return: A populated &struct vpd_blob on success. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + struct vpd_sequence seq; + + vpd_sequence_begin(&seq, loc_code); + blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq); + if (!blob) + vpd_sequence_set_err(&seq, -ENOMEM); + vpd_sequence_end(&seq); + + if (seq.error) { + vpd_blob_free(blob); + return ERR_PTR(seq.error); + } + + return blob; +} + +/** + * papr_vpd_retrieve() - Return the VPD for a location code. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Run VPD sequences against @loc_code until a blob is successfully + * instantiated, or a hard error is encountered, or a fatal signal is + * pending. + * + * Context: May sleep. + * Return: A fully populated VPD blob when successful. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + + /* + * EAGAIN means the sequence errored with a -4 (VPD changed) + * status from ibm,get-vpd, and we should attempt a new + * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this + * should be a transient condition, not something that happens + * continuously. But we'll stop trying on a fatal signal. + */ + do { + blob = papr_vpd_run_sequence(loc_code); + if (!IS_ERR(blob)) /* Success. */ + break; + if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */ + break; + pr_info_ratelimited("VPD changed during retrieval, retrying\n"); + cond_resched(); + } while (!fatal_signal_pending(current)); + + return blob; +} + +static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off) +{ + const struct vpd_blob *blob = file->private_data; + + /* bug: we should not instantiate a handle without any data attached. */ + if (!vpd_blob_has_data(blob)) { + pr_err_once("handle without data\n"); + return -EIO; + } + + return simple_read_from_buffer(buf, size, off, blob->data, blob->len); +} + +static int papr_vpd_handle_release(struct inode *inode, struct file *file) +{ + const struct vpd_blob *blob = file->private_data; + + vpd_blob_free(blob); + + return 0; +} + +static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence) +{ + const struct vpd_blob *blob = file->private_data; + + return fixed_size_llseek(file, off, whence, blob->len); +} + + +static const struct file_operations papr_vpd_handle_ops = { + .read = papr_vpd_handle_read, + .llseek = papr_vpd_handle_seek, + .release = papr_vpd_handle_release, +}; + +/** + * papr_vpd_create_handle() - Create a fd-based handle for reading VPD. + * @ulc: Location code in user memory; defines the scope of the VPD to + * retrieve. + * + * Handler for PAPR_VPD_IOC_CREATE_HANDLE ioctl command. Validates + * @ulc and instantiates an immutable VPD "blob" for it. The blob is + * attached to a file descriptor for reading by user space. The memory + * backing the blob is freed when the file is released. + * + * The entire requested VPD is retrieved by this call and all + * necessary RTAS interactions are performed before returning the fd + * to user space. This keeps the read handler simple and ensures that + * the kernel can prevent interleaving of ibm,get-vpd call sequences. + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_vpd_create_handle(struct papr_location_code __user *ulc) +{ + struct papr_location_code klc; + const struct vpd_blob *blob; + struct file *file; + long err; + int fd; + + if (copy_from_user(&klc, ulc, sizeof(klc))) + return -EFAULT; + + if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str))) + return -EINVAL; + + blob = papr_vpd_retrieve(&klc); + if (IS_ERR(blob)) + return PTR_ERR(blob); + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = fd; + goto free_blob; + } + + file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops, + (void *)blob, O_RDONLY); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_fd; + } + + file->f_mode |= FMODE_LSEEK | FMODE_PREAD; + fd_install(fd, file); + return fd; +put_fd: + put_unused_fd(fd); +free_blob: + vpd_blob_free(blob); + return err; +} + +/* + * Top-level ioctl handler for /dev/papr-vpd. + */ +static long papr_vpd_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_VPD_IOC_CREATE_HANDLE: + ret = papr_vpd_create_handle(argp); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_vpd_ops = { + .unlocked_ioctl = papr_vpd_dev_ioctl, +}; + +static struct miscdevice papr_vpd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-vpd", + .fops = &papr_vpd_ops, +}; + +static __init int papr_vpd_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_VPD)) + return -ENODEV; + + return misc_register(&papr_vpd_dev); +} +machine_device_initcall(pseries, papr_vpd_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8376f03f932a..bba4ad192b0f 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); extern int dlpar_unisolate_drc(u32 drc_index); +extern void post_mobility_fixup(void); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5c43435472cc..382003dfdb9a 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,6 +13,7 @@ #include <asm/mmu.h> #include <asm/rtas.h> #include <asm/topology.h> +#include "pseries.h" static struct device suspend_dev; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ |