diff options
Diffstat (limited to 'include')
150 files changed, 3974 insertions, 786 deletions
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index 940d5ec122c9..b1bc954eccf3 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -6,6 +6,7 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> #include <linux/dma-attrs.h> +#include <asm-generic/dma-coherent.h> static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, @@ -237,4 +238,121 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL) +#ifndef arch_dma_alloc_attrs +#define arch_dma_alloc_attrs(dev, flag) (true) +#endif + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; + + BUG_ON(!ops); + + if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) + return cpu_addr; + + if (!arch_dma_alloc_attrs(&dev, &flag)) + return NULL; + if (!ops->alloc) + return NULL; + + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!ops); + WARN_ON(irqs_disabled()); + + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + + if (!ops->free) + return; + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free(dev, size, cpu_addr, dma_handle, attrs); +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + return dma_alloc_attrs(dev, size, dma_handle, flag, NULL); +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_attrs(dev, size, cpu_addr, dma_handle, NULL); +} + +static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); + return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs); +} + +static inline void dma_free_noncoherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); + dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + debug_dma_mapping_error(dev, dma_addr); + + if (get_dma_ops(dev)->mapping_error) + return get_dma_ops(dev)->mapping_error(dev, dma_addr); + +#ifdef DMA_ERROR_CODE + return dma_addr == DMA_ERROR_CODE; +#else + return 0; +#endif +} + +#ifndef HAVE_ARCH_DMA_SUPPORTED +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops) + return 0; + if (!ops->dma_supported) + return 1; + return ops->dma_supported(dev, mask); +} +#endif + +#ifndef HAVE_ARCH_DMA_SET_MASK +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->set_dma_mask) + return ops->set_dma_mask(dev, mask); + + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + *dev->dma_mask = mask; + return 0; +} +#endif + #endif diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index a5de55c04fb2..734ad4db388c 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -11,6 +11,8 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr, unsigned long size); extern void *early_memremap(resource_size_t phys_addr, unsigned long size); +extern void *early_memremap_ro(resource_size_t phys_addr, + unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void early_memunmap(void *addr, unsigned long size); @@ -33,6 +35,12 @@ extern void early_ioremap_setup(void); */ extern void early_ioremap_reset(void); +/* + * Early copy from unmapped memory to kernel mapped memory. + */ +extern void copy_from_early_mem(void *dest, phys_addr_t src, + unsigned long size); + #else static inline void early_ioremap_init(void) { } static inline void early_ioremap_setup(void) { } diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index f23174fb9ec4..1cbb8338edf3 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -46,6 +46,9 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #ifndef FIXMAP_PAGE_NORMAL #define FIXMAP_PAGE_NORMAL PAGE_KERNEL #endif +#if !defined(FIXMAP_PAGE_RO) && defined(PAGE_KERNEL_RO) +#define FIXMAP_PAGE_RO PAGE_KERNEL_RO +#endif #ifndef FIXMAP_PAGE_NOCACHE #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NOCACHE #endif diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 14909b0b9cae..f20f407ce45d 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -69,6 +69,12 @@ }) #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */ +/* + * Convert a physical address to a Page Frame Number and back + */ +#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) +#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) + #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h index fa86f240c874..4e3b6558331e 100644 --- a/include/asm-generic/rtc.h +++ b/include/asm-generic/rtc.h @@ -16,6 +16,9 @@ #include <linux/rtc.h> #include <linux/bcd.h> #include <linux/delay.h> +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#endif #define RTC_PIE 0x40 /* periodic interrupt enable */ #define RTC_AIE 0x20 /* alarm interrupt enable */ @@ -46,6 +49,7 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; + unsigned char century = 0; #ifdef CONFIG_MACH_DECSTATION unsigned int real_year; @@ -79,6 +83,11 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time) #ifdef CONFIG_MACH_DECSTATION real_year = CMOS_READ(RTC_DEC_YEAR); #endif +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + century = CMOS_READ(acpi_gbl_FADT.century); +#endif ctrl = CMOS_READ(RTC_CONTROL); spin_unlock_irqrestore(&rtc_lock, flags); @@ -90,12 +99,16 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time) time->tm_mday = bcd2bin(time->tm_mday); time->tm_mon = bcd2bin(time->tm_mon); time->tm_year = bcd2bin(time->tm_year); + century = bcd2bin(century); } #ifdef CONFIG_MACH_DECSTATION time->tm_year += real_year - 72; #endif + if (century) + time->tm_year += (century - 19) * 100; + /* * Account for differences between how the RTC uses the values * and how they are defined in a struct rtc_time; @@ -122,6 +135,7 @@ static inline int __set_rtc_time(struct rtc_time *time) #ifdef CONFIG_MACH_DECSTATION unsigned int real_yrs, leap_yr; #endif + unsigned char century = 0; yrs = time->tm_year; mon = time->tm_mon + 1; /* tm_mon starts at zero */ @@ -150,6 +164,15 @@ static inline int __set_rtc_time(struct rtc_time *time) yrs = 73; } #endif + +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) { + century = (yrs + 1900) / 100; + yrs %= 100; + } +#endif + /* These limits and adjustments are independent of * whether the chip is in binary mode or not. */ @@ -169,6 +192,7 @@ static inline int __set_rtc_time(struct rtc_time *time) day = bin2bcd(day); mon = bin2bcd(mon); yrs = bin2bcd(yrs); + century = bin2bcd(century); } save_control = CMOS_READ(RTC_CONTROL); @@ -185,6 +209,11 @@ static inline int __set_rtc_time(struct rtc_time *time) CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); +#ifdef CONFIG_ACPI + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && + acpi_gbl_FADT.century) + CMOS_WRITE(century, acpi_gbl_FADT.century); +#endif CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8bd374d3cf21..1781e54ea6d3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -412,12 +412,10 @@ * during second ld run in second ld pass when generating System.map */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot) \ - *(.text .text.fixup) \ + *(.text.hot .text .text.fixup .text.unlikely) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ - *(.text.unlikely) /* sched.text is aling to function alignment to secure we have same diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h index 691c79172a26..441aff9b5aa7 100644 --- a/include/crypto/pkcs7.h +++ b/include/crypto/pkcs7.h @@ -9,6 +9,11 @@ * 2 of the Licence, or (at your option) any later version. */ +#ifndef _CRYPTO_PKCS7_H +#define _CRYPTO_PKCS7_H + +#include <crypto/public_key.h> + struct key; struct pkcs7_message; @@ -33,4 +38,10 @@ extern int pkcs7_validate_trust(struct pkcs7_message *pkcs7, /* * pkcs7_verify.c */ -extern int pkcs7_verify(struct pkcs7_message *pkcs7); +extern int pkcs7_verify(struct pkcs7_message *pkcs7, + enum key_being_used_for usage); + +extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7, + const void *data, size_t datalen); + +#endif /* _CRYPTO_PKCS7_H */ diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index 54add2069901..067c242b1e15 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -33,12 +33,27 @@ extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST]; enum pkey_id_type { PKEY_ID_PGP, /* OpenPGP generated key ID */ PKEY_ID_X509, /* X.509 arbitrary subjectKeyIdentifier */ + PKEY_ID_PKCS7, /* Signature in PKCS#7 message */ PKEY_ID_TYPE__LAST }; extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST]; /* + * The use to which an asymmetric key is being put. + */ +enum key_being_used_for { + VERIFYING_MODULE_SIGNATURE, + VERIFYING_FIRMWARE_SIGNATURE, + VERIFYING_KEXEC_PE_SIGNATURE, + VERIFYING_KEY_SIGNATURE, + VERIFYING_KEY_SELF_SIGNATURE, + VERIFYING_UNSPECIFIED_SIGNATURE, + NR__KEY_BEING_USED_FOR +}; +extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; + +/* * Cryptographic data for the public-key subtype of the asymmetric key type. * * Note that this may include private part of the key as well as the public @@ -101,7 +116,8 @@ extern int verify_signature(const struct key *key, struct asymmetric_key_id; extern struct key *x509_request_asymmetric_key(struct key *keyring, - const struct asymmetric_key_id *kid, + const struct asymmetric_key_id *id, + const struct asymmetric_key_id *skid, bool partial); #endif /* _LINUX_PUBLIC_KEY_H */ diff --git a/include/dt-bindings/i2c/i2c.h b/include/dt-bindings/i2c/i2c.h new file mode 100644 index 000000000000..1d5da81d90f1 --- /dev/null +++ b/include/dt-bindings/i2c/i2c.h @@ -0,0 +1,18 @@ +/* + * This header provides constants for I2C bindings + * + * Copyright (C) 2015 by Sang Engineering + * Copyright (C) 2015 by Renesas Electronics Corporation + * + * Wolfram Sang <wsa@sang-engineering.com> + * + * GPLv2 only + */ + +#ifndef _DT_BINDINGS_I2C_I2C_H +#define _DT_BINDINGS_I2C_I2C_H + +#define I2C_TEN_BIT_ADDRESS (1 << 31) +#define I2C_OWN_SLAVE_ADDRESS (1 << 30) + +#endif diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 72665eb80692..b20cd885c1fd 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -15,6 +15,7 @@ #ifdef CONFIG_SYSTEM_TRUSTED_KEYRING #include <linux/key.h> +#include <crypto/public_key.h> extern struct key *system_trusted_keyring; static inline struct key *get_system_trusted_keyring(void) @@ -28,4 +29,10 @@ static inline struct key *get_system_trusted_keyring(void) } #endif +#ifdef CONFIG_SYSTEM_DATA_VERIFICATION +extern int system_verify_data(const void *data, unsigned long len, + const void *raw_pkcs7, size_t pkcs7_len, + enum key_being_used_for usage); +#endif + #endif /* _KEYS_SYSTEM_KEYRING_H */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e5966758c093..e1e4d7c38dda 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -52,13 +52,16 @@ struct arch_timer_cpu { /* Timer IRQ */ const struct kvm_irq_level *irq; + + /* VGIC mapping */ + struct irq_phys_map *map; }; int kvm_timer_hyp_init(void); void kvm_timer_enable(struct kvm *kvm); void kvm_timer_init(struct kvm *kvm); -void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq); +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 133ea00aa83b..d901f1a47be6 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -95,11 +95,15 @@ enum vgic_type { #define LR_STATE_ACTIVE (1 << 1) #define LR_STATE_MASK (3 << 0) #define LR_EOI_INT (1 << 2) +#define LR_HW (1 << 3) struct vgic_lr { - u16 irq; - u8 source; - u8 state; + unsigned irq:10; + union { + unsigned hwirq:10; + unsigned source:3; + }; + unsigned state:4; }; struct vgic_vmcr { @@ -155,6 +159,19 @@ struct vgic_io_device { struct kvm_io_device dev; }; +struct irq_phys_map { + u32 virt_irq; + u32 phys_irq; + u32 irq; + bool active; +}; + +struct irq_phys_map_entry { + struct list_head entry; + struct rcu_head rcu; + struct irq_phys_map map; +}; + struct vgic_dist { spinlock_t lock; bool in_kernel; @@ -252,6 +269,10 @@ struct vgic_dist { struct vgic_vm_ops vm_ops; struct vgic_io_device dist_iodev; struct vgic_io_device *redist_iodevs; + + /* Virtual irq to hwirq mapping */ + spinlock_t irq_phys_map_lock; + struct list_head irq_phys_map_list; }; struct vgic_v2_cpu_if { @@ -303,6 +324,9 @@ struct vgic_cpu { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; + + /* Protected by the distributor's irq_phys_map_lock */ + struct list_head irq_phys_map_list; }; #define LR_EMPTY 0xff @@ -317,16 +341,25 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_get_max_vcpus(void); +void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, + struct irq_phys_map *map, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, + int virt_irq, int irq); +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); +bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); +void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 6a0a89ed7f81..0ddb5c02ad8b 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -33,14 +33,12 @@ #define UART01x_DR 0x00 /* Data read or written from the interface. */ #define UART01x_RSR 0x04 /* Receive status register (Read). */ #define UART01x_ECR 0x04 /* Error clear register (Write). */ -#define ZX_UART01x_DR 0x04 /* Data read or written from the interface. */ #define UART010_LCRH 0x08 /* Line control register, high byte. */ #define ST_UART011_DMAWM 0x08 /* DMA watermark configure register. */ #define UART010_LCRM 0x0C /* Line control register, middle byte. */ #define ST_UART011_TIMEOUT 0x0C /* Timeout period register. */ #define UART010_LCRL 0x10 /* Line control register, low byte. */ #define UART010_CR 0x14 /* Control register. */ -#define ZX_UART01x_FR 0x14 /* Flag register (Read only). */ #define UART01x_FR 0x18 /* Flag register (Read only). */ #define UART010_IIR 0x1C /* Interrupt identification register (Read). */ #define UART010_ICR 0x1C /* Interrupt clear register (Write). */ @@ -51,21 +49,13 @@ #define UART011_LCRH 0x2c /* Line control register. */ #define ST_UART011_LCRH_TX 0x2c /* Tx Line control register. */ #define UART011_CR 0x30 /* Control register. */ -#define ZX_UART011_LCRH_TX 0x30 /* Tx Line control register. */ #define UART011_IFLS 0x34 /* Interrupt fifo level select. */ -#define ZX_UART011_CR 0x34 /* Control register. */ -#define ZX_UART011_IFLS 0x38 /* Interrupt fifo level select. */ #define UART011_IMSC 0x38 /* Interrupt mask. */ #define UART011_RIS 0x3c /* Raw interrupt status. */ #define UART011_MIS 0x40 /* Masked interrupt status. */ -#define ZX_UART011_IMSC 0x40 /* Interrupt mask. */ #define UART011_ICR 0x44 /* Interrupt clear register. */ -#define ZX_UART011_RIS 0x44 /* Raw interrupt status. */ #define UART011_DMACR 0x48 /* DMA control register. */ -#define ZX_UART011_MIS 0x48 /* Masked interrupt status. */ -#define ZX_UART011_ICR 0x4c /* Interrupt clear register. */ #define ST_UART011_XFCR 0x50 /* XON/XOFF control register. */ -#define ZX_UART011_DMACR 0x50 /* DMA control register. */ #define ST_UART011_XON1 0x54 /* XON1 register. */ #define ST_UART011_XON2 0x58 /* XON2 register. */ #define ST_UART011_XOFF1 0x5C /* XON1 register. */ @@ -85,19 +75,15 @@ #define UART01x_RSR_PE 0x02 #define UART01x_RSR_FE 0x01 -#define ZX_UART01x_FR_BUSY 0x300 #define UART011_FR_RI 0x100 #define UART011_FR_TXFE 0x080 #define UART011_FR_RXFF 0x040 #define UART01x_FR_TXFF 0x020 #define UART01x_FR_RXFE 0x010 #define UART01x_FR_BUSY 0x008 -#define ZX_UART01x_FR_DSR 0x008 #define UART01x_FR_DCD 0x004 #define UART01x_FR_DSR 0x002 -#define ZX_UART01x_FR_CTS 0x002 #define UART01x_FR_CTS 0x001 -#define ZX_UART011_FR_RI 0x001 #define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) #define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ diff --git a/include/linux/asn1_ber_bytecode.h b/include/linux/asn1_ber_bytecode.h index 945d44ae529c..ab3a6c002f7b 100644 --- a/include/linux/asn1_ber_bytecode.h +++ b/include/linux/asn1_ber_bytecode.h @@ -45,23 +45,27 @@ enum asn1_opcode { ASN1_OP_MATCH_JUMP = 0x04, ASN1_OP_MATCH_JUMP_OR_SKIP = 0x05, ASN1_OP_MATCH_ANY = 0x08, + ASN1_OP_MATCH_ANY_OR_SKIP = 0x09, ASN1_OP_MATCH_ANY_ACT = 0x0a, + ASN1_OP_MATCH_ANY_ACT_OR_SKIP = 0x0b, /* Everything before here matches unconditionally */ ASN1_OP_COND_MATCH_OR_SKIP = 0x11, ASN1_OP_COND_MATCH_ACT_OR_SKIP = 0x13, ASN1_OP_COND_MATCH_JUMP_OR_SKIP = 0x15, ASN1_OP_COND_MATCH_ANY = 0x18, + ASN1_OP_COND_MATCH_ANY_OR_SKIP = 0x19, ASN1_OP_COND_MATCH_ANY_ACT = 0x1a, + ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP = 0x1b, /* Everything before here will want a tag from the data */ -#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT +#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP /* These are here to help fill up space */ - ASN1_OP_COND_FAIL = 0x1b, - ASN1_OP_COMPLETE = 0x1c, - ASN1_OP_ACT = 0x1d, - ASN1_OP_RETURN = 0x1e, + ASN1_OP_COND_FAIL = 0x1c, + ASN1_OP_COMPLETE = 0x1d, + ASN1_OP_ACT = 0x1e, + ASN1_OP_MAYBE_ACT = 0x1f, /* The following eight have bit 0 -> SET, 1 -> OF, 2 -> ACT */ ASN1_OP_END_SEQ = 0x20, @@ -76,6 +80,8 @@ enum asn1_opcode { #define ASN1_OP_END__OF 0x02 #define ASN1_OP_END__ACT 0x04 + ASN1_OP_RETURN = 0x28, + ASN1_OP__NR }; diff --git a/include/linux/audit.h b/include/linux/audit.h index c2e7e3a83965..b2abc996c25d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -27,6 +27,9 @@ #include <linux/ptrace.h> #include <uapi/linux/audit.h> +#define AUDIT_INO_UNSET ((unsigned long)-1) +#define AUDIT_DEV_UNSET ((dev_t)-1) + struct audit_sig_info { uid_t uid; pid_t pid; @@ -59,6 +62,7 @@ struct audit_krule { struct audit_field *inode_f; /* quick access to an inode field */ struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ + struct audit_fsnotify_mark *exe; struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ struct list_head list; /* for AUDIT_LIST* purposes only */ u64 prio; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0fe9df983ab7..5a5d79ee256f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -286,7 +286,7 @@ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi * %current's blkcg equals the effective blkcg of its memcg. No * need to use the relatively expensive cgroup_get_e_css(). */ - if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) + if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id))) return wb; return NULL; } @@ -402,7 +402,7 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) } struct wb_iter { - int start_blkcg_id; + int start_memcg_id; struct radix_tree_iter tree_iter; void **slot; }; @@ -414,9 +414,9 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, WARN_ON_ONCE(!rcu_read_lock_held()); - if (iter->start_blkcg_id >= 0) { - iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); - iter->start_blkcg_id = -1; + if (iter->start_memcg_id >= 0) { + iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id); + iter->start_memcg_id = -1; } else { iter->slot = radix_tree_next_slot(iter->slot, titer, 0); } @@ -430,30 +430,30 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, struct backing_dev_info *bdi, - int start_blkcg_id) + int start_memcg_id) { - iter->start_blkcg_id = start_blkcg_id; + iter->start_memcg_id = start_memcg_id; - if (start_blkcg_id) + if (start_memcg_id) return __wb_iter_next(iter, bdi); else return &bdi->wb; } /** - * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order + * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order * @wb_cur: cursor struct bdi_writeback pointer * @bdi: bdi to walk wb's of * @iter: pointer to struct wb_iter to be used as iteration buffer - * @start_blkcg_id: blkcg ID to start iteration from + * @start_memcg_id: memcg ID to start iteration from * * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending - * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter + * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter * to be used as temp storage during iteration. rcu_read_lock() must be * held throughout iteration. */ -#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ - for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ +#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \ + for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \ (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a4cd1641e9e2..0a5cc7a1109b 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -14,12 +14,15 @@ */ #include <linux/cgroup.h> -#include <linux/u64_stats_sync.h> +#include <linux/percpu_counter.h> #include <linux/seq_file.h> #include <linux/radix-tree.h> #include <linux/blkdev.h> #include <linux/atomic.h> +/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ +#define BLKG_STAT_CPU_BATCH (INT_MAX / 2) + /* Max limits for throttle policy */ #define THROTL_IOPS_MAX UINT_MAX @@ -45,7 +48,7 @@ struct blkcg { struct blkcg_gq *blkg_hint; struct hlist_head blkg_list; - struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK @@ -53,14 +56,19 @@ struct blkcg { #endif }; +/* + * blkg_[rw]stat->aux_cnt is excluded for local stats but included for + * recursive. Used to carry stats of dead children, and, for blkg_rwstat, + * to carry result values from read and sum operations. + */ struct blkg_stat { - struct u64_stats_sync syncp; - uint64_t cnt; + struct percpu_counter cpu_cnt; + atomic64_t aux_cnt; }; struct blkg_rwstat { - struct u64_stats_sync syncp; - uint64_t cnt[BLKG_RWSTAT_NR]; + struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; + atomic64_t aux_cnt[BLKG_RWSTAT_NR]; }; /* @@ -68,32 +76,28 @@ struct blkg_rwstat { * request_queue (q). This is used by blkcg policies which need to track * information per blkcg - q pair. * - * There can be multiple active blkcg policies and each has its private - * data on each blkg, the size of which is determined by - * blkcg_policy->pd_size. blkcg core allocates and frees such areas - * together with blkg and invokes pd_init/exit_fn() methods. - * - * Such private data must embed struct blkg_policy_data (pd) at the - * beginning and pd_size can't be smaller than pd. + * There can be multiple active blkcg policies and each blkg:policy pair is + * represented by a blkg_policy_data which is allocated and freed by each + * policy's pd_alloc/free_fn() methods. A policy can allocate private data + * area by allocating larger data structure which embeds blkg_policy_data + * at the beginning. */ struct blkg_policy_data { /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; int plid; - - /* used during policy activation */ - struct list_head alloc_node; }; /* - * Policies that need to keep per-blkcg data which is independent - * from any request_queue associated to it must specify its size - * with the cpd_size field of the blkcg_policy structure and - * embed a blkcg_policy_data in it. cpd_init() is invoked to let - * each policy handle per-blkcg data. + * Policies that need to keep per-blkcg data which is independent from any + * request_queue associated to it should implement cpd_alloc/free_fn() + * methods. A policy can allocate private data area by allocating larger + * data structure which embeds blkcg_policy_data at the beginning. + * cpd_init() is invoked to let each policy handle per-blkcg data. */ struct blkcg_policy_data { - /* the policy id this per-policy data belongs to */ + /* the blkcg and policy id this per-policy data belongs to */ + struct blkcg *blkcg; int plid; }; @@ -123,40 +127,50 @@ struct blkcg_gq { /* is this blkg online? protected by both blkcg and q locks */ bool online; + struct blkg_rwstat stat_bytes; + struct blkg_rwstat stat_ios; + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; }; -typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); -typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); -typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); -typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); -typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); -typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); +typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); +typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); +typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); +typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); +typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node); +typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); +typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); struct blkcg_policy { int plid; - /* policy specific private data size */ - size_t pd_size; - /* policy specific per-blkcg data size */ - size_t cpd_size; /* cgroup files for the policy */ - struct cftype *cftypes; + struct cftype *dfl_cftypes; + struct cftype *legacy_cftypes; /* operations */ + blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; blkcg_pol_init_cpd_fn *cpd_init_fn; + blkcg_pol_free_cpd_fn *cpd_free_fn; + blkcg_pol_bind_cpd_fn *cpd_bind_fn; + + blkcg_pol_alloc_pd_fn *pd_alloc_fn; blkcg_pol_init_pd_fn *pd_init_fn; blkcg_pol_online_pd_fn *pd_online_fn; blkcg_pol_offline_pd_fn *pd_offline_fn; - blkcg_pol_exit_pd_fn *pd_exit_fn; + blkcg_pol_free_pd_fn *pd_free_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; }; extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; -struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); +struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, + struct request_queue *q, bool update_hint); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -171,6 +185,7 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); +const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int), @@ -182,19 +197,24 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +int blkg_print_stat_bytes(struct seq_file *sf, void *v); +int blkg_print_stat_ios(struct seq_file *sf, void *v); +int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); +int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); -u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, - int off); +u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, + struct blkcg_policy *pol, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, + struct blkcg_policy *pol, int off); struct blkg_conf_ctx { struct gendisk *disk; struct blkcg_gq *blkg; - u64 v; + char *body; }; int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - const char *input, struct blkg_conf_ctx *ctx); + char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); @@ -205,7 +225,7 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) static inline struct blkcg *task_blkcg(struct task_struct *tsk) { - return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); + return css_to_blkcg(task_css(tsk, io_cgrp_id)); } static inline struct blkcg *bio_blkcg(struct bio *bio) @@ -218,7 +238,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) static inline struct cgroup_subsys_state * task_get_blkcg_css(struct task_struct *task) { - return task_get_css(task, blkio_cgrp_id); + return task_get_css(task, io_cgrp_id); } /** @@ -233,6 +253,52 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) } /** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ +static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, + struct request_queue *q, + bool update_hint) +{ + struct blkcg_gq *blkg; + + if (blkcg == &blkcg_root) + return q->root_blkg; + + blkg = rcu_dereference(blkcg->blkg_hint); + if (blkg && blkg->q == q) + return blkg; + + return blkg_lookup_slowpath(blkcg, q, update_hint); +} + +/** + * blkg_lookup - lookup blkg for the specified blkcg - q pair + * @blkcg: blkcg of interest + * @q: request_queue of interest + * + * Lookup blkg for the @blkcg - @q pair. This function should be called + * under RCU read lock and is guaranteed to return %NULL if @q is bypassing + * - see blk_queue_bypass_start() for details. + */ +static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, + struct request_queue *q) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (unlikely(blk_queue_bypass(q))) + return NULL; + return __blkg_lookup(blkcg, q, false); +} + +/** * blkg_to_pdata - get policy private data * @blkg: blkg of interest * @pol: policy of interest @@ -248,7 +314,7 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, struct blkcg_policy *pol) { - return blkcg ? blkcg->pd[pol->plid] : NULL; + return blkcg ? blkcg->cpd[pol->plid] : NULL; } /** @@ -262,6 +328,11 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) return pd ? pd->blkg : NULL; } +static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) +{ + return cpd ? cpd->blkcg : NULL; +} + /** * blkg_path - format cgroup path of blkg * @blkg: blkg of interest @@ -309,9 +380,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) call_rcu(&blkg->rcu_head, __blkg_release_rcu); } -struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, - bool update_hint); - /** * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants * @d_blkg: loop cursor pointing to the current descendant @@ -373,8 +441,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, * or if either the blkcg or queue is going away. Fall back to * root_rl in such cases. */ - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) goto root_rl; blkg_get(blkg); @@ -394,8 +462,7 @@ root_rl: */ static inline void blk_put_rl(struct request_list *rl) { - /* root_rl may not have blkg set */ - if (rl->blkg && rl->blkg->blkcg != &blkcg_root) + if (rl->blkg->blkcg != &blkcg_root) blkg_put(rl->blkg); } @@ -433,9 +500,21 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) -static inline void blkg_stat_init(struct blkg_stat *stat) +static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) { - u64_stats_init(&stat->syncp); + int ret; + + ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); + if (ret) + return ret; + + atomic64_set(&stat->aux_cnt, 0); + return 0; +} + +static inline void blkg_stat_exit(struct blkg_stat *stat) +{ + percpu_counter_destroy(&stat->cpu_cnt); } /** @@ -443,34 +522,21 @@ static inline void blkg_stat_init(struct blkg_stat *stat) * @stat: target blkg_stat * @val: value to add * - * Add @val to @stat. The caller is responsible for synchronizing calls to - * this function. + * Add @val to @stat. The caller must ensure that IRQ on the same CPU + * don't re-enter this function for the same counter. */ static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) { - u64_stats_update_begin(&stat->syncp); - stat->cnt += val; - u64_stats_update_end(&stat->syncp); + __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** * blkg_stat_read - read the current value of a blkg_stat * @stat: blkg_stat to read - * - * Read the current value of @stat. This function can be called without - * synchroniztion and takes care of u64 atomicity. */ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) { - unsigned int start; - uint64_t v; - - do { - start = u64_stats_fetch_begin_irq(&stat->syncp); - v = stat->cnt; - } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); - - return v; + return percpu_counter_sum_positive(&stat->cpu_cnt); } /** @@ -479,24 +545,46 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) */ static inline void blkg_stat_reset(struct blkg_stat *stat) { - stat->cnt = 0; + percpu_counter_set(&stat->cpu_cnt, 0); + atomic64_set(&stat->aux_cnt, 0); } /** - * blkg_stat_merge - merge a blkg_stat into another + * blkg_stat_add_aux - add a blkg_stat into another's aux count * @to: the destination blkg_stat * @from: the source * - * Add @from's count to @to. + * Add @from's count including the aux one to @to's aux count. */ -static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +static inline void blkg_stat_add_aux(struct blkg_stat *to, + struct blkg_stat *from) { - blkg_stat_add(to, blkg_stat_read(from)); + atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), + &to->aux_cnt); } -static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) +static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) { - u64_stats_init(&rwstat->syncp); + int i, ret; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) { + ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); + if (ret) { + while (--i >= 0) + percpu_counter_destroy(&rwstat->cpu_cnt[i]); + return ret; + } + atomic64_set(&rwstat->aux_cnt[i], 0); + } + return 0; +} + +static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) +{ + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + percpu_counter_destroy(&rwstat->cpu_cnt[i]); } /** @@ -511,39 +599,38 @@ static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, int rw, uint64_t val) { - u64_stats_update_begin(&rwstat->syncp); + struct percpu_counter *cnt; if (rw & REQ_WRITE) - rwstat->cnt[BLKG_RWSTAT_WRITE] += val; + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else - rwstat->cnt[BLKG_RWSTAT_READ] += val; + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; + + __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); + if (rw & REQ_SYNC) - rwstat->cnt[BLKG_RWSTAT_SYNC] += val; + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else - rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; + cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; - u64_stats_update_end(&rwstat->syncp); + __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); } /** * blkg_rwstat_read - read the current values of a blkg_rwstat * @rwstat: blkg_rwstat to read * - * Read the current snapshot of @rwstat and return it as the return value. - * This function can be called without synchronization and takes care of - * u64 atomicity. + * Read the current snapshot of @rwstat and return it in the aux counts. */ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) { - unsigned int start; - struct blkg_rwstat tmp; - - do { - start = u64_stats_fetch_begin_irq(&rwstat->syncp); - tmp = *rwstat; - } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); + struct blkg_rwstat result; + int i; - return tmp; + for (i = 0; i < BLKG_RWSTAT_NR; i++) + atomic64_set(&result.aux_cnt[i], + percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); + return result; } /** @@ -558,7 +645,8 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); - return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; + return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); } /** @@ -567,26 +655,71 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) */ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) { - memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); + int i; + + for (i = 0; i < BLKG_RWSTAT_NR; i++) { + percpu_counter_set(&rwstat->cpu_cnt[i], 0); + atomic64_set(&rwstat->aux_cnt[i], 0); + } } /** - * blkg_rwstat_merge - merge a blkg_rwstat into another + * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count * @to: the destination blkg_rwstat * @from: the source * - * Add @from's counts to @to. + * Add @from's count including the aux one to @to's aux count. */ -static inline void blkg_rwstat_merge(struct blkg_rwstat *to, - struct blkg_rwstat *from) +static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, + struct blkg_rwstat *from) { struct blkg_rwstat v = blkg_rwstat_read(from); int i; - u64_stats_update_begin(&to->syncp); for (i = 0; i < BLKG_RWSTAT_NR; i++) - to->cnt[i] += v.cnt[i]; - u64_stats_update_end(&to->syncp); + atomic64_add(atomic64_read(&v.aux_cnt[i]) + + atomic64_read(&from->aux_cnt[i]), + &to->aux_cnt[i]); +} + +#ifdef CONFIG_BLK_DEV_THROTTLING +extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, + struct bio *bio); +#else +static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, + struct bio *bio) { return false; } +#endif + +static inline bool blkcg_bio_issue_check(struct request_queue *q, + struct bio *bio) +{ + struct blkcg *blkcg; + struct blkcg_gq *blkg; + bool throtl = false; + + rcu_read_lock(); + blkcg = bio_blkcg(bio); + + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) { + spin_lock_irq(q->queue_lock); + blkg = blkg_lookup_create(blkcg, q); + if (IS_ERR(blkg)) + blkg = NULL; + spin_unlock_irq(q->queue_lock); + } + + throtl = blk_throtl_bio(q, blkg, bio); + + if (!throtl) { + blkg = blkg ?: q->root_blkg; + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags, + bio->bi_iter.bi_size); + blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1); + } + + rcu_read_unlock(); + return !throtl; } #else /* CONFIG_BLK_CGROUP */ @@ -642,6 +775,9 @@ static inline void blk_put_rl(struct request_list *rl) { } static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } +static inline bool blkcg_bio_issue_check(struct request_queue *q, + struct bio *bio) { return true; } + #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a622f270f09e..38a5ff772a37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -584,7 +584,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) +#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1)) /* * Driver can handle struct request, if it either has an old style @@ -1569,8 +1569,8 @@ struct block_device_operations { int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, - void **, unsigned long *pfn, long size); + long (*direct_access)(struct block_device *, sector_t, void __pmem **, + unsigned long *pfn); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1588,8 +1588,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, sector_t, void **addr, - unsigned long *pfn, long size); +extern long bdev_direct_access(struct block_device *, sector_t, + void __pmem **addr, unsigned long *pfn, long size); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -46,6 +46,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long monc_ping_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -66,6 +67,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -248,6 +248,8 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ + struct timespec last_keepalive_ack; + struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ }; @@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); extern void ceph_con_keepalive(struct ceph_connection *con); +extern bool ceph_con_keepalive_expired(struct ceph_connection *con, + unsigned long interval); extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, size_t length, size_t alignment); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -84,10 +84,12 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_MSG 7 /* message */ #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ -#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ +#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ +#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ /* diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 1f36945fd23d..1a96fdaa33d5 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -27,7 +27,7 @@ SUBSYS(cpuacct) #endif #if IS_ENABLED(CONFIG_BLK_CGROUP) -SUBSYS(blkio) +SUBSYS(io) #endif #if IS_ENABLED(CONFIG_MEMCG) diff --git a/include/linux/dax.h b/include/linux/dax.h new file mode 100644 index 000000000000..b415e521528d --- /dev/null +++ b/include/linux/dax.h @@ -0,0 +1,39 @@ +#ifndef _LINUX_DAX_H +#define _LINUX_DAX_H + +#include <linux/fs.h> +#include <linux/mm.h> +#include <asm/pgtable.h> + +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, + get_block_t, dio_iodone_t, int flags); +int dax_clear_blocks(struct inode *, sector_t block, long size); +int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); +int dax_truncate_page(struct inode *, loff_t from, get_block_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned int flags, get_block_t, dax_iodone_t); +int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned int flags, get_block_t, dax_iodone_t); +#else +static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags, get_block_t gb, + dax_iodone_t di) +{ + return VM_FAULT_FALLBACK; +} +#define __dax_pmd_fault dax_pmd_fault +#endif +int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) + +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); +} +#endif diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 420311bcee38..9beb636b97eb 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -116,6 +116,12 @@ struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, bool debugfs_initialized(void); +ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + +ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos); + #else #include <linux/err.h> @@ -282,6 +288,20 @@ static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, return ERR_PTR(-ENODEV); } +static inline ssize_t debugfs_read_file_bool(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif #endif diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index e1043f79122f..53ba737505df 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -24,6 +24,12 @@ void dma_pool_destroy(struct dma_pool *pool); void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle); +static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, + dma_addr_t *handle) +{ + return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle); +} + void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr); /* diff --git a/include/linux/fb.h b/include/linux/fb.h index 043f3283b71c..bc9afa74ee11 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -788,7 +788,7 @@ struct dmt_videomode { extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; -extern const struct fb_videomode cea_modes[64]; +extern const struct fb_videomode cea_modes[65]; extern const struct dmt_videomode dmt_modes[]; struct fb_modelist { diff --git a/include/linux/fs.h b/include/linux/fs.h index b2f9b9c25e41..72d8a844c692 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -52,7 +52,6 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct vm_fault; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2678,19 +2677,6 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, - get_block_t, dio_iodone_t, int flags); -int dax_clear_blocks(struct inode *, sector_t block, long size); -int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); -int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, - dax_iodone_t); -int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) -#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) - #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ad35f300b9a4..f92cbd2f4450 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -63,7 +63,10 @@ struct vm_area_struct; * but it is definitely preferable to use the flag rather than opencode endless * loop around allocator. * - * __GFP_NORETRY: The VM implementation must not retry indefinitely. + * __GFP_NORETRY: The VM implementation must not retry indefinitely and will + * return NULL when direct reclaim and memory compaction have failed to allow + * the allocation to succeed. The OOM killer is not called with the current + * implementation. * * __GFP_MOVABLE: Flag that this page will be movable by the page migration * mechanism or reclaimed @@ -300,22 +303,31 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); } -static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order) +/* + * Allocate pages, preferring the node given as nid. The node must be valid and + * online. For more general interface, see alloc_pages_node(). + */ +static inline struct page * +__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - /* Unknown node is current node */ - if (nid < 0) - nid = numa_node_id(); + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON(!node_online(nid)); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } -static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask, +/* + * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, + * prefer the current CPU's closest node. Otherwise node must be valid and + * online. + */ +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)); + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); + return __alloc_pages_node(nid, gfp_mask, order); } #ifdef CONFIG_NUMA @@ -354,7 +366,6 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); -/* This is different from alloc_pages_exact_node !!! */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f10b20f05159..ecb080d6ff42 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -33,6 +33,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); +int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, + unsigned long pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, @@ -122,7 +124,7 @@ extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, #endif extern int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice); -extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, +extern void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); @@ -138,15 +140,6 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, else return 0; } -static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - long adjust_next) -{ - if (!vma->anon_vma || vma->vm_ops) - return; - __vma_adjust_trans_huge(vma, start, end, adjust_next); -} static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -164,6 +157,13 @@ static inline bool is_huge_zero_page(struct page *page) return ACCESS_ONCE(huge_zero_page) == page; } +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return is_huge_zero_page(pmd_page(pmd)); +} + +struct page *get_huge_zero_page(void); + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d891f949466a..5e35379f58a5 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -35,6 +35,9 @@ struct resv_map { struct kref refs; spinlock_t lock; struct list_head regions; + long adds_in_progress; + struct list_head region_cache; + long region_cache_count; }; extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); @@ -80,11 +83,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); -void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); +long hugetlb_unreserve_pages(struct inode *inode, long start, long end, + long freed); int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); +void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve); +extern struct mutex *hugetlb_fault_mutex_table; +u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, + struct vm_area_struct *vma, + struct address_space *mapping, + pgoff_t idx, unsigned long address); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -320,9 +330,13 @@ struct huge_bootmem_page { #endif }; +struct page *alloc_huge_page(struct vm_area_struct *vma, + unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); +int huge_add_to_page_cache(struct page *page, struct address_space *mapping, + pgoff_t idx); /* arch callback */ int __init alloc_bootmem_huge_page(struct hstate *h); @@ -471,6 +485,7 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +#define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e83a738a3b87..768063baafbf 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -121,6 +121,9 @@ extern s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command, u8 length, const u8 *values); +extern s32 +i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, + u8 command, u8 length, u8 *values); #endif /* I2C */ /** @@ -550,11 +553,12 @@ void i2c_lock_adapter(struct i2c_adapter *); void i2c_unlock_adapter(struct i2c_adapter *); /*flags for the client struct: */ -#define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ -#define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ +#define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ +#define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ -#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ -#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ +#define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ +#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ +#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ /* i2c adapter classes (bitmask) */ @@ -638,6 +642,8 @@ extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); /* must call put_device() when done with returned i2c_adapter device */ extern struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node); +/* must call i2c_put_adapter() when done with returned i2c_adapter device */ +struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node); #else static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) @@ -649,6 +655,11 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node { return NULL; } + +static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) +{ + return NULL; +} #endif /* CONFIG_OF */ #endif /* _LINUX_I2C_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d9a366d24e3b..6240063bdcac 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -344,7 +344,7 @@ struct intel_iommu { #ifdef CONFIG_INTEL_IOMMU unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ + struct dmar_domain ***domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index c27dde7215b5..e399029b68c5 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -21,7 +21,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/bug.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/page.h> /* diff --git a/include/linux/io.h b/include/linux/io.h index fb5a99800e77..de64c1e53612 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -20,10 +20,13 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/bug.h> +#include <linux/err.h> #include <asm/io.h> #include <asm/page.h> struct device; +struct resource; __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); @@ -80,6 +83,27 @@ int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); void devm_ioremap_release(struct device *dev, void *res); +void *devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); +void devm_memunmap(struct device *dev, void *addr); + +void *__devm_memremap_pages(struct device *dev, struct resource *res); + +#ifdef CONFIG_ZONE_DEVICE +void *devm_memremap_pages(struct device *dev, struct resource *res); +#else +static inline void *devm_memremap_pages(struct device *dev, struct resource *res) +{ + /* + * Fail attempts to call devm_memremap_pages() without + * ZONE_DEVICE support enabled, this requires callers to fall + * back to plain devm_memremap() based on config + */ + WARN_ON_ONCE(1); + return ERR_PTR(-ENXIO); +} +#endif + /* * Some systems do not have legacy ISA devices. * /dev/port is not a valid interface on these systems. @@ -121,4 +145,13 @@ static inline int arch_phys_wc_index(int handle) #endif #endif +enum { + /* See memremap() kernel-doc for usage description... */ + MEMREMAP_WB = 1 << 0, + MEMREMAP_WT = 1 << 1, +}; + +void *memremap(resource_size_t offset, size_t size, unsigned long flags); +void memunmap(void *addr); + #endif /* _LINUX_IO_H */ diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 0b1e569f5ff5..f8cea14485dd 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -115,6 +115,11 @@ struct ipmi_smi_handlers { implement it. */ void (*set_need_watch)(void *send_info, bool enable); + /* + * Called when flushing all pending messages. + */ + void (*flush_messages)(void *send_info); + /* Called when the interface should go into "run to completion" mode. If this call sets the value to true, the interface should make sure that all messages are flushed @@ -207,7 +212,7 @@ static inline int ipmi_demangle_device_id(const unsigned char *data, upper layer until the start_processing() function in the handlers is called, and the lower layer must get the interface from that call. */ -int ipmi_register_smi(struct ipmi_smi_handlers *handlers, +int ipmi_register_smi(const struct ipmi_smi_handlers *handlers, void *send_info, struct ipmi_device_id *device_id, struct device *dev, diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index bf982e021fbd..9eeeb9589acf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -104,6 +104,8 @@ #define GICR_SYNCR 0x00C0 #define GICR_MOVLPIR 0x0100 #define GICR_MOVALLR 0x0110 +#define GICR_ISACTIVER GICD_ISACTIVER +#define GICR_ICACTIVER GICD_ICACTIVER #define GICR_IDREGS GICD_IDREGS #define GICR_PIDR2 GICD_PIDR2 @@ -268,9 +270,12 @@ #define ICH_LR_EOI (1UL << 41) #define ICH_LR_GROUP (1UL << 60) +#define ICH_LR_HW (1UL << 61) #define ICH_LR_STATE (3UL << 62) #define ICH_LR_PENDING_BIT (1UL << 62) #define ICH_LR_ACTIVE_BIT (1UL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT) #define ICH_MISR_EOI (1 << 0) #define ICH_MISR_U (1 << 1) @@ -288,6 +293,7 @@ #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) #define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) @@ -385,6 +391,12 @@ static inline void gic_write_eoir(u64 irq) isb(); } +static inline void gic_write_dir(u64 irq) +{ + asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq)); + isb(); +} + struct irq_domain; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 65da435d01c1..b8901dfd9e95 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -20,9 +20,13 @@ #define GIC_CPU_ALIAS_BINPOINT 0x1c #define GIC_CPU_ACTIVEPRIO 0xd0 #define GIC_CPU_IDENT 0xfc +#define GIC_CPU_DEACTIVATE 0x1000 #define GICC_ENABLE 0x1 #define GICC_INT_PRI_THRESHOLD 0xf0 + +#define GIC_CPU_CTRL_EOImodeNS (1 << 9) + #define GICC_IAR_INT_ID_MASK 0x3ff #define GICC_INT_SPURIOUS 1023 #define GICC_DIS_BYPASS_MASK 0x1e0 @@ -71,11 +75,12 @@ #define GICH_LR_VIRTUALID (0x3ff << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) -#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) #define GICH_LR_STATE (3 << 28) #define GICH_LR_PENDING_BIT (1 << 28) #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_LR_HW (1 << 31) #define GICH_VMCR_CTRL_SHIFT 0 #define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 123be25ea15a..5d4e9c4b821d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -266,6 +266,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); +size_t kernfs_path_len(struct kernfs_node *kn); char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); @@ -332,6 +333,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } +static inline size_t kernfs_path_len(struct kernfs_node *kn) +{ return 0; } + static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return NULL; } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index b63218f68c4b..d140b1e9faa7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -16,7 +16,7 @@ #include <uapi/linux/kexec.h> -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #include <linux/list.h> #include <linux/linkage.h> #include <linux/compat.h> @@ -318,13 +318,24 @@ int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); -#else /* !CONFIG_KEXEC */ +int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len); +void * __weak arch_kexec_kernel_image_load(struct kimage *image); +int __weak arch_kimage_file_post_load_cleanup(struct kimage *image); +int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, + unsigned long buf_len); +int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, + Elf_Shdr *sechdrs, unsigned int relsec); +int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + unsigned int relsec); + +#else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } #define kexec_in_progress false -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* !defined(__ASSEBMLY__) */ diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0555cc66a15b..fcfd2bf14d3f 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -85,8 +85,6 @@ enum umh_disable_depth { UMH_DISABLED, }; -extern void usermodehelper_init(void); - extern int __usermodehelper_disable(enum umh_disable_depth depth); extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 81089cf1f0c1..1bef9e21e725 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -242,6 +242,7 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; + unsigned int halt_poll_ns; #ifdef CONFIG_HAS_IOMEM int mmio_needed; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 75e3af01ee32..3f021dc5da8c 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -31,6 +31,9 @@ enum { ND_CMD_ARS_STATUS_MAX = SZ_4K, ND_MAX_MAPPINGS = 32, + /* region flag indicating to direct-map persistent memory by default */ + ND_REGION_PAGEMAP = 0, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -91,6 +94,7 @@ struct nd_region_desc { void *provider_data; int num_lanes; int numa_node; + unsigned long flags; }; struct nvdimm_bus; diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 1cc89e9df480..ffb9c9da4f39 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -40,6 +40,11 @@ struct lsm_network_audit { } fam; }; +struct lsm_ioctlop_audit { + struct path path; + u16 cmd; +}; + /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { char type; @@ -53,6 +58,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_KMOD 8 #define LSM_AUDIT_DATA_INODE 9 #define LSM_AUDIT_DATA_DENTRY 10 +#define LSM_AUDIT_DATA_IOCTL_OP 11 union { struct path path; struct dentry *dentry; @@ -68,6 +74,7 @@ struct common_audit_data { } key_struct; #endif char *kmod_name; + struct lsm_ioctlop_audit *op; } u; /* this union contains LSM specific data */ union { diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9429f054c323..ec3a6bab29de 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1881,8 +1881,10 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); -#ifdef CONFIG_SECURITY_YAMA_STACKED -void __init yama_add_hooks(void); +#ifdef CONFIG_SECURITY_YAMA +extern void __init yama_add_hooks(void); +#else +static inline void __init yama_add_hooks(void) { } #endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cc4b01972060..c518eb589260 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,6 +77,8 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); +bool memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); @@ -323,7 +325,7 @@ void memblock_enforce_memory_limit(phys_addr_t memory_limit); int memblock_is_memory(phys_addr_t addr); int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); int memblock_is_reserved(phys_addr_t addr); -int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); +bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void __memblock_dump_all(void); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 73b02b0a8f60..ad800e62cb7a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -23,6 +23,11 @@ #include <linux/vm_event_item.h> #include <linux/hardirq.h> #include <linux/jump_label.h> +#include <linux/page_counter.h> +#include <linux/vmpressure.h> +#include <linux/eventfd.h> +#include <linux/mmzone.h> +#include <linux/writeback.h> struct mem_cgroup; struct page; @@ -67,12 +72,221 @@ enum mem_cgroup_events_index { MEMCG_NR_EVENTS, }; +/* + * Per memcg event counter is incremented at every pagein/pageout. With THP, + * it will be incremated by the number of pages. This counter is used for + * for trigger some periodic events. This is straightforward and better + * than using jiffies etc. to handle periodic memcg event. + */ +enum mem_cgroup_events_target { + MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, + MEM_CGROUP_TARGET_NUMAINFO, + MEM_CGROUP_NTARGETS, +}; + +/* + * Bits in struct cg_proto.flags + */ +enum cg_proto_flags { + /* Currently active and new sockets should be assigned to cgroups */ + MEMCG_SOCK_ACTIVE, + /* It was ever activated; we must disarm static keys on destruction */ + MEMCG_SOCK_ACTIVATED, +}; + +struct cg_proto { + struct page_counter memory_allocated; /* Current allocated memory. */ + struct percpu_counter sockets_allocated; /* Current number of sockets. */ + int memory_pressure; + long sysctl_mem[3]; + unsigned long flags; + /* + * memcg field is used to find which memcg we belong directly + * Each memcg struct can hold more than one cg_proto, so container_of + * won't really cut. + * + * The elegant solution would be having an inverse function to + * proto_cgroup in struct proto, but that means polluting the structure + * for everybody, instead of just for memcg users. + */ + struct mem_cgroup *memcg; +}; + #ifdef CONFIG_MEMCG +struct mem_cgroup_stat_cpu { + long count[MEM_CGROUP_STAT_NSTATS]; + unsigned long events[MEMCG_NR_EVENTS]; + unsigned long nr_page_events; + unsigned long targets[MEM_CGROUP_NTARGETS]; +}; + +struct mem_cgroup_reclaim_iter { + struct mem_cgroup *position; + /* scan generation, increased every round-trip */ + unsigned int generation; +}; + +/* + * per-zone information in memory controller. + */ +struct mem_cgroup_per_zone { + struct lruvec lruvec; + unsigned long lru_size[NR_LRU_LISTS]; + + struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; + + struct rb_node tree_node; /* RB tree node */ + unsigned long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; + struct mem_cgroup *memcg; /* Back pointer, we cannot */ + /* use container_of */ +}; + +struct mem_cgroup_per_node { + struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; +}; + +struct mem_cgroup_threshold { + struct eventfd_ctx *eventfd; + unsigned long threshold; +}; + +/* For threshold */ +struct mem_cgroup_threshold_ary { + /* An array index points to threshold just below or equal to usage. */ + int current_threshold; + /* Size of entries[] */ + unsigned int size; + /* Array of thresholds */ + struct mem_cgroup_threshold entries[0]; +}; + +struct mem_cgroup_thresholds { + /* Primary thresholds array */ + struct mem_cgroup_threshold_ary *primary; + /* + * Spare threshold array. + * This is needed to make mem_cgroup_unregister_event() "never fail". + * It must be able to store at least primary->size - 1 entries. + */ + struct mem_cgroup_threshold_ary *spare; +}; + +/* + * The memory controller data structure. The memory controller controls both + * page cache and RSS per cgroup. We would eventually like to provide + * statistics based on the statistics developed by Rik Van Riel for clock-pro, + * to help the administrator determine what knobs to tune. + */ +struct mem_cgroup { + struct cgroup_subsys_state css; + + /* Accounted resources */ + struct page_counter memory; + struct page_counter memsw; + struct page_counter kmem; + + /* Normal memory consumption range */ + unsigned long low; + unsigned long high; + + unsigned long soft_limit; + + /* vmpressure notifications */ + struct vmpressure vmpressure; + + /* css_online() has been completed */ + int initialized; + + /* + * Should the accounting and control be hierarchical, per subtree? + */ + bool use_hierarchy; + + /* protected by memcg_oom_lock */ + bool oom_lock; + int under_oom; + + int swappiness; + /* OOM-Killer disable */ + int oom_kill_disable; + + /* protect arrays of thresholds */ + struct mutex thresholds_lock; + + /* thresholds for memory usage. RCU-protected */ + struct mem_cgroup_thresholds thresholds; + + /* thresholds for mem+swap usage. RCU-protected */ + struct mem_cgroup_thresholds memsw_thresholds; + + /* For oom notifier event fd */ + struct list_head oom_notify; + + /* + * Should we move charges of a task when a task is moved into this + * mem_cgroup ? And what type of charges should we move ? + */ + unsigned long move_charge_at_immigrate; + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + /* taken only while moving_account > 0 */ + spinlock_t move_lock; + struct task_struct *move_lock_task; + unsigned long move_lock_flags; + /* + * percpu counter. + */ + struct mem_cgroup_stat_cpu __percpu *stat; + spinlock_t pcp_counter_lock; + +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) + struct cg_proto tcp_mem; +#endif +#if defined(CONFIG_MEMCG_KMEM) + /* Index in the kmem_cache->memcg_params.memcg_caches array */ + int kmemcg_id; + bool kmem_acct_activated; + bool kmem_acct_active; +#endif + + int last_scanned_node; +#if MAX_NUMNODES > 1 + nodemask_t scan_nodes; + atomic_t numainfo_events; + atomic_t numainfo_updating; +#endif + +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; + struct wb_domain cgwb_domain; +#endif + + /* List of events which userspace want to receive */ + struct list_head event_list; + spinlock_t event_list_lock; + + struct mem_cgroup_per_node *nodeinfo[0]; + /* WARNING: nodeinfo must be the last member here */ +}; extern struct cgroup_subsys_state *mem_cgroup_root_css; -void mem_cgroup_events(struct mem_cgroup *memcg, +/** + * mem_cgroup_events - count memory events against a cgroup + * @memcg: the memory cgroup + * @idx: the event index + * @nr: the number of events to account for + */ +static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, - unsigned int nr); + unsigned int nr) +{ + this_cpu_add(memcg->stat->events[idx], nr); +} bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); @@ -90,15 +304,29 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); -bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, - struct mem_cgroup *root); bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); -extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); -extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); +static inline +struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ + return css ? container_of(css, struct mem_cgroup, css) : NULL; +} -extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); -extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); +struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, + struct mem_cgroup *, + struct mem_cgroup_reclaim_cookie *); +void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); + +static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, + struct mem_cgroup *root) +{ + if (root == memcg) + return true; + if (!root->use_hierarchy) + return false; + return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); +} static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) @@ -114,24 +342,68 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return match; } -extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); -extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); +struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); +ino_t page_cgroup_ino(struct page *page); -struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, - struct mem_cgroup *, - struct mem_cgroup_reclaim_cookie *); -void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); +static inline bool mem_cgroup_disabled(void) +{ + if (memory_cgrp_subsys.disabled) + return true; + return false; +} /* * For memory reclaim. */ -int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); -bool mem_cgroup_lruvec_online(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); -unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); -void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); -extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, - struct task_struct *p); + +void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int nr_pages); + +static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + struct mem_cgroup_per_zone *mz; + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return true; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + memcg = mz->memcg; + + return !!(memcg->css.flags & CSS_ONLINE); +} + +static inline +unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) +{ + struct mem_cgroup_per_zone *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + return mz->lru_size[lru]; +} + +static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) +{ + unsigned long inactive_ratio; + unsigned long inactive; + unsigned long active; + unsigned long gb; + + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); + + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + + return inactive * inactive_ratio < active; +} + +void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, + struct task_struct *p); static inline void mem_cgroup_oom_enable(void) { @@ -156,18 +428,26 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -static inline bool mem_cgroup_disabled(void) -{ - if (memory_cgrp_subsys.disabled) - return true; - return false; -} - struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); -void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx, int val); void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); +/** + * mem_cgroup_update_page_stat - update page state statistics + * @memcg: memcg to account against + * @idx: page state item to account + * @val: number of pages (positive or negative) + * + * See mem_cgroup_begin_page_stat() for locking requirements. + */ +static inline void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ + VM_BUG_ON(!rcu_read_lock_held()); + + if (memcg) + this_cpu_add(memcg->stat->count[idx], val); +} + static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { @@ -184,13 +464,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, unsigned long *total_scanned); -void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) { + struct mem_cgroup *memcg; + if (mem_cgroup_disabled()) return; - __mem_cgroup_count_vm_event(mm, idx); + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (unlikely(!memcg)) + goto out; + + switch (idx) { + case PGFAULT: + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); + break; + case PGMAJFAULT: + this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); + break; + default: + BUG(); + } +out: + rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); @@ -199,8 +497,6 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; -#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) - static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr) @@ -258,11 +554,6 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, return &zone->lruvec; } -static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) -{ - return NULL; -} - static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) { @@ -275,12 +566,6 @@ static inline bool task_in_mem_cgroup(struct task_struct *task, return true; } -static inline struct cgroup_subsys_state - *mem_cgroup_css(struct mem_cgroup *memcg) -{ - return NULL; -} - static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -428,8 +713,8 @@ static inline void sock_release_memcg(struct sock *sk) extern struct static_key memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; -extern void memcg_get_cache_ids(void); -extern void memcg_put_cache_ids(void); +void memcg_get_cache_ids(void); +void memcg_put_cache_ids(void); /* * Helper macro to loop through all memcg-specific caches. Callers must still @@ -444,7 +729,10 @@ static inline bool memcg_kmem_enabled(void) return static_key_false(&memcg_kmem_enabled_key); } -bool memcg_kmem_is_active(struct mem_cgroup *memcg); +static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) +{ + return memcg->kmem_acct_active; +} /* * In general, we'll do everything in our power to not incur in any overhead @@ -463,7 +751,15 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order); void __memcg_kmem_uncharge_pages(struct page *page, int order); -int memcg_cache_id(struct mem_cgroup *memcg); +/* + * helper for acessing a memcg's index. It will be used as an index in the + * child cache array in kmem_cache, and also to derive its name. This function + * will return -1 when this is not a kmem-limited memcg. + */ +static inline int memcg_cache_id(struct mem_cgroup *memcg) +{ + return memcg ? memcg->kmemcg_id : -1; +} struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); void __memcg_kmem_put_cache(struct kmem_cache *cachep); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 6ffa0ac7f7d6..8f60e899b33c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -266,8 +266,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); -extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default); -extern int arch_add_memory(int nid, u64 start, u64 size); +extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, + bool for_device); +extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h new file mode 100644 index 000000000000..eb492d47f717 --- /dev/null +++ b/include/linux/microchipphy.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2015 Microchip Technology + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _MICROCHIPPHY_H +#define _MICROCHIPPHY_H + +#define LAN88XX_INT_MASK (0x19) +#define LAN88XX_INT_MASK_MDINTPIN_EN_ (0x8000) +#define LAN88XX_INT_MASK_SPEED_CHANGE_ (0x4000) +#define LAN88XX_INT_MASK_LINK_CHANGE_ (0x2000) +#define LAN88XX_INT_MASK_FDX_CHANGE_ (0x1000) +#define LAN88XX_INT_MASK_AUTONEG_ERR_ (0x0800) +#define LAN88XX_INT_MASK_AUTONEG_DONE_ (0x0400) +#define LAN88XX_INT_MASK_POE_DETECT_ (0x0200) +#define LAN88XX_INT_MASK_SYMBOL_ERR_ (0x0100) +#define LAN88XX_INT_MASK_FAST_LINK_FAIL_ (0x0080) +#define LAN88XX_INT_MASK_WOL_EVENT_ (0x0040) +#define LAN88XX_INT_MASK_EXTENDED_INT_ (0x0020) +#define LAN88XX_INT_MASK_RESERVED_ (0x0010) +#define LAN88XX_INT_MASK_FALSE_CARRIER_ (0x0008) +#define LAN88XX_INT_MASK_LINK_SPEED_DS_ (0x0004) +#define LAN88XX_INT_MASK_MASTER_SLAVE_DONE_ (0x0002) +#define LAN88XX_INT_MASK_RX__ER_ (0x0001) + +#define LAN88XX_INT_STS (0x1A) +#define LAN88XX_INT_STS_INT_ACTIVE_ (0x8000) +#define LAN88XX_INT_STS_SPEED_CHANGE_ (0x4000) +#define LAN88XX_INT_STS_LINK_CHANGE_ (0x2000) +#define LAN88XX_INT_STS_FDX_CHANGE_ (0x1000) +#define LAN88XX_INT_STS_AUTONEG_ERR_ (0x0800) +#define LAN88XX_INT_STS_AUTONEG_DONE_ (0x0400) +#define LAN88XX_INT_STS_POE_DETECT_ (0x0200) +#define LAN88XX_INT_STS_SYMBOL_ERR_ (0x0100) +#define LAN88XX_INT_STS_FAST_LINK_FAIL_ (0x0080) +#define LAN88XX_INT_STS_WOL_EVENT_ (0x0040) +#define LAN88XX_INT_STS_EXTENDED_INT_ (0x0020) +#define LAN88XX_INT_STS_RESERVED_ (0x0010) +#define LAN88XX_INT_STS_FALSE_CARRIER_ (0x0008) +#define LAN88XX_INT_STS_LINK_SPEED_DS_ (0x0004) +#define LAN88XX_INT_STS_MASTER_SLAVE_DONE_ (0x0002) +#define LAN88XX_INT_STS_RX_ER_ (0x0001) + +#define LAN88XX_EXT_PAGE_ACCESS (0x1F) +#define LAN88XX_EXT_PAGE_SPACE_0 (0x0000) +#define LAN88XX_EXT_PAGE_SPACE_1 (0x0001) +#define LAN88XX_EXT_PAGE_SPACE_2 (0x0002) + +/* Extended Register Page 1 space */ +#define LAN88XX_EXT_MODE_CTRL (0x13) +#define LAN88XX_EXT_MODE_CTRL_MDIX_MASK_ (0x000C) +#define LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_ (0x0000) +#define LAN88XX_EXT_MODE_CTRL_MDI_ (0x0008) +#define LAN88XX_EXT_MODE_CTRL_MDI_X_ (0x000C) + +/* MMD 3 Registers */ +#define LAN88XX_MMD3_CHIP_ID (32877) +#define LAN88XX_MMD3_CHIP_REV (32878) + +#endif /* _MICROCHIPPHY_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index bcbf8c72a77b..baad4cb8e9b0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -79,7 +79,8 @@ enum { enum { MLX4_MAX_PORTS = 2, - MLX4_MAX_PORT_PKEYS = 128 + MLX4_MAX_PORT_PKEYS = 128, + MLX4_MAX_PORT_GIDS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 9553a73d2049..5a06d969338e 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -59,6 +59,7 @@ struct mlx4_interface { void (*event) (struct mlx4_dev *dev, void *context, enum mlx4_dev_event event, unsigned long param); void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); + void (*activate)(struct mlx4_dev *dev, void *context); struct list_head list; enum mlx4_protocol protocol; int flags; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 250b1ff8b48d..8eb3b19af2a4 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -402,6 +402,17 @@ struct mlx5_cmd_teardown_hca_mbox_out { u8 rsvd[8]; }; +struct mlx5_cmd_query_special_contexts_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_query_special_contexts_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 dump_fill_mkey; + __be32 resd_lkey; +}; + struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8b6d6f2154a4..27b53f9a24ad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -845,6 +845,7 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey); struct mlx5_profile { u64 mask; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b257c43855b..91c08f6f0dc9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -20,6 +20,7 @@ #include <linux/shrinker.h> #include <linux/resource.h> #include <linux/page_ext.h> +#include <linux/err.h> struct mempolicy; struct anon_vma; @@ -249,6 +250,8 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pmd_fault)(struct vm_area_struct *, unsigned long address, + pmd_t *, unsigned int flags); void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf); /* notification that a previously read-only page is about to become @@ -307,18 +310,6 @@ struct inode; #define page_private(page) ((page)->private) #define set_page_private(page, v) ((page)->private = (v)) -/* It's valid only if the page is free path or free_list */ -static inline void set_freepage_migratetype(struct page *page, int migratetype) -{ - page->index = migratetype; -} - -/* It's valid only if the page is free path or free_list */ -static inline int get_freepage_migratetype(struct page *page) -{ - return page->index; -} - /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) @@ -359,20 +350,15 @@ static inline int get_page_unless_zero(struct page *page) return atomic_inc_not_zero(&page->_count); } -/* - * Try to drop a ref unless the page has a refcount of one, return false if - * that is the case. - * This is to make sure that the refcount won't become zero after this drop. - * This can be called when MMU is off so it must not access - * any of the virtual mappings. - */ -static inline int put_page_unless_one(struct page *page) -{ - return atomic_add_unless(&page->_count, -1, 1); -} - extern int page_is_ram(unsigned long pfn); -extern int region_is_ram(resource_size_t phys_addr, unsigned long size); + +enum { + REGION_INTERSECTS, + REGION_DISJOINT, + REGION_MIXED, +}; + +int region_intersects(resource_size_t offset, size_t size, const char *type); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); @@ -1229,6 +1215,49 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); + +/* Container for pinned pfns / pages */ +struct frame_vector { + unsigned int nr_allocated; /* Number of frames we have space for */ + unsigned int nr_frames; /* Number of frames stored in ptrs array */ + bool got_ref; /* Did we pin pages by getting page ref? */ + bool is_pfns; /* Does array contain pages or pfns? */ + void *ptrs[0]; /* Array of pinned pfns / pages. Use + * pfns_vector_pages() or pfns_vector_pfns() + * for access */ +}; + +struct frame_vector *frame_vector_create(unsigned int nr_frames); +void frame_vector_destroy(struct frame_vector *vec); +int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, + bool write, bool force, struct frame_vector *vec); +void put_vaddr_frames(struct frame_vector *vec); +int frame_vector_to_pages(struct frame_vector *vec); +void frame_vector_to_pfns(struct frame_vector *vec); + +static inline unsigned int frame_vector_count(struct frame_vector *vec) +{ + return vec->nr_frames; +} + +static inline struct page **frame_vector_pages(struct frame_vector *vec) +{ + if (vec->is_pfns) { + int err = frame_vector_to_pages(vec); + + if (err) + return ERR_PTR(err); + } + return (struct page **)(vec->ptrs); +} + +static inline unsigned long *frame_vector_pfns(struct frame_vector *vec) +{ + if (!vec->is_pfns) + frame_vector_to_pfns(vec); + return (unsigned long *)(vec->ptrs); +} + struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); @@ -1260,6 +1289,11 @@ static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); } +static inline bool vma_is_anonymous(struct vm_area_struct *vma) +{ + return !vma->vm_ops; +} + static inline int stack_guard_page_start(struct vm_area_struct *vma, unsigned long addr) { @@ -1883,11 +1917,19 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate); + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate); extern int do_munmap(struct mm_struct *, unsigned long, size_t); +static inline unsigned long +do_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, unsigned long flags, + unsigned long pgoff, unsigned long *populate) +{ + return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); +} + #ifdef CONFIG_MMU extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); @@ -2186,6 +2228,7 @@ extern int memory_failure(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); extern int get_hwpoison_page(struct page *page); +extern void put_hwpoison_page(struct page *page); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c8d0a73d64c4..3d6baa7d4534 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -235,7 +235,7 @@ struct page_frag_cache { bool pfmemalloc; }; -typedef unsigned long __nocast vm_flags_t; +typedef unsigned long vm_flags_t; /* * A region containing a mapping of a non-memory backed file under NOMMU diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 4d3776d25925..fdd0779ccdfa 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -279,10 +279,13 @@ struct mmc_card { #define MMC_QUIRK_LONG_READ_TIME (1<<9) /* Data read time > CSD says */ #define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10) /* Skip secure for erase/trim */ #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ +#define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ + unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ unsigned int pref_erase; /* in sectors */ + unsigned int eg_boundary; /* don't cross erase-group boundaries */ u8 erased_byte; /* value of erased bytes */ u32 raw_cid[4]; /* raw card CID */ diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 5be97676f1fa..134c57422740 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -98,6 +98,7 @@ struct mmc_data; * @irq_flags: The flags to be passed to request_irq. * @irq: The irq value to be passed to request_irq. * @sdio_id0: Number of slot0 in the SDIO interrupt registers. + * @dto_timer: Timer for broken data transfer over scheme. * * Locking * ======= @@ -153,11 +154,7 @@ struct dw_mci { dma_addr_t sg_dma; void *sg_cpu; const struct dw_mci_dma_ops *dma_ops; -#ifdef CONFIG_MMC_DW_IDMAC unsigned int ring_size; -#else - struct dw_mci_dma_data *dma_data; -#endif u32 cmd_status; u32 data_status; u32 stop_cmdr; @@ -204,6 +201,7 @@ struct dw_mci { int sdio_id0; struct timer_list cmd11_timer; + struct timer_list dto_timer; }; /* DMA ops for Internal/External DMAC interface */ @@ -226,6 +224,8 @@ struct dw_mci_dma_ops { #define DW_MCI_QUIRK_HIGHSPEED BIT(2) /* Unreliable card detection */ #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) +/* Timer for broken data transfer over scheme */ +#define DW_MCI_QUIRK_BROKEN_DTO BIT(4) struct dma_pdata; @@ -259,7 +259,6 @@ struct dw_mci_board { struct dw_mci_dma_ops *dma_ops; struct dma_pdata *data; - struct block_settings *blk_settings; }; #endif /* LINUX_MMC_DW_MMC_H */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 1369e54faeb7..83b81fd865f3 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -412,7 +412,8 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host) { host->ops->enable_sdio_irq(host, 0); host->sdio_irq_pending = true; - wake_up_process(host->sdio_irq_thread); + if (host->sdio_irq_thread) + wake_up_process(host->sdio_irq_thread); } void sdio_run_irqs(struct mmc_host *host); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 61cd67f4d788..a1a210d59961 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -66,6 +66,16 @@ struct mmu_notifier_ops { unsigned long end); /* + * clear_young is a lightweight version of clear_flush_young. Like the + * latter, it is supposed to test-and-clear the young/accessed bitflag + * in the secondary pte, but it may omit flushing the secondary tlb. + */ + int (*clear_young)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end); + + /* * test_young is called to check the young/accessed bitflag in * the secondary pte. This is used to know if the page is * frequently used without actually clearing the flag or tearing @@ -203,6 +213,9 @@ extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end); +extern int __mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, + unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, @@ -231,6 +244,15 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline int mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + if (mm_has_notifiers(mm)) + return __mmu_notifier_clear_young(mm, start, end); + return 0; +} + static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { @@ -311,6 +333,28 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __young; \ }) +#define ptep_clear_young_notify(__vma, __address, __ptep) \ +({ \ + int __young; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ + __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ + ___address + PAGE_SIZE); \ + __young; \ +}) + +#define pmdp_clear_young_notify(__vma, __address, __pmdp) \ +({ \ + int __young; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ + __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ + ___address + PMD_SIZE); \ + __young; \ +}) + #define ptep_clear_flush_notify(__vma, __address, __ptep) \ ({ \ unsigned long ___addr = __address & PAGE_MASK; \ @@ -427,6 +471,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young +#define ptep_clear_young_notify ptep_test_and_clear_young +#define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ac00e2050943..d94347737292 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -319,7 +319,11 @@ enum zone_type { ZONE_HIGHMEM, #endif ZONE_MOVABLE, +#ifdef CONFIG_ZONE_DEVICE + ZONE_DEVICE, +#endif __MAX_NR_ZONES + }; #ifndef __GENERATING_BOUNDS_H @@ -786,6 +790,25 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat) return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; } +static inline int zone_id(const struct zone *zone) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + + return zone - pgdat->node_zones; +} + +#ifdef CONFIG_ZONE_DEVICE +static inline bool is_dev_zone(const struct zone *zone) +{ + return zone_id(zone) == ZONE_DEVICE; +} +#else +static inline bool is_dev_zone(const struct zone *zone) +{ + return false; +} +#endif + #include <linux/memory_hotplug.h> extern struct mutex zonelists_mutex; diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 29975c73a953..366cf77953b5 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -27,9 +27,9 @@ #include <linux/string.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/io.h> #include <asm/unaligned.h> -#include <asm/io.h> #include <asm/barrier.h> #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9120edb650a0..639e9b8b0e4d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -68,8 +68,17 @@ extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, - u32 dst_portid, gfp_t gfp_mask); + +extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, + unsigned int ldiff, u32 dst_portid, + gfp_t gfp_mask); +static inline struct sk_buff * +netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid, + gfp_t gfp_mask) +{ + return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask); +} + extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a91adf6e02f2..78488e099ce7 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -47,6 +47,12 @@ static inline bool trigger_allbutself_cpu_backtrace(void) arch_trigger_all_cpu_backtrace(false); return true; } + +/* generic implementation */ +void nmi_trigger_all_cpu_backtrace(bool include_self, + void (*raise)(cpumask_t *mask)); +bool nmi_cpu_backtrace(struct pt_regs *regs); + #else static inline bool trigger_all_cpu_backtrace(void) { diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b02f72bb8e32..f798e2afba88 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) * @speed: OUT - The link speed expressed as PCIe generation number. * @width: OUT - The link width expressed as the number of PCIe lanes. * - * Set the translation of a memory window. The peer may access local memory - * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. * * Return: One if the link is up, zero if the link is down, otherwise a * negative value indicating the error number. @@ -795,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) } /** - * ntb_peer_db_clear() - clear bits in the local doorbell register + * ntb_peer_db_clear() - clear bits in the peer doorbell register * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index 2862861366a5..7243eb98a722 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); void ntb_transport_link_up(struct ntb_transport_qp *qp); void ntb_transport_link_down(struct ntb_transport_qp *qp); bool ntb_transport_link_query(struct ntb_transport_qp *qp); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp); diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index c2bbf672b84e..d2fa9ca42e9a 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -41,7 +41,7 @@ enum OID { OID_signed_data, /* 1.2.840.113549.1.7.2 */ /* PKCS#9 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9)} */ OID_email_address, /* 1.2.840.113549.1.9.1 */ - OID_content_type, /* 1.2.840.113549.1.9.3 */ + OID_contentType, /* 1.2.840.113549.1.9.3 */ OID_messageDigest, /* 1.2.840.113549.1.9.4 */ OID_signingTime, /* 1.2.840.113549.1.9.5 */ OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ @@ -54,6 +54,8 @@ enum OID { /* Microsoft Authenticode & Software Publishing */ OID_msIndirectData, /* 1.3.6.1.4.1.311.2.1.4 */ + OID_msStatementType, /* 1.3.6.1.4.1.311.2.1.11 */ + OID_msSpOpusInfo, /* 1.3.6.1.4.1.311.2.1.12 */ OID_msPeImageDataObjId, /* 1.3.6.1.4.1.311.2.1.15 */ OID_msIndividualSPKeyPurpose, /* 1.3.6.1.4.1.311.2.1.21 */ OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */ @@ -61,6 +63,9 @@ enum OID { OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ OID_sha1, /* 1.3.14.3.2.26 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ + OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ + OID_sha512, /* 2.16.840.1.101.3.4.2.3 */ + OID_sha224, /* 2.16.840.1.101.3.4.2.4 */ /* Distinguished Name attribute IDs [RFC 2256] */ OID_commonName, /* 2.5.4.3 */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 7deecb7bca5e..03e6257321f0 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -13,6 +13,27 @@ struct mem_cgroup; struct task_struct; /* + * Details of the page allocation that triggered the oom killer that are used to + * determine what should be killed. + */ +struct oom_control { + /* Used to determine cpuset */ + struct zonelist *zonelist; + + /* Used to determine mempolicy */ + nodemask_t *nodemask; + + /* Used to determine cpuset and node locality requirement */ + const gfp_t gfp_mask; + + /* + * order == -1 means the oom kill is required by sysrq, otherwise only + * for display purposes. + */ + const int order; +}; + +/* * Types of limitations to the nodes from which allocations may occur */ enum oom_constraint { @@ -57,21 +78,18 @@ extern unsigned long oom_badness(struct task_struct *p, extern int oom_kills_count(void); extern void note_oom_kill(void); -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, nodemask_t *nodemask, - const char *message); + struct mem_cgroup *memcg, const char *message); -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask, +extern void check_panic_on_oom(struct oom_control *oc, + enum oom_constraint constraint, struct mem_cgroup *memcg); -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill); +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, + struct task_struct *task, unsigned long totalpages); -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *mask, bool force_kill); +extern bool out_of_memory(struct oom_control *oc); extern void exit_oom_victim(void); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 41c93844fb1d..416509e26d6d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -109,6 +109,10 @@ enum pageflags { #ifdef CONFIG_TRANSPARENT_HUGEPAGE PG_compound_lock, #endif +#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) + PG_young, + PG_idle, +#endif __NR_PAGEFLAGS, /* Filesystems */ @@ -289,6 +293,13 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif +#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +TESTPAGEFLAG(Young, young) +SETPAGEFLAG(Young, young) +TESTCLEARFLAG(Young, young) +PAGEFLAG(Idle, idle) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 2dc1e1697b45..047d64706f2a 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -65,11 +65,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, bool skip_hwpoisoned_pages); -/* - * Internal functions. Changes pageblock's migrate type. - */ -int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages); -void unset_migratetype_isolate(struct page *page, unsigned migratetype); struct page *alloc_migrate_target(struct page *page, unsigned long private, int **resultp); diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index c42981cd99aa..17f118a82854 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -26,6 +26,10 @@ enum page_ext_flags { PAGE_EXT_DEBUG_POISON, /* Page is poisoned */ PAGE_EXT_DEBUG_GUARD, PAGE_EXT_OWNER, +#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) + PAGE_EXT_YOUNG, + PAGE_EXT_IDLE, +#endif }; /* diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h new file mode 100644 index 000000000000..bf268fa92c5b --- /dev/null +++ b/include/linux/page_idle.h @@ -0,0 +1,110 @@ +#ifndef _LINUX_MM_PAGE_IDLE_H +#define _LINUX_MM_PAGE_IDLE_H + +#include <linux/bitops.h> +#include <linux/page-flags.h> +#include <linux/page_ext.h> + +#ifdef CONFIG_IDLE_PAGE_TRACKING + +#ifdef CONFIG_64BIT +static inline bool page_is_young(struct page *page) +{ + return PageYoung(page); +} + +static inline void set_page_young(struct page *page) +{ + SetPageYoung(page); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return TestClearPageYoung(page); +} + +static inline bool page_is_idle(struct page *page) +{ + return PageIdle(page); +} + +static inline void set_page_idle(struct page *page) +{ + SetPageIdle(page); +} + +static inline void clear_page_idle(struct page *page) +{ + ClearPageIdle(page); +} +#else /* !CONFIG_64BIT */ +/* + * If there is not enough space to store Idle and Young bits in page flags, use + * page ext flags instead. + */ +extern struct page_ext_operations page_idle_ops; + +static inline bool page_is_young(struct page *page) +{ + return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); +} + +static inline void set_page_young(struct page *page) +{ + set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return test_and_clear_bit(PAGE_EXT_YOUNG, + &lookup_page_ext(page)->flags); +} + +static inline bool page_is_idle(struct page *page) +{ + return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); +} + +static inline void set_page_idle(struct page *page) +{ + set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); +} + +static inline void clear_page_idle(struct page *page) +{ + clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); +} +#endif /* CONFIG_64BIT */ + +#else /* !CONFIG_IDLE_PAGE_TRACKING */ + +static inline bool page_is_young(struct page *page) +{ + return false; +} + +static inline void set_page_young(struct page *page) +{ +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return false; +} + +static inline bool page_is_idle(struct page *page) +{ + return false; +} + +static inline void set_page_idle(struct page *page) +{ +} + +static inline void clear_page_idle(struct page *page) +{ +} + +#endif /* CONFIG_IDLE_PAGE_TRACKING */ + +#endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 1a64733c48c7..e90eb22de628 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1227,6 +1227,8 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, dma_pool_create(name, &pdev->dev, size, align, allocation) #define pci_pool_destroy(pool) dma_pool_destroy(pool) #define pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle) +#define pci_pool_zalloc(pool, flags, handle) \ + dma_pool_zalloc(pool, flags, handle) #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr) struct msix_entry { diff --git a/include/linux/platform_data/i2c-mux-reg.h b/include/linux/platform_data/i2c-mux-reg.h new file mode 100644 index 000000000000..c68712aadf43 --- /dev/null +++ b/include/linux/platform_data/i2c-mux-reg.h @@ -0,0 +1,44 @@ +/* + * I2C multiplexer using a single register + * + * Copyright 2015 Freescale Semiconductor + * York Sun <yorksun@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_PLATFORM_DATA_I2C_MUX_REG_H +#define __LINUX_PLATFORM_DATA_I2C_MUX_REG_H + +/** + * struct i2c_mux_reg_platform_data - Platform-dependent data for i2c-mux-reg + * @parent: Parent I2C bus adapter number + * @base_nr: Base I2C bus number to number adapters from or zero for dynamic + * @values: Array of value for each channel + * @n_values: Number of multiplexer channels + * @little_endian: Indicating if the register is in little endian + * @write_only: Reading the register is not allowed by hardware + * @classes: Optional I2C auto-detection classes + * @idle: Value to write to mux when idle + * @idle_in_use: indicate if idle value is in use + * @reg: Virtual address of the register to switch channel + * @reg_size: register size in bytes + */ +struct i2c_mux_reg_platform_data { + int parent; + int base_nr; + const unsigned int *values; + int n_values; + bool little_endian; + bool write_only; + const unsigned int *classes; + u32 idle; + bool idle_in_use; + void __iomem *reg; + resource_size_t reg_size; +}; + +#endif /* __LINUX_PLATFORM_DATA_I2C_MUX_REG_H */ diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index e1571efa3f2b..95ccab3f454a 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -45,5 +45,6 @@ struct esdhc_platform_data { int max_bus_width; bool support_vsel; unsigned int delay_line; + unsigned int tuning_step; /* The delay cell steps in tuning procedure */ }; #endif /* __ASM_ARCH_IMX_ESDHC_H */ diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cab7ba55bedb..e817722ee3f0 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); +struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, @@ -80,6 +81,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) return 0; } +static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) +{ + return NULL; +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { diff --git a/include/linux/pmem.h b/include/linux/pmem.h index d2114045a6c4..85f810b33917 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -14,28 +14,42 @@ #define __PMEM_H__ #include <linux/io.h> +#include <linux/uio.h> #ifdef CONFIG_ARCH_HAS_PMEM_API -#include <asm/cacheflush.h> +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB +#include <asm/pmem.h> #else +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT +/* + * These are simply here to enable compilation, all call sites gate + * calling these symbols with arch_has_pmem_api() and redirect to the + * implementation in asm/pmem.h. + */ +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} + static inline void arch_wmb_pmem(void) { BUG(); } -static inline bool __arch_has_wmb_pmem(void) +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) { - return false; + BUG(); } -static inline void __pmem *arch_memremap_pmem(resource_size_t offset, - unsigned long size) +static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, + struct iov_iter *i) { - return NULL; + BUG(); + return 0; } -static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t n) +static inline void arch_clear_pmem(void __pmem *addr, size_t size) { BUG(); } @@ -43,18 +57,22 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, /* * Architectures that define ARCH_HAS_PMEM_API must provide - * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(), - * arch_wmb_pmem(), and __arch_has_wmb_pmem(). + * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), + * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem(). */ - static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(void __pmem *addr) +static inline void memunmap_pmem(struct device *dev, void __pmem *addr) +{ + devm_memunmap(dev, (void __force *) addr); +} + +static inline bool arch_has_pmem_api(void) { - iounmap((void __force __iomem *) addr); + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); } /** @@ -68,14 +86,7 @@ static inline void memunmap_pmem(void __pmem *addr) */ static inline bool arch_has_wmb_pmem(void) { - if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) - return __arch_has_wmb_pmem(); - return false; -} - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); + return arch_has_pmem_api() && __arch_has_wmb_pmem(); } /* @@ -85,16 +96,24 @@ static inline bool arch_has_pmem_api(void) * default_memremap_pmem + default_memcpy_to_pmem is sufficient for * making data durable relative to i/o completion. */ -static void default_memcpy_to_pmem(void __pmem *dst, const void *src, +static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, size_t size) { memcpy((void __force *) dst, src, size); } -static void __pmem *default_memremap_pmem(resource_size_t offset, - unsigned long size) +static inline size_t default_copy_from_iter_pmem(void __pmem *addr, + size_t bytes, struct iov_iter *i) +{ + return copy_from_iter_nocache((void __force *)addr, bytes, i); +} + +static inline void default_clear_pmem(void __pmem *addr, size_t size) { - return (void __pmem __force *)ioremap_wt(offset, size); + if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0) + clear_page((void __force *)addr); + else + memset((void __force *)addr, 0, size); } /** @@ -109,12 +128,11 @@ static void __pmem *default_memremap_pmem(resource_size_t offset, * wmb_pmem() arrange for the data to be written through the * cache to persistent media. */ -static inline void __pmem *memremap_pmem(resource_size_t offset, - unsigned long size) +static inline void __pmem *memremap_pmem(struct device *dev, + resource_size_t offset, unsigned long size) { - if (arch_has_pmem_api()) - return arch_memremap_pmem(offset, size); - return default_memremap_pmem(offset, size); + return (void __pmem *) devm_memremap(dev, offset, size, + ARCH_MEMREMAP_PMEM); } /** @@ -146,7 +164,42 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) */ static inline void wmb_pmem(void) { - if (arch_has_pmem_api()) + if (arch_has_wmb_pmem()) arch_wmb_pmem(); + else + wmb(); +} + +/** + * copy_from_iter_pmem - copy data from an iterator to PMEM + * @addr: PMEM destination address + * @bytes: number of bytes to copy + * @i: iterator with source data + * + * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. + * This function requires explicit ordering with a wmb_pmem() call. + */ +static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, + struct iov_iter *i) +{ + if (arch_has_pmem_api()) + return arch_copy_from_iter_pmem(addr, bytes, i); + return default_copy_from_iter_pmem(addr, bytes, i); +} + +/** + * clear_pmem - zero a PMEM memory range + * @addr: virtual start address + * @size: number of bytes to zero + * + * Write zeros into the memory range starting at 'addr' for 'size' bytes. + * This function requires explicit ordering with a wmb_pmem() call. + */ +static inline void clear_pmem(void __pmem *addr, size_t size) +{ + if (arch_has_pmem_api()) + arch_clear_pmem(addr, size); + else + default_clear_pmem(addr, size); } #endif /* __PMEM_H__ */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 2110a81c5e2a..317e16de09e5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -19,8 +19,8 @@ * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) -#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) +#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* @@ -69,10 +69,6 @@ #define ATM_POISON_FREE 0x12 #define ATM_POISON 0xdeadbeef -/********** net/ **********/ -#define NEIGHBOR_DEAD 0xdeadbeef -#define NETFILTER_LINK_POISON 0xdead57ac - /********** kernel/mutexes **********/ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 @@ -83,7 +79,4 @@ /********** security/ **********/ #define KEY_DESTROY 0xbd -/********** sound/oss/ **********/ -#define OSS_POISON_FREE 0xAB - #endif diff --git a/include/linux/printk.h b/include/linux/printk.h index a6298b27ac99..9729565c25ff 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -404,10 +404,10 @@ do { \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ - DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ __ratelimit(&_rs)) \ - __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define pr_debug_ratelimited(fmt, ...) \ @@ -456,11 +456,17 @@ static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, groupsize, buf, len, ascii) \ dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) -#else +#elif defined(DEBUG) #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) -#endif /* defined(CONFIG_DYNAMIC_DEBUG) */ +#else +static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ +} +#endif #endif diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 987a73a40ef8..061265f92876 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -34,6 +34,7 @@ #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) +#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 36262d08a9da..d681f6875aef 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -79,26 +79,43 @@ enum { PWMF_EXPORTED = 1 << 2, }; +/** + * struct pwm_device - PWM channel object + * @label: name of the PWM device + * @flags: flags associated with the PWM device + * @hwpwm: per-chip relative index of the PWM device + * @pwm: global index of the PWM device + * @chip: PWM chip providing this PWM device + * @chip_data: chip-private data associated with the PWM device + * @period: period of the PWM signal (in nanoseconds) + * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) + * @polarity: polarity of the PWM signal + */ struct pwm_device { - const char *label; - unsigned long flags; - unsigned int hwpwm; - unsigned int pwm; - struct pwm_chip *chip; - void *chip_data; - - unsigned int period; /* in nanoseconds */ - unsigned int duty_cycle; /* in nanoseconds */ - enum pwm_polarity polarity; + const char *label; + unsigned long flags; + unsigned int hwpwm; + unsigned int pwm; + struct pwm_chip *chip; + void *chip_data; + + unsigned int period; + unsigned int duty_cycle; + enum pwm_polarity polarity; }; +static inline bool pwm_is_enabled(const struct pwm_device *pwm) +{ + return test_bit(PWMF_ENABLED, &pwm->flags); +} + static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) { if (pwm) pwm->period = period; } -static inline unsigned int pwm_get_period(struct pwm_device *pwm) +static inline unsigned int pwm_get_period(const struct pwm_device *pwm) { return pwm ? pwm->period : 0; } @@ -109,7 +126,7 @@ static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) pwm->duty_cycle = duty; } -static inline unsigned int pwm_get_duty_cycle(struct pwm_device *pwm) +static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) { return pwm ? pwm->duty_cycle : 0; } @@ -119,6 +136,11 @@ static inline unsigned int pwm_get_duty_cycle(struct pwm_device *pwm) */ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity); +static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm) +{ + return pwm ? pwm->polarity : PWM_POLARITY_NORMAL; +} + /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM @@ -131,25 +153,18 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity); * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { - int (*request)(struct pwm_chip *chip, - struct pwm_device *pwm); - void (*free)(struct pwm_chip *chip, - struct pwm_device *pwm); - int (*config)(struct pwm_chip *chip, - struct pwm_device *pwm, - int duty_ns, int period_ns); - int (*set_polarity)(struct pwm_chip *chip, - struct pwm_device *pwm, - enum pwm_polarity polarity); - int (*enable)(struct pwm_chip *chip, - struct pwm_device *pwm); - void (*disable)(struct pwm_chip *chip, - struct pwm_device *pwm); + int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); + void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); + int (*config)(struct pwm_chip *chip, struct pwm_device *pwm, + int duty_ns, int period_ns); + int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm, + enum pwm_polarity polarity); + int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm); + void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm); #ifdef CONFIG_DEBUG_FS - void (*dbg_show)(struct pwm_chip *chip, - struct seq_file *s); + void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s); #endif - struct module *owner; + struct module *owner; }; /** @@ -160,22 +175,24 @@ struct pwm_ops { * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip * @pwms: array of PWM devices allocated by the framework + * @of_xlate: request a PWM device given a device tree PWM specifier + * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier * @can_sleep: must be true if the .config(), .enable() or .disable() * operations may sleep */ struct pwm_chip { - struct device *dev; - struct list_head list; - const struct pwm_ops *ops; - int base; - unsigned int npwm; - - struct pwm_device *pwms; - - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, - const struct of_phandle_args *args); - unsigned int of_pwm_n_cells; - bool can_sleep; + struct device *dev; + struct list_head list; + const struct pwm_ops *ops; + int base; + unsigned int npwm; + + struct pwm_device *pwms; + + struct pwm_device * (*of_xlate)(struct pwm_chip *pc, + const struct of_phandle_args *args); + unsigned int of_pwm_n_cells; + bool can_sleep; }; #if IS_ENABLED(CONFIG_PWM) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4a6759098769..8fc0bfd8edc4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -17,6 +17,7 @@ #include <linux/rbtree.h> #include <linux/err.h> #include <linux/bug.h> +#include <linux/lockdep.h> struct module; struct device; @@ -51,14 +52,17 @@ struct reg_default { }; /** - * Register/value pairs for sequences of writes + * Register/value pairs for sequences of writes with an optional delay in + * microseconds to be applied after each write. * * @reg: Register address. * @def: Register value. + * @delay_us: Delay to be applied after the register write in microseconds */ struct reg_sequence { unsigned int reg; unsigned int def; + unsigned int delay_us; }; #ifdef CONFIG_REGMAP @@ -307,8 +311,12 @@ typedef void (*regmap_hw_free_context)(void *context); * if not implemented on a given device. * @async_write: Write operation which completes asynchronously, optional and * must serialise with respect to non-async I/O. + * @reg_write: Write a single register value to the given register address. This + * write operation has to complete when returning from the function. * @read: Read operation. Data is returned in the buffer used to transmit * data. + * @reg_read: Read a single register value from a given register address. + * @free_context: Free context. * @async_alloc: Allocate a regmap_async() structure. * @read_flag_mask: Mask to be set in the top byte of the register when doing * a read. @@ -318,7 +326,8 @@ typedef void (*regmap_hw_free_context)(void *context); * @val_format_endian_default: Default endianness for formatted register * values. Used when the regmap_config specifies DEFAULT. If this is * DEFAULT, BIG is assumed. - * @async_size: Size of struct used for async work. + * @max_raw_read: Max raw read size that can be used on the bus. + * @max_raw_write: Max raw write size that can be used on the bus. */ struct regmap_bus { bool fast_io; @@ -333,47 +342,186 @@ struct regmap_bus { u8 read_flag_mask; enum regmap_endian reg_format_endian_default; enum regmap_endian val_format_endian_default; + size_t max_raw_read; + size_t max_raw_write; }; -struct regmap *regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config); +/* + * __regmap_init functions. + * + * These functions take a lock key and name parameter, and should not be called + * directly. Instead, use the regmap_init macros that generate a key and name + * for each call. + */ +struct regmap *__regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spi(struct spi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spmi_base(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spmi_ext(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + +struct regmap *__devm_regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spi(struct spi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_mmio_clk(struct device *dev, + const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + +/* + * Wrapper for regmap_init macros to include a unique lockdep key and name + * for each call. No-op if CONFIG_LOCKDEP is not set. + * + * @fn: Real function to call (in the form __[*_]regmap_init[_*]) + * @name: Config variable name (#config in the calling macro) + **/ +#ifdef CONFIG_LOCKDEP +#define __regmap_lockdep_wrapper(fn, name, ...) \ +( \ + ({ \ + static struct lock_class_key _key; \ + fn(__VA_ARGS__, &_key, \ + KBUILD_BASENAME ":" \ + __stringify(__LINE__) ":" \ + "(" name ")->lock"); \ + }) \ +) +#else +#define __regmap_lockdep_wrapper(fn, name, ...) fn(__VA_ARGS__, NULL, NULL) +#endif + +/** + * regmap_init(): Initialise register map + * + * @dev: Device that will be interacted with + * @bus: Bus-specific callbacks to use with device + * @bus_context: Data passed to bus-specific callbacks + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. This function should generally not be called + * directly, it should be called by bus-specific init functions. + */ +#define regmap_init(dev, bus, bus_context, config) \ + __regmap_lockdep_wrapper(__regmap_init, #config, \ + dev, bus, bus_context, config) int regmap_attach_dev(struct device *dev, struct regmap *map, - const struct regmap_config *config); -struct regmap *regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config); -struct regmap *regmap_init_spi(struct spi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_spmi_base(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_spmi_ext(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config); -struct regmap *regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config); - -struct regmap *devm_regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config); -struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spi(struct spi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spmi_base(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config); -struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config); + const struct regmap_config *config); -bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); +/** + * regmap_init_i2c(): Initialise register map + * + * @i2c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_i2c(i2c, config) \ + __regmap_lockdep_wrapper(__regmap_init_i2c, #config, \ + i2c, config) + +/** + * regmap_init_spi(): Initialise register map + * + * @spi: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_spi(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spi, #config, \ + dev, config) + +/** + * regmap_init_spmi_base(): Create regmap for the Base register space + * @sdev: SPMI device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_spmi_base(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spmi_base, #config, \ + dev, config) + +/** + * regmap_init_spmi_ext(): Create regmap for Ext register space + * @sdev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_spmi_ext(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config, \ + dev, config) + +/** + * regmap_init_mmio_clk(): Initialise register map with register clock + * + * @dev: Device that will be interacted with + * @clk_id: register clock consumer ID + * @regs: Pointer to memory-mapped IO region + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_mmio_clk(dev, clk_id, regs, config) \ + __regmap_lockdep_wrapper(__regmap_init_mmio_clk, #config, \ + dev, clk_id, regs, config) /** * regmap_init_mmio(): Initialise register map @@ -385,12 +533,109 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -static inline struct regmap *regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) -{ - return regmap_init_mmio_clk(dev, NULL, regs, config); -} +#define regmap_init_mmio(dev, regs, config) \ + regmap_init_mmio_clk(dev, NULL, regs, config) + +/** + * regmap_init_ac97(): Initialise AC'97 register map + * + * @ac97: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_ac97(ac97, config) \ + __regmap_lockdep_wrapper(__regmap_init_ac97, #config, \ + ac97, config) +bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); + +/** + * devm_regmap_init(): Initialise managed register map + * + * @dev: Device that will be interacted with + * @bus: Bus-specific callbacks to use with device + * @bus_context: Data passed to bus-specific callbacks + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. This function should generally not be called + * directly, it should be called by bus-specific init functions. The + * map will be automatically freed by the device management code. + */ +#define devm_regmap_init(dev, bus, bus_context, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init, #config, \ + dev, bus, bus_context, config) + +/** + * devm_regmap_init_i2c(): Initialise managed register map + * + * @i2c: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_i2c(i2c, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config, \ + i2c, config) + +/** + * devm_regmap_init_spi(): Initialise register map + * + * @spi: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The map will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_spi(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spi, #config, \ + dev, config) + +/** + * devm_regmap_init_spmi_base(): Create managed regmap for Base register space + * @sdev: SPMI device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_spmi_base(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spmi_base, #config, \ + dev, config) + +/** + * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space + * @sdev: SPMI device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_spmi_ext(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config, \ + dev, config) + +/** + * devm_regmap_init_mmio_clk(): Initialise managed register map with clock + * + * @dev: Device that will be interacted with + * @clk_id: register clock consumer ID + * @regs: Pointer to memory-mapped IO region + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_mmio_clk(dev, clk_id, regs, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config, \ + dev, clk_id, regs, config) /** * devm_regmap_init_mmio(): Initialise managed register map @@ -403,12 +648,22 @@ static inline struct regmap *regmap_init_mmio(struct device *dev, * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -static inline struct regmap *devm_regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) -{ - return devm_regmap_init_mmio_clk(dev, NULL, regs, config); -} +#define devm_regmap_init_mmio(dev, regs, config) \ + devm_regmap_init_mmio_clk(dev, NULL, regs, config) + +/** + * devm_regmap_init_ac97(): Initialise AC'97 register map + * + * @ac97: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_ac97(ac97, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config, \ + ac97, config) void regmap_exit(struct regmap *map); int regmap_reinit_cache(struct regmap *map, @@ -450,6 +705,8 @@ int regmap_get_max_register(struct regmap *map); int regmap_get_reg_stride(struct regmap *map); int regmap_async_complete(struct regmap *map); bool regmap_can_raw_write(struct regmap *map); +size_t regmap_get_raw_read_max(struct regmap *map); +size_t regmap_get_raw_write_max(struct regmap *map); int regcache_sync(struct regmap *map); int regcache_sync_region(struct regmap *map, unsigned int min, diff --git a/include/linux/reset.h b/include/linux/reset.h index da5602bd77d7..7f65f9cff951 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -74,6 +74,20 @@ static inline int device_reset_optional(struct device *dev) return -ENOSYS; } +static inline struct reset_control *__must_check reset_control_get( + struct device *dev, const char *id) +{ + WARN_ON(1); + return ERR_PTR(-EINVAL); +} + +static inline struct reset_control *__must_check devm_reset_control_get( + struct device *dev, const char *id) +{ + WARN_ON(1); + return ERR_PTR(-EINVAL); +} + static inline struct reset_control *reset_control_get_optional( struct device *dev, const char *id) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index a19ddacdac30..f4265039a94c 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -78,7 +78,7 @@ static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3) static inline int seccomp_mode(struct seccomp *s) { - return 0; + return SECCOMP_MODE_DISABLED; } #endif /* CONFIG_SECCOMP */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index d4c7271382cb..dde00defbaa5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -114,13 +114,22 @@ int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); -int seq_escape(struct seq_file *, const char *, const char *); -int seq_putc(struct seq_file *m, char c); -int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); -__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); -__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); +__printf(2, 0) +void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); +__printf(2, 3) +void seq_printf(struct seq_file *m, const char *fmt, ...); +void seq_putc(struct seq_file *m, char c); +void seq_puts(struct seq_file *m, const char *s); +void seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num); +void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +void seq_escape(struct seq_file *m, const char *s, const char *esc); + +void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, const void *buf, size_t len, + bool ascii); int seq_path(struct seq_file *, const struct path *, const char *); int seq_file_path(struct seq_file *, struct file *, const char *); @@ -134,10 +143,6 @@ int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); -int seq_put_decimal_ull(struct seq_file *m, char delimiter, - unsigned long long num); -int seq_put_decimal_ll(struct seq_file *m, char delimiter, - long long num); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 71f711db4500..dabe643eb5fa 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -48,24 +48,24 @@ static inline int string_unescape_any_inplace(char *buf) #define ESCAPE_HEX 0x20 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, - unsigned int flags, const char *esc); + unsigned int flags, const char *only); static inline int string_escape_mem_any_np(const char *src, size_t isz, - char *dst, size_t osz, const char *esc) + char *dst, size_t osz, const char *only) { - return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); + return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, only); } static inline int string_escape_str(const char *src, char *dst, size_t sz, - unsigned int flags, const char *esc) + unsigned int flags, const char *only) { - return string_escape_mem(src, strlen(src), dst, sz, flags, esc); + return string_escape_mem(src, strlen(src), dst, sz, flags, only); } static inline int string_escape_str_any_np(const char *src, char *dst, - size_t sz, const char *esc) + size_t sz, const char *only) { - return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); + return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); } #endif diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d5ee6d8b7c58..7ccc961f33e9 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -132,6 +132,7 @@ struct svcxprt_rdma { struct list_head sc_accept_q; /* Conn. waiting accept */ int sc_ord; /* RDMA read limit */ int sc_max_sge; + int sc_max_sge_rd; /* max sge for read target */ int sc_sq_depth; /* Depth of SQ */ atomic_t sc_sq_count; /* Number of SQ WR on queue */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 31496d201fdc..7ba7dccaf0e7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -351,7 +351,15 @@ extern void check_move_unevictable_pages(struct page **, int nr_pages); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_MEMCG -extern int mem_cgroup_swappiness(struct mem_cgroup *mem); +static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) +{ + /* root ? */ + if (mem_cgroup_disabled() || !memcg->css.parent) + return vm_swappiness; + + return memcg->swappiness; +} + #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { @@ -398,6 +406,9 @@ extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); +extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, + struct vm_area_struct *vma, unsigned long addr, + bool *new_page_allocated); extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); @@ -431,6 +442,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); @@ -522,6 +534,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int swp_swapcount(swp_entry_t entry) +{ + return 0; +} + #define reuse_swap_page(page) (page_mapcount(page) == 1) static inline int try_to_free_swap(struct page *page) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cedf3d3c373f..5c3a5f3e7eec 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -164,6 +164,9 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif #ifdef CONFIG_MEMORY_FAILURE + +extern atomic_long_t num_poisoned_pages __read_mostly; + /* * Support for hardware poisoned pages */ @@ -177,6 +180,31 @@ static inline int is_hwpoison_entry(swp_entry_t entry) { return swp_type(entry) == SWP_HWPOISON; } + +static inline bool test_set_page_hwpoison(struct page *page) +{ + return TestSetPageHWPoison(page); +} + +static inline void num_poisoned_pages_inc(void) +{ + atomic_long_inc(&num_poisoned_pages); +} + +static inline void num_poisoned_pages_dec(void) +{ + atomic_long_dec(&num_poisoned_pages); +} + +static inline void num_poisoned_pages_add(long num) +{ + atomic_long_add(num, &num_poisoned_pages); +} + +static inline void num_poisoned_pages_sub(long num) +{ + atomic_long_sub(num, &num_poisoned_pages); +} #else static inline swp_entry_t make_hwpoison_entry(struct page *page) @@ -188,6 +216,15 @@ static inline int is_hwpoison_entry(swp_entry_t swp) { return 0; } + +static inline bool test_set_page_hwpoison(struct page *page) +{ + return false; +} + +static inline void num_poisoned_pages_inc(void) +{ +} #endif #if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 08001317aee7..a460e2ef2843 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -885,4 +885,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); +asmlinkage long sys_membarrier(int cmd, int flags); + #endif diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 037e9df2f610..17292fee8686 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,23 +92,19 @@ struct thermal_zone_device_ops { struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); - int (*get_temp) (struct thermal_zone_device *, unsigned long *); + int (*get_temp) (struct thermal_zone_device *, int *); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*get_trip_type) (struct thermal_zone_device *, int, enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_temp) (struct thermal_zone_device *, int, - unsigned long); - int (*get_trip_hyst) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_hyst) (struct thermal_zone_device *, int, - unsigned long); - int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); - int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); + int (*get_trip_temp) (struct thermal_zone_device *, int, int *); + int (*set_trip_temp) (struct thermal_zone_device *, int, int); + int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); + int (*set_trip_hyst) (struct thermal_zone_device *, int, int); + int (*get_crit_temp) (struct thermal_zone_device *, int *); + int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -332,9 +328,9 @@ struct thermal_genl_event { * temperature. */ struct thermal_zone_of_device_ops { - int (*get_temp)(void *, long *); + int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); - int (*set_emul_temp)(void *, unsigned long); + int (*set_emul_temp)(void *, int); }; /** @@ -406,7 +402,7 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, const struct thermal_cooling_device_ops *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp); +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -457,7 +453,7 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( const char *name) { return ERR_PTR(-ENODEV); } static inline int thermal_zone_get_temp( - struct thermal_zone_device *tz, unsigned long *temp) + struct thermal_zone_device *tz, int *temp) { return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h index ac34819214f9..da2049b5161c 100644 --- a/include/linux/verify_pefile.h +++ b/include/linux/verify_pefile.h @@ -12,7 +12,11 @@ #ifndef _LINUX_VERIFY_PEFILE_H #define _LINUX_VERIFY_PEFILE_H +#include <crypto/public_key.h> + extern int verify_pefile_signature(const void *pebuf, unsigned pelen, - struct key *trusted_keyring, bool *_trusted); + struct key *trusted_keyring, + enum key_being_used_for usage, + bool *_trusted); #endif /* _LINUX_VERIFY_PEFILE_H */ diff --git a/include/linux/zbud.h b/include/linux/zbud.h index f9d41a6e361f..e183a0a65ac1 100644 --- a/include/linux/zbud.h +++ b/include/linux/zbud.h @@ -9,7 +9,7 @@ struct zbud_ops { int (*evict)(struct zbud_pool *pool, unsigned long handle); }; -struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops); +struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops); void zbud_destroy_pool(struct zbud_pool *pool); int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp, unsigned long *handle); diff --git a/include/linux/zpool.h b/include/linux/zpool.h index d30eff3d84d5..42f8ec992452 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -36,8 +36,10 @@ enum zpool_mapmode { ZPOOL_MM_DEFAULT = ZPOOL_MM_RW }; +bool zpool_has_pool(char *type); + struct zpool *zpool_create_pool(char *type, char *name, - gfp_t gfp, struct zpool_ops *ops); + gfp_t gfp, const struct zpool_ops *ops); char *zpool_get_type(struct zpool *pool); @@ -81,7 +83,7 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops, + void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops, struct zpool *zpool); void (*destroy)(void *pool); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 1338190b5478..6398dfae53f1 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -34,6 +34,11 @@ enum zs_mapmode { */ }; +struct zs_pool_stats { + /* How many pages were migrated (freed) */ + unsigned long pages_compacted; +}; + struct zs_pool; struct zs_pool *zs_create_pool(char *name, gfp_t flags); @@ -49,4 +54,5 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle); unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); +void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); #endif diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h index 9f36641a6781..6513c7ec3116 100644 --- a/include/media/videobuf2-memops.h +++ b/include/media/videobuf2-memops.h @@ -15,6 +15,7 @@ #define _MEDIA_VIDEOBUF2_MEMOPS_H #include <media/videobuf2-core.h> +#include <linux/mm.h> /** * struct vb2_vmarea_handler - common vma refcount tracking handler @@ -31,11 +32,9 @@ struct vb2_vmarea_handler { extern const struct vm_operations_struct vb2_common_vm_ops; -int vb2_get_contig_userptr(unsigned long vaddr, unsigned long size, - struct vm_area_struct **res_vma, dma_addr_t *res_pa); - -struct vm_area_struct *vb2_get_vma(struct vm_area_struct *vma); -void vb2_put_vma(struct vm_area_struct *vma); - +struct frame_vector *vb2_create_framevec(unsigned long start, + unsigned long length, + bool write); +void vb2_destroy_framevec(struct frame_vector *vec); #endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 0c3ac5acb85f..b5474b1fcd83 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -91,6 +91,37 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); +static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +{ + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + + /* + * The zSeries OSA network cards can be shared among various + * OS instances, but the OSA cards have only one MAC address. + * This leads to duplicate address conflicts in conjunction + * with IPv6 if more than one instance uses the same card. + * + * The driver for these cards can deliver a unique 16-bit + * identifier for each instance sharing the same card. It is + * placed instead of 0xFFFE in the interface identifier. The + * "u" bit of the interface identifier is not inverted in this + * case. Hence the resulting interface identifier has local + * scope according to RFC2373. + */ + if (dev->dev_id) { + eui[3] = (dev->dev_id >> 8) & 0xFF; + eui[4] = dev->dev_id & 0xFF; + } else { + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + } + return 0; +} + static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { diff --git a/include/net/bonding.h b/include/net/bonding.h index 20defc0353d1..c1740a2794a3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -310,6 +310,13 @@ static inline bool bond_uses_primary(struct bonding *bond) return bond_mode_uses_primary(BOND_MODE(bond)); } +static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ + struct slave *slave = rcu_dereference(bond->curr_active_slave); + + return bond_uses_primary(bond) && slave ? slave->dev : NULL; +} + static inline bool bond_slave_is_up(struct slave *slave) { return netif_running(slave->dev) && netif_carrier_ok(slave->dev); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4e8f804f4589..59160de702b6 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -66,7 +66,6 @@ struct fib_rules_ops { struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); - u32 (*default_pref)(struct fib_rules_ops *ops); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush @@ -118,5 +117,4 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); -u32 fib_default_rule_pref(struct fib_rules_ops *ops); #endif diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e3314e516681..bfc569498bfa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -477,7 +477,9 @@ struct ieee80211_event { * @chandef: Channel definition for this BSS -- the hardware might be * configured a higher bandwidth than this BSS uses, for example. * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. - * This field is only valid when the channel type is one of the HT types. + * This field is only valid when the channel is a wide HT/VHT channel. + * Note that with TDLS this can be the case (channel is HT, protection must + * be used from this field) even when the BSS association isn't using HT. * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value * implies disabled * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index bab824bde92c..d4c6b5f30acd 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -59,7 +59,7 @@ static inline unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - return NF_DROP; + return NF_ACCEPT; } #endif diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f5e23c6dee8b..e8ad46834df8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -298,6 +298,7 @@ void init_nf_conntrack_hash_rnd(void); struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); +void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2a246680a6c3..aa8bee72c9d3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -125,7 +125,7 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) { - return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } unsigned int nft_parse_register(const struct nlattr *attr); diff --git a/include/net/sock.h b/include/net/sock.h index 43c6abcf06ab..7aa78440559a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1042,42 +1042,9 @@ struct proto { #endif }; -/* - * Bits in struct cg_proto.flags - */ -enum cg_proto_flags { - /* Currently active and new sockets should be assigned to cgroups */ - MEMCG_SOCK_ACTIVE, - /* It was ever activated; we must disarm static keys on destruction */ - MEMCG_SOCK_ACTIVATED, -}; - -struct cg_proto { - struct page_counter memory_allocated; /* Current allocated memory. */ - struct percpu_counter sockets_allocated; /* Current number of sockets. */ - int memory_pressure; - long sysctl_mem[3]; - unsigned long flags; - /* - * memcg field is used to find which memcg we belong directly - * Each memcg struct can hold more than one cg_proto, so container_of - * won't really cut. - * - * The elegant solution would be having an inverse function to - * proto_cgroup in struct proto, but that means polluting the structure - * for everybody, instead of just for memcg users. - */ - struct mem_cgroup *memcg; -}; - int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); -static inline bool memcg_proto_active(struct cg_proto *cg_proto) -{ - return test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); -} - #ifdef SOCK_REFCNT_DEBUG static inline void sk_refcnt_debug_inc(struct sock *sk) { diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 39ed2d2fbd51..92a7d85917b4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -105,14 +105,16 @@ enum ib_cm_data_size { IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, - /* compare done u32 at a time */ - IB_CM_COMPARE_SIZE = (64 / sizeof(u32)) }; struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; + + /* P_Key that was used by the GMP's BTH header */ + u16 bth_pkey; + u8 port; struct ib_sa_path_rec *primary_path; @@ -223,6 +225,9 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; + __be64 service_id; + /* P_Key that was used by the GMP's BTH header */ + u16 bth_pkey; u8 port; u16 pkey; }; @@ -337,11 +342,6 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) -struct ib_cm_compare_data { - u32 data[IB_CM_COMPARE_SIZE]; - u32 mask[IB_CM_COMPARE_SIZE]; -}; - /** * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. @@ -354,12 +354,13 @@ struct ib_cm_compare_data { * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. - * @compare_data: This parameter is optional. It specifies data that must - * appear in the private data of a connection request for the specified - * listen request. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, - struct ib_cm_compare_data *compare_data); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, + __be64 service_mask); + +struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, + ib_cm_handler cm_handler, + __be64 service_id); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index c8422d5a5a91..188df91d5851 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -127,6 +127,23 @@ #define IB_DEFAULT_PKEY_PARTIAL 0x7FFF #define IB_DEFAULT_PKEY_FULL 0xFFFF +/* + * Generic trap/notice types + */ +#define IB_NOTICE_TYPE_FATAL 0x80 +#define IB_NOTICE_TYPE_URGENT 0x81 +#define IB_NOTICE_TYPE_SECURITY 0x82 +#define IB_NOTICE_TYPE_SM 0x83 +#define IB_NOTICE_TYPE_INFO 0x84 + +/* + * Generic trap/notice producers + */ +#define IB_NOTICE_PROD_CA cpu_to_be16(1) +#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) +#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) +#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) + enum { IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, @@ -240,6 +257,70 @@ struct ib_class_port_info { __be32 trap_qkey; }; +struct ib_mad_notice_attr { + u8 generic_type; + u8 prod_type_msb; + __be16 prod_type_lsb; + __be16 trap_num; + __be16 issuer_lid; + __be16 toggle_count; + + union { + struct { + u8 details[54]; + } raw_data; + + struct { + __be16 reserved; + __be16 lid; /* where violation happened */ + u8 port_num; /* where violation happened */ + } __packed ntc_129_131; + + struct { + __be16 reserved; + __be16 lid; /* LID where change occurred */ + u8 reserved2; + u8 local_changes; /* low bit - local changes */ + __be32 new_cap_mask; /* new capability mask */ + u8 reserved3; + u8 change_flags; /* low 3 bits only */ + } __packed ntc_144; + + struct { + __be16 reserved; + __be16 lid; /* lid where sys guid changed */ + __be16 reserved2; + __be64 new_sys_guid; + } __packed ntc_145; + + struct { + __be16 reserved; + __be16 lid; + __be16 dr_slid; + u8 method; + u8 reserved2; + __be16 attr_id; + __be32 attr_mod; + __be64 mkey; + u8 reserved3; + u8 dr_trunc_hop; + u8 dr_rtn_path[30]; + } __packed ntc_256; + + struct { + __be16 reserved; + __be16 lid1; + __be16 lid2; + __be32 key; + __be32 sl_qp1; /* SL: high 4 bits */ + __be32 qp2; /* high 8 bits reserved */ + union ib_gid gid1; + union ib_gid gid2; + } __packed ntc_257_258; + + } details; +}; + /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. @@ -388,7 +469,6 @@ enum { struct ib_mad_agent { struct ib_device *device; struct ib_qp *qp; - struct ib_mr *mr; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; ib_mad_snoop_handler snoop_handler; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b1f7592e02e4..709a5331e6b9 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -76,6 +76,8 @@ enum { IB_OPCODE_UC = 0x20, IB_OPCODE_RD = 0x40, IB_OPCODE_UD = 0x60, + /* per IBTA 3.1 Table 38, A10.3.2 */ + IB_OPCODE_CNP = 0x80, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index 98b9086d769a..b439e988408e 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -119,10 +119,57 @@ struct ib_port_info { u8 link_roundtrip_latency[3]; }; +struct ib_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be64 sys_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; +} __packed; + +struct ib_vl_weight_elem { + u8 vl; /* IB: VL is low 4 bits, upper 4 bits reserved */ + /* OPA: VL is low 5 bits, upper 3 bits reserved */ + u8 weight; +}; + static inline u8 ib_get_smp_direction(struct ib_smp *smp) { return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); } +/* + * SM Trap/Notice numbers + */ +#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) +#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) +#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) +#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) +#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) +#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) +#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) +#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) + +/* + * Other local changes flags (trap 144). + */ +#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ +#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ +#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 + +/* + * M_Key volation flags in dr_trunc_hop (trap 256). + */ +#define IB_NOTICE_TRAP_DR_NOTICE 0x80 +#define IB_NOTICE_TRAP_DR_TRUNC 0x40 + + #endif /* IB_SMI_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 43c1cf01c84b..7845fae6f2df 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -48,6 +48,7 @@ #include <linux/rwsem.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> +#include <linux/socket.h> #include <uapi/linux/if_ether.h> #include <linux/atomic.h> @@ -64,6 +65,12 @@ union ib_gid { } global; }; +extern union ib_gid zgid; + +struct ib_gid_attr { + struct net_device *ndev; +}; + enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, @@ -284,7 +291,7 @@ enum ib_port_cap_flags { IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, IB_PORT_CLIENT_REG_SUP = 1 << 25, - IB_PORT_IP_BASED_GIDS = 1 << 26 + IB_PORT_IP_BASED_GIDS = 1 << 26, }; enum ib_port_width { @@ -556,20 +563,18 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate); */ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); -enum ib_mr_create_flags { - IB_MR_SIGNATURE_EN = 1, -}; /** - * ib_mr_init_attr - Memory region init attributes passed to routine - * ib_create_mr. - * @max_reg_descriptors: max number of registration descriptors that - * may be used with registration work requests. - * @flags: MR creation flags bit mask. + * enum ib_mr_type - memory region type + * @IB_MR_TYPE_MEM_REG: memory region that is used for + * normal registration + * @IB_MR_TYPE_SIGNATURE: memory region that is used for + * signature operations (data-integrity + * capable regions) */ -struct ib_mr_init_attr { - int max_reg_descriptors; - u32 flags; +enum ib_mr_type { + IB_MR_TYPE_MEM_REG, + IB_MR_TYPE_SIGNATURE, }; /** @@ -1252,9 +1257,11 @@ struct ib_udata { }; struct ib_pd { + u32 local_dma_lkey; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ + struct ib_mr *local_mr; }; struct ib_xrcd { @@ -1488,7 +1495,7 @@ struct ib_cache { rwlock_t lock; struct ib_event_handler event_handler; struct ib_pkey_cache **pkey_cache; - struct ib_gid_cache **gid_cache; + struct ib_gid_table **gid_cache; u8 *lmc_cache; }; @@ -1550,6 +1557,8 @@ struct ib_device { spinlock_t client_data_lock; struct list_head core_list; + /* Access to the client_data_list is protected by the client_data_lock + * spinlock and the lists_rwsem read-write semaphore */ struct list_head client_data_list; struct ib_cache cache; @@ -1572,9 +1581,47 @@ struct ib_device { struct ib_port_attr *port_attr); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u8 port_num); + /* When calling get_netdev, the HW vendor's driver should return the + * net device of device @device at port @port_num or NULL if such + * a net device doesn't exist. The vendor driver should call dev_hold + * on this net device. The HW vendor's device driver must guarantee + * that this function returns NULL before the net device reaches + * NETDEV_UNREGISTER_FINAL state. + */ + struct net_device *(*get_netdev)(struct ib_device *device, + u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); + /* When calling add_gid, the HW vendor's driver should + * add the gid of device @device at gid index @index of + * port @port_num to be @gid. Meta-info of that gid (for example, + * the network device related to this gid is available + * at @attr. @context allows the HW vendor driver to store extra + * information together with a GID entry. The HW vendor may allocate + * memory to contain this information and store it in @context when a + * new GID entry is written to. Params are consistent until the next + * call of add_gid or delete_gid. The function should return 0 on + * success or error otherwise. The function could be called + * concurrently for different ports. This function is only called + * when roce_gid_table is used. + */ + int (*add_gid)(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); + /* When calling del_gid, the HW vendor's driver should delete the + * gid of device @device at gid index @index of port @port_num. + * Upon the deletion of a GID entry, the HW vendor must free any + * allocated memory. The caller will clear @context afterwards. + * This function is only called when roce_gid_table is used. + */ + int (*del_gid)(struct ib_device *device, + u8 port_num, + unsigned int index, + void **context); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int (*modify_device)(struct ib_device *device, @@ -1668,11 +1715,9 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); - int (*destroy_mr)(struct ib_mr *mr); - struct ib_mr * (*create_mr)(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr); - struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, - int max_page_list_len); + struct ib_mr * (*alloc_mr)(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, int page_list_len); void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); @@ -1724,6 +1769,7 @@ struct ib_device { int (*destroy_flow)(struct ib_flow *flow_id); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); struct ib_dma_mapping_ops *dma_ops; @@ -1761,8 +1807,30 @@ struct ib_device { struct ib_client { char *name; void (*add) (struct ib_device *); - void (*remove)(struct ib_device *); - + void (*remove)(struct ib_device *, void *client_data); + + /* Returns the net_dev belonging to this ib_client and matching the + * given parameters. + * @dev: An RDMA device that the net_dev use for communication. + * @port: A physical port number on the RDMA device. + * @pkey: P_Key that the net_dev uses if applicable. + * @gid: A GID that the net_dev uses to communicate. + * @addr: An IP address the net_dev is configured with. + * @client_data: The device's client data set by ib_set_client_data(). + * + * An ib_client that implements a net_dev on top of RDMA devices + * (such as IP over IB) should implement this callback, allowing the + * rdma_cm module to find the right net_dev for a given request. + * + * The caller is responsible for calling dev_put on the returned + * netdev. */ + struct net_device *(*get_net_dev_by_params)( + struct ib_device *dev, + u8 port, + u16 pkey, + const union ib_gid *gid, + const struct sockaddr *addr, + void *client_data); struct list_head list; }; @@ -2071,34 +2139,6 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) } /** - * rdma_cap_read_multi_sge - Check if the port of device has the capability - * RDMA Read Multiple Scatter-Gather Entries. - * @device: Device to check - * @port_num: Port number to check - * - * iWARP has a restriction that RDMA READ requests may only have a single - * Scatter/Gather Entry (SGE) in the work request. - * - * NOTE: although the linux kernel currently assumes all devices are either - * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and - * WRITEs, according to Tom Talpey, this is not accurate. There are some - * devices out there that support more than a single SGE on RDMA READ - * requests, but do not support the same number of SGEs as they do on - * RDMA WRITE requests. The linux kernel would need rearchitecting to - * support these imbalanced READ/WRITE SGEs allowed devices. So, for now, - * suffice with either the device supports the same READ/WRITE SGEs, or - * it only gets one READ sge. - * - * Return: true for any device that allows more than one SGE in RDMA READ - * requests. - */ -static inline bool rdma_cap_read_multi_sge(struct ib_device *device, - u8 port_num) -{ - return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP); -} - -/** * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. * * @device: Device @@ -2115,6 +2155,26 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n return device->port_immutable[port_num].max_mad_size; } +/** + * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table + * @device: Device to check + * @port_num: Port number to check + * + * RoCE GID table mechanism manages the various GIDs for a device. + * + * NOTE: if allocating the port's GID table has failed, this call will still + * return true, but any RoCE GID table API will fail. + * + * Return: true if the port uses RoCE GID table mechanism in order to manage + * its GIDs. + */ +static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, + u8 port_num) +{ + return rdma_protocol_roce(device, port_num) && + device->add_gid && device->del_gid; +} + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -2135,20 +2195,9 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -/** - * ib_alloc_pd - Allocates an unused protection domain. - * @device: The device on which to allocate the protection domain. - * - * A protection domain object provides an association between QPs, shared - * receive queues, address handles, memory regions, and memory windows. - */ struct ib_pd *ib_alloc_pd(struct ib_device *device); -/** - * ib_dealloc_pd - Deallocates a protection domain. - * @pd: The protection domain to deallocate. - */ -int ib_dealloc_pd(struct ib_pd *pd); +void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. @@ -2775,33 +2824,9 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); */ int ib_dereg_mr(struct ib_mr *mr); - -/** - * ib_create_mr - Allocates a memory region that may be used for - * signature handover operations. - * @pd: The protection domain associated with the region. - * @mr_init_attr: memory region init attributes. - */ -struct ib_mr *ib_create_mr(struct ib_pd *pd, - struct ib_mr_init_attr *mr_init_attr); - -/** - * ib_destroy_mr - Destroys a memory region that was created using - * ib_create_mr and removes it from HW translation tables. - * @mr: The memory region to destroy. - * - * This function can fail, if the memory region has memory windows bound to it. - */ -int ib_destroy_mr(struct ib_mr *mr); - -/** - * ib_alloc_fast_reg_mr - Allocates memory region usable with the - * IB_WR_FAST_REG_MR send work request. - * @pd: The protection domain associated with the region. - * @max_page_list_len: requested max physical buffer list length to be - * used with fast register work requests for this MR. - */ -struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg); /** * ib_alloc_fast_reg_page_list - Allocates a page list array @@ -2994,4 +3019,8 @@ static inline int ib_check_mr_access(int flags) int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, + u16 pkey, const union ib_gid *gid, + const struct sockaddr *addr); + #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h new file mode 100644 index 000000000000..391dae1931c0 --- /dev/null +++ b/include/rdma/opa_port_info.h @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(OPA_PORT_INFO_H) +#define OPA_PORT_INFO_H + +/* Temporary until HFI driver is updated */ +#ifndef USE_PI_LED_ENABLE +#define USE_PI_LED_ENABLE 0 +#endif + +#define OPA_PORT_LINK_MODE_NOP 0 /* No change */ +#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ + +#define OPA_PORT_PACKET_FORMAT_NOP 0 /* No change */ +#define OPA_PORT_PACKET_FORMAT_8B 1 /* Format 8B */ +#define OPA_PORT_PACKET_FORMAT_9B 2 /* Format 9B */ +#define OPA_PORT_PACKET_FORMAT_10B 4 /* Format 10B */ +#define OPA_PORT_PACKET_FORMAT_16B 8 /* Format 16B */ + +#define OPA_PORT_LTP_CRC_MODE_NONE 0 /* No change */ +#define OPA_PORT_LTP_CRC_MODE_14 1 /* 14-bit LTP CRC mode (optional) */ +#define OPA_PORT_LTP_CRC_MODE_16 2 /* 16-bit LTP CRC mode */ +#define OPA_PORT_LTP_CRC_MODE_48 4 /* 48-bit LTP CRC mode (optional) */ +#define OPA_PORT_LTP_CRC_MODE_PER_LANE 8 /* 12/16-bit per lane LTP CRC mode */ + +/* Link Down / Neighbor Link Down Reason; indicated as follows: */ +#define OPA_LINKDOWN_REASON_NONE 0 /* No specified reason */ +#define OPA_LINKDOWN_REASON_RCV_ERROR_0 1 +#define OPA_LINKDOWN_REASON_BAD_PKT_LEN 2 +#define OPA_LINKDOWN_REASON_PKT_TOO_LONG 3 +#define OPA_LINKDOWN_REASON_PKT_TOO_SHORT 4 +#define OPA_LINKDOWN_REASON_BAD_SLID 5 +#define OPA_LINKDOWN_REASON_BAD_DLID 6 +#define OPA_LINKDOWN_REASON_BAD_L2 7 +#define OPA_LINKDOWN_REASON_BAD_SC 8 +#define OPA_LINKDOWN_REASON_RCV_ERROR_8 9 +#define OPA_LINKDOWN_REASON_BAD_MID_TAIL 10 +#define OPA_LINKDOWN_REASON_RCV_ERROR_10 11 +#define OPA_LINKDOWN_REASON_PREEMPT_ERROR 12 +#define OPA_LINKDOWN_REASON_PREEMPT_VL15 13 +#define OPA_LINKDOWN_REASON_BAD_VL_MARKER 14 +#define OPA_LINKDOWN_REASON_RCV_ERROR_14 15 +#define OPA_LINKDOWN_REASON_RCV_ERROR_15 16 +#define OPA_LINKDOWN_REASON_BAD_HEAD_DIST 17 +#define OPA_LINKDOWN_REASON_BAD_TAIL_DIST 18 +#define OPA_LINKDOWN_REASON_BAD_CTRL_DIST 19 +#define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK 20 +#define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER 21 +#define OPA_LINKDOWN_REASON_BAD_PREEMPT 22 +#define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT 23 +#define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT 24 +#define OPA_LINKDOWN_REASON_RCV_ERROR_24 25 +#define OPA_LINKDOWN_REASON_RCV_ERROR_25 26 +#define OPA_LINKDOWN_REASON_RCV_ERROR_26 27 +#define OPA_LINKDOWN_REASON_RCV_ERROR_27 28 +#define OPA_LINKDOWN_REASON_RCV_ERROR_28 29 +#define OPA_LINKDOWN_REASON_RCV_ERROR_29 30 +#define OPA_LINKDOWN_REASON_RCV_ERROR_30 31 +#define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN 32 +#define OPA_LINKDOWN_REASON_UNKNOWN 33 +/* 34 -reserved */ +#define OPA_LINKDOWN_REASON_REBOOT 35 +#define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN 36 +/* 37-38 reserved */ +#define OPA_LINKDOWN_REASON_FM_BOUNCE 39 +#define OPA_LINKDOWN_REASON_SPEED_POLICY 40 +#define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 +/* 42-48 reserved */ +#define OPA_LINKDOWN_REASON_DISCONNECTED 49 +#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 +#define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 +/* 53 reserved */ +#define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED 54 +/* 55 reserved */ +#define OPA_LINKDOWN_REASON_POWER_POLICY 56 +#define OPA_LINKDOWN_REASON_LINKSPEED_POLICY 57 +#define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY 58 +/* 59 reserved */ +#define OPA_LINKDOWN_REASON_SWITCH_MGMT 60 +#define OPA_LINKDOWN_REASON_SMA_DISABLED 61 +/* 62 reserved */ +#define OPA_LINKDOWN_REASON_TRANSIENT 63 +/* 64-255 reserved */ + +/* OPA Link Init reason; indicated as follows: */ +/* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */ +#define OPA_LINKINIT_REASON_NOP 0 +#define OPA_LINKINIT_REASON_LINKUP (1 << 4) +#define OPA_LINKINIT_REASON_FLAPPING (2 << 4) +#define OPA_LINKINIT_REASON_CLEAR (8 << 4) +#define OPA_LINKINIT_OUTSIDE_POLICY (8 << 4) +#define OPA_LINKINIT_QUARANTINED (9 << 4) +#define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4) + +#define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */ +#define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */ +#define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */ + +#define OPA_LINK_WIDTH_1X 0x0001 +#define OPA_LINK_WIDTH_2X 0x0002 +#define OPA_LINK_WIDTH_3X 0x0004 +#define OPA_LINK_WIDTH_4X 0x0008 + +#define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) +#define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) +#define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) +#define OPA_CAP_MASK3_IsPassThroughSupported (1 << 4) +#define OPA_CAP_MASK3_IsSharedSpaceSupported (1 << 3) +/* reserved (1 << 2) */ +#define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) +#define OPA_CAP_MASK3_IsVLrSupported (1 << 0) + +/** + * new MTU values + */ +enum { + OPA_MTU_8192 = 6, + OPA_MTU_10240 = 7, +}; + +enum { + OPA_PORT_PHYS_CONF_DISCONNECTED = 0, + OPA_PORT_PHYS_CONF_STANDARD = 1, + OPA_PORT_PHYS_CONF_FIXED = 2, + OPA_PORT_PHYS_CONF_VARIABLE = 3, + OPA_PORT_PHYS_CONF_SI_PHOTO = 4 +}; + +enum port_info_field_masks { + /* vl.cap */ + OPA_PI_MASK_VL_CAP = 0x1F, + /* port_states.ledenable_offlinereason */ + OPA_PI_MASK_OFFLINE_REASON = 0x0F, + OPA_PI_MASK_LED_ENABLE = 0x40, + /* port_states.unsleepstate_downdefstate */ + OPA_PI_MASK_UNSLEEP_STATE = 0xF0, + OPA_PI_MASK_DOWNDEF_STATE = 0x0F, + /* port_states.portphysstate_portstate */ + OPA_PI_MASK_PORT_PHYSICAL_STATE = 0xF0, + OPA_PI_MASK_PORT_STATE = 0x0F, + /* port_phys_conf */ + OPA_PI_MASK_PORT_PHYSICAL_CONF = 0x0F, + /* collectivemask_multicastmask */ + OPA_PI_MASK_COLLECT_MASK = 0x38, + OPA_PI_MASK_MULTICAST_MASK = 0x07, + /* mkeyprotect_lmc */ + OPA_PI_MASK_MKEY_PROT_BIT = 0xC0, + OPA_PI_MASK_LMC = 0x0F, + /* smsl */ + OPA_PI_MASK_SMSL = 0x1F, + /* partenforce_filterraw */ + /* Filter Raw In/Out bits 1 and 2 were removed */ + OPA_PI_MASK_LINKINIT_REASON = 0xF0, + OPA_PI_MASK_PARTITION_ENFORCE_IN = 0x08, + OPA_PI_MASK_PARTITION_ENFORCE_OUT = 0x04, + /* operational_vls */ + OPA_PI_MASK_OPERATIONAL_VL = 0x1F, + /* sa_qp */ + OPA_PI_MASK_SA_QP = 0x00FFFFFF, + /* sm_trap_qp */ + OPA_PI_MASK_SM_TRAP_QP = 0x00FFFFFF, + /* localphy_overrun_errors */ + OPA_PI_MASK_LOCAL_PHY_ERRORS = 0xF0, + OPA_PI_MASK_OVERRUN_ERRORS = 0x0F, + /* clientrereg_subnettimeout */ + OPA_PI_MASK_CLIENT_REREGISTER = 0x80, + OPA_PI_MASK_SUBNET_TIMEOUT = 0x1F, + /* port_link_mode */ + OPA_PI_MASK_PORT_LINK_SUPPORTED = (0x001F << 10), + OPA_PI_MASK_PORT_LINK_ENABLED = (0x001F << 5), + OPA_PI_MASK_PORT_LINK_ACTIVE = (0x001F << 0), + /* port_link_crc_mode */ + OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED = 0x0F00, + OPA_PI_MASK_PORT_LINK_CRC_ENABLED = 0x00F0, + OPA_PI_MASK_PORT_LINK_CRC_ACTIVE = 0x000F, + /* port_mode */ + OPA_PI_MASK_PORT_MODE_SECURITY_CHECK = 0x0001, + OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY = 0x0002, + OPA_PI_MASK_PORT_MODE_PKEY_CONVERT = 0x0004, + OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING = 0x0008, + OPA_PI_MASK_PORT_MODE_VL_MARKER = 0x0010, + OPA_PI_MASK_PORT_PASS_THROUGH = 0x0020, + OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE = 0x0040, + /* flit_control.interleave */ + OPA_PI_MASK_INTERLEAVE_DIST_SUP = (0x0003 << 12), + OPA_PI_MASK_INTERLEAVE_DIST_ENABLE = (0x0003 << 10), + OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX = (0x001F << 5), + OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX = (0x001F << 0), + + /* port_error_action */ + OPA_PI_MASK_EX_BUFFER_OVERRUN = 0x80000000, + /* 7 bits reserved */ + OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT = 0x00800000, + OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT = 0x00400000, + OPA_PI_MASK_FM_CFG_BAD_PREEMPT = 0x00200000, + OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER = 0x00100000, + OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK = 0x00080000, + OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST = 0x00040000, + OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST = 0x00020000, + OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST = 0x00010000, + /* 2 bits reserved */ + OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER = 0x00002000, + OPA_PI_MASK_PORT_RCV_PREEMPT_VL15 = 0x00001000, + OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR = 0x00000800, + /* 1 bit reserved */ + OPA_PI_MASK_PORT_RCV_BAD_MidTail = 0x00000200, + /* 1 bit reserved */ + OPA_PI_MASK_PORT_RCV_BAD_SC = 0x00000080, + OPA_PI_MASK_PORT_RCV_BAD_L2 = 0x00000040, + OPA_PI_MASK_PORT_RCV_BAD_DLID = 0x00000020, + OPA_PI_MASK_PORT_RCV_BAD_SLID = 0x00000010, + OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT = 0x00000008, + OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG = 0x00000004, + OPA_PI_MASK_PORT_RCV_BAD_PKTLEN = 0x00000002, + OPA_PI_MASK_PORT_RCV_BAD_LT = 0x00000001, + + /* pass_through.res_drctl */ + OPA_PI_MASK_PASS_THROUGH_DR_CONTROL = 0x01, + + /* buffer_units */ + OPA_PI_MASK_BUF_UNIT_VL15_INIT = (0x00000FFF << 11), + OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE = (0x0000001F << 6), + OPA_PI_MASK_BUF_UNIT_CREDIT_ACK = (0x00000003 << 3), + OPA_PI_MASK_BUF_UNIT_BUF_ALLOC = (0x00000003 << 0), + + /* neigh_mtu.pvlx_to_mtu */ + OPA_PI_MASK_NEIGH_MTU_PVL0 = 0xF0, + OPA_PI_MASK_NEIGH_MTU_PVL1 = 0x0F, + + /* neigh_mtu.vlstall_hoq_life */ + OPA_PI_MASK_VL_STALL = (0x03 << 5), + OPA_PI_MASK_HOQ_LIFE = (0x1F << 0), + + /* port_neigh_mode */ + OPA_PI_MASK_NEIGH_MGMT_ALLOWED = (0x01 << 3), + OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS = (0x01 << 2), + OPA_PI_MASK_NEIGH_NODE_TYPE = (0x03 << 0), + + /* resptime_value */ + OPA_PI_MASK_RESPONSE_TIME_VALUE = 0x1F, + + /* mtucap */ + OPA_PI_MASK_MTU_CAP = 0x0F, +}; + +#if USE_PI_LED_ENABLE +struct opa_port_states { + u8 reserved; + u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ + u8 reserved2; + u8 portphysstate_portstate; /* 4 bits, 4 bits */ +}; +#define PI_LED_ENABLE_SUP 1 +#else +struct opa_port_states { + u8 reserved; + u8 offline_reason; /* 2 res, 6 bits */ + u8 reserved2; + u8 portphysstate_portstate; /* 4 bits, 4 bits */ +}; +#define PI_LED_ENABLE_SUP 0 +#endif + +struct opa_port_state_info { + struct opa_port_states port_states; + u16 link_width_downgrade_tx_active; + u16 link_width_downgrade_rx_active; +}; + +struct opa_port_info { + __be32 lid; + __be32 flow_control_mask; + + struct { + u8 res; /* was inittype */ + u8 cap; /* 3 res, 5 bits */ + __be16 high_limit; + __be16 preempt_limit; + u8 arb_high_cap; + u8 arb_low_cap; + } vl; + + struct opa_port_states port_states; + u8 port_phys_conf; /* 4 res, 4 bits */ + u8 collectivemask_multicastmask; /* 2 res, 3, 3 */ + u8 mkeyprotect_lmc; /* 2 bits, 2 res, 4 bits */ + u8 smsl; /* 3 res, 5 bits */ + + u8 partenforce_filterraw; /* bit fields */ + u8 operational_vls; /* 3 res, 5 bits */ + __be16 pkey_8b; + __be16 pkey_10b; + __be16 mkey_violations; + + __be16 pkey_violations; + __be16 qkey_violations; + __be32 sm_trap_qp; /* 8 bits, 24 bits */ + + __be32 sa_qp; /* 8 bits, 24 bits */ + u8 neigh_port_num; + u8 link_down_reason; + u8 neigh_link_down_reason; + u8 clientrereg_subnettimeout; /* 1 bit, 2 bits, 5 */ + + struct { + __be16 supported; + __be16 enabled; + __be16 active; + } link_speed; + struct { + __be16 supported; + __be16 enabled; + __be16 active; + } link_width; + struct { + __be16 supported; + __be16 enabled; + __be16 tx_active; + __be16 rx_active; + } link_width_downgrade; + __be16 port_link_mode; /* 1 res, 5 bits, 5 bits, 5 bits */ + __be16 port_ltp_crc_mode; /* 4 res, 4 bits, 4 bits, 4 bits */ + + __be16 port_mode; /* 9 res, bit fields */ + struct { + __be16 supported; + __be16 enabled; + } port_packet_format; + struct { + __be16 interleave; /* 2 res, 2,2,5,5 */ + struct { + __be16 min_initial; + __be16 min_tail; + u8 large_pkt_limit; + u8 small_pkt_limit; + u8 max_small_pkt_limit; + u8 preemption_limit; + } preemption; + } flit_control; + + __be32 reserved4; + __be32 port_error_action; /* bit field */ + + struct { + u8 egress_port; + u8 res_drctl; /* 7 res, 1 */ + } pass_through; + __be16 mkey_lease_period; + __be32 buffer_units; /* 9 res, 12, 5, 3, 3 */ + + __be32 reserved5; + __be32 sm_lid; + + __be64 mkey; + + __be64 subnet_prefix; + + struct { + u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */ + } neigh_mtu; + + struct { + u8 vlstall_hoqlife; /* 3 bits, 5 bits */ + } xmit_q[OPA_MAX_VLS]; + + struct { + u8 addr[16]; + } ipaddr_ipv6; + + struct { + u8 addr[4]; + } ipaddr_ipv4; + + u32 reserved6; + u32 reserved7; + u32 reserved8; + + __be64 neigh_node_guid; + + __be32 ib_cap_mask; + __be16 reserved9; /* was ib_cap_mask2 */ + __be16 opa_cap_mask; + + __be32 reserved10; /* was link_roundtrip_latency */ + __be16 overall_buffer_space; + __be16 reserved11; /* was max_credit_hint */ + + __be16 diag_code; + struct { + u8 buffer; + u8 wire; + } replay_depth; + u8 port_neigh_mode; + u8 mtucap; /* 4 res, 4 bits */ + + u8 resptimevalue; /* 3 res, 5 bits */ + u8 local_port_num; + u8 reserved12; + u8 reserved13; /* was guid_cap */ +} __attribute__ ((packed)); + +#endif /* OPA_PORT_INFO_H */ diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index 29063e84c253..4a529ef47995 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -40,6 +40,10 @@ #define OPA_SMP_DR_DATA_SIZE 1872 #define OPA_SMP_MAX_PATH_HOPS 64 +#define OPA_MAX_VLS 32 +#define OPA_MAX_SLS 32 +#define OPA_MAX_SCS 32 + #define OPA_SMI_CLASS_VERSION 0x80 #define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF) @@ -73,6 +77,49 @@ struct opa_smp { } __packed; +/* Subnet management attributes */ +/* ... */ +#define OPA_ATTRIB_ID_NODE_DESCRIPTION cpu_to_be16(0x0010) +#define OPA_ATTRIB_ID_NODE_INFO cpu_to_be16(0x0011) +#define OPA_ATTRIB_ID_PORT_INFO cpu_to_be16(0x0015) +#define OPA_ATTRIB_ID_PARTITION_TABLE cpu_to_be16(0x0016) +#define OPA_ATTRIB_ID_SL_TO_SC_MAP cpu_to_be16(0x0017) +#define OPA_ATTRIB_ID_VL_ARBITRATION cpu_to_be16(0x0018) +#define OPA_ATTRIB_ID_SM_INFO cpu_to_be16(0x0020) +#define OPA_ATTRIB_ID_CABLE_INFO cpu_to_be16(0x0032) +#define OPA_ATTRIB_ID_AGGREGATE cpu_to_be16(0x0080) +#define OPA_ATTRIB_ID_SC_TO_SL_MAP cpu_to_be16(0x0082) +#define OPA_ATTRIB_ID_SC_TO_VLR_MAP cpu_to_be16(0x0083) +#define OPA_ATTRIB_ID_SC_TO_VLT_MAP cpu_to_be16(0x0084) +#define OPA_ATTRIB_ID_SC_TO_VLNT_MAP cpu_to_be16(0x0085) +/* ... */ +#define OPA_ATTRIB_ID_PORT_STATE_INFO cpu_to_be16(0x0087) +/* ... */ +#define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE cpu_to_be16(0x008A) +/* ... */ + +struct opa_node_description { + u8 data[64]; +} __attribute__ ((packed)); + +struct opa_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be32 reserved; + __be64 system_image_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; /* network byte order */ +} __attribute__ ((packed)); + +#define OPA_PARTITION_TABLE_BLK_SIZE 32 + static inline u8 opa_get_smp_direction(struct opa_smp *smp) { diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 0790882e0c9b..585266144329 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -77,4 +77,11 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags); +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int ibnl_chk_listeners(unsigned int group); + #endif /* _RDMA_NETLINK_H */ diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 676b03b78e57..11571b2a831e 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -61,4 +61,9 @@ static inline bool scsi_sense_valid(const struct scsi_sense_hdr *sshdr) extern bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len, struct scsi_sense_hdr *sshdr); +extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); +int scsi_set_sense_information(u8 *buf, int buf_len, u64 info); +extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, + int desc_type); + #endif /* _SCSI_COMMON_H_ */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 50c2a363bc8f..fe89d7cd67b9 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -196,34 +196,13 @@ struct scsi_device { struct execute_work ew; /* used to get process context on put */ struct work_struct requeue_work; - struct scsi_dh_data *scsi_dh_data; + struct scsi_device_handler *handler; + void *handler_data; + enum scsi_device_state sdev_state; unsigned long sdev_data[0]; } __attribute__((aligned(sizeof(unsigned long)))); -typedef void (*activate_complete)(void *, int); -struct scsi_device_handler { - /* Used by the infrastructure */ - struct list_head list; /* list of scsi_device_handlers */ - - /* Filled by the hardware handler */ - struct module *module; - const char *name; - int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *); - struct scsi_dh_data *(*attach)(struct scsi_device *); - void (*detach)(struct scsi_device *); - int (*activate)(struct scsi_device *, activate_complete, void *); - int (*prep_fn)(struct scsi_device *, struct request *); - int (*set_params)(struct scsi_device *, const char *); - bool (*match)(struct scsi_device *); -}; - -struct scsi_dh_data { - struct scsi_device_handler *scsi_dh; - struct scsi_device *sdev; - struct kref kref; -}; - #define to_scsi_device(d) \ container_of(d, struct scsi_device, sdev_gendev) #define class_to_sdev(d) \ diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index 620c723ee8ed..85d731746834 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -55,11 +55,26 @@ enum { SCSI_DH_NOSYS, SCSI_DH_DRIVER_MAX, }; -#if defined(CONFIG_SCSI_DH) || defined(CONFIG_SCSI_DH_MODULE) + +typedef void (*activate_complete)(void *, int); +struct scsi_device_handler { + /* Used by the infrastructure */ + struct list_head list; /* list of scsi_device_handlers */ + + /* Filled by the hardware handler */ + struct module *module; + const char *name; + int (*check_sense)(struct scsi_device *, struct scsi_sense_hdr *); + int (*attach)(struct scsi_device *); + void (*detach)(struct scsi_device *); + int (*activate)(struct scsi_device *, activate_complete, void *); + int (*prep_fn)(struct scsi_device *, struct request *); + int (*set_params)(struct scsi_device *, const char *); +}; + +#ifdef CONFIG_SCSI_DH extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); -extern int scsi_dh_handler_exist(const char *); extern int scsi_dh_attach(struct request_queue *, const char *); -extern void scsi_dh_detach(struct request_queue *); extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t); extern int scsi_dh_set_params(struct request_queue *, const char *); #else @@ -69,18 +84,10 @@ static inline int scsi_dh_activate(struct request_queue *req, fn(data, 0); return 0; } -static inline int scsi_dh_handler_exist(const char *name) -{ - return 0; -} static inline int scsi_dh_attach(struct request_queue *req, const char *name) { return SCSI_DH_NOSYS; } -static inline void scsi_dh_detach(struct request_queue *q) -{ - return; -} static inline const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) { diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 8d1d7fa67ec4..dbb8c640e26f 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -4,6 +4,7 @@ #include <linux/scatterlist.h> #include <scsi/scsi_cmnd.h> +#include <scsi/scsi_common.h> struct scsi_device; struct Scsi_Host; @@ -21,14 +22,9 @@ static inline bool scsi_sense_is_deferred(const struct scsi_sense_hdr *sshdr) return ((sshdr->response_code >= 0x70) && (sshdr->response_code & 1)); } -extern const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len, - int desc_type); - extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, u64 * info_out); -extern void scsi_build_sense_buffer(int desc, u8 *buf, u8 key, u8 asc, u8 ascq); - extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 370f2909ec19..44202ff897fd 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -51,11 +51,6 @@ struct tegra_smmu_swgroup { unsigned int reg; }; -struct tegra_smmu_ops { - void (*flush_dcache)(struct page *page, unsigned long offset, - size_t size); -}; - struct tegra_smmu_soc { const struct tegra_mc_client *clients; unsigned int num_clients; @@ -66,9 +61,8 @@ struct tegra_smmu_soc { bool supports_round_robin_arbitration; bool supports_request_limit; + unsigned int num_tlb_lines; unsigned int num_asids; - - const struct tegra_smmu_ops *ops; }; struct tegra_mc; diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 0aedbb2c10e0..373d3342002b 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -62,6 +62,8 @@ /* T10 protection information disabled by default */ #define TA_DEFAULT_T10_PI 0 #define TA_DEFAULT_FABRIC_PROT_TYPE 0 +/* TPG status needs to be enabled to return sendtargets discovery endpoint info */ +#define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1 #define ISCSI_IOV_DATA_BUFFER 5 @@ -517,7 +519,6 @@ struct iscsi_conn { u16 cid; /* Remote TCP Port */ u16 login_port; - u16 local_port; int net_size; int login_family; u32 auth_id; @@ -527,9 +528,8 @@ struct iscsi_conn { u32 exp_statsn; /* Per connection status sequence number */ u32 stat_sn; -#define IPV6_ADDRESS_SPACE 48 - unsigned char login_ip[IPV6_ADDRESS_SPACE]; - unsigned char local_ip[IPV6_ADDRESS_SPACE]; + struct sockaddr_storage login_sockaddr; + struct sockaddr_storage local_sockaddr; int conn_usage_count; int conn_waiting_on_uc; atomic_t check_immediate_queue; @@ -636,7 +636,7 @@ struct iscsi_session { /* session wide counter: expected command sequence number */ u32 exp_cmd_sn; /* session wide counter: maximum allowed command sequence number */ - u32 max_cmd_sn; + atomic_t max_cmd_sn; struct list_head sess_ooo_cmdsn_list; /* LIO specific session ID */ @@ -764,6 +764,7 @@ struct iscsi_tpg_attrib { u32 default_erl; u8 t10_pi; u32 fabric_prot_type; + u32 tpg_enabled_sendtargets; struct iscsi_portal_group *tpg; }; @@ -776,12 +777,10 @@ struct iscsi_np { enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; - unsigned char np_ip[IPV6_ADDRESS_SPACE]; - u16 np_port; spinlock_t np_thread_lock; struct completion np_restart_comp; struct socket *np_socket; - struct __kernel_sockaddr_storage np_sockaddr; + struct sockaddr_storage np_sockaddr; struct task_struct *np_thread; struct timer_list np_login_timer; void *np_context; diff --git a/include/target/iscsi/iscsi_target_stat.h b/include/target/iscsi/iscsi_target_stat.h index 3ff76b4faad3..e615bb485d0b 100644 --- a/include/target/iscsi/iscsi_target_stat.h +++ b/include/target/iscsi/iscsi_target_stat.h @@ -50,7 +50,7 @@ struct iscsi_login_stats { u64 last_fail_time; /* time stamp (jiffies) */ u32 last_fail_type; int last_intr_fail_ip_family; - unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE]; + struct sockaddr_storage last_intr_fail_sockaddr; char last_intr_fail_name[224]; } ____cacheline_aligned; diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index e6bb166f12c2..90e37faa2ede 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -9,7 +9,7 @@ struct iscsit_transport { int priv_size; struct module *owner; struct list_head t_node; - int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); + int (*iscsit_setup_np)(struct iscsi_np *, struct sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); void (*iscsit_wait_conn)(struct iscsi_conn *); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1e5c8f949bae..56cf8e485ef2 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -93,4 +93,6 @@ bool target_lun_is_rdonly(struct se_cmd *); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); +bool target_sense_desc_format(struct se_device *dev); + #endif /* TARGET_CORE_BACKEND_H */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 17ae2d6a4891..ac9bf1c0e42d 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -6,6 +6,7 @@ #include <linux/dma-mapping.h> #include <linux/blkdev.h> #include <linux/percpu_ida.h> +#include <linux/t10-pi.h> #include <net/sock.h> #include <net/tcp.h> @@ -426,12 +427,6 @@ enum target_core_dif_check { TARGET_DIF_CHECK_REFTAG = 0x1 << 2, }; -struct se_dif_v1_tuple { - __be16 guard_tag; - __be16 app_tag; - __be32 ref_tag; -}; - /* for sam_task_attr */ #define TCM_SIMPLE_TAG 0x20 #define TCM_HEAD_TAG 0x21 @@ -444,6 +439,9 @@ struct se_cmd { u8 scsi_asc; u8 scsi_ascq; u16 scsi_sense_length; + unsigned cmd_wait_set:1; + unsigned unknown_data_length:1; + bool state_active:1; u64 tag; /* SAM command identifier aka task tag */ /* Delay for ALUA Active/NonOptimized state access in milliseconds */ int alua_nonop_delay; @@ -455,11 +453,8 @@ struct se_cmd { unsigned int map_tag; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; - unsigned cmd_wait_set:1; - unsigned unknown_data_length:1; /* See se_cmd_flags_table */ u32 se_cmd_flags; - u32 se_ordered_id; /* Total size in bytes associated with command */ u32 data_length; u32 residual_count; @@ -477,7 +472,6 @@ struct se_cmd { struct se_tmr_req *se_tmr_req; struct list_head se_cmd_list; struct completion cmd_wait_comp; - struct kref cmd_kref; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); @@ -497,6 +491,7 @@ struct se_cmd { #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) spinlock_t t_state_lock; + struct kref cmd_kref; struct completion t_transport_stop_comp; struct work_struct work; @@ -509,8 +504,10 @@ struct se_cmd { struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; + /* Used for lun->lun_ref counting */ + int lun_ref_active; + struct list_head state_list; - bool state_active; /* old task stop completion, consider merging with some of the above */ struct completion task_stop_comp; @@ -518,20 +515,17 @@ struct se_cmd { /* backend private data */ void *priv; - /* Used for lun->lun_ref counting */ - int lun_ref_active; - /* DIF related members */ enum target_prot_op prot_op; enum target_prot_type prot_type; u8 prot_checks; + bool prot_pto; u32 prot_length; u32 reftag_seed; struct scatterlist *t_prot_sg; unsigned int t_prot_nents; sense_reason_t pi_err; sector_t bad_sector; - bool prot_pto; }; struct se_ua { @@ -598,7 +592,6 @@ struct se_ml_stat_grps { }; struct se_lun_acl { - char initiatorname[TRANSPORT_IQN_LEN]; u64 mapped_lun; struct se_node_acl *se_lun_nacl; struct se_lun *se_lun; @@ -685,7 +678,6 @@ struct se_lun { #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; u32 lun_access; - u32 lun_flags; u32 lun_index; /* RELATIVE TARGET PORT IDENTIFER */ @@ -751,7 +743,6 @@ struct se_device { atomic_long_t write_bytes; /* Active commands on this virtual SE device */ atomic_t simple_cmds; - atomic_t dev_ordered_id; atomic_t dev_ordered_sync; atomic_t dev_qf_count; u32 export_count; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 18afef91b447..7fb2557a760e 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -5,6 +5,19 @@ struct target_core_fabric_ops { struct module *module; const char *name; size_t node_acl_size; + /* + * Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload. + * Setting this value tells target-core to enforce this limit, and + * report as INQUIRY EVPD=b0 MAXIMUM TRANSFER LENGTH. + * + * target-core will currently reset se_cmd->data_length to this + * maximum size, and set UNDERFLOW residual count if length exceeds + * this limit. + * + * XXX: Not all initiator hosts honor this block-limit EVPD + * XXX: Currently assumes single PAGE_SIZE per scatterlist entry + */ + u32 max_data_sg_nents; char *(*get_fabric_name)(void); char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); @@ -152,6 +165,7 @@ int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u64); +void core_allocate_nexus_loss_ua(struct se_node_acl *acl); struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, unsigned char *); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index a44062da684b..d6f83222a6a1 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -358,6 +358,36 @@ TRACE_EVENT( #endif +TRACE_EVENT(kvm_halt_poll_ns, + TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), + TP_ARGS(grow, vcpu_id, new, old), + + TP_STRUCT__entry( + __field(bool, grow) + __field(unsigned int, vcpu_id) + __field(int, new) + __field(int, old) + ), + + TP_fast_assign( + __entry->grow = grow; + __entry->vcpu_id = vcpu_id; + __entry->new = new; + __entry->old = old; + ), + + TP_printk("vcpu %u: halt_poll_ns %d (%s %d)", + __entry->vcpu_id, + __entry->new, + __entry->grow ? "grow" : "shrink", + __entry->old) +); + +#define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \ + trace_kvm_halt_poll_ns(true, vcpu_id, new, old) +#define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ + trace_kvm_halt_poll_ns(false, vcpu_id, new, old) + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ diff --git a/include/trace/events/task.h b/include/trace/events/task.h index dee3bb1d5a6b..2cca6cd342d8 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -46,7 +46,7 @@ TRACE_EVENT(task_rename, TP_fast_assign( __entry->pid = task->pid; memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN); - memcpy(entry->newcomm, comm, TASK_COMM_LEN); + strlcpy(entry->newcomm, comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h index 12e1321c4e0c..5afae8fe3795 100644 --- a/include/trace/events/thermal_power_allocator.h +++ b/include/trace/events/thermal_power_allocator.h @@ -11,7 +11,7 @@ TRACE_EVENT(thermal_power_allocator, u32 total_req_power, u32 *granted_power, u32 total_granted_power, size_t num_actors, u32 power_range, u32 max_allocatable_power, - unsigned long current_temp, s32 delta_temp), + int current_temp, s32 delta_temp), TP_ARGS(tz, req_power, total_req_power, granted_power, total_granted_power, num_actors, power_range, max_allocatable_power, current_temp, delta_temp), @@ -24,7 +24,7 @@ TRACE_EVENT(thermal_power_allocator, __field(size_t, num_actors ) __field(u32, power_range ) __field(u32, max_allocatable_power ) - __field(unsigned long, current_temp ) + __field(int, current_temp ) __field(s32, delta_temp ) ), TP_fast_assign( @@ -42,7 +42,7 @@ TRACE_EVENT(thermal_power_allocator, __entry->delta_temp = delta_temp; ), - TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d", + TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%d delta_temperature=%d", __entry->tz_id, __print_array(__get_dynamic_array(req_power), __entry->num_actors, 4), diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index a7aa607a4c55..fff846b512e6 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -131,6 +131,66 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, TP_ARGS(inode, flags) ); +#ifdef CREATE_TRACE_POINTS +#ifdef CONFIG_CGROUP_WRITEBACK + +static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) +{ + return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1; +} + +static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) +{ + struct cgroup *cgrp = wb->memcg_css->cgroup; + char *path; + + path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1); + WARN_ON_ONCE(path != buf); +} + +static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) +{ + if (wbc->wb) + return __trace_wb_cgroup_size(wbc->wb); + else + return 2; +} + +static inline void __trace_wbc_assign_cgroup(char *buf, + struct writeback_control *wbc) +{ + if (wbc->wb) + __trace_wb_assign_cgroup(buf, wbc->wb); + else + strcpy(buf, "/"); +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) +{ + return 2; +} + +static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) +{ + strcpy(buf, "/"); +} + +static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) +{ + return 2; +} + +static inline void __trace_wbc_assign_cgroup(char *buf, + struct writeback_control *wbc) +{ + strcpy(buf, "/"); +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ +#endif /* CREATE_TRACE_POINTS */ + DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_PROTO(struct inode *inode, struct writeback_control *wbc), @@ -141,6 +201,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, __array(char, name, 32) __field(unsigned long, ino) __field(int, sync_mode) + __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) ), TP_fast_assign( @@ -148,12 +209,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; + __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); ), - TP_printk("bdi %s: ino=%lu sync_mode=%d", + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s", __entry->name, __entry->ino, - __entry->sync_mode + __entry->sync_mode, + __get_str(cgroup) ) ); @@ -172,8 +235,8 @@ DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, ); DECLARE_EVENT_CLASS(writeback_work_class, - TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), - TP_ARGS(bdi, work), + TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), + TP_ARGS(wb, work), TP_STRUCT__entry( __array(char, name, 32) __field(long, nr_pages) @@ -183,10 +246,11 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(int, range_cyclic) __field(int, for_background) __field(int, reason) + __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( strncpy(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -194,9 +258,10 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->range_cyclic = work->range_cyclic; __entry->for_background = work->for_background; __entry->reason = work->reason; + __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " - "kupdate=%d range_cyclic=%d background=%d reason=%s", + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s", __entry->name, MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), __entry->nr_pages, @@ -204,13 +269,14 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->for_kupdate, __entry->range_cyclic, __entry->for_background, - __print_symbolic(__entry->reason, WB_WORK_REASON) + __print_symbolic(__entry->reason, WB_WORK_REASON), + __get_str(cgroup) ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ DEFINE_EVENT(writeback_work_class, name, \ - TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ - TP_ARGS(bdi, work)) + TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \ + TP_ARGS(wb, work)) DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); @@ -230,26 +296,42 @@ TRACE_EVENT(writeback_pages_written, ); DECLARE_EVENT_CLASS(writeback_class, - TP_PROTO(struct backing_dev_info *bdi), - TP_ARGS(bdi), + TP_PROTO(struct bdi_writeback *wb), + TP_ARGS(wb), TP_STRUCT__entry( __array(char, name, 32) + __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), - TP_printk("bdi %s", - __entry->name + TP_printk("bdi %s: cgroup=%s", + __entry->name, + __get_str(cgroup) ) ); #define DEFINE_WRITEBACK_EVENT(name) \ DEFINE_EVENT(writeback_class, name, \ - TP_PROTO(struct backing_dev_info *bdi), \ - TP_ARGS(bdi)) + TP_PROTO(struct bdi_writeback *wb), \ + TP_ARGS(wb)) DEFINE_WRITEBACK_EVENT(writeback_nowork); DEFINE_WRITEBACK_EVENT(writeback_wake_background); -DEFINE_WRITEBACK_EVENT(writeback_bdi_register); + +TRACE_EVENT(writeback_bdi_register, + TP_PROTO(struct backing_dev_info *bdi), + TP_ARGS(bdi), + TP_STRUCT__entry( + __array(char, name, 32) + ), + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + ), + TP_printk("bdi %s", + __entry->name + ) +); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), @@ -265,6 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class, __field(int, range_cyclic) __field(long, range_start) __field(long, range_end) + __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) ), TP_fast_assign( @@ -278,11 +361,12 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->range_cyclic = wbc->range_cyclic; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; + __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " "bgrd=%d reclm=%d cyclic=%d " - "start=0x%lx end=0x%lx", + "start=0x%lx end=0x%lx cgroup=%s", __entry->name, __entry->nr_to_write, __entry->pages_skipped, @@ -292,7 +376,9 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->for_reclaim, __entry->range_cyclic, __entry->range_start, - __entry->range_end) + __entry->range_end, + __get_str(cgroup) + ) ) #define DEFINE_WBC_EVENT(name) \ @@ -312,6 +398,7 @@ TRACE_EVENT(writeback_queue_io, __field(long, age) __field(int, moved) __field(int, reason) + __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( unsigned long *older_than_this = work->older_than_this; @@ -321,13 +408,15 @@ TRACE_EVENT(writeback_queue_io, (jiffies - *older_than_this) * 1000 / HZ : -1; __entry->moved = moved; __entry->reason = work->reason; + __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s", + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s", __entry->name, __entry->older, /* older_than_this in jiffies */ __entry->age, /* older_than_this in relative milliseconds */ __entry->moved, - __print_symbolic(__entry->reason, WB_WORK_REASON) + __print_symbolic(__entry->reason, WB_WORK_REASON), + __get_str(cgroup) ) ); @@ -381,11 +470,11 @@ TRACE_EVENT(global_dirty_state, TRACE_EVENT(bdi_dirty_ratelimit, - TP_PROTO(struct backing_dev_info *bdi, + TP_PROTO(struct bdi_writeback *wb, unsigned long dirty_rate, unsigned long task_ratelimit), - TP_ARGS(bdi, dirty_rate, task_ratelimit), + TP_ARGS(wb, dirty_rate, task_ratelimit), TP_STRUCT__entry( __array(char, bdi, 32) @@ -395,36 +484,39 @@ TRACE_EVENT(bdi_dirty_ratelimit, __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned long, balanced_dirty_ratelimit) + __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strlcpy(__entry->bdi, dev_name(bdi->dev), 32); - __entry->write_bw = KBps(bdi->wb.write_bandwidth); - __entry->avg_write_bw = KBps(bdi->wb.avg_write_bandwidth); + strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + __entry->write_bw = KBps(wb->write_bandwidth); + __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); - __entry->dirty_ratelimit = KBps(bdi->wb.dirty_ratelimit); + __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->balanced_dirty_ratelimit = - KBps(bdi->wb.balanced_dirty_ratelimit); + KBps(wb->balanced_dirty_ratelimit); + __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: " "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " - "balanced_dirty_ratelimit=%lu", + "balanced_dirty_ratelimit=%lu cgroup=%s", __entry->bdi, __entry->write_bw, /* write bandwidth */ __entry->avg_write_bw, /* avg write bandwidth */ __entry->dirty_rate, /* bdi dirty rate */ __entry->dirty_ratelimit, /* base ratelimit */ __entry->task_ratelimit, /* ratelimit with position control */ - __entry->balanced_dirty_ratelimit /* the balanced ratelimit */ + __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */ + __get_str(cgroup) ) ); TRACE_EVENT(balance_dirty_pages, - TP_PROTO(struct backing_dev_info *bdi, + TP_PROTO(struct bdi_writeback *wb, unsigned long thresh, unsigned long bg_thresh, unsigned long dirty, @@ -437,7 +529,7 @@ TRACE_EVENT(balance_dirty_pages, long pause, unsigned long start_time), - TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, + TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, dirty_ratelimit, task_ratelimit, dirtied, period, pause, start_time), @@ -456,11 +548,12 @@ TRACE_EVENT(balance_dirty_pages, __field( long, pause) __field(unsigned long, period) __field( long, think) + __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strlcpy(__entry->bdi, dev_name(bdi->dev), 32); + strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -478,6 +571,7 @@ TRACE_EVENT(balance_dirty_pages, __entry->period = period * 1000 / HZ; __entry->pause = pause * 1000 / HZ; __entry->paused = (jiffies - start_time) * 1000 / HZ; + __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), @@ -486,7 +580,7 @@ TRACE_EVENT(balance_dirty_pages, "bdi_setpoint=%lu bdi_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " - "paused=%lu pause=%ld period=%lu think=%ld", + "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s", __entry->bdi, __entry->limit, __entry->setpoint, @@ -500,7 +594,8 @@ TRACE_EVENT(balance_dirty_pages, __entry->paused, /* ms */ __entry->pause, /* ms */ __entry->period, /* ms */ - __entry->think /* ms */ + __entry->think, /* ms */ + __get_str(cgroup) ) ); @@ -514,6 +609,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, __field(unsigned long, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) + __dynamic_array(char, cgroup, + __trace_wb_cgroup_size(inode_to_wb(inode))) ), TP_fast_assign( @@ -522,14 +619,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue, __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; + __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode)); ), - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu", + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s", __entry->name, __entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, - (jiffies - __entry->dirtied_when) / HZ + (jiffies - __entry->dirtied_when) / HZ, + __get_str(cgroup) ) ); @@ -585,6 +684,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) + __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) ), TP_fast_assign( @@ -596,10 +696,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; + __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " - "index=%lu to_write=%ld wrote=%lu", + "index=%lu to_write=%ld wrote=%lu cgroup=%s", __entry->name, __entry->ino, show_inode_state(__entry->state), @@ -607,7 +708,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, (jiffies - __entry->dirtied_when) / HZ, __entry->writeback_index, __entry->nr_to_write, - __entry->wrote + __entry->wrote, + __get_str(cgroup) ) ); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index e016bd9b1a04..8da542a2874d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 281 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) +#define __NR_membarrier 282 +__SYSCALL(__NR_membarrier, sys_membarrier) #undef __NR_syscalls -#define __NR_syscalls 282 +#define __NR_syscalls 283 /* * All syscalls below here should go away really, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dbd16a2d37db..fd5aa47bd689 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -358,7 +358,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { - s32 param; + __s32 param; /* * WARNING: Using pointers instead of fixed-size u64 means we need to write * compat32 code. Don't repeat this mistake. diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 70ff1d9abf0d..f7b2db44eb4b 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -252,6 +252,7 @@ header-y += mdio.h header-y += media.h header-y += media-bus-format.h header-y += mei.h +header-y += membarrier.h header-y += memfd.h header-y += mempolicy.h header-y += meye.h diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 1f977dd4c370..843540c398eb 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -266,6 +266,7 @@ #define AUDIT_OBJ_UID 109 #define AUDIT_OBJ_GID 110 #define AUDIT_FIELD_COMPARE 111 +#define AUDIT_EXE 112 #define AUDIT_ARG0 200 #define AUDIT_ARG1 (AUDIT_ARG0+1) @@ -324,8 +325,10 @@ enum { #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 +#define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH 0x00000004 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \ - AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME) + AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \ + AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH) /* deprecated: AUDIT_VERSION_* */ #define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 3429a3ba382b..b56dfcfe922a 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -39,6 +39,7 @@ #define EM_TI_C6000 140 /* TI C6X DSPs */ #define EM_AARCH64 183 /* ARM 64 bit */ #define EM_TILEPRO 188 /* Tilera TILEPro */ +#define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ #define EM_TILEGX 191 /* Tilera TILE-Gx */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index aa63ed023c2b..ea9221b0331a 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -42,6 +42,7 @@ #define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ +#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ #define ETH_P_IP 0x0800 /* Internet Protocol packet */ #define ETH_P_X25 0x0805 /* CCITT X.25 */ #define ETH_P_ARP 0x0806 /* Address Resolution packet */ diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index a6c4962e5d46..5da5f8751ce7 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -33,6 +33,7 @@ #define KPF_THP 22 #define KPF_BALLOON 23 #define KPF_ZERO_PAGE 24 +#define KPF_IDLE 25 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d831f94f8a8..a9256f0331ae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -237,6 +237,7 @@ struct kvm_run { __u32 count; __u64 data_offset; /* relative to kvm_run start */ } io; + /* KVM_EXIT_DEBUG */ struct { struct kvm_debug_exit_arch arch; } debug; @@ -285,6 +286,7 @@ struct kvm_run { __u32 data; __u8 is_write; } dcr; + /* KVM_EXIT_INTERNAL_ERROR */ struct { __u32 suberror; /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ @@ -295,6 +297,7 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + /* KVM_EXIT_PAPR_HCALL */ struct { __u64 nr; __u64 ret; @@ -819,6 +822,8 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_DISABLE_QUIRKS 116 #define KVM_CAP_X86_SMM 117 #define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_GUEST_DEBUG_HW_BPS 119 +#define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h new file mode 100644 index 000000000000..e0b108bd2624 --- /dev/null +++ b/include/uapi/linux/membarrier.h @@ -0,0 +1,53 @@ +#ifndef _UAPI_LINUX_MEMBARRIER_H +#define _UAPI_LINUX_MEMBARRIER_H + +/* + * linux/membarrier.h + * + * membarrier system call API + * + * Copyright (c) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * enum membarrier_cmd - membarrier system call command + * @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns + * a bitmask of valid commands. + * @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads. + * Upon return from system call, the caller thread + * is ensured that all running threads have passed + * through a state where all memory accesses to + * user-space addresses match program order between + * entry to and return from the system call + * (non-running threads are de facto in such a + * state). This covers threads from all processes + * running on the system. This command returns 0. + * + * Command to be passed to the membarrier system call. The commands need to + * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to + * the value 0. + */ +enum membarrier_cmd { + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), +}; + +#endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 2b94ea2287bb..5b4a4be06e2b 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -87,7 +87,7 @@ struct nd_cmd_ars_status { __u32 handle; __u32 flags; __u64 err_address; - __u64 mask; + __u64 length; } __packed records[0]; } __packed; @@ -111,6 +111,11 @@ enum { ND_CMD_VENDOR = 9, }; +enum { + ND_ARS_VOLATILE = 1, + ND_ARS_PERSISTENT = 2, +}; + static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { static const char * const names[] = { @@ -194,4 +199,9 @@ enum nd_driver_flags { enum { ND_MIN_NAMESPACE_SIZE = 0x00400000, }; + +enum ars_masks { + ARS_STATUS_MASK = 0x0000FFFF, + ARS_EXT_STATUS_SHIFT = 16, +}; #endif /* __NDCTL_H__ */ diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index cf1019e15f5b..a7a697986614 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -89,9 +89,11 @@ struct ptrace_peeksiginfo_args { #define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP) /* eventless options */ -#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_SUSPEND_SECCOMP (1 << 21) -#define PTRACE_O_MASK (0x000000ff | PTRACE_O_EXITKILL) +#define PTRACE_O_MASK (\ + 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP) #include <asm/ptrace.h> diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index b67f99d3c520..95c6521d8a95 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -42,10 +42,6 @@ #define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ -/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */ -#define xstr(s) str(s) -#define str(s) #s - struct tcmu_mailbox { __u16 version; __u16 flags; diff --git a/include/uapi/linux/toshiba.h b/include/uapi/linux/toshiba.h index e9bef5b2f91e..c58bf4b5bb26 100644 --- a/include/uapi/linux/toshiba.h +++ b/include/uapi/linux/toshiba.h @@ -1,6 +1,7 @@ /* toshiba.h -- Linux driver for accessing the SMM on Toshiba laptops * * Copyright (c) 1996-2000 Jonathan A. Buzzard (jonathan@buzzard.org.uk) + * Copyright (c) 2015 Azael Avalos <coproscefalo@gmail.com> * * Thanks to Juergen Heinzl <juergen@monocerus.demon.co.uk> for the pointers * on making sure the structure is aligned and packed. @@ -20,9 +21,18 @@ #ifndef _UAPI_LINUX_TOSHIBA_H #define _UAPI_LINUX_TOSHIBA_H -#define TOSH_PROC "/proc/toshiba" -#define TOSH_DEVICE "/dev/toshiba" -#define TOSH_SMM _IOWR('t', 0x90, int) /* broken: meant 24 bytes */ +/* + * Toshiba modules paths + */ + +#define TOSH_PROC "/proc/toshiba" +#define TOSH_DEVICE "/dev/toshiba" +#define TOSHIBA_ACPI_PROC "/proc/acpi/toshiba" +#define TOSHIBA_ACPI_DEVICE "/dev/toshiba_acpi" + +/* + * Toshiba SMM structure + */ typedef struct { unsigned int eax; @@ -33,5 +43,21 @@ typedef struct { unsigned int edi __attribute__ ((packed)); } SMMRegisters; +/* + * IOCTLs (0x90 - 0x91) + */ + +#define TOSH_SMM _IOWR('t', 0x90, SMMRegisters) +/* + * Convenience toshiba_acpi command. + * + * The System Configuration Interface (SCI) is opened/closed internally + * to avoid userspace of buggy BIOSes. + * + * The toshiba_acpi module checks whether the eax register is set with + * SCI_GET (0xf300) or SCI_SET (0xf400), returning -EINVAL if not. + */ +#define TOSHIBA_ACPI_SCI _IOWR('t', 0x91, SMMRegisters) + #endif /* _UAPI_LINUX_TOSHIBA_H */ diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 687ae332200f..231901b08f6c 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -5,3 +5,4 @@ header-y += ib_user_sa.h header-y += ib_user_verbs.h header-y += rdma_netlink.h header-y += rdma_user_cm.h +header-y += hfi/ diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild new file mode 100644 index 000000000000..ef23c294fc71 --- /dev/null +++ b/include/uapi/rdma/hfi/Kbuild @@ -0,0 +1,2 @@ +# UAPI Header export list +header-y += hfi1_user.h diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h new file mode 100644 index 000000000000..78c442fbf263 --- /dev/null +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -0,0 +1,427 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains defines, structures, etc. that are used + * to communicate between kernel and user code. + */ + +#ifndef _LINUX__HFI1_USER_H +#define _LINUX__HFI1_USER_H + +#include <linux/types.h> + +/* + * This version number is given to the driver by the user code during + * initialization in the spu_userversion field of hfi1_user_info, so + * the driver can check for compatibility with user code. + * + * The major version changes when data structures change in an incompatible + * way. The driver must be the same for initialization to succeed. + */ +#define HFI1_USER_SWMAJOR 4 + +/* + * Minor version differences are always compatible + * a within a major version, however if user software is larger + * than driver software, some new features and/or structure fields + * may not be implemented; the user code must deal with this if it + * cares, or it must abort after initialization reports the difference. + */ +#define HFI1_USER_SWMINOR 0 + +/* + * Set of HW and driver capability/feature bits. + * These bit values are used to configure enabled/disabled HW and + * driver features. The same set of bits are communicated to user + * space. + */ +#define HFI1_CAP_DMA_RTAIL (1UL << 0) /* Use DMA'ed RTail value */ +#define HFI1_CAP_SDMA (1UL << 1) /* Enable SDMA support */ +#define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */ +#define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */ +#define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */ +/* 1UL << 5 reserved */ +#define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */ +#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/ +#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */ +#define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */ +#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Enable Expected TID caching */ +#define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */ +#define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */ +#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */ +#define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */ +#define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */ +#define HFI1_CAP_QSFP_ENABLED (1UL << 16) /* Enable QSFP check during LNI */ +#define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */ +#define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */ + +#define HFI1_RCVHDR_ENTSIZE_2 (1UL << 0) +#define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) +#define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) + +/* + * If the unit is specified via open, HFI choice is fixed. If port is + * specified, it's also fixed. Otherwise we try to spread contexts + * across ports and HFIs, using different algorithms. WITHIN is + * the old default, prior to this mechanism. + */ +#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then + * ports; this is the default */ +#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin + * active ports within), then next HFI */ +#define HFI1_ALG_COUNT 2 /* number of algorithm choices */ + + +/* User commands. */ +#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ +#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ +#define HFI1_CMD_USER_INFO 3 /* set up userspace */ +#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ +#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ +#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ +#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */ + +#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ +#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ +#define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */ +#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ +#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ +/* separate EPROM commands from normal PSM commands */ +#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ +#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ +#define HFI1_CMD_EP_ERASE_P0 66 /* erase EPROM partition 0 */ +#define HFI1_CMD_EP_ERASE_P1 67 /* erase EPROM partition 1 */ +#define HFI1_CMD_EP_READ_P0 68 /* read EPROM partition 0 */ +#define HFI1_CMD_EP_READ_P1 69 /* read EPROM partition 1 */ +#define HFI1_CMD_EP_WRITE_P0 70 /* write EPROM partition 0 */ +#define HFI1_CMD_EP_WRITE_P1 71 /* write EPROM partition 1 */ + +#define _HFI1_EVENT_FROZEN_BIT 0 +#define _HFI1_EVENT_LINKDOWN_BIT 1 +#define _HFI1_EVENT_LID_CHANGE_BIT 2 +#define _HFI1_EVENT_LMC_CHANGE_BIT 3 +#define _HFI1_EVENT_SL2VL_CHANGE_BIT 4 +#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT + +#define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT) +#define HFI1_EVENT_LINKDOWN_BIT (1UL << _HFI1_EVENT_LINKDOWN_BIT) +#define HFI1_EVENT_LID_CHANGE_BIT (1UL << _HFI1_EVENT_LID_CHANGE_BIT) +#define HFI1_EVENT_LMC_CHANGE_BIT (1UL << _HFI1_EVENT_LMC_CHANGE_BIT) +#define HFI1_EVENT_SL2VL_CHANGE_BIT (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT) + +/* + * These are the status bits readable (in ASCII form, 64bit value) + * from the "status" sysfs file. For binary compatibility, values + * must remain as is; removed states can be reused for different + * purposes. + */ +#define HFI1_STATUS_INITTED 0x1 /* basic initialization done */ +/* Chip has been found and initialized */ +#define HFI1_STATUS_CHIP_PRESENT 0x20 +/* IB link is at ACTIVE, usable for data traffic */ +#define HFI1_STATUS_IB_READY 0x40 +/* link is configured, LID, MTU, etc. have been set */ +#define HFI1_STATUS_IB_CONF 0x80 +/* A Fatal hardware error has occurred. */ +#define HFI1_STATUS_HWERROR 0x200 + +/* + * Number of supported shared contexts. + * This is the maximum number of software contexts that can share + * a hardware send/receive context. + */ +#define HFI1_MAX_SHARED_CTXTS 8 + +/* + * Poll types + */ +#define HFI1_POLL_TYPE_ANYRCV 0x0 +#define HFI1_POLL_TYPE_URGENT 0x1 + +/* + * This structure is passed to the driver to tell it where + * user code buffers are, sizes, etc. The offsets and sizes of the + * fields must remain unchanged, for binary compatibility. It can + * be extended, if userversion is changed so user code can tell, if needed + */ +struct hfi1_user_info { + /* + * version of user software, to detect compatibility issues. + * Should be set to HFI1_USER_SWVERSION. + */ + __u32 userversion; + __u16 pad; + /* HFI selection algorithm, if unit has not selected */ + __u16 hfi1_alg; + /* + * If two or more processes wish to share a context, each process + * must set the subcontext_cnt and subcontext_id to the same + * values. The only restriction on the subcontext_id is that + * it be unique for a given node. + */ + __u16 subctxt_cnt; + __u16 subctxt_id; + /* 128bit UUID passed in by PSM. */ + __u8 uuid[16]; +}; + +struct hfi1_ctxt_info { + __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */ + __u32 rcvegr_size; /* size of each eager buffer */ + __u16 num_active; /* number of active units */ + __u16 unit; /* unit (chip) assigned to caller */ + __u16 ctxt; /* ctxt on unit assigned to caller */ + __u16 subctxt; /* subctxt on unit assigned to caller */ + __u16 rcvtids; /* number of Rcv TIDs for this context */ + __u16 credits; /* number of PIO credits for this context */ + __u16 numa_node; /* NUMA node of the assigned device */ + __u16 rec_cpu; /* cpu # for affinity (0xffff if none) */ + __u16 send_ctxt; /* send context in use by this user context */ + __u16 egrtids; /* number of RcvArray entries for Eager Rcvs */ + __u16 rcvhdrq_cnt; /* number of RcvHdrQ entries */ + __u16 rcvhdrq_entsize; /* size (in bytes) for each RcvHdrQ entry */ + __u16 sdma_ring_size; /* number of entries in SDMA request ring */ +}; + +struct hfi1_tid_info { + /* virtual address of first page in transfer */ + __u64 vaddr; + /* pointer to tid array. this array is big enough */ + __u64 tidlist; + /* number of tids programmed by this request */ + __u32 tidcnt; + /* length of transfer buffer programmed by this request */ + __u32 length; + /* + * pointer to bitmap of TIDs used for this call; + * checked for being large enough at open + */ + __u64 tidmap; +}; + +struct hfi1_cmd { + __u32 type; /* command type */ + __u32 len; /* length of struct pointed to by add */ + __u64 addr; /* pointer to user structure */ +}; + +enum hfi1_sdma_comp_state { + FREE = 0, + QUEUED, + COMPLETE, + ERROR +}; + +/* + * SDMA completion ring entry + */ +struct hfi1_sdma_comp_entry { + __u32 status; + __u32 errcode; +}; + +/* + * Device status and notifications from driver to user-space. + */ +struct hfi1_status { + __u64 dev; /* device/hw status bits */ + __u64 port; /* port state and status bits */ + char freezemsg[0]; +}; + +/* + * This structure is returned by the driver immediately after + * open to get implementation-specific info, and info specific to this + * instance. + * + * This struct must have explicit pad fields where type sizes + * may result in different alignments between 32 and 64 bit + * programs, since the 64 bit * bit kernel requires the user code + * to have matching offsets + */ +struct hfi1_base_info { + /* version of hardware, for feature checking. */ + __u32 hw_version; + /* version of software, for feature checking. */ + __u32 sw_version; + /* Job key */ + __u16 jkey; + __u16 padding1; + /* + * The special QP (queue pair) value that identifies PSM + * protocol packet from standard IB packets. + */ + __u32 bthqp; + /* PIO credit return address, */ + __u64 sc_credits_addr; + /* + * Base address of write-only pio buffers for this process. + * Each buffer has sendpio_credits*64 bytes. + */ + __u64 pio_bufbase_sop; + /* + * Base address of write-only pio buffers for this process. + * Each buffer has sendpio_credits*64 bytes. + */ + __u64 pio_bufbase; + /* address where receive buffer queue is mapped into */ + __u64 rcvhdr_bufbase; + /* base address of Eager receive buffers. */ + __u64 rcvegr_bufbase; + /* base address of SDMA completion ring */ + __u64 sdma_comp_bufbase; + /* + * User register base for init code, not to be used directly by + * protocol or applications. Always maps real chip register space. + * the register addresses are: + * ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail, + * ur_rcvtidflow + */ + __u64 user_regbase; + /* notification events */ + __u64 events_bufbase; + /* status page */ + __u64 status_bufbase; + /* rcvhdrtail update */ + __u64 rcvhdrtail_base; + /* + * shared memory pages for subctxts if ctxt is shared; these cover + * all the processes in the group sharing a single context. + * all have enough space for the num_subcontexts value on this job. + */ + __u64 subctxt_uregbase; + __u64 subctxt_rcvegrbuf; + __u64 subctxt_rcvhdrbuf; +}; + +enum sdma_req_opcode { + EXPECTED = 0, + EAGER +}; + +#define HFI1_SDMA_REQ_VERSION_MASK 0xF +#define HFI1_SDMA_REQ_VERSION_SHIFT 0x0 +#define HFI1_SDMA_REQ_OPCODE_MASK 0xF +#define HFI1_SDMA_REQ_OPCODE_SHIFT 0x4 +#define HFI1_SDMA_REQ_IOVCNT_MASK 0xFF +#define HFI1_SDMA_REQ_IOVCNT_SHIFT 0x8 + +struct sdma_req_info { + /* + * bits 0-3 - version (currently unused) + * bits 4-7 - opcode (enum sdma_req_opcode) + * bits 8-15 - io vector count + */ + __u16 ctrl; + /* + * Number of fragments contained in this request. + * User-space has already computed how many + * fragment-sized packet the user buffer will be + * split into. + */ + __u16 npkts; + /* + * Size of each fragment the user buffer will be + * split into. + */ + __u16 fragsize; + /* + * Index of the slot in the SDMA completion ring + * this request should be using. User-space is + * in charge of managing its own ring. + */ + __u16 comp_idx; +} __packed; + +/* + * SW KDETH header. + * swdata is SW defined portion. + */ +struct hfi1_kdeth_header { + __le32 ver_tid_offset; + __le16 jkey; + __le16 hcrc; + __le32 swdata[7]; +} __packed; + +/* + * Structure describing the headers that User space uses. The + * structure above is a subset of this one. + */ +struct hfi1_pkt_header { + __le16 pbc[4]; + __be16 lrh[4]; + __be32 bth[3]; + struct hfi1_kdeth_header kdeth; +} __packed; + + +/* + * The list of usermode accessible registers. + */ +enum hfi1_ureg { + /* (RO) DMA RcvHdr to be used next. */ + ur_rcvhdrtail = 0, + /* (RW) RcvHdr entry to be processed next by host. */ + ur_rcvhdrhead = 1, + /* (RO) Index of next Eager index to use. */ + ur_rcvegrindextail = 2, + /* (RW) Eager TID to be processed next */ + ur_rcvegrindexhead = 3, + /* (RO) Receive Eager Offset Tail */ + ur_rcvegroffsettail = 4, + /* For internal use only; max register number. */ + ur_maxreg, + /* (RW) Receive TID flow table */ + ur_rcvtidflowtable = 256 +}; + +#endif /* _LINIUX__HFI1_USER_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 6e4bb4270ca2..c19a5dc1531a 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -7,12 +7,14 @@ enum { RDMA_NL_RDMA_CM = 1, RDMA_NL_NES, RDMA_NL_C4IW, + RDMA_NL_LS, /* RDMA Local Services */ RDMA_NL_NUM_CLIENTS }; enum { RDMA_NL_GROUP_CM = 1, RDMA_NL_GROUP_IWPM, + RDMA_NL_GROUP_LS, RDMA_NL_NUM_GROUPS }; @@ -128,5 +130,85 @@ enum { IWPM_NLA_ERR_MAX }; +/* + * Local service operations: + * RESOLVE - The client requests the local service to resolve a path. + * SET_TIMEOUT - The local service requests the client to set the timeout. + */ +enum { + RDMA_NL_LS_OP_RESOLVE = 0, + RDMA_NL_LS_OP_SET_TIMEOUT, + RDMA_NL_LS_NUM_OPS +}; + +/* Local service netlink message flags */ +#define RDMA_NL_LS_F_ERR 0x0100 /* Failed response */ + +/* + * Local service resolve operation family header. + * The layout for the resolve operation: + * nlmsg header + * family header + * attributes + */ + +/* + * Local service path use: + * Specify how the path(s) will be used. + * ALL - For connected CM operation (6 pathrecords) + * UNIDIRECTIONAL - For unidirectional UD (1 pathrecord) + * GMP - For miscellaneous GMP like operation (at least 1 reversible + * pathrecord) + */ +enum { + LS_RESOLVE_PATH_USE_ALL = 0, + LS_RESOLVE_PATH_USE_UNIDIRECTIONAL, + LS_RESOLVE_PATH_USE_GMP, + LS_RESOLVE_PATH_USE_MAX +}; + +#define LS_DEVICE_NAME_MAX 64 + +struct rdma_ls_resolve_header { + __u8 device_name[LS_DEVICE_NAME_MAX]; + __u8 port_num; + __u8 path_use; +}; + +/* Local service attribute type */ +#define RDMA_NLA_F_MANDATORY (1 << 13) +#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \ + RDMA_NLA_F_MANDATORY)) + +/* + * Local service attributes: + * Attr Name Size Byte order + * ----------------------------------------------------- + * PATH_RECORD struct ib_path_rec_data + * TIMEOUT u32 cpu + * SERVICE_ID u64 cpu + * DGID u8[16] BE + * SGID u8[16] BE + * TCLASS u8 + * PKEY u16 cpu + * QOS_CLASS u16 cpu + */ +enum { + LS_NLA_TYPE_UNSPEC = 0, + LS_NLA_TYPE_PATH_RECORD, + LS_NLA_TYPE_TIMEOUT, + LS_NLA_TYPE_SERVICE_ID, + LS_NLA_TYPE_DGID, + LS_NLA_TYPE_SGID, + LS_NLA_TYPE_TCLASS, + LS_NLA_TYPE_PKEY, + LS_NLA_TYPE_QOS_CLASS, + LS_NLA_TYPE_MAX +}; + +/* Local service DGID/SGID attribute: big endian */ +struct rdma_nla_ls_gid { + __u8 gid[16]; +}; #endif /* _UAPI_RDMA_NETLINK_H */ diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index a85316811d79..7ddeeda93809 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -44,6 +44,10 @@ struct privcmd_hypercall { struct privcmd_mmap_entry { __u64 va; + /* + * This should be a GFN. It's not possible to change the name because + * it's exposed to the user-space. + */ __u64 mfn; __u64 npages; }; diff --git a/include/video/vga.h b/include/video/vga.h index cac567f22e62..d334e64c1c19 100644 --- a/include/video/vga.h +++ b/include/video/vga.h @@ -18,7 +18,7 @@ #define __linux_video_vga_h__ #include <linux/types.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/vga.h> #include <asm/byteorder.h> diff --git a/include/xen/events.h b/include/xen/events.h index 7d95fdf9cf3e..88da2abaf535 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -92,7 +92,6 @@ void xen_hvm_callback_vector(void); #ifdef CONFIG_TRACING #define trace_xen_hvm_callback_vector xen_hvm_callback_vector #endif -extern int xen_have_vector_callback; int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 5cc49ea8d840..8e035871360e 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -474,6 +474,23 @@ struct xenpf_core_parking { }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); +#define XENPF_get_symbol 63 +struct xenpf_symdata { + /* IN/OUT variables */ + uint32_t namelen; /* size of 'name' buffer */ + + /* IN/OUT variables */ + uint32_t symnum; /* IN: Symbol to read */ + /* OUT: Next available symbol. If same as IN */ + /* then we reached the end */ + + /* OUT variables */ + GUEST_HANDLE(char) name; + uint64_t address; + char type; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -495,6 +512,7 @@ struct xen_platform_op { struct xenpf_cpu_hotadd cpu_add; struct xenpf_mem_hotadd mem_add; struct xenpf_core_parking core_parking; + struct xenpf_symdata symdata; uint8_t pad[128]; } u; }; diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index a48378958062..167071c290b3 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -80,6 +80,7 @@ #define __HYPERVISOR_kexec_op 37 #define __HYPERVISOR_tmem_op 38 #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_xenpmu_op 40 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -112,6 +113,7 @@ #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ +#define VIRQ_XENPMU 13 /* PMC interrupt */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -585,26 +587,29 @@ struct shared_info { }; /* - * Start-of-day memory layout for the initial domain (DOM0): + * Start-of-day memory layout + * * 1. The domain is started within contiguous virtual-memory region. * 2. The contiguous region begins and ends on an aligned 4MB boundary. - * 3. The region start corresponds to the load address of the OS image. - * If the load address is not 4MB aligned then the address is rounded down. - * 4. This the order of bootstrap elements in the initial virtual region: + * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] + * (may be omitted) * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] - * e. bootstrap page tables [pt_base, CR3 (x86)] - * f. bootstrap stack [register ESP (x86)] - * 5. Bootstrap elements are packed together, but each is 4kB-aligned. - * 6. The initial ram disk may be omitted. - * 7. The list of page frames forms a contiguous 'pseudo-physical' memory + * in case of dom0 this page contains the console info, too + * e. unless dom0: xenstore ring page + * f. unless dom0: console ring page + * g. bootstrap page tables [pt_base, CR3 (x86)] + * h. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 8. All bootstrap elements are mapped read-writable for the guest OS. The + * 6. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 9. There is guaranteed to be at least 512kB padding after the final + * 7. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. */ @@ -641,10 +646,12 @@ struct start_info { }; /* These flags are passed in the 'flags' field of start_info_t. */ -#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ -#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ -#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ -#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */ + /* P->M making the 3 level tree obsolete? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ /* diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h new file mode 100644 index 000000000000..139efc91bceb --- /dev/null +++ b/include/xen/interface/xenpmu.h @@ -0,0 +1,94 @@ +#ifndef __XEN_PUBLIC_XENPMU_H__ +#define __XEN_PUBLIC_XENPMU_H__ + +#include "xen.h" + +#define XENPMU_VER_MAJ 0 +#define XENPMU_VER_MIN 1 + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_xenpmu_op(enum xenpmu_op cmd, struct xenpmu_params *args); + * + * @cmd == XENPMU_* (PMU operation) + * @args == struct xenpmu_params + */ +/* ` enum xenpmu_op { */ +#define XENPMU_mode_get 0 /* Also used for getting PMU version */ +#define XENPMU_mode_set 1 +#define XENPMU_feature_get 2 +#define XENPMU_feature_set 3 +#define XENPMU_init 4 +#define XENPMU_finish 5 +#define XENPMU_lvtpc_set 6 +#define XENPMU_flush 7 + +/* ` } */ + +/* Parameters structure for HYPERVISOR_xenpmu_op call */ +struct xen_pmu_params { + /* IN/OUT parameters */ + struct { + uint32_t maj; + uint32_t min; + } version; + uint64_t val; + + /* IN parameters */ + uint32_t vcpu; + uint32_t pad; +}; + +/* PMU modes: + * - XENPMU_MODE_OFF: No PMU virtualization + * - XENPMU_MODE_SELF: Guests can profile themselves + * - XENPMU_MODE_HV: Guests can profile themselves, dom0 profiles + * itself and Xen + * - XENPMU_MODE_ALL: Only dom0 has access to VPMU and it profiles + * everyone: itself, the hypervisor and the guests. + */ +#define XENPMU_MODE_OFF 0 +#define XENPMU_MODE_SELF (1<<0) +#define XENPMU_MODE_HV (1<<1) +#define XENPMU_MODE_ALL (1<<2) + +/* + * PMU features: + * - XENPMU_FEATURE_INTEL_BTS: Intel BTS support (ignored on AMD) + */ +#define XENPMU_FEATURE_INTEL_BTS 1 + +/* + * Shared PMU data between hypervisor and PV(H) domains. + * + * The hypervisor fills out this structure during PMU interrupt and sends an + * interrupt to appropriate VCPU. + * Architecture-independent fields of xen_pmu_data are WO for the hypervisor + * and RO for the guest but some fields in xen_pmu_arch can be writable + * by both the hypervisor and the guest (see arch-$arch/pmu.h). + */ +struct xen_pmu_data { + /* Interrupted VCPU */ + uint32_t vcpu_id; + + /* + * Physical processor on which the interrupt occurred. On non-privileged + * guests set to vcpu_id; + */ + uint32_t pcpu_id; + + /* + * Domain that was interrupted. On non-privileged guests set to + * DOMID_SELF. + * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in + * XENPMU_MODE_ALL mode, domain ID of another domain. + */ + domid_t domain_id; + + uint8_t pad[6]; + + /* Architecture-specific information */ + struct xen_pmu_arch pmu; +}; + +#endif /* __XEN_PUBLIC_XENPMU_H__ */ diff --git a/include/xen/page.h b/include/xen/page.h index c5ed20bb3fe9..1daae485e336 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -3,14 +3,14 @@ #include <asm/xen/page.h> -static inline unsigned long page_to_mfn(struct page *page) +static inline unsigned long xen_page_to_gfn(struct page *page) { - return pfn_to_mfn(page_to_pfn(page)); + return pfn_to_gfn(page_to_pfn(page)); } struct xen_memory_region { - phys_addr_t start; - phys_addr_t size; + unsigned long start_pfn; + unsigned long n_pfns; }; #define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 0ce4f32017ea..e4e214a5abd5 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -30,7 +30,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); struct vm_area_struct; /* - * xen_remap_domain_mfn_array() - map an array of foreign frames + * xen_remap_domain_gfn_array() - map an array of foreign frames * @vma: VMA to map the pages into * @addr: Address at which to map the pages * @gfn: Array of GFNs to map @@ -46,14 +46,14 @@ struct vm_area_struct; * Returns the number of successfully mapped frames, or a -ve error * code. */ -int xen_remap_domain_mfn_array(struct vm_area_struct *vma, +int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *gfn, int nr, int *err_ptr, pgprot_t prot, unsigned domid, struct page **pages); -/* xen_remap_domain_mfn_range() - map a range of foreign frames +/* xen_remap_domain_gfn_range() - map a range of foreign frames * @vma: VMA to map the pages into * @addr: Address at which to map the pages * @gfn: First GFN to map. @@ -65,12 +65,12 @@ int xen_remap_domain_mfn_array(struct vm_area_struct *vma, * Returns the number of successfully mapped frames, or a -ve error * code. */ -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, +int xen_remap_domain_gfn_range(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t gfn, int nr, pgprot_t prot, unsigned domid, struct page **pages); -int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, +int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages); int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, unsigned long addr, |