diff options
Diffstat (limited to 'include/linux')
39 files changed, 462 insertions, 195 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 1d7edad9914f..33207004cfde 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,7 @@ struct bdi_writeback { struct list_head b_dirty_time; /* time stamps are dirty */ spinlock_t list_lock; /* protects the b_* lists */ + atomic_t writeback_inodes; /* number of inodes under writeback */ struct percpu_counter stat[NR_WB_STAT_ITEMS]; unsigned long congested; /* WB_[a]sync_congested flags */ @@ -142,6 +143,7 @@ struct bdi_writeback { spinlock_t work_lock; /* protects work_list & dwork scheduling */ struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ unsigned long dirty_sleep; /* last wait */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 29530859d9ff..ac7f231b8825 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -288,6 +288,17 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) return inode->i_wb; } +static inline struct bdi_writeback *inode_to_wb_wbc( + struct inode *inode, + struct writeback_control *wbc) +{ + /* + * If wbc does not have inode attached, it means cgroup writeback was + * disabled when wbc started. Just use the default wb in that case. + */ + return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb; +} + /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode @@ -366,6 +377,14 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return &inode_to_bdi(inode)->wb; } +static inline struct bdi_writeback *inode_to_wb_wbc( + struct inode *inode, + struct writeback_control *wbc) +{ + return inode_to_wb(inode); +} + + static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c9cb12483e12..12b9dbcc980e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -17,7 +17,6 @@ #include <linux/bio.h> #include <linux/stringify.h> #include <linux/gfp.h> -#include <linux/bsg.h> #include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> @@ -27,14 +26,11 @@ #include <linux/sbitmap.h> struct module; -struct scsi_ioctl_command; - struct request_queue; struct elevator_queue; struct blk_trace; struct request; struct sg_io_hdr; -struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; @@ -274,9 +270,6 @@ enum blk_queue_state { #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ -#define BLK_SCSI_MAX_CMDS (256) -#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) - /* * Zoned block device models (zoned limit). * @@ -505,11 +498,6 @@ struct request_queue { unsigned int max_active_zones; #endif /* CONFIG_BLK_DEV_ZONED */ - /* - * sg stuff - */ - unsigned int sg_timeout; - unsigned int sg_reserved_size; int node; struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE @@ -536,10 +524,6 @@ struct request_queue { int mq_freeze_depth; -#if defined(CONFIG_BLK_DEV_BSG) - struct bsg_class_device bsg_dev; -#endif - #ifdef CONFIG_BLK_DEV_THROTTLING /* Throttle data */ struct throtl_data *td; @@ -885,16 +869,6 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_queue_split(struct bio **); -extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); -extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, - unsigned int, void __user *); -extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, - unsigned int, void __user *); -extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, - struct scsi_ioctl_command __user *); -extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); -extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); - extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); @@ -1347,8 +1321,6 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, gfp_mask, 0); } -extern int blk_verify_command(unsigned char *cmd, fmode_t mode); - static inline bool bdev_is_partition(struct block_device *bdev) { return bdev->bd_partno; @@ -1377,6 +1349,11 @@ static inline unsigned int queue_max_sectors(const struct request_queue *q) return q->limits.max_sectors; } +static inline unsigned int queue_max_bytes(struct request_queue *q) +{ + return min_t(unsigned int, queue_max_sectors(q), INT_MAX >> 9) << 9; +} + static inline unsigned int queue_max_hw_sectors(const struct request_queue *q) { return q->limits.max_hw_sectors; diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 960988d42f77..6b211323a489 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -12,6 +12,7 @@ #include <linux/blkdev.h> #include <scsi/scsi_request.h> +struct bsg_job; struct request; struct device; struct scatterlist; diff --git a/include/linux/bsg.h b/include/linux/bsg.h index dac37b6e00ec..1ac81c809da9 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -4,36 +4,16 @@ #include <uapi/linux/bsg.h> -struct request; +struct bsg_device; +struct device; +struct request_queue; -#ifdef CONFIG_BLK_DEV_BSG -struct bsg_ops { - int (*check_proto)(struct sg_io_v4 *hdr); - int (*fill_hdr)(struct request *rq, struct sg_io_v4 *hdr, - fmode_t mode); - int (*complete_rq)(struct request *rq, struct sg_io_v4 *hdr); - void (*free_rq)(struct request *rq); -}; +typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr, + fmode_t mode, unsigned int timeout); -struct bsg_class_device { - struct device *class_dev; - int minor; - struct request_queue *queue; - const struct bsg_ops *ops; -}; +struct bsg_device *bsg_register_queue(struct request_queue *q, + struct device *parent, const char *name, + bsg_sg_io_fn *sg_io_fn); +void bsg_unregister_queue(struct bsg_device *bcd); -int bsg_register_queue(struct request_queue *q, struct device *parent, - const char *name, const struct bsg_ops *ops); -int bsg_scsi_register_queue(struct request_queue *q, struct device *parent); -void bsg_unregister_queue(struct request_queue *q); -#else -static inline int bsg_scsi_register_queue(struct request_queue *q, - struct device *parent) -{ - return 0; -} -static inline void bsg_unregister_queue(struct request_queue *q) -{ -} -#endif /* CONFIG_BLK_DEV_BSG */ #endif /* _LINUX_BSG_H */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e7e99da31349..6486d3c19463 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -409,7 +409,7 @@ static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } static inline void invalidate_bh_lrus_cpu(int cpu) {} -static inline bool has_bh_in_lru(int cpu, void *dummy) { return 0; } +static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; } #define buffer_heads_over_limit 0 #endif /* CONFIG_BLOCK */ diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index f48d0a31deae..c4fef00abdf3 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -86,11 +86,13 @@ struct cdrom_device_ops { /* play stuff */ int (*audio_ioctl) (struct cdrom_device_info *,unsigned int, void *); -/* driver specifications */ - const int capability; /* capability flags */ /* handle uniform packets for scsi type devices (scsi,atapi) */ int (*generic_packet) (struct cdrom_device_info *, struct packet_command *); + int (*read_cdda_bpc)(struct cdrom_device_info *cdi, void __user *ubuf, + u32 lba, u32 nframes, u8 *last_sense); +/* driver specifications */ + const int capability; /* capability flags */ }; int cdrom_multisession(struct cdrom_device_info *cdi, diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index d83b829305c0..f59c875271a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -342,7 +342,7 @@ struct clk_fixed_rate { unsigned long flags; }; -#define CLK_FIXED_RATE_PARENT_ACCURACY BIT(0) +#define CLK_FIXED_RATE_PARENT_ACCURACY BIT(0) extern const struct clk_ops clk_fixed_rate_ops; struct clk_hw *__clk_hw_register_fixed_rate(struct device *dev, @@ -1001,6 +1001,12 @@ struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, * CLK_FRAC_DIVIDER_BIG_ENDIAN - By default little endian register accesses are * used for the divider register. Setting this flag makes the register * accesses big endian. + * CLK_FRAC_DIVIDER_POWER_OF_TWO_PS - By default the resulting fraction might + * be saturated and the caller will get quite far from the good enough + * approximation. Instead the caller may require, by setting this flag, + * to shift left by a few bits in case, when the asked one is quite small + * to satisfy the desired range of denominator. It assumes that on the + * caller's side the power-of-two capable prescaler exists. */ struct clk_fractional_divider { struct clk_hw hw; @@ -1022,8 +1028,8 @@ struct clk_fractional_divider { #define CLK_FRAC_DIVIDER_ZERO_BASED BIT(0) #define CLK_FRAC_DIVIDER_BIG_ENDIAN BIT(1) +#define CLK_FRAC_DIVIDER_POWER_OF_TWO_PS BIT(2) -extern const struct clk_ops clk_fractional_divider_ops; struct clk *clk_register_fractional_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, @@ -1069,9 +1075,9 @@ struct clk_multiplier { #define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw) -#define CLK_MULTIPLIER_ZERO_BYPASS BIT(0) +#define CLK_MULTIPLIER_ZERO_BYPASS BIT(0) #define CLK_MULTIPLIER_ROUND_CLOSEST BIT(1) -#define CLK_MULTIPLIER_BIG_ENDIAN BIT(2) +#define CLK_MULTIPLIER_BIG_ENDIAN BIT(2) extern const struct clk_ops clk_multiplier_ops; diff --git a/include/linux/compaction.h b/include/linux/compaction.h index c24098c7acca..34bce35c808d 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -84,6 +84,8 @@ static inline unsigned long compact_gap(unsigned int order) extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); +extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table, + int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/device.h b/include/linux/device.h index 65d84b67b024..e270cb740b9e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -424,6 +424,7 @@ struct dev_links_info { * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations + * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -534,6 +535,9 @@ struct device { struct cma *cma_area; /* contiguous memory area for dma allocations */ #endif +#ifdef CONFIG_SWIOTLB + struct io_tlb_mem *dma_io_tlb_mem; +#endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 758ca4694257..24607dc3c2ac 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); +int iommu_dma_init_fq(struct iommu_domain *domain); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -54,6 +55,11 @@ static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, { } +static inline int iommu_dma_init_fq(struct iommu_domain *domain) +{ + return -EINVAL; +} + static inline int iommu_get_dma_cookie(struct iommu_domain *domain) { return -ENODEV; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d9a606a9fc64..b4c49f9cc379 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -130,10 +130,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page } #endif -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ -} +#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 0b8d1fdda3a1..c137396129db 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -121,6 +121,13 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) css_put(&h_cg->css); } +static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ + if (resv_map->css) + css_get(resv_map->css); +} + extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, @@ -199,6 +206,11 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) { } +static inline void resv_map_dup_hugetlb_cgroup_uncharge_info( + struct resv_map *resv_map) +{ +} + static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d0fa0b31994d..05a65eb155f7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -124,9 +124,9 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 -#define DMAR_VCCAP_REG 0xe00 /* Virtual command capability register */ -#define DMAR_VCMD_REG 0xe10 /* Virtual command register */ -#define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */ +#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ +#define DMAR_VCMD_REG 0xe00 /* Virtual command register */ +#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ #define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg) #define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 10fa80eef13a..57cceecbe37f 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,6 +14,11 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) +/* Page Request Queue depth */ +#define PRQ_ORDER 2 +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) +#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) + /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 4d40dfa75b55..86af6f0a00a2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,6 +16,7 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + APPLE_DART, IO_PGTABLE_NUM_FMTS, }; @@ -73,10 +74,6 @@ struct io_pgtable_cfg { * to support up to 35 bits PA where the bit32, bit33 and bit34 are * encoded in the bit9, bit4 and bit5 of the PTE respectively. * - * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs - * on unmap, for DMA domains using the flush queue mechanism for - * delayed invalidation. - * * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table * for use in the upper half of a split address space. * @@ -86,7 +83,6 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) - #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) unsigned long quirks; @@ -136,6 +132,11 @@ struct io_pgtable_cfg { u64 transtab; u64 memattr; } arm_mali_lpae_cfg; + + struct { + u64 ttbr[4]; + u32 n_ttbrs; + } apple_dart_cfg; }; }; @@ -143,7 +144,9 @@ struct io_pgtable_cfg { * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map: Map a physically contiguous memory region. + * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap: Unmap a physically contiguous memory region. + * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. * * These functions map directly onto the iommu_ops member functions with @@ -152,8 +155,14 @@ struct io_pgtable_cfg { struct io_pgtable_ops { int (*map)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, struct iommu_iotlb_gather *gather); + size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); }; @@ -246,5 +255,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9369458ba1bd..d2f3435e7d17 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -40,6 +40,7 @@ struct iommu_domain; struct notifier_block; struct iommu_sva; struct iommu_fault_event; +struct iommu_dma_cookie; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -60,6 +61,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API implementation */ #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ +#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ /* * This are the possible domain-types @@ -72,12 +74,17 @@ struct iommu_domain_geometry { * IOMMU_DOMAIN_DMA - Internally used for DMA-API implementations. * This flag allows IOMMU drivers to implement * certain optimizations for these domains + * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB + * invalidation. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING) #define IOMMU_DOMAIN_DMA (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API) +#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ + __IOMMU_DOMAIN_DMA_API | \ + __IOMMU_DOMAIN_DMA_FQ) struct iommu_domain { unsigned type; @@ -86,9 +93,14 @@ struct iommu_domain { iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; - void *iova_cookie; + struct iommu_dma_cookie *iova_cookie; }; +static inline bool iommu_is_dma_domain(struct iommu_domain *domain) +{ + return domain->type & __IOMMU_DOMAIN_DMA_API; +} + enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA transactions */ @@ -160,16 +172,22 @@ enum iommu_dev_features { * @start: IOVA representing the start of the range to be flushed * @end: IOVA representing the end of the range to be flushed (inclusive) * @pgsize: The interval at which to perform the flush + * @freelist: Removed pages to free after sync + * @queued: Indicates that the flush will be queued * * This structure is intended to be updated by multiple calls to the * ->unmap() function in struct iommu_ops before eventually being passed - * into ->iotlb_sync(). + * into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after + * ->iotlb_sync() or ->iotlb_flush_all() have cleared all cached references to + * them. @queued is set to indicate when ->iotlb_flush_all() will be called + * later instead of ->iotlb_sync(), so drivers may optimise accordingly. */ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; struct page *freelist; + bool queued; }; /** @@ -180,7 +198,10 @@ struct iommu_iotlb_gather { * @attach_dev: attach device to an iommu domain * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain + * @map_pages: map a physically contiguous set of pages of the same size to + * an iommu domain. * @unmap: unmap a physically contiguous memory region from an iommu domain + * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush @@ -229,8 +250,14 @@ struct iommu_ops { void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + int (*map_pages)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); + size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, size_t size); @@ -476,8 +503,7 @@ int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); -void iommu_set_dma_strict(bool val); -bool iommu_get_dma_strict(struct iommu_domain *domain); +void iommu_set_dma_strict(void); extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); @@ -497,29 +523,80 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain, iommu_iotlb_gather_init(iotlb_gather); } +/** + * iommu_iotlb_gather_is_disjoint - Checks whether a new range is disjoint + * + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to check whether a new range and the gathered range + * are disjoint. For many IOMMUs, flushing the IOMMU in this case is better + * than merging the two, which might lead to unnecessary invalidations. + */ +static inline +bool iommu_iotlb_gather_is_disjoint(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long start = iova, end = start + size - 1; + + return gather->end != 0 && + (end + 1 < gather->start || start > gather->end + 1); +} + + +/** + * iommu_iotlb_gather_add_range - Gather for address-based TLB invalidation + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to build arbitrarily-sized invalidation commands + * where only the address range matters, and simply minimising intermediate + * syncs is preferred. + */ +static inline void iommu_iotlb_gather_add_range(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long end = iova + size - 1; + + if (gather->start > iova) + gather->start = iova; + if (gather->end < end) + gather->end = end; +} + +/** + * iommu_iotlb_gather_add_page - Gather for page-based TLB invalidation + * @domain: IOMMU domain to be invalidated + * @gather: TLB gather data + * @iova: start of page to invalidate + * @size: size of page to invalidate + * + * Helper for IOMMU drivers to build invalidation commands based on individual + * pages, or with page size/table level hints which cannot be gathered if they + * differ. + */ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) { - unsigned long start = iova, end = start + size - 1; - /* * If the new page is disjoint from the current range or is mapped at * a different granularity, then sync the TLB so that the gather * structure can be rewritten. */ - if (gather->pgsize != size || - end + 1 < gather->start || start > gather->end + 1) { - if (gather->pgsize) - iommu_iotlb_sync(domain, gather); - gather->pgsize = size; - } + if ((gather->pgsize && gather->pgsize != size) || + iommu_iotlb_gather_is_disjoint(gather, iova, size)) + iommu_iotlb_sync(domain, gather); - if (gather->end < end) - gather->end = end; + gather->pgsize = size; + iommu_iotlb_gather_add_range(gather, iova, size); +} - if (gather->start > start) - gather->start = start; +static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) +{ + return gather && gather->queued; } /* PCI device grouping function */ @@ -870,6 +947,11 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, { } +static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather) +{ + return false; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4a53c3ca86bd..b066024c62e3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -99,8 +99,6 @@ void memblock_discard(void); static inline void memblock_discard(void) {} #endif -phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 20151c4f1e0e..3096c9a0ee01 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -105,14 +105,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -struct lruvec_stat { - long count[NR_VM_NODE_STAT_ITEMS]; -}; - -struct batched_lruvec_stat { - s32 count[NR_VM_NODE_STAT_ITEMS]; -}; - /* * Bitmap and deferred work of shrinker::id corresponding to memcg-aware * shrinkers, which have elements charged to this memcg. @@ -123,24 +115,30 @@ struct shrinker_info { unsigned long *map; }; +struct lruvec_stats_percpu { + /* Local (CPU and cgroup) state */ + long state[NR_VM_NODE_STAT_ITEMS]; + + /* Delta calculation for lockless upward propagation */ + long state_prev[NR_VM_NODE_STAT_ITEMS]; +}; + +struct lruvec_stats { + /* Aggregated (CPU and subtree) state */ + long state[NR_VM_NODE_STAT_ITEMS]; + + /* Pending child counts during tree propagation */ + long state_pending[NR_VM_NODE_STAT_ITEMS]; +}; + /* * per-node information in memory controller. */ struct mem_cgroup_per_node { struct lruvec lruvec; - /* - * Legacy local VM stats. This should be struct lruvec_stat and - * cannot be optimized to struct batched_lruvec_stat. Because - * the threshold of the lruvec_stat_cpu can be as big as - * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this - * filed has no upper limit. - */ - struct lruvec_stat __percpu *lruvec_stat_local; - - /* Subtree VM stats (batched updates) */ - struct batched_lruvec_stat __percpu *lruvec_stat_cpu; - atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; + struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; + struct lruvec_stats lruvec_stats; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -595,13 +593,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #endif -static __always_inline bool memcg_stat_item_in_bytes(int idx) -{ - if (idx == MEMCG_PERCPU_B) - return true; - return vmstat_item_in_bytes(idx); -} - static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); @@ -693,13 +684,35 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); +static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) +{ + if (mem_cgroup_disabled()) + return 0; + return __mem_cgroup_charge(page, mm, gfp_mask); +} + int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void mem_cgroup_uncharge(struct page *page); -void mem_cgroup_uncharge_list(struct list_head *page_list); +void __mem_cgroup_uncharge(struct page *page); +static inline void mem_cgroup_uncharge(struct page *page) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge(page); +} + +void __mem_cgroup_uncharge_list(struct list_head *page_list); +static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge_list(page_list); +} void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); @@ -884,11 +897,6 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg) return !!(memcg->css.flags & CSS_ONLINE); } -/* - * For memory reclaim. - */ -int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); - void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, int zid, int nr_pages); @@ -955,22 +963,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, local_irq_restore(flags); } +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) +{ + return READ_ONCE(memcg->vmstats.state[idx]); +} + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; - long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = atomic_long_read(&pn->lruvec_stat[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; + return READ_ONCE(pn->lruvec_stats.state[idx]); } static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, @@ -985,7 +992,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); for_each_possible_cpu(cpu) - x += per_cpu(pn->lruvec_stat_local->count[idx], cpu); + x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -993,6 +1000,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return x; } +void mem_cgroup_flush_stats(void); + void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); @@ -1391,6 +1400,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, { } +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) +{ + return 0; +} + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { @@ -1403,6 +1417,10 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return node_page_state(lruvec_pgdat(lruvec), idx); } +static inline void mem_cgroup_flush_stats(void) +{ +} + static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { diff --git a/include/linux/memory.h b/include/linux/memory.h index 97e92e8b556a..d9a0b61cd432 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -90,7 +90,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); -extern struct memory_block *find_memory_block(struct mem_section *); +extern struct memory_block *find_memory_block(unsigned long section_nr); typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0aaf91b496e2..4091692bed8c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -184,6 +184,14 @@ extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); +extern bool numa_demotion_enabled; + +static inline bool mpol_is_preferred_many(struct mempolicy *pol) +{ + return (pol->mode == MPOL_PREFERRED_MANY); +} + + #else struct mempolicy {}; @@ -292,5 +300,13 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp) { return NULL; } + +#define numa_demotion_enabled false + +static inline bool mpol_is_preferred_many(struct mempolicy *pol) +{ + return false; +} + #endif /* CONFIG_NUMA */ #endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 23dadf7aeba8..326250996b4e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -28,6 +28,7 @@ enum migrate_reason { MR_NUMA_MISPLACED, MR_CONTIG_RANGE, MR_LONGTERM_PIN, + MR_DEMOTION, MR_TYPES }; @@ -41,7 +42,8 @@ extern int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, - unsigned long private, enum migrate_mode mode, int reason); + unsigned long private, enum migrate_mode mode, int reason, + unsigned int *ret_succeeded); extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); @@ -56,7 +58,7 @@ extern int migrate_page_move_mapping(struct address_space *mapping, static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, - int reason) + int reason, unsigned int *ret_succeeded) { return -ENOSYS; } static inline struct page *alloc_migration_target(struct page *page, unsigned long private) @@ -166,6 +168,14 @@ struct migrate_vma { int migrate_vma_setup(struct migrate_vma *args); void migrate_vma_pages(struct migrate_vma *migrate); void migrate_vma_finalize(struct migrate_vma *migrate); +int next_demotion_node(int node); + +#else /* CONFIG_MIGRATION disabled: */ + +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} #endif /* CONFIG_MIGRATION */ diff --git a/include/linux/mm.h b/include/linux/mm.h index e59646a5d44d..ed2552c9aeca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1216,18 +1216,10 @@ static inline void get_page(struct page *page) } bool __must_check try_grab_page(struct page *page, unsigned int flags); -__maybe_unused struct page *try_grab_compound_head(struct page *page, int refs, - unsigned int flags); +struct page *try_grab_compound_head(struct page *page, int refs, + unsigned int flags); - -static inline __must_check bool try_get_page(struct page *page) -{ - page = compound_head(page); - if (WARN_ON_ONCE(page_ref_count(page) <= 0)) - return false; - page_ref_inc(page); - return true; -} +struct page *try_get_compound_head(struct page *page, int refs); static inline void put_page(struct page *page) { @@ -1849,7 +1841,6 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); -int get_kernel_page(unsigned long start, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); @@ -3121,7 +3112,7 @@ extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; -extern void shake_page(struct page *p, int access); +extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fcb535560028..1bd5f5955f9a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -846,6 +846,7 @@ typedef struct pglist_data { enum zone_type kcompactd_highest_zoneidx; wait_queue_head_t kcompactd_wait; struct task_struct *kcompactd; + bool proactive_compact_trigger; #endif /* * This is a per-node reserve of pages that are not available @@ -1342,7 +1343,6 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } -extern unsigned long __section_nr(struct mem_section *ms); extern size_t mem_section_usage_size(void); /* @@ -1365,7 +1365,7 @@ extern size_t mem_section_usage_size(void); #define SECTION_TAINT_ZONE_DEVICE (1UL<<4) #define SECTION_MAP_LAST_BIT (1UL<<5) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 3 +#define SECTION_NID_SHIFT 6 static inline struct page *__section_mem_map_addr(struct mem_section *section) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ed02aa522263..5dcf446f42e5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -736,7 +736,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. */ -static inline int fault_in_pages_writeable(char __user *uaddr, int size) +static inline int fault_in_pages_writeable(char __user *uaddr, size_t size) { char __user *end = uaddr + size - 1; @@ -763,7 +763,7 @@ static inline int fault_in_pages_writeable(char __user *uaddr, int size) return 0; } -static inline int fault_in_pages_readable(const char __user *uaddr, int size) +static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) { volatile char c; const char __user *end = uaddr + size - 1; diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h index 207e1a317800..41df326583f9 100644 --- a/include/linux/platform_data/x86/clk-lpss.h +++ b/include/linux/platform_data/x86/clk-lpss.h @@ -15,6 +15,6 @@ struct lpss_clk_data { struct clk *clk; }; -extern int lpt_clk_init(void); +extern int lpss_atom_clk_init(void); #endif /* __CLK_LPSS_H */ diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 8ddc7860e131..ada3a0ab10bf 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -47,6 +47,7 @@ extern void pm_clk_remove(struct device *dev, const char *con_id); extern void pm_clk_remove_clk(struct device *dev, struct clk *clk); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); +extern int devm_pm_clk_create(struct device *dev); #else static inline bool pm_clk_no_clocks(struct device *dev) { @@ -83,6 +84,10 @@ static inline void pm_clk_remove(struct device *dev, const char *con_id) static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk) { } +static inline int devm_pm_clk_create(struct device *dev) +{ + return -EINVAL; +} #endif #ifdef CONFIG_HAVE_CLK diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index aab8b35e9f8a..222da43b7096 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -59,6 +59,8 @@ extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); +extern int devm_pm_runtime_enable(struct device *dev); + /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. * @dev: Target device. @@ -253,6 +255,8 @@ static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } + static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index ecf87484814f..266754a55327 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -39,6 +39,12 @@ struct sg_table { unsigned int orig_nents; /* original size of list */ }; +struct sg_append_table { + struct sg_table sgt; /* The scatter list table */ + struct scatterlist *prv; /* last populated sge in the table */ + unsigned int total_nents; /* Total entries in the table */ +}; + /* * Notes on SG table design. * @@ -280,19 +286,51 @@ typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); void __sg_free_table(struct sg_table *, unsigned int, unsigned int, - sg_free_fn *); + sg_free_fn *, unsigned int); void sg_free_table(struct sg_table *); +void sg_free_append_table(struct sg_append_table *sgt); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, - struct page **pages, unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - struct scatterlist *prv, unsigned int left_pages, - gfp_t gfp_mask); -int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, gfp_t gfp_mask); +int sg_alloc_append_table_from_pages(struct sg_append_table *sgt, + struct page **pages, unsigned int n_pages, + unsigned int offset, unsigned long size, + unsigned int max_segment, + unsigned int left_pages, gfp_t gfp_mask); +int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, + unsigned int n_pages, unsigned int offset, + unsigned long size, + unsigned int max_segment, gfp_t gfp_mask); + +/** + * sg_alloc_table_from_pages - Allocate and initialize an sg table from + * an array of pages + * @sgt: The sg table header to use + * @pages: Pointer to an array of page pointers + * @n_pages: Number of pages in the pages array + * @offset: Offset from start of the first page to the start of a buffer + * @size: Number of valid bytes in the buffer (after offset) + * @gfp_mask: GFP allocation mask + * + * Description: + * Allocate and initialize an sg table from a list of pages. Contiguous + * ranges of the pages are squashed into a single scatterlist node. A user + * may provide an offset at a start and a size of valid data in a buffer + * specified by the page array. The returned sg table is released by + * sg_free_table. + * + * Returns: + * 0 on success, negative error on failure + */ +static inline int sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, + unsigned int n_pages, + unsigned int offset, + unsigned long size, gfp_t gfp_mask) +{ + return sg_alloc_table_from_pages_segment(sgt, pages, n_pages, offset, + size, UINT_MAX, gfp_mask); +} #ifdef CONFIG_SGL_ALLOC struct scatterlist *sgl_alloc_order(unsigned long long length, diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e24b1fe348e3..5561486fddef 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -174,13 +174,13 @@ static inline gfp_t current_gfp_context(gfp_t flags) } #ifdef CONFIG_LOCKDEP -extern void __fs_reclaim_acquire(void); -extern void __fs_reclaim_release(void); +extern void __fs_reclaim_acquire(unsigned long ip); +extern void __fs_reclaim_release(unsigned long ip); extern void fs_reclaim_acquire(gfp_t gfp_mask); extern void fs_reclaim_release(gfp_t gfp_mask); #else -static inline void __fs_reclaim_acquire(void) { } -static inline void __fs_reclaim_release(void) { } +static inline void __fs_reclaim_acquire(unsigned long ip) { } +static inline void __fs_reclaim_release(unsigned long ip) { } static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } static inline void fs_reclaim_release(gfp_t gfp_mask) { } #endif @@ -306,7 +306,7 @@ set_active_memcg(struct mem_cgroup *memcg) { struct mem_cgroup *old; - if (in_interrupt()) { + if (!in_task()) { old = this_cpu_read(int_active_memcg); this_cpu_write(int_active_memcg, memcg); } else { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 8e775ce517bb..166158b6e917 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -18,6 +18,7 @@ struct shmem_inode_info { unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ + pgoff_t fallocend; /* highest fallocate endindex */ struct list_head shrinklist; /* shrinkable hpage inodes */ struct list_head swaplist; /* chain of maybes on swap */ struct shared_policy policy; /* NUMA memory alloc policy */ @@ -31,7 +32,7 @@ struct shmem_sb_info { struct percpu_counter used_blocks; /* How many are allocated */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ - spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ + raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ @@ -85,7 +86,12 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse); -extern bool shmem_huge_enabled(struct vm_area_struct *vma); +extern bool shmem_is_huge(struct vm_area_struct *vma, + struct inode *inode, pgoff_t index); +static inline bool shmem_huge_enabled(struct vm_area_struct *vma) +{ + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); +} extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -93,9 +99,8 @@ extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, /* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ + SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ - SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */ - SGP_HUGE, /* like SGP_CACHE, huge pages preferred */ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; @@ -119,6 +124,18 @@ static inline bool shmem_file(struct file *file) return shmem_mapping(file->f_mapping); } +/* + * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages + * beyond i_size's notion of EOF, which fallocate has committed to reserving: + * which split_huge_page() must therefore not delete. This use of a single + * "fallocend" per inode errs on the side of not deleting a reservation when + * in doubt: there are plenty of cases when it preserves unreserved pages. + */ +static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof) +{ + return max(eof, SHMEM_I(inode)->fallocend); +} + extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index f2645ec52520..60e66fc9b6bf 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -29,6 +29,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_NCPB 0x6270636E #define QCOM_SMD_RPM_OCMEM_PWR 0x706d636f #define QCOM_SMD_RPM_QPIC_CLK 0x63697071 +#define QCOM_SMD_RPM_QUP_CLK 0x707571 #define QCOM_SMD_RPM_SMPA 0x61706d73 #define QCOM_SMD_RPM_SMPB 0x62706d73 #define QCOM_SMD_RPM_SPDM 0x63707362 diff --git a/include/linux/swap.h b/include/linux/swap.h index 6f5a43251593..ba52f3a3478e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -408,7 +408,7 @@ static inline bool node_reclaim_enabled(void) extern void check_move_unevictable_pages(struct pagevec *pvec); -extern int kswapd_run(int nid); +extern void kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP @@ -721,7 +721,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); +extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); +static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) +{ + if (mem_cgroup_disabled()) + return; + __cgroup_throttle_swaprate(page, gfp_mask); +} #else static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) { @@ -730,8 +736,22 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); -extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); +extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); +static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) +{ + if (mem_cgroup_disabled()) + return 0; + return __mem_cgroup_try_charge_swap(page, entry); +} + +extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge_swap(entry, nr_pages); +} + extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 216854a5e513..b0cb2a9973f4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -2,6 +2,7 @@ #ifndef __LINUX_SWIOTLB_H #define __LINUX_SWIOTLB_H +#include <linux/device.h> #include <linux/dma-direction.h> #include <linux/init.h> #include <linux/types.h> @@ -72,7 +73,8 @@ extern enum swiotlb_force swiotlb_force; * range check to see if the memory was in fact allocated by this * API. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and - * @end. This is command line adjustable via setup_io_tlb_npages. + * @end. For default swiotlb, this is command line adjustable via + * setup_io_tlb_npages. * @used: The number of used IO TLB block. * @list: The free list describing the number of free entries available * from each index. @@ -83,6 +85,8 @@ extern enum swiotlb_force swiotlb_force; * unmap calls. * @debugfs: The dentry to debugfs. * @late_alloc: %true if allocated using the page allocator + * @force_bounce: %true if swiotlb bouncing is forced + * @for_alloc: %true if the pool is used for memory allocation */ struct io_tlb_mem { phys_addr_t start; @@ -93,29 +97,42 @@ struct io_tlb_mem { spinlock_t lock; struct dentry *debugfs; bool late_alloc; + bool force_bounce; + bool for_alloc; struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; unsigned int list; - } slots[]; + } *slots; }; -extern struct io_tlb_mem *io_tlb_default_mem; +extern struct io_tlb_mem io_tlb_default_mem; -static inline bool is_swiotlb_buffer(phys_addr_t paddr) +static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { - struct io_tlb_mem *mem = io_tlb_default_mem; + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; return mem && paddr >= mem->start && paddr < mem->end; } +static inline bool is_swiotlb_force_bounce(struct device *dev) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + + return mem && mem->force_bounce; +} + void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); -bool is_swiotlb_active(void); +bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); #else #define swiotlb_force SWIOTLB_NO_FORCE -static inline bool is_swiotlb_buffer(phys_addr_t paddr) +static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +{ + return false; +} +static inline bool is_swiotlb_force_bounce(struct device *dev) { return false; } @@ -131,7 +148,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev) return SIZE_MAX; } -static inline bool is_swiotlb_active(void) +static inline bool is_swiotlb_active(struct device *dev) { return false; } @@ -144,4 +161,28 @@ static inline void swiotlb_adjust_size(unsigned long size) extern void swiotlb_print_info(void); extern void swiotlb_set_max_segment(unsigned int); +#ifdef CONFIG_DMA_RESTRICTED_POOL +struct page *swiotlb_alloc(struct device *dev, size_t size); +bool swiotlb_free(struct device *dev, struct page *page, size_t size); + +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return dev->dma_io_tlb_mem->for_alloc; +} +#else +static inline struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + return NULL; +} +static inline bool swiotlb_free(struct device *dev, struct page *page, + size_t size) +{ + return false; +} +static inline bool is_swiotlb_for_alloc(struct device *dev) +{ + return false; +} +#endif /* CONFIG_DMA_RESTRICTED_POOL */ + #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2b47584eb843..60a3ab0ad2cc 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -915,6 +915,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); +asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 331d2ccf0bcc..33cea484d1ad 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -60,16 +60,16 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - bool *mmap_changing, __u64 mode); + atomic_t *mmap_changing, __u64 mode); extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, - bool *mmap_changing); + atomic_t *mmap_changing); extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long len, bool *mmap_changing); + unsigned long len, atomic_t *mmap_changing); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, - bool enable_wp, bool *mmap_changing); + bool enable_wp, atomic_t *mmap_changing); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ae0dd1948c2b..a185cc75ff52 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -33,6 +33,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, + PGDEMOTE_KSWAPD, + PGDEMOTE_DIRECT, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_DIRECT_THROTTLE, diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 6d28bc433c1c..6a2f51ebbfd3 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -37,7 +37,7 @@ extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); -extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); +extern struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr); extern int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 270677dc4f36..d1f65adf6a26 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -218,7 +218,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, size_t bytes); -int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages, +int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(void); bool cleanup_offline_cgwb(struct bdi_writeback *wb); @@ -374,7 +374,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); -void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); +void wb_update_bandwidth(struct bdi_writeback *wb); void balance_dirty_pages_ratelimited(struct address_space *mapping); bool wb_over_bg_thresh(struct bdi_writeback *wb); |