197 files changed, 4158 insertions, 1516 deletions
diff --git a/CREDITS b/CREDITS
index c9273393fe14..7d397ee67524 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2541,6 +2541,10 @@ S: Ormond
 S: Victoria 3163
 S: Australia
 
+N: Eric Miao
+E: [email protected]
+D: MMP support
+
 N: Pauline Middelink
 E: [email protected]
 D: General low-level bug fixes, /proc fixes, identd support
@@ -4115,6 +4119,10 @@ S: 1507 145th Place SE #B5
 S: Bellevue, Washington 98007
 S: USA
 
+N: Haojian Zhuang
+E: [email protected]
+D: MMP support
+
 N: Richard Zidlicky
 E: [email protected], [email protected]
 W: http://www.geocities.com/rdzidlic
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 73318225a368..9605dbd4b5b5 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -145,6 +145,8 @@ What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/name
 		/sys/devices/system/cpu/cpuX/cpuidle/stateN/power
 		/sys/devices/system/cpu/cpuX/cpuidle/stateN/time
 		/sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/above
+		/sys/devices/system/cpu/cpuX/cpuidle/stateN/below
 Date:		September 2007
 KernelVersion:	v2.6.24
 Contact:	Linux power management list <[email protected]>
@@ -166,6 +168,11 @@ Description:
 
 		usage: (RO) Number of times this state was entered (a count).
 
+		above: (RO) Number of times this state was entered, but the
+		       observed CPU idle duration was too short for it (a count).
+
+		below: (RO) Number of times this state was entered, but the
+		       observed CPU idle duration was too long for it (a count).
 
 What:		/sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
 Date:		February 2008
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aefd358a5ca3..362a18cd68e1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -674,6 +674,9 @@
 	cpuidle.off=1	[CPU_IDLE]
 			disable the cpuidle sub-system
 
+	cpuidle.governor=
+			[CPU_IDLE] Name of the cpuidle governor to use.
+
 	cpufreq.off=1	[CPU_FREQ]
 			disable the cpufreq sub-system
 
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
new file mode 100644
index 000000000000..106379e2619f
--- /dev/null
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -0,0 +1,631 @@
+.. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
+.. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
+
+========================
+CPU Idle Time Management
+========================
+
+::
+
+ Copyright (c) 2018 Intel Corp., Rafael J. Wysocki <[email protected]>
+
+Concepts
+========
+
+Modern processors are generally able to enter states in which the execution of
+a program is suspended and instructions belonging to it are not fetched from
+memory or executed.  Those states are the *idle* states of the processor.
+
+Since part of the processor hardware is not used in idle states, entering them
+generally allows power drawn by the processor to be reduced and, in consequence,
+it is an opportunity to save energy.
+
+CPU idle time management is an energy-efficiency feature concerned about using
+the idle states of processors for this purpose.
+
+Logical CPUs
+------------
+
+CPU idle time management operates on CPUs as seen by the *CPU scheduler* (that
+is the part of the kernel responsible for the distribution of computational
+work in the system).  In its view, CPUs are *logical* units.  That is, they need
+not be separate physical entities and may just be interfaces appearing to
+software as individual single-core processors.  In other words, a CPU is an
+entity which appears to be fetching instructions that belong to one sequence
+(program) from memory and executing them, but it need not work this way
+physically.  Generally, three different cases can be consider here.
+
+First, if the whole processor can only follow one sequence of instructions (one
+program) at a time, it is a CPU.  In that case, if the hardware is asked to
+enter an idle state, that applies to the processor as a whole.
+
+Second, if the processor is multi-core, each core in it is able to follow at
+least one program at a time.  The cores need not be entirely independent of each
+other (for example, they may share caches), but still most of the time they
+work physically in parallel with each other, so if each of them executes only
+one program, those programs run mostly independently of each other at the same
+time.  The entire cores are CPUs in that case and if the hardware is asked to
+enter an idle state, that applies to the core that asked for it in the first
+place, but it also may apply to a larger unit (say a "package" or a "cluster")
+that the core belongs to (in fact, it may apply to an entire hierarchy of larger
+units containing the core).  Namely, if all of the cores in the larger unit
+except for one have been put into idle states at the "core level" and the
+remaining core asks the processor to enter an idle state, that may trigger it
+to put the whole larger unit into an idle state which also will affect the
+other cores in that unit.
+
+Finally, each core in a multi-core processor may be able to follow more than one
+program in the same time frame (that is, each core may be able to fetch
+instructions from multiple locations in memory and execute them in the same time
+frame, but not necessarily entirely in parallel with each other).  In that case
+the cores present themselves to software as "bundles" each consisting of
+multiple individual single-core "processors", referred to as *hardware threads*
+(or hyper-threads specifically on Intel hardware), that each can follow one
+sequence of instructions.  Then, the hardware threads are CPUs from the CPU idle
+time management perspective and if the processor is asked to enter an idle state
+by one of them, the hardware thread (or CPU) that asked for it is stopped, but
+nothing more happens, unless all of the other hardware threads within the same
+core also have asked the processor to enter an idle state.  In that situation,
+the core may be put into an idle state individually or a larger unit containing
+it may be put into an idle state as a whole (if the other cores within the
+larger unit are in idle states already).
+
+Idle CPUs
+---------
+
+Logical CPUs, simply referred to as "CPUs" in what follows, are regarded as
+*idle* by the Linux kernel when there are no tasks to run on them except for the
+special "idle" task.
+
+Tasks are the CPU scheduler's representation of work.  Each task consists of a
+sequence of instructions to execute, or code, data to be manipulated while
+running that code, and some context information that needs to be loaded into the
+processor every time the task's code is run by a CPU.  The CPU scheduler
+distributes work by assigning tasks to run to the CPUs present in the system.
+
+Tasks can be in various states.  In particular, they are *runnable* if there are
+no specific conditions preventing their code from being run by a CPU as long as
+there is a CPU available for that (for example, they are not waiting for any
+events to occur or similar).  When a task becomes runnable, the CPU scheduler
+assigns it to one of the available CPUs to run and if there are no more runnable
+tasks assigned to it, the CPU will load the given task's context and run its
+code (from the instruction following the last one executed so far, possibly by
+another CPU).  [If there are multiple runnable tasks assigned to one CPU
+simultaneously, they will be subject to prioritization and time sharing in order
+to allow them to make some progress over time.]
+
+The special "idle" task becomes runnable if there are no other runnable tasks
+assigned to the given CPU and the CPU is then regarded as idle.  In other words,
+in Linux idle CPUs run the code of the "idle" task called *the idle loop*.  That
+code may cause the processor to be put into one of its idle states, if they are
+supported, in order to save energy, but if the processor does not support any
+idle states, or there is not enough time to spend in an idle state before the
+next wakeup event, or there are strict latency constraints preventing any of the
+available idle states from being used, the CPU will simply execute more or less
+useless instructions in a loop until it is assigned a new task to run.
+
+
+.. _idle-loop:
+
+The Idle Loop
+=============
+
+The idle loop code takes two major steps in every iteration of it.  First, it
+calls into a code module referred to as the *governor* that belongs to the CPU
+idle time management subsystem called ``CPUIdle`` to select an idle state for
+the CPU to ask the hardware to enter.  Second, it invokes another code module
+from the ``CPUIdle`` subsystem, called the *driver*, to actually ask the
+processor hardware to enter the idle state selected by the governor.
+
+The role of the governor is to find an idle state most suitable for the
+conditions at hand.  For this purpose, idle states that the hardware can be
+asked to enter by logical CPUs are represented in an abstract way independent of
+the platform or the processor architecture and organized in a one-dimensional
+(linear) array.  That array has to be prepared and supplied by the ``CPUIdle``
+driver matching the platform the kernel is running on at the initialization
+time.  This allows ``CPUIdle`` governors to be independent of the underlying
+hardware and to work with any platforms that the Linux kernel can run on.
+
+Each idle state present in that array is characterized by two parameters to be
+taken into account by the governor, the *target residency* and the (worst-case)
+*exit latency*.  The target residency is the minimum time the hardware must
+spend in the given state, including the time needed to enter it (which may be
+substantial), in order to save more energy than it would save by entering one of
+the shallower idle states instead.  [The "depth" of an idle state roughly
+corresponds to the power drawn by the processor in that state.]  The exit
+latency, in turn, is the maximum time it will take a CPU asking the processor
+hardware to enter an idle state to start executing the first instruction after a
+wakeup from that state.  Note that in general the exit latency also must cover
+the time needed to enter the given state in case the wakeup occurs when the
+hardware is entering it and it must be entered completely to be exited in an
+ordered manner.
+
+There are two types of information that can influence the governor's decisions.
+First of all, the governor knows the time until the closest timer event.  That
+time is known exactly, because the kernel programs timers and it knows exactly
+when they will trigger, and it is the maximum time the hardware that the given
+CPU depends on can spend in an idle state, including the time necessary to enter
+and exit it.  However, the CPU may be woken up by a non-timer event at any time
+(in particular, before the closest timer triggers) and it generally is not known
+when that may happen.  The governor can only see how much time the CPU actually
+was idle after it has been woken up (that time will be referred to as the *idle
+duration* from now on) and it can use that information somehow along with the
+time until the closest timer to estimate the idle duration in future.  How the
+governor uses that information depends on what algorithm is implemented by it
+and that is the primary reason for having more than one governor in the
+``CPUIdle`` subsystem.
+
+There are two ``CPUIdle`` governors available, ``menu`` and ``ladder``.  Which
+of them is used depends on the configuration of the kernel and in particular on
+whether or not the scheduler tick can be `stopped by the idle
+loop <idle-cpus-and-tick_>`_.  It is possible to change the governor at run time
+if the ``cpuidle_sysfs_switch`` command line parameter has been passed to the
+kernel, but that is not safe in general, so it should not be done on production
+systems (that may change in the future, though).  The name of the ``CPUIdle``
+governor currently used by the kernel can be read from the
+:file:`current_governor_ro` (or :file:`current_governor` if
+``cpuidle_sysfs_switch`` is present in the kernel command line) file under
+:file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``.
+
+Which ``CPUIdle`` driver is used, on the other hand, usually depends on the
+platform the kernel is running on, but there are platforms with more than one
+matching driver.  For example, there are two drivers that can work with the
+majority of Intel platforms, ``intel_idle`` and ``acpi_idle``, one with
+hardcoded idle states information and the other able to read that information
+from the system's ACPI tables, respectively.  Still, even in those cases, the
+driver chosen at the system initialization time cannot be replaced later, so the
+decision on which one of them to use has to be made early (on Intel platforms
+the ``acpi_idle`` driver will be used if ``intel_idle`` is disabled for some
+reason or if it does not recognize the processor).  The name of the ``CPUIdle``
+driver currently used by the kernel can be read from the :file:`current_driver`
+file under :file:`/sys/devices/system/cpu/cpuidle/` in ``sysfs``.
+
+
+.. _idle-cpus-and-tick:
+
+Idle CPUs and The Scheduler Tick
+================================
+
+The scheduler tick is a timer that triggers periodically in order to implement
+the time sharing strategy of the CPU scheduler.  Of course, if there are
+multiple runnable tasks assigned to one CPU at the same time, the only way to
+allow them to make reasonable progress in a given time frame is to make them
+share the available CPU time.  Namely, in rough approximation, each task is
+given a slice of the CPU time to run its code, subject to the scheduling class,
+prioritization and so on and when that time slice is used up, the CPU should be
+switched over to running (the code of) another task.  The currently running task
+may not want to give the CPU away voluntarily, however, and the scheduler tick
+is there to make the switch happen regardless.  That is not the only role of the
+tick, but it is the primary reason for using it.
+
+The scheduler tick is problematic from the CPU idle time management perspective,
+because it triggers periodically and relatively often (depending on the kernel
+configuration, the length of the tick period is between 1 ms and 10 ms).
+Thus, if the tick is allowed to trigger on idle CPUs, it will not make sense
+for them to ask the hardware to enter idle states with target residencies above
+the tick period length.  Moreover, in that case the idle duration of any CPU
+will never exceed the tick period length and the energy used for entering and
+exiting idle states due to the tick wakeups on idle CPUs will be wasted.
+
+Fortunately, it is not really necessary to allow the tick to trigger on idle
+CPUs, because (by definition) they have no tasks to run except for the special
+"idle" one.  In other words, from the CPU scheduler perspective, the only user
+of the CPU time on them is the idle loop.  Since the time of an idle CPU need
+not be shared between multiple runnable tasks, the primary reason for using the
+tick goes away if the given CPU is idle.  Consequently, it is possible to stop
+the scheduler tick entirely on idle CPUs in principle, even though that may not
+always be worth the effort.
+
+Whether or not it makes sense to stop the scheduler tick in the idle loop
+depends on what is expected by the governor.  First, if there is another
+(non-tick) timer due to trigger within the tick range, stopping the tick clearly
+would be a waste of time, even though the timer hardware may not need to be
+reprogrammed in that case.  Second, if the governor is expecting a non-timer
+wakeup within the tick range, stopping the tick is not necessary and it may even
+be harmful.  Namely, in that case the governor will select an idle state with
+the target residency within the time until the expected wakeup, so that state is
+going to be relatively shallow.  The governor really cannot select a deep idle
+state then, as that would contradict its own expectation of a wakeup in short
+order.  Now, if the wakeup really occurs shortly, stopping the tick would be a
+waste of time and in this case the timer hardware would need to be reprogrammed,
+which is expensive.  On the other hand, if the tick is stopped and the wakeup
+does not occur any time soon, the hardware may spend indefinite amount of time
+in the shallow idle state selected by the governor, which will be a waste of
+energy.  Hence, if the governor is expecting a wakeup of any kind within the
+tick range, it is better to allow the tick trigger.  Otherwise, however, the
+governor will select a relatively deep idle state, so the tick should be stopped
+so that it does not wake up the CPU too early.
+
+In any case, the governor knows what it is expecting and the decision on whether
+or not to stop the scheduler tick belongs to it.  Still, if the tick has been
+stopped already (in one of the previous iterations of the loop), it is better
+to leave it as is and the governor needs to take that into account.
+
+The kernel can be configured to disable stopping the scheduler tick in the idle
+loop altogether.  That can be done through the build-time configuration of it
+(by unsetting the ``CONFIG_NO_HZ_IDLE`` configuration option) or by passing
+``nohz=off`` to it in the command line.  In both cases, as the stopping of the
+scheduler tick is disabled, the governor's decisions regarding it are simply
+ignored by the idle loop code and the tick is never stopped.
+
+The systems that run kernels configured to allow the scheduler tick to be
+stopped on idle CPUs are referred to as *tickless* systems and they are
+generally regarded as more energy-efficient than the systems running kernels in
+which the tick cannot be stopped.  If the given system is tickless, it will use
+the ``menu`` governor by default and if it is not tickless, the default
+``CPUIdle`` governor on it will be ``ladder``.
+
+
+The ``menu`` Governor
+=====================
+
+The ``menu`` governor is the default ``CPUIdle`` governor for tickless systems.
+It is quite complex, but the basic principle of its design is straightforward.
+Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
+the CPU will ask the processor hardware to enter), it attempts to predict the
+idle duration and uses the predicted value for idle state selection.
+
+It first obtains the time until the closest timer event with the assumption
+that the scheduler tick will be stopped.  That time, referred to as the *sleep
+length* in what follows, is the upper bound on the time before the next CPU
+wakeup.  It is used to determine the sleep length range, which in turn is needed
+to get the sleep length correction factor.
+
+The ``menu`` governor maintains two arrays of sleep length correction factors.
+One of them is used when tasks previously running on the given CPU are waiting
+for some I/O operations to complete and the other one is used when that is not
+the case.  Each array contains several correction factor values that correspond
+to different sleep length ranges organized so that each range represented in the
+array is approximately 10 times wider than the previous one.
+
+The correction factor for the given sleep length range (determined before
+selecting the idle state for the CPU) is updated after the CPU has been woken
+up and the closer the sleep length is to the observed idle duration, the closer
+to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
+The sleep length is multiplied by the correction factor for the range that it
+falls into to obtain the first approximation of the predicted idle duration.
+
+Next, the governor uses a simple pattern recognition algorithm to refine its
+idle duration prediction.  Namely, it saves the last 8 observed idle duration
+values and, when predicting the idle duration next time, it computes the average
+and variance of them.  If the variance is small (smaller than 400 square
+milliseconds) or it is small relative to the average (the average is greater
+that 6 times the standard deviation), the average is regarded as the "typical
+interval" value.  Otherwise, the longest of the saved observed idle duration
+values is discarded and the computation is repeated for the remaining ones.
+Again, if the variance of them is small (in the above sense), the average is
+taken as the "typical interval" value and so on, until either the "typical
+interval" is determined or too many data points are disregarded, in which case
+the "typical interval" is assumed to equal "infinity" (the maximum unsigned
+integer value).  The "typical interval" computed this way is compared with the
+sleep length multiplied by the correction factor and the minimum of the two is
+taken as the predicted idle duration.
+
+Then, the governor computes an extra latency limit to help "interactive"
+workloads.  It uses the observation that if the exit latency of the selected
+idle state is comparable with the predicted idle duration, the total time spent
+in that state probably will be very short and the amount of energy to save by
+entering it will be relatively small, so likely it is better to avoid the
+overhead related to entering that state and exiting it.  Thus selecting a
+shallower state is likely to be a better option then.   The first approximation
+of the extra latency limit is the predicted idle duration itself which
+additionally is divided by a value depending on the number of tasks that
+previously ran on the given CPU and now they are waiting for I/O operations to
+complete.  The result of that division is compared with the latency limit coming
+from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
+framework and the minimum of the two is taken as the limit for the idle states'
+exit latency.
+
+Now, the governor is ready to walk the list of idle states and choose one of
+them.  For this purpose, it compares the target residency of each state with
+the predicted idle duration and the exit latency of it with the computed latency
+limit.  It selects the state with the target residency closest to the predicted
+idle duration, but still below it, and exit latency that does not exceed the
+limit.
+
+In the final step the governor may still need to refine the idle state selection
+if it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_.  That
+happens if the idle duration predicted by it is less than the tick period and
+the tick has not been stopped already (in a previous iteration of the idle
+loop).  Then, the sleep length used in the previous computations may not reflect
+the real time until the closest timer event and if it really is greater than
+that time, the governor may need to select a shallower state with a suitable
+target residency.
+
+
+.. _idle-states-representation:
+
+Representation of Idle States
+=============================
+
+For the CPU idle time management purposes all of the physical idle states
+supported by the processor have to be represented as a one-dimensional array of
+|struct cpuidle_state| objects each allowing an individual (logical) CPU to ask
+the processor hardware to enter an idle state of certain properties.  If there
+is a hierarchy of units in the processor, one |struct cpuidle_state| object can
+cover a combination of idle states supported by the units at different levels of
+the hierarchy.  In that case, the `target residency and exit latency parameters
+of it <idle-loop_>`_, must reflect the properties of the idle state at the
+deepest level (i.e. the idle state of the unit containing all of the other
+units).
+
+For example, take a processor with two cores in a larger unit referred to as
+a "module" and suppose that asking the hardware to enter a specific idle state
+(say "X") at the "core" level by one core will trigger the module to try to
+enter a specific idle state of its own (say "MX") if the other core is in idle
+state "X" already.  In other words, asking for idle state "X" at the "core"
+level gives the hardware a license to go as deep as to idle state "MX" at the
+"module" level, but there is no guarantee that this is going to happen (the core
+asking for idle state "X" may just end up in that state by itself instead).
+Then, the target residency of the |struct cpuidle_state| object representing
+idle state "X" must reflect the minimum time to spend in idle state "MX" of
+the module (including the time needed to enter it), because that is the minimum
+time the CPU needs to be idle to save any energy in case the hardware enters
+that state.  Analogously, the exit latency parameter of that object must cover
+the exit time of idle state "MX" of the module (and usually its entry time too),
+because that is the maximum delay between a wakeup signal and the time the CPU
+will start to execute the first new instruction (assuming that both cores in the
+module will always be ready to execute instructions as soon as the module
+becomes operational as a whole).
+
+There are processors without direct coordination between different levels of the
+hierarchy of units inside them, however.  In those cases asking for an idle
+state at the "core" level does not automatically affect the "module" level, for
+example, in any way and the ``CPUIdle`` driver is responsible for the entire
+handling of the hierarchy.  Then, the definition of the idle state objects is
+entirely up to the driver, but still the physical properties of the idle state
+that the processor hardware finally goes into must always follow the parameters
+used by the governor for idle state selection (for instance, the actual exit
+latency of that idle state must not exceed the exit latency parameter of the
+idle state object selected by the governor).
+
+In addition to the target residency and exit latency idle state parameters
+discussed above, the objects representing idle states each contain a few other
+parameters describing the idle state and a pointer to the function to run in
+order to ask the hardware to enter that state.  Also, for each
+|struct cpuidle_state| object, there is a corresponding
+:c:type:`struct cpuidle_state_usage <cpuidle_state_usage>` one containing usage
+statistics of the given idle state.  That information is exposed by the kernel
+via ``sysfs``.
+
+For each CPU in the system, there is a :file:`/sys/devices/system/cpu<N>/cpuidle/`
+directory in ``sysfs``, where the number ``<N>`` is assigned to the given
+CPU at the initialization time.  That directory contains a set of subdirectories
+called :file:`state0`, :file:`state1` and so on, up to the number of idle state
+objects defined for the given CPU minus one.  Each of these directories
+corresponds to one idle state object and the larger the number in its name, the
+deeper the (effective) idle state represented by it.  Each of them contains
+a number of files (attributes) representing the properties of the idle state
+object corresponding to it, as follows:
+
+``above``
+	Total number of times this idle state had been asked for, but the
+	observed idle duration was certainly too short to match its target
+	residency.
+
+``below``
+	Total number of times this idle state had been asked for, but cerainly
+	a deeper idle state would have been a better match for the observed idle
+	duration.
+
+``desc``
+	Description of the idle state.
+
+``disable``
+	Whether or not this idle state is disabled.
+
+``latency``
+	Exit latency of the idle state in microseconds.
+
+``name``
+	Name of the idle state.
+
+``power``
+	Power drawn by hardware in this idle state in milliwatts (if specified,
+	0 otherwise).
+
+``residency``
+	Target residency of the idle state in microseconds.
+
+``time``
+	Total time spent in this idle state by the given CPU (as measured by the
+	kernel) in microseconds.
+
+``usage``
+	Total number of times the hardware has been asked by the given CPU to
+	enter this idle state.
+
+The :file:`desc` and :file:`name` files both contain strings.  The difference
+between them is that the name is expected to be more concise, while the
+description may be longer and it may contain white space or special characters.
+The other files listed above contain integer numbers.
+
+The :file:`disable` attribute is the only writeable one.  If it contains 1, the
+given idle state is disabled for this particular CPU, which means that the
+governor will never select it for this particular CPU and the ``CPUIdle``
+driver will never ask the hardware to enter it for that CPU as a result.
+However, disabling an idle state for one CPU does not prevent it from being
+asked for by the other CPUs, so it must be disabled for all of them in order to
+never be asked for by any of them.  [Note that, due to the way the ``ladder``
+governor is implemented, disabling an idle state prevents that governor from
+selecting any idle states deeper than the disabled one too.]
+
+If the :file:`disable` attribute contains 0, the given idle state is enabled for
+this particular CPU, but it still may be disabled for some or all of the other
+CPUs in the system at the same time.  Writing 1 to it causes the idle state to
+be disabled for this particular CPU and writing 0 to it allows the governor to
+take it into consideration for the given CPU and the driver to ask for it,
+unless that state was disabled globally in the driver (in which case it cannot
+be used at all).
+
+The :file:`power` attribute is not defined very well, especially for idle state
+objects representing combinations of idle states at different levels of the
+hierarchy of units in the processor, and it generally is hard to obtain idle
+state power numbers for complex hardware, so :file:`power` often contains 0 (not
+available) and if it contains a nonzero number, that number may not be very
+accurate and it should not be relied on for anything meaningful.
+
+The number in the :file:`time` file generally may be greater than the total time
+really spent by the given CPU in the given idle state, because it is measured by
+the kernel and it may not cover the cases in which the hardware refused to enter
+this idle state and entered a shallower one instead of it (or even it did not
+enter any idle state at all).  The kernel can only measure the time span between
+asking the hardware to enter an idle state and the subsequent wakeup of the CPU
+and it cannot say what really happened in the meantime at the hardware level.
+Moreover, if the idle state object in question represents a combination of idle
+states at different levels of the hierarchy of units in the processor,
+the kernel can never say how deep the hardware went down the hierarchy in any
+particular case.  For these reasons, the only reliable way to find out how
+much time has been spent by the hardware in different idle states supported by
+it is to use idle state residency counters in the hardware, if available.
+
+
+.. _cpu-pm-qos:
+
+Power Management Quality of Service for CPUs
+============================================
+
+The power management quality of service (PM QoS) framework in the Linux kernel
+allows kernel code and user space processes to set constraints on various
+energy-efficiency features of the kernel to prevent performance from dropping
+below a required level.  The PM QoS constraints can be set globally, in
+predefined categories referred to as PM QoS classes, or against individual
+devices.
+
+CPU idle time management can be affected by PM QoS in two ways, through the
+global constraint in the ``PM_QOS_CPU_DMA_LATENCY`` class and through the
+resume latency constraints for individual CPUs.  Kernel code (e.g. device
+drivers) can set both of them with the help of special internal interfaces
+provided by the PM QoS framework.  User space can modify the former by opening
+the :file:`cpu_dma_latency` special device file under :file:`/dev/` and writing
+a binary value (interpreted as a signed 32-bit integer) to it.  In turn, the
+resume latency constraint for a CPU can be modified by user space by writing a
+string (representing a signed 32-bit integer) to the
+:file:`power/pm_qos_resume_latency_us` file under
+:file:`/sys/devices/system/cpu/cpu<N>/` in ``sysfs``, where the CPU number
+``<N>`` is allocated at the system initialization time.  Negative values
+will be rejected in both cases and, also in both cases, the written integer
+number will be interpreted as a requested PM QoS constraint in microseconds.
+
+The requested value is not automatically applied as a new constraint, however,
+as it may be less restrictive (greater in this particular case) than another
+constraint previously requested by someone else.  For this reason, the PM QoS
+framework maintains a list of requests that have been made so far in each
+global class and for each device, aggregates them and applies the effective
+(minimum in this particular case) value as the new constraint.
+
+In fact, opening the :file:`cpu_dma_latency` special device file causes a new
+PM QoS request to be created and added to the priority list of requests in the
+``PM_QOS_CPU_DMA_LATENCY`` class and the file descriptor coming from the
+"open" operation represents that request.  If that file descriptor is then
+used for writing, the number written to it will be associated with the PM QoS
+request represented by it as a new requested constraint value.  Next, the
+priority list mechanism will be used to determine the new effective value of
+the entire list of requests and that effective value will be set as a new
+constraint.  Thus setting a new requested constraint value will only change the
+real constraint if the effective "list" value is affected by it.  In particular,
+for the ``PM_QOS_CPU_DMA_LATENCY`` class it only affects the real constraint if
+it is the minimum of the requested constraints in the list.  The process holding
+a file descriptor obtained by opening the :file:`cpu_dma_latency` special device
+file controls the PM QoS request associated with that file descriptor, but it
+controls this particular PM QoS request only.
+
+Closing the :file:`cpu_dma_latency` special device file or, more precisely, the
+file descriptor obtained while opening it, causes the PM QoS request associated
+with that file descriptor to be removed from the ``PM_QOS_CPU_DMA_LATENCY``
+class priority list and destroyed.  If that happens, the priority list mechanism
+will be used, again, to determine the new effective value for the whole list
+and that value will become the new real constraint.
+
+In turn, for each CPU there is only one resume latency PM QoS request
+associated with the :file:`power/pm_qos_resume_latency_us` file under
+:file:`/sys/devices/system/cpu/cpu<N>/` in ``sysfs`` and writing to it causes
+this single PM QoS request to be updated regardless of which user space
+process does that.  In other words, this PM QoS request is shared by the entire
+user space, so access to the file associated with it needs to be arbitrated
+to avoid confusion.  [Arguably, the only legitimate use of this mechanism in
+practice is to pin a process to the CPU in question and let it use the
+``sysfs`` interface to control the resume latency constraint for it.]  It
+still only is a request, however.  It is a member of a priority list used to
+determine the effective value to be set as the resume latency constraint for the
+CPU in question every time the list of requests is updated this way or another
+(there may be other requests coming from kernel code in that list).
+
+CPU idle time governors are expected to regard the minimum of the global
+effective ``PM_QOS_CPU_DMA_LATENCY`` class constraint and the effective
+resume latency constraint for the given CPU as the upper limit for the exit
+latency of the idle states they can select for that CPU.  They should never
+select any idle states with exit latency beyond that limit.
+
+
+Idle States Control Via Kernel Command Line
+===========================================
+
+In addition to the ``sysfs`` interface allowing individual idle states to be
+`disabled for individual CPUs <idle-states-representation_>`_, there are kernel
+command line parameters affecting CPU idle time management.
+
+The ``cpuidle.off=1`` kernel command line option can be used to disable the
+CPU idle time management entirely.  It does not prevent the idle loop from
+running on idle CPUs, but it prevents the CPU idle time governors and drivers
+from being invoked.  If it is added to the kernel command line, the idle loop
+will ask the hardware to enter idle states on idle CPUs via the CPU architecture
+support code that is expected to provide a default mechanism for this purpose.
+That default mechanism usually is the least common denominator for all of the
+processors implementing the architecture (i.e. CPU instruction set) in question,
+however, so it is rather crude and not very energy-efficient.  For this reason,
+it is not recommended for production use.
+
+The ``cpuidle.governor=`` kernel command line switch allows the ``CPUIdle``
+governor to use to be specified.  It has to be appended with a string matching
+the name of an available governor (e.g. ``cpuidle.governor=menu``) and that
+governor will be used instead of the default one.  It is possible to force
+the ``menu`` governor to be used on the systems that use the ``ladder`` governor
+by default this way, for example.
+
+The other kernel command line parameters controlling CPU idle time management
+described below are only relevant for the *x86* architecture and some of
+them affect Intel processors only.
+
+The *x86* architecture support code recognizes three kernel command line
+options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
+and ``idle=nomwait``.  The first two of them disable the ``acpi_idle`` and
+``intel_idle`` drivers altogether, which effectively causes the entire
+``CPUIdle`` subsystem to be disabled and makes the idle loop invoke the
+architecture support code to deal with idle CPUs.  How it does that depends on
+which of the two parameters is added to the kernel command line.  In the
+``idle=halt`` case, the architecture support code will use the ``HLT``
+instruction of the CPUs (which, as a rule, suspends the execution of the program
+and causes the hardware to attempt to enter the shallowest available idle state)
+for this purpose, and if ``idle=poll`` is used, idle CPUs will execute a
+more or less ``lightweight'' sequence of instructions in a tight loop.  [Note
+that using ``idle=poll`` is somewhat drastic in many cases, as preventing idle
+CPUs from saving almost any energy at all may not be the only effect of it.
+For example, on Intel hardware it effectively prevents CPUs from using
+P-states (see |cpufreq|) that require any number of CPUs in a package to be
+idle, so it very well may hurt single-thread computations performance as well as
+energy-efficiency.  Thus using it for performance reasons may not be a good idea
+at all.]
+
+The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
+``acpi_idle`` to be used (as long as all of the information needed by it is
+there in the system's ACPI tables), but it is not allowed to use the
+``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+
+In addition to the architecture-level kernel command line options affecting CPU
+idle time management, there are parameters affecting individual ``CPUIdle``
+drivers that can be passed to them via the kernel command line.  Specifically,
+the ``intel_idle.max_cstate=<n>`` and ``processor.max_cstate=<n>`` parameters,
+where ``<n>`` is an idle state index also used in the name of the given
+state's directory in ``sysfs`` (see
+`Representation of Idle States <idle-states-representation_>`_), causes the
+``intel_idle`` and ``acpi_idle`` drivers, respectively, to discard all of the
+idle states deeper than idle state ``<n>``.  In that case, they will never ask
+for any of those idle states or expose them to the governor.  [The behavior of
+the two drivers is different for ``<n>`` equal to ``0``.  Adding
+``intel_idle.max_cstate=0`` to the kernel command line disables the
+``intel_idle`` driver and allows ``acpi_idle`` to be used, whereas
+``processor.max_cstate=0`` is equivalent to ``processor.max_cstate=1``.
+Also, the ``acpi_idle`` driver is part of the ``processor`` kernel module that
+can be loaded separately and ``max_cstate=<n>`` can be passed to it as a module
+parameter when it is loaded.]
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ac6f5c597a56..ec0f7c111f65 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -495,7 +495,15 @@ on the following rules, regardless of the current operation mode of the driver:
 
  2. Each individual CPU is affected by its own per-policy limits (that is, it
     cannot be requested to run faster than its own per-policy maximum and it
-    cannot be requested to run slower than its own per-policy minimum).
+    cannot be requested to run slower than its own per-policy minimum). The
+    effective performance depends on whether the platform supports per core
+    P-states, hyper-threading is enabled and on current performance requests
+    from other CPUs. When platform doesn't support per core P-states, the
+    effective performance can be more than the policy limits set on a CPU, if
+    other CPUs are requesting higher performance at that moment. Even with per
+    core P-states support, when hyper-threading is enabled, if the sibling CPU
+    is requesting higher performance, the other siblings will get higher
+    performance than their policy limits.
 
  3. The global and per-policy limits can be set independently.
 
diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst
index fa01bf083dfe..b6cef9b5e961 100644
--- a/Documentation/admin-guide/pm/working-state.rst
+++ b/Documentation/admin-guide/pm/working-state.rst
@@ -5,5 +5,6 @@ Working-State Power Management
 .. toctree::
    :maxdepth: 2
 
+   cpuidle
    cpufreq
    intel_pstate
diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index dbe96cb5558e..6a6d67acaf69 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -187,6 +187,8 @@ Takes xa_lock internally:
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
  * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_cmpxchg_bh`
+ * :c:func:`xa_cmpxchg_irq`
  * :c:func:`xa_store_range`
  * :c:func:`xa_alloc`
  * :c:func:`xa_alloc_bh`
@@ -263,7 +265,8 @@ using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
 context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
 in the interrupt handler.  Some of the more common patterns have helper
 functions such as :c:func:`xa_store_bh`, :c:func:`xa_store_irq`,
-:c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
+:c:func:`xa_erase_bh`, :c:func:`xa_erase_irq`, :c:func:`xa_cmpxchg_bh`
+and :c:func:`xa_cmpxchg_irq`.
 
 Sometimes you need to protect access to the XArray with a mutex because
 that lock sits above another mutex in the locking hierarchy.  That does
diff --git a/Documentation/cpuidle/core.txt b/Documentation/cpuidle/core.txt
deleted file mode 100644
index 63ecc5dc9d8a..000000000000
--- a/Documentation/cpuidle/core.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-		Supporting multiple CPU idle levels in kernel
-
-				cpuidle
-
-General Information:
-
-Various CPUs today support multiple idle levels that are differentiated
-by varying exit latencies and power consumption during idle.
-cpuidle is a generic in-kernel infrastructure that separates
-idle policy (governor) from idle mechanism (driver) and provides a
-standardized infrastructure to support independent development of
-governors and drivers.
-
-cpuidle resides under drivers/cpuidle.
-
-Boot options:
-"cpuidle_sysfs_switch"
-enables current_governor interface in /sys/devices/system/cpu/cpuidle/,
-which can be used to switch governors at run time. This boot option
-is meant for developer testing only. In normal usage, kernel picks the
-best governor based on governor ratings.
-SEE ALSO: sysfs.txt in this directory.
diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt
deleted file mode 100644
index d1587f434e7b..000000000000
--- a/Documentation/cpuidle/sysfs.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-
-
-		Supporting multiple CPU idle levels in kernel
-
-				cpuidle sysfs
-
-System global cpuidle related information and tunables are under
-/sys/devices/system/cpu/cpuidle
-
-The current interfaces in this directory has self-explanatory names:
-* current_driver
-* current_governor_ro
-
-With cpuidle_sysfs_switch boot option (meant for developer testing)
-following objects are visible instead.
-* current_driver
-* available_governors
-* current_governor
-In this case users can switch the governor at run time by writing
-to current_governor.
-
-
-Per logical CPU specific cpuidle information are under
-/sys/devices/system/cpu/cpuX/cpuidle
-for each online cpu X
-
---------------------------------------------------------------------------------
-# ls -lR /sys/devices/system/cpu/cpu0/cpuidle/
-/sys/devices/system/cpu/cpu0/cpuidle/:
-total 0
-drwxr-xr-x 2 root root 0 Feb  8 10:42 state0
-drwxr-xr-x 2 root root 0 Feb  8 10:42 state1
-drwxr-xr-x 2 root root 0 Feb  8 10:42 state2
-drwxr-xr-x 2 root root 0 Feb  8 10:42 state3
-
-/sys/devices/system/cpu/cpu0/cpuidle/state0:
-total 0
--r--r--r-- 1 root root 4096 Feb  8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
--r--r--r-- 1 root root 4096 Feb  8 10:42 latency
--r--r--r-- 1 root root 4096 Feb  8 10:42 name
--r--r--r-- 1 root root 4096 Feb  8 10:42 power
--r--r--r-- 1 root root 4096 Feb  8 10:42 residency
--r--r--r-- 1 root root 4096 Feb  8 10:42 time
--r--r--r-- 1 root root 4096 Feb  8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state1:
-total 0
--r--r--r-- 1 root root 4096 Feb  8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
--r--r--r-- 1 root root 4096 Feb  8 10:42 latency
--r--r--r-- 1 root root 4096 Feb  8 10:42 name
--r--r--r-- 1 root root 4096 Feb  8 10:42 power
--r--r--r-- 1 root root 4096 Feb  8 10:42 residency
--r--r--r-- 1 root root 4096 Feb  8 10:42 time
--r--r--r-- 1 root root 4096 Feb  8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state2:
-total 0
--r--r--r-- 1 root root 4096 Feb  8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
--r--r--r-- 1 root root 4096 Feb  8 10:42 latency
--r--r--r-- 1 root root 4096 Feb  8 10:42 name
--r--r--r-- 1 root root 4096 Feb  8 10:42 power
--r--r--r-- 1 root root 4096 Feb  8 10:42 residency
--r--r--r-- 1 root root 4096 Feb  8 10:42 time
--r--r--r-- 1 root root 4096 Feb  8 10:42 usage
-
-/sys/devices/system/cpu/cpu0/cpuidle/state3:
-total 0
--r--r--r-- 1 root root 4096 Feb  8 10:42 desc
--rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
--r--r--r-- 1 root root 4096 Feb  8 10:42 latency
--r--r--r-- 1 root root 4096 Feb  8 10:42 name
--r--r--r-- 1 root root 4096 Feb  8 10:42 power
--r--r--r-- 1 root root 4096 Feb  8 10:42 residency
--r--r--r-- 1 root root 4096 Feb  8 10:42 time
--r--r--r-- 1 root root 4096 Feb  8 10:42 usage
---------------------------------------------------------------------------------
-
-
-* desc : Small description about the idle state (string)
-* disable : Option to disable this idle state (bool) -> see note below
-* latency : Latency to exit out of this idle state (in microseconds)
-* residency : Time after which a state becomes more effecient than any
-  shallower state (in microseconds)
-* name : Name of the idle state (string)
-* power : Power consumed while in this idle state (in milliwatts)
-* time : Total time spent in this idle state (in microseconds)
-* usage : Number of times this state was entered (count)
-
-Note:
-The behavior and the effect of the disable variable depends on the
-implementation of a particular governor. In the ladder governor, for
-example, it is not coherent, i.e. if one is disabling a light state,
-then all deeper states are disabled as well, but the disable variable
-does not reflect it. Likewise, if one enables a deep state but a lighter
-state still is disabled, then this has no effect.
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt
new file mode 100644
index 000000000000..33856947c561
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.txt
@@ -0,0 +1,172 @@
+Qualcomm Technologies, Inc. CPUFREQ Bindings
+
+CPUFREQ HW is a hardware engine used by some Qualcomm Technologies, Inc. (QTI)
+SoCs to manage frequency in hardware. It is capable of controlling frequency
+for multiple clusters.
+
+Properties:
+- compatible
+	Usage:		required
+	Value type:	<string>
+	Definition:	must be "qcom,cpufreq-hw".
+
+- clocks
+	Usage:		required
+	Value type:	<phandle> From common clock binding.
+	Definition:	clock handle for XO clock and GPLL0 clock.
+
+- clock-names
+	Usage:		required
+	Value type:	<string> From common clock binding.
+	Definition:	must be "xo", "alternate".
+
+- reg
+	Usage:		required
+	Value type:	<prop-encoded-array>
+	Definition:	Addresses and sizes for the memory of the HW bases in
+			each frequency domain.
+- reg-names
+	Usage:		Optional
+	Value type:	<string>
+	Definition:	Frequency domain name i.e.
+			"freq-domain0", "freq-domain1".
+
+- #freq-domain-cells:
+	Usage:		required.
+	Definition:	Number of cells in a freqency domain specifier.
+
+* Property qcom,freq-domain
+Devices supporting freq-domain must set their "qcom,freq-domain" property with
+phandle to a cpufreq_hw followed by the Domain ID(0/1) in the CPU DT node.
+
+
+Example:
+
+Example 1: Dual-cluster, Quad-core per cluster. CPUs within a cluster switch
+DCVS state together.
+
+/ {
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		CPU0: cpu@0 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			next-level-cache = <&L2_0>;
+			qcom,freq-domain = <&cpufreq_hw 0>;
+			L2_0: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+				L3_0: l3-cache {
+				      compatible = "cache";
+				};
+			};
+		};
+
+		CPU1: cpu@100 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+			next-level-cache = <&L2_100>;
+			qcom,freq-domain = <&cpufreq_hw 0>;
+			L2_100: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU2: cpu@200 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x200>;
+			enable-method = "psci";
+			next-level-cache = <&L2_200>;
+			qcom,freq-domain = <&cpufreq_hw 0>;
+			L2_200: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU3: cpu@300 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x300>;
+			enable-method = "psci";
+			next-level-cache = <&L2_300>;
+			qcom,freq-domain = <&cpufreq_hw 0>;
+			L2_300: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU4: cpu@400 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x400>;
+			enable-method = "psci";
+			next-level-cache = <&L2_400>;
+			qcom,freq-domain = <&cpufreq_hw 1>;
+			L2_400: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU5: cpu@500 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x500>;
+			enable-method = "psci";
+			next-level-cache = <&L2_500>;
+			qcom,freq-domain = <&cpufreq_hw 1>;
+			L2_500: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU6: cpu@600 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x600>;
+			enable-method = "psci";
+			next-level-cache = <&L2_600>;
+			qcom,freq-domain = <&cpufreq_hw 1>;
+			L2_600: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+
+		CPU7: cpu@700 {
+			device_type = "cpu";
+			compatible = "qcom,kryo385";
+			reg = <0x0 0x700>;
+			enable-method = "psci";
+			next-level-cache = <&L2_700>;
+			qcom,freq-domain = <&cpufreq_hw 1>;
+			L2_700: l2-cache {
+				compatible = "cache";
+				next-level-cache = <&L3_0>;
+			};
+		};
+	};
+
+ soc {
+	cpufreq_hw: cpufreq@17d43000 {
+		compatible = "qcom,cpufreq-hw";
+		reg = <0x17d43000 0x1400>, <0x17d45800 0x1400>;
+		reg-names = "freq-domain0", "freq-domain1";
+
+		clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
+		clock-names = "xo", "alternate";
+
+		#freq-domain-cells = <1>;
+	};
+}
diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 65a1d873196b..027358b91082 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1505,6 +1505,11 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     configuring a stateless hardware decoding pipeline for MPEG-2.
     The bitstream parameters are defined according to :ref:`mpeg2part2`.
 
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
 .. c:type:: v4l2_ctrl_mpeg2_slice_params
 
 .. cssclass:: longtable
@@ -1625,6 +1630,11 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     Specifies quantization matrices (as extracted from the bitstream) for the
     associated MPEG-2 slice data.
 
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
 .. c:type:: v4l2_ctrl_mpeg2_quantization
 
 .. cssclass:: longtable
diff --git a/MAINTAINERS b/MAINTAINERS
index 8119141a926f..3318f30903b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1739,13 +1739,17 @@ ARM/Mediatek SoC support
 M:	Matthias Brugger <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
 L:	[email protected] (moderated for non-subscribers)
+W:	https://mtk.bcnfs.org/
+C:	irc://chat.freenode.net/linux-mediatek
 S:	Maintained
 F:	arch/arm/boot/dts/mt6*
 F:	arch/arm/boot/dts/mt7*
 F:	arch/arm/boot/dts/mt8*
 F:	arch/arm/mach-mediatek/
 F:	arch/arm64/boot/dts/mediatek/
+F:	drivers/soc/mediatek/
 N:	mtk
+N:	mt[678]
 K:	mediatek
 
 ARM/Mediatek USB3 PHY DRIVER
@@ -4843,6 +4847,7 @@ F:	include/uapi/drm/vmwgfx_drm.h
 
 DRM DRIVERS
 M:	David Airlie <[email protected]>
+M:	Daniel Vetter <[email protected]>
 L:	[email protected]
 T:	git git://anongit.freedesktop.org/drm/drm
 B:	https://bugs.freedesktop.org/
@@ -10005,12 +10010,9 @@ S:	Odd Fixes
 F:	drivers/media/radio/radio-miropcm20*
 
 MMP SUPPORT
-M:	Eric Miao <[email protected]>
-M:	Haojian Zhuang <[email protected]>
+R:	Lubomir Rintel <[email protected]>
 L:	[email protected] (moderated for non-subscribers)
-T:	git git://github.com/hzhuang1/linux.git
-T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
-S:	Maintained
+S:	Odd Fixes
 F:	arch/arm/boot/dts/mmp*
 F:	arch/arm/mach-mmp/
 
diff --git a/Makefile b/Makefile
index f2c3423c3062..56d5270f22f8 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index a37fd990bd55..4b5b1b244f86 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -634,6 +634,7 @@ setup_arch(char **cmdline_p)
 
 	/* Find our memory.  */
 	setup_memory(kernel_end);
+	memblock_set_bottom_up(true);
 
 	/* First guess at cpu cache sizes.  Do this before init_arch.  */
 	determine_cpu_caches(cpu->type);
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 74846553e3f1..d0b73371e985 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -144,14 +144,14 @@ setup_memory_node(int nid, void *kernel_end)
 	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
 		panic("kernel loaded out of ram");
 
+	memblock_add(PFN_PHYS(node_min_pfn),
+		     (node_max_pfn - node_min_pfn) << PAGE_SHIFT);
+
 	/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
 	   Note that we round this down, not up - node memory
 	   has much larger alignment than 8Mb, so it's safe. */
 	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
 
-	memblock_add(PFN_PHYS(node_min_pfn),
-		     (node_max_pfn - node_min_pfn) << PAGE_SHIFT);
-
 	NODE_DATA(nid)->node_start_pfn = node_min_pfn;
 	NODE_DATA(nid)->node_present_pages = node_max_pfn - node_min_pfn;
 
diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts
index f2a1d25eb6cf..83e0fbc4a1a1 100644
--- a/arch/arm/boot/dts/arm-realview-pb1176.dts
+++ b/arch/arm/boot/dts/arm-realview-pb1176.dts
@@ -45,7 +45,7 @@
 	};
 
 	/* The voltage to the MMC card is hardwired at 3.3V */
-	vmmc: fixedregulator@0 {
+	vmmc: regulator-vmmc {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmc";
 		regulator-min-microvolt = <3300000>;
@@ -53,7 +53,7 @@
 		regulator-boot-on;
         };
 
-	veth: fixedregulator@0 {
+	veth: regulator-veth {
 		compatible = "regulator-fixed";
 		regulator-name = "veth";
 		regulator-min-microvolt = <3300000>;
diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts
index 7f9cbdf33a51..2f6aa24a0b67 100644
--- a/arch/arm/boot/dts/arm-realview-pb11mp.dts
+++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts
@@ -145,7 +145,7 @@
 	};
 
 	/* The voltage to the MMC card is hardwired at 3.3V */
-	vmmc: fixedregulator@0 {
+	vmmc: regulator-vmmc {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmc";
 		regulator-min-microvolt = <3300000>;
@@ -153,7 +153,7 @@
 		regulator-boot-on;
         };
 
-	veth: fixedregulator@0 {
+	veth: regulator-veth {
 		compatible = "regulator-fixed";
 		regulator-name = "veth";
 		regulator-min-microvolt = <3300000>;
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
index 4adb85e66be3..93762244be7f 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
@@ -31,7 +31,7 @@
 
 	wifi_pwrseq: wifi-pwrseq {
 		compatible = "mmc-pwrseq-simple";
-		reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
index c318bcbc6ba7..89e6fd547c75 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
@@ -26,7 +26,7 @@
 
 	wifi_pwrseq: wifi-pwrseq {
 		compatible = "mmc-pwrseq-simple";
-		reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts
index d8aac4a2d02a..177d21fdeb28 100644
--- a/arch/arm/boot/dts/imx7d-nitrogen7.dts
+++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts
@@ -86,13 +86,17 @@
 		compatible = "regulator-fixed";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
-		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
-		clock-names = "slow";
 		regulator-name = "reg_wlan";
 		startup-delay-us = <70000>;
 		gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>;
 		enable-active-high;
 	};
+
+	usdhc2_pwrseq: usdhc2_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
+		clock-names = "ext_clock";
+	};
 };
 
 &adc1 {
@@ -375,6 +379,7 @@
 	bus-width = <4>;
 	non-removable;
 	vmmc-supply = <&reg_wlan>;
+	mmc-pwrseq = <&usdhc2_pwrseq>;
 	cap-power-off-card;
 	keep-power-in-suspend;
 	status = "okay";
diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi
index 21973eb55671..f27b3849d3ff 100644
--- a/arch/arm/boot/dts/imx7d-pico.dtsi
+++ b/arch/arm/boot/dts/imx7d-pico.dtsi
@@ -100,6 +100,19 @@
 		regulator-min-microvolt = <1800000>;
 		regulator-max-microvolt = <1800000>;
 	};
+
+	usdhc2_pwrseq: usdhc2_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
+		clock-names = "ext_clock";
+	};
+};
+
+&clks {
+	assigned-clocks = <&clks IMX7D_CLKO2_ROOT_SRC>,
+			  <&clks IMX7D_CLKO2_ROOT_DIV>;
+	assigned-clock-parents = <&clks IMX7D_CKIL>;
+	assigned-clock-rates = <0>, <32768>;
 };
 
 &i2c4 {
@@ -199,12 +212,13 @@
 
 &usdhc2 { /* Wifi SDIO */
 	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usdhc2>;
+	pinctrl-0 = <&pinctrl_usdhc2 &pinctrl_wifi_clk>;
 	no-1-8-v;
 	non-removable;
 	keep-power-in-suspend;
 	wakeup-source;
 	vmmc-supply = <&reg_ap6212>;
+	mmc-pwrseq = <&usdhc2_pwrseq>;
 	status = "okay";
 };
 
@@ -301,6 +315,12 @@
 };
 
 &iomuxc_lpsr {
+	pinctrl_wifi_clk: wificlkgrp {
+		fsl,pins = <
+			MX7D_PAD_LPSR_GPIO1_IO03__CCM_CLKO2	0x7d
+		>;
+	};
+
 	pinctrl_wdog: wdoggrp {
 		fsl,pins = <
 			MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B	0x74
diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
index 742d2946b08b..583a5a01642f 100644
--- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
@@ -314,8 +314,8 @@
 
 &reg_dldo3 {
 	regulator-always-on;
-	regulator-min-microvolt = <2500000>;
-	regulator-max-microvolt = <2500000>;
+	regulator-min-microvolt = <3300000>;
+	regulator-max-microvolt = <3300000>;
 	regulator-name = "vcc-pd";
 };
 
diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c
index 243a108a940b..fd0053e47a15 100644
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -110,7 +110,7 @@ int __init imx6sx_cpuidle_init(void)
 	 * except for power up sw2iso which need to be
 	 * larger than LDO ramp up time.
 	 */
-	imx_gpc_set_arm_power_up_timing(2, 1);
+	imx_gpc_set_arm_power_up_timing(0xf, 1);
 	imx_gpc_set_arm_power_down_timing(1, 1);
 
 	return cpuidle_register(&imx6sx_cpuidle_driver, NULL);
diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h
index 446edaeb78a7..a96abcf521b4 100644
--- a/arch/arm/mach-mmp/cputype.h
+++ b/arch/arm/mach-mmp/cputype.h
@@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void)
 #define cpu_is_pxa910()	(0)
 #endif
 
-#ifdef CONFIG_CPU_MMP2
+#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT)
 static inline int cpu_is_mmp2(void)
 {
-	return (((read_cpuid_id() >> 8) & 0xff) == 0x58);
+	return (((read_cpuid_id() >> 8) & 0xff) == 0x58) &&
+		(((mmp_chip_id & 0xfff) == 0x410) ||
+		 ((mmp_chip_id & 0xfff) == 0x610));
 }
 #else
 #define cpu_is_mmp2()	(0)
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
index 64632c873888..01ea662afba8 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
@@ -20,28 +20,24 @@
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x000>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x001>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu2: cpu@100 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x100>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu3: cpu@101 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x101>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
index 073610ac0a53..7d94c1fa592a 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
@@ -28,33 +28,6 @@
 		method = "smc";
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		idle_states {
-			entry_method = "arm,pcsi";
-
-			CPU_SLEEP_0: cpu-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x0010000>;
-				entry-latency-us = <80>;
-				exit-latency-us  = <160>;
-				min-residency-us = <320>;
-			};
-
-			CLUSTER_SLEEP_0: cluster-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x1010000>;
-				entry-latency-us = <500>;
-				exit-latency-us = <1000>;
-				min-residency-us = <2500>;
-			};
-		};
-	};
-
 	ap806 {
 		#address-cells = <2>;
 		#size-cells = <2>;
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index 5d6005c9b097..710c5c3d87d3 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -16,8 +16,13 @@
 	model = "Bananapi BPI-R64";
 	compatible = "bananapi,bpi-r64", "mediatek,mt7622";
 
+	aliases {
+		serial0 = &uart0;
+	};
+
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512";
+		stdout-path = "serial0:115200n8";
+		bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512";
 	};
 
 	cpus {
diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index dcad0869b84c..3f783348c66a 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -17,8 +17,13 @@
 	model = "MediaTek MT7622 RFB1 board";
 	compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622";
 
+	aliases {
+		serial0 = &uart0;
+	};
+
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512";
+		stdout-path = "serial0:115200n8";
+		bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512";
 	};
 
 	cpus {
diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index fe0c875f1d95..14a1028ca3a6 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -227,16 +227,6 @@
 		#reset-cells = <1>;
 	};
 
-	timer: timer@10004000 {
-		compatible = "mediatek,mt7622-timer",
-			     "mediatek,mt6577-timer";
-		reg = <0 0x10004000 0 0x80>;
-		interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&infracfg CLK_INFRA_APXGPT_PD>,
-			 <&topckgen CLK_TOP_RTC>;
-		clock-names = "system-clk", "rtc-clk";
-	};
-
 	scpsys: scpsys@10006000 {
 		compatible = "mediatek,mt7622-scpsys",
 			     "syscon";
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b96442960aea..f0a5c9531e8b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -35,15 +35,6 @@
 #define PCI_IO_SIZE		SZ_16M
 
 /*
- * Log2 of the upper bound of the size of a struct page. Used for sizing
- * the vmemmap region only, does not affect actual memory footprint.
- * We don't use sizeof(struct page) directly since taking its size here
- * requires its definition to be available at this point in the inclusion
- * chain, and it may not be a power of 2 in the first place.
- */
-#define STRUCT_PAGE_MAX_SHIFT	6
-
-/*
  * VMEMMAP_SIZE - allows the whole linear region to be covered by
  *                a struct page array
  */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a3ac26284845..a53704406099 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -429,9 +429,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 						   prot,
 						   __builtin_return_address(0));
 		if (addr) {
-			memset(addr, 0, size);
 			if (!coherent)
 				__dma_flush_area(page_to_virt(page), iosize);
+			memset(addr, 0, size);
 		} else {
 			iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
 			dma_release_from_contiguous(dev, page,
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9b432d9fcada..0340e45655c6 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -610,14 +610,6 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
 #endif
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	/*
-	 * Make sure we chose the upper bound of sizeof(struct page)
-	 * correctly when sizing the VMEMMAP array.
-	 */
-	BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
-#endif
-
 	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
 		/*
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 39354365f54a..ed9883169190 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -197,7 +197,7 @@ $(obj)/empty.c:
 $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
 	$(Q)cp $< $@
 
-$(obj)/serial.c: $(obj)/autoconf.h
+$(srctree)/$(src)/serial.c: $(obj)/autoconf.h
 
 $(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/%
 	$(Q)cp $< $@
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 32dfe6d083f3..9b9d17437373 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -15,7 +15,7 @@
 RELA = 7
 RELACOUNT = 0x6ffffff9
 
-	.text
+	.data
 	/* A procedure descriptor used when booting this as a COFF file.
 	 * When making COFF, this comes first in the link and we're
 	 * linked at 0x500000.
@@ -23,6 +23,8 @@ RELACOUNT = 0x6ffffff9
 	.globl	_zimage_start_opd
 _zimage_start_opd:
 	.long	0x500000, 0, 0, 0
+	.text
+	b	_zimage_start
 
 #ifdef __powerpc64__
 .balign 8
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 8bf1b6351716..16a49819da9a 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -26,6 +26,8 @@
 #include <asm/ptrace.h>
 #include <asm/reg.h>
 
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+
 /*
  * Overload regs->result to specify whether we should use the MSR (result
  * is zero) or the SIAR (result is non zero).
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index a658091a19f9..3712152206f3 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,7 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y += bpf_perf_event.h
 generic-y += param.h
 generic-y += poll.h
 generic-y += resource.h
diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..b551b741653d
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 33b34a58fc62..5b9dce17f0c9 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -372,6 +372,8 @@ void __init find_legacy_serial_ports(void)
 
 	/* Now find out if one of these is out firmware console */
 	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (path == NULL)
+		path = of_get_property(of_chosen, "stdout-path", NULL);
 	if (path != NULL) {
 		stdout = of_find_node_by_path(path);
 		if (stdout)
@@ -595,8 +597,10 @@ static int __init check_legacy_serial_console(void)
 	/* We are getting a weird phandle from OF ... */
 	/* ... So use the full path instead */
 	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+	if (name == NULL)
+		name = of_get_property(of_chosen, "stdout-path", NULL);
 	if (name == NULL) {
-		DBG(" no linux,stdout-path !\n");
+		DBG(" no stdout-path !\n");
 		return -ENODEV;
 	}
 	prom_stdout = of_find_node_by_path(name);
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index dab616a33b8d..f2197654be07 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -34,5 +34,10 @@ void arch_teardown_msi_irqs(struct pci_dev *dev)
 {
 	struct pci_controller *phb = pci_bus_to_host(dev->bus);
 
-	phb->controller_ops.teardown_msi_irqs(dev);
+	/*
+	 * We can be called even when arch_setup_msi_irqs() returns -ENOSYS,
+	 * so check the pointer again.
+	 */
+	if (phb->controller_ops.teardown_msi_irqs)
+		phb->controller_ops.teardown_msi_irqs(dev);
 }
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index afb819f4ca68..714c3480c52d 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3266,12 +3266,17 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 	user_exit();
 
 	if (test_thread_flag(TIF_SYSCALL_EMU)) {
-		ptrace_report_syscall(regs);
 		/*
+		 * A nonzero return code from tracehook_report_syscall_entry()
+		 * tells us to prevent the syscall execution, but we are not
+		 * going to execute it anyway.
+		 *
 		 * Returning -1 will skip the syscall execution. We want to
 		 * avoid clobbering any register also, thus, not 'gotoing'
 		 * skip label.
 		 */
+		if (tracehook_report_syscall_entry(regs))
+			;
 		return -1;
 	}
 
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index 2b74f8adf4d0..6aa41669ac1a 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -19,6 +19,7 @@
 #include <linux/hugetlb.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <asm/fixmap.h>
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 7a9886f98b0c..a5091c034747 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -188,15 +188,20 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
 
 	for (; start < end; start += page_size) {
-		void *p;
+		void *p = NULL;
 		int rc;
 
 		if (vmemmap_populated(start, page_size))
 			continue;
 
+		/*
+		 * Allocate from the altmap first if we have one. This may
+		 * fail due to alignment issues when using 16MB hugepages, so
+		 * fall back to system memory if the altmap allocation fail.
+		 */
 		if (altmap)
 			p = altmap_alloc_block_buf(page_size, altmap);
-		else
+		if (!p)
 			p = vmemmap_alloc_block_buf(page_size, node);
 		if (!p)
 			return -ENOMEM;
@@ -255,8 +260,15 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
 {
 	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
 	unsigned long page_order = get_order(page_size);
+	unsigned long alt_start = ~0, alt_end = ~0;
+	unsigned long base_pfn;
 
 	start = _ALIGN_DOWN(start, page_size);
+	if (altmap) {
+		alt_start = altmap->base_pfn;
+		alt_end = altmap->base_pfn + altmap->reserve +
+			  altmap->free + altmap->alloc + altmap->align;
+	}
 
 	pr_debug("vmemmap_free %lx...%lx\n", start, end);
 
@@ -280,8 +292,9 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
 		page = pfn_to_page(addr >> PAGE_SHIFT);
 		section_base = pfn_to_page(vmemmap_section_start(start));
 		nr_pages = 1 << page_order;
+		base_pfn = PHYS_PFN(addr);
 
-		if (altmap) {
+		if (base_pfn >= alt_start && base_pfn < alt_end) {
 			vmem_altmap_free(altmap, nr_pages);
 		} else if (PageReserved(page)) {
 			/* allocated from bootmem */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 2e4bd32154b5..472b784f01eb 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -140,8 +140,7 @@ config IBMEBUS
 	  Bus device driver for GX bus based adapters.
 
 config PAPR_SCM
-	depends on PPC_PSERIES && MEMORY_HOTPLUG
-	select LIBNVDIMM
+	depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM
 	tristate "Support for the PAPR Storage Class Memory interface"
 	help
 	  Enable access to hypervisor provided storage class memory.
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index ee9372b65ca5..7d6457ab5d34 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -55,7 +55,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
 	do {
 		rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
 				p->blocks, BIND_ANY_ADDR, token);
-		token = be64_to_cpu(ret[0]);
+		token = ret[0];
 		cond_resched();
 	} while (rc == H_BUSY);
 
@@ -64,7 +64,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
 		return -ENXIO;
 	}
 
-	p->bound_addr = be64_to_cpu(ret[1]);
+	p->bound_addr = ret[1];
 
 	dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
 
@@ -82,7 +82,7 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
 	do {
 		rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
 				p->bound_addr, p->blocks, token);
-		token = be64_to_cpu(ret);
+		token = ret[0];
 		cond_resched();
 	} while (rc == H_BUSY);
 
@@ -223,6 +223,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 		goto err;
 	}
 
+	if (nvdimm_bus_check_dimm_count(p->bus, 1))
+		goto err;
+
 	/* now add the region */
 
 	memset(&mapping, 0, sizeof(mapping));
@@ -257,9 +260,12 @@ err:	nvdimm_bus_unregister(p->bus);
 
 static int papr_scm_probe(struct platform_device *pdev)
 {
-	uint32_t drc_index, metadata_size, unit_cap[2];
 	struct device_node *dn = pdev->dev.of_node;
+	u32 drc_index, metadata_size;
+	u64 blocks, block_size;
 	struct papr_scm_priv *p;
+	const char *uuid_str;
+	u64 uuid[2];
 	int rc;
 
 	/* check we have all the required DT properties */
@@ -268,8 +274,18 @@ static int papr_scm_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) {
-		dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn);
+	if (of_property_read_u64(dn, "ibm,block-size", &block_size)) {
+		dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) {
+		dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) {
+		dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn);
 		return -ENODEV;
 	}
 
@@ -282,8 +298,13 @@ static int papr_scm_probe(struct platform_device *pdev)
 
 	p->dn = dn;
 	p->drc_index = drc_index;
-	p->block_size = unit_cap[0];
-	p->blocks     = unit_cap[1];
+	p->block_size = block_size;
+	p->blocks = blocks;
+
+	/* We just need to ensure that set cookies are unique across */
+	uuid_parse(uuid_str, (uuid_t *) uuid);
+	p->nd_set.cookie1 = uuid[0];
+	p->nd_set.cookie2 = uuid[1];
 
 	/* might be zero */
 	p->metadata_size = metadata_size;
@@ -296,7 +317,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 
 	/* setup the resource for the newly bound range */
 	p->res.start = p->bound_addr;
-	p->res.end   = p->bound_addr + p->blocks * p->block_size;
+	p->res.end   = p->bound_addr + p->blocks * p->block_size - 1;
 	p->res.name  = pdev->name;
 	p->res.flags = IORESOURCE_MEM;
 
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 98cb8c802b1a..4f7f235f15f8 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -24,6 +24,7 @@
 #define __IO_PREFIX     generic
 #include <asm/io_generic.h>
 #include <asm/io_trapped.h>
+#include <asm-generic/pci_iomap.h>
 #include <mach/mangle-port.h>
 
 #define __raw_writeb(v,a)	(__chk_io_ptr(a), *(volatile u8  __force *)(a) = (v))
diff --git a/block/bio.c b/block/bio.c
index 4f4d9884443b..4d86e90654b2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1261,7 +1261,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 		if (ret)
 			goto cleanup;
 	} else {
-		zero_fill_bio(bio);
+		if (bmd->is_our_pages)
+			zero_fill_bio(bio);
 		iov_iter_advance(iter, bio->bi_iter.bi_size);
 	}
 
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 13ba2011a306..a327bef07642 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -378,7 +378,7 @@ static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
 	struct page *page;
 	int order;
 
-	for (order = get_order(size); order > 0; order--) {
+	for (order = get_order(size); order >= 0; order--) {
 		page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
 		if (page) {
 			*nr_zones = min_t(unsigned int, *nr_zones,
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index d4e5610e09c5..9d66a47d32fb 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1034,6 +1034,18 @@ void acpi_ec_unblock_transactions(void)
 		acpi_ec_start(first_ec, true);
 }
 
+void acpi_ec_mark_gpe_for_wake(void)
+{
+	if (first_ec && !ec_no_wakeup)
+		acpi_mark_gpe_for_wake(NULL, first_ec->gpe);
+}
+
+void acpi_ec_set_gpe_wake_mask(u8 action)
+{
+	if (first_ec && !ec_no_wakeup)
+		acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
+}
+
 void acpi_ec_dispatch_gpe(void)
 {
 	if (first_ec)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 530a3f675490..f59d0b9e2683 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -188,6 +188,8 @@ int acpi_ec_ecdt_probe(void);
 int acpi_ec_dsdt_probe(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
+void acpi_ec_mark_gpe_for_wake(void);
+void acpi_ec_set_gpe_wake_mask(u8 action);
 void acpi_ec_dispatch_gpe(void);
 int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 			      acpi_handle handle, acpi_ec_query_func func,
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 754d59f95500..403c4ff15349 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -940,6 +940,8 @@ static int lps0_device_attach(struct acpi_device *adev,
 
 		acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
 				  bitmask);
+
+		acpi_ec_mark_gpe_for_wake();
 	} else {
 		acpi_handle_debug(adev->handle,
 				  "_DSM function 0 evaluation failed\n");
@@ -968,16 +970,23 @@ static int acpi_s2idle_prepare(void)
 	if (lps0_device_handle) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
+
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE);
 	}
 
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
 
+	/* Change the configuration of GPEs to avoid spurious wakeup. */
+	acpi_enable_all_wakeup_gpes();
+	acpi_os_wait_events_complete();
 	return 0;
 }
 
 static void acpi_s2idle_wake(void)
 {
+	if (!lps0_device_handle)
+		return;
 
 	if (pm_debug_messages_on)
 		lpi_check_constraints();
@@ -996,8 +1005,7 @@ static void acpi_s2idle_wake(void)
 		 * takes too much time for EC wakeup events to survive, so look
 		 * for them now.
 		 */
-		if (lps0_device_handle)
-			acpi_ec_dispatch_gpe();
+		acpi_ec_dispatch_gpe();
 	}
 }
 
@@ -1017,10 +1025,14 @@ static void acpi_s2idle_sync(void)
 
 static void acpi_s2idle_restore(void)
 {
+	acpi_enable_all_runtime_gpes();
+
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
 
 	if (lps0_device_handle) {
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE);
+
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
 	}
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 7f38a92b444a..500de1dee967 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -239,6 +239,127 @@ static void genpd_update_accounting(struct generic_pm_domain *genpd)
 static inline void genpd_update_accounting(struct generic_pm_domain *genpd) {}
 #endif
 
+static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd,
+					   unsigned int state)
+{
+	struct generic_pm_domain_data *pd_data;
+	struct pm_domain_data *pdd;
+	struct gpd_link *link;
+
+	/* New requested state is same as Max requested state */
+	if (state == genpd->performance_state)
+		return state;
+
+	/* New requested state is higher than Max requested state */
+	if (state > genpd->performance_state)
+		return state;
+
+	/* Traverse all devices within the domain */
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		pd_data = to_gpd_data(pdd);
+
+		if (pd_data->performance_state > state)
+			state = pd_data->performance_state;
+	}
+
+	/*
+	 * Traverse all sub-domains within the domain. This can be
+	 * done without any additional locking as the link->performance_state
+	 * field is protected by the master genpd->lock, which is already taken.
+	 *
+	 * Also note that link->performance_state (subdomain's performance state
+	 * requirement to master domain) is different from
+	 * link->slave->performance_state (current performance state requirement
+	 * of the devices/sub-domains of the subdomain) and so can have a
+	 * different value.
+	 *
+	 * Note that we also take vote from powered-off sub-domains into account
+	 * as the same is done for devices right now.
+	 */
+	list_for_each_entry(link, &genpd->master_links, master_node) {
+		if (link->performance_state > state)
+			state = link->performance_state;
+	}
+
+	return state;
+}
+
+static int _genpd_set_performance_state(struct generic_pm_domain *genpd,
+					unsigned int state, int depth)
+{
+	struct generic_pm_domain *master;
+	struct gpd_link *link;
+	int master_state, ret;
+
+	if (state == genpd->performance_state)
+		return 0;
+
+	/* Propagate to masters of genpd */
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		master = link->master;
+
+		if (!master->set_performance_state)
+			continue;
+
+		/* Find master's performance state */
+		ret = dev_pm_opp_xlate_performance_state(genpd->opp_table,
+							 master->opp_table,
+							 state);
+		if (unlikely(ret < 0))
+			goto err;
+
+		master_state = ret;
+
+		genpd_lock_nested(master, depth + 1);
+
+		link->prev_performance_state = link->performance_state;
+		link->performance_state = master_state;
+		master_state = _genpd_reeval_performance_state(master,
+						master_state);
+		ret = _genpd_set_performance_state(master, master_state, depth + 1);
+		if (ret)
+			link->performance_state = link->prev_performance_state;
+
+		genpd_unlock(master);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = genpd->set_performance_state(genpd, state);
+	if (ret)
+		goto err;
+
+	genpd->performance_state = state;
+	return 0;
+
+err:
+	/* Encountered an error, lets rollback */
+	list_for_each_entry_continue_reverse(link, &genpd->slave_links,
+					     slave_node) {
+		master = link->master;
+
+		if (!master->set_performance_state)
+			continue;
+
+		genpd_lock_nested(master, depth + 1);
+
+		master_state = link->prev_performance_state;
+		link->performance_state = master_state;
+
+		master_state = _genpd_reeval_performance_state(master,
+						master_state);
+		if (_genpd_set_performance_state(master, master_state, depth + 1)) {
+			pr_err("%s: Failed to roll back to %d performance state\n",
+			       master->name, master_state);
+		}
+
+		genpd_unlock(master);
+	}
+
+	return ret;
+}
+
 /**
  * dev_pm_genpd_set_performance_state- Set performance state of device's power
  * domain.
@@ -257,10 +378,9 @@ static inline void genpd_update_accounting(struct generic_pm_domain *genpd) {}
 int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 {
 	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data, *pd_data;
-	struct pm_domain_data *pdd;
+	struct generic_pm_domain_data *gpd_data;
 	unsigned int prev;
-	int ret = 0;
+	int ret;
 
 	genpd = dev_to_genpd(dev);
 	if (IS_ERR(genpd))
@@ -281,47 +401,11 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 	prev = gpd_data->performance_state;
 	gpd_data->performance_state = state;
 
-	/* New requested state is same as Max requested state */
-	if (state == genpd->performance_state)
-		goto unlock;
-
-	/* New requested state is higher than Max requested state */
-	if (state > genpd->performance_state)
-		goto update_state;
-
-	/* Traverse all devices within the domain */
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		pd_data = to_gpd_data(pdd);
-
-		if (pd_data->performance_state > state)
-			state = pd_data->performance_state;
-	}
-
-	if (state == genpd->performance_state)
-		goto unlock;
-
-	/*
-	 * We aren't propagating performance state changes of a subdomain to its
-	 * masters as we don't have hardware that needs it. Over that, the
-	 * performance states of subdomain and its masters may not have
-	 * one-to-one mapping and would require additional information. We can
-	 * get back to this once we have hardware that needs it. For that
-	 * reason, we don't have to consider performance state of the subdomains
-	 * of genpd here.
-	 */
-
-update_state:
-	if (genpd_status_on(genpd)) {
-		ret = genpd->set_performance_state(genpd, state);
-		if (ret) {
-			gpd_data->performance_state = prev;
-			goto unlock;
-		}
-	}
-
-	genpd->performance_state = state;
+	state = _genpd_reeval_performance_state(genpd, state);
+	ret = _genpd_set_performance_state(genpd, state, 0);
+	if (ret)
+		gpd_data->performance_state = prev;
 
-unlock:
 	genpd_unlock(genpd);
 
 	return ret;
@@ -347,15 +431,6 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
 		return ret;
 
 	elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
-
-	if (unlikely(genpd->set_performance_state)) {
-		ret = genpd->set_performance_state(genpd, genpd->performance_state);
-		if (ret) {
-			pr_warn("%s: Failed to set performance state %d (%d)\n",
-				genpd->name, genpd->performance_state, ret);
-		}
-	}
-
 	if (elapsed_ns <= genpd->states[state_idx].power_on_latency_ns)
 		return ret;
 
@@ -1907,12 +1982,21 @@ int of_genpd_add_provider_simple(struct device_node *np,
 				ret);
 			goto unlock;
 		}
+
+		/*
+		 * Save table for faster processing while setting performance
+		 * state.
+		 */
+		genpd->opp_table = dev_pm_opp_get_opp_table(&genpd->dev);
+		WARN_ON(!genpd->opp_table);
 	}
 
 	ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
 	if (ret) {
-		if (genpd->set_performance_state)
+		if (genpd->set_performance_state) {
+			dev_pm_opp_put_opp_table(genpd->opp_table);
 			dev_pm_opp_of_remove_table(&genpd->dev);
+		}
 
 		goto unlock;
 	}
@@ -1965,6 +2049,13 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 					i, ret);
 				goto error;
 			}
+
+			/*
+			 * Save table for faster processing while setting
+			 * performance state.
+			 */
+			genpd->opp_table = dev_pm_opp_get_opp_table_indexed(&genpd->dev, i);
+			WARN_ON(!genpd->opp_table);
 		}
 
 		genpd->provider = &np->fwnode;
@@ -1989,8 +2080,10 @@ error:
 		genpd->provider = NULL;
 		genpd->has_provider = false;
 
-		if (genpd->set_performance_state)
+		if (genpd->set_performance_state) {
+			dev_pm_opp_put_opp_table(genpd->opp_table);
 			dev_pm_opp_of_remove_table(&genpd->dev);
+		}
 	}
 
 	mutex_unlock(&gpd_list_lock);
@@ -2024,6 +2117,7 @@ void of_genpd_del_provider(struct device_node *np)
 					if (!gpd->set_performance_state)
 						continue;
 
+					dev_pm_opp_put_opp_table(gpd->opp_table);
 					dev_pm_opp_of_remove_table(&gpd->dev);
 				}
 			}
@@ -2338,7 +2432,7 @@ EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 					 unsigned int index)
 {
-	struct device *genpd_dev;
+	struct device *virt_dev;
 	int num_domains;
 	int ret;
 
@@ -2352,31 +2446,31 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 		return NULL;
 
 	/* Allocate and register device on the genpd bus. */
-	genpd_dev = kzalloc(sizeof(*genpd_dev), GFP_KERNEL);
-	if (!genpd_dev)
+	virt_dev = kzalloc(sizeof(*virt_dev), GFP_KERNEL);
+	if (!virt_dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev_set_name(genpd_dev, "genpd:%u:%s", index, dev_name(dev));
-	genpd_dev->bus = &genpd_bus_type;
-	genpd_dev->release = genpd_release_dev;
+	dev_set_name(virt_dev, "genpd:%u:%s", index, dev_name(dev));
+	virt_dev->bus = &genpd_bus_type;
+	virt_dev->release = genpd_release_dev;
 
-	ret = device_register(genpd_dev);
+	ret = device_register(virt_dev);
 	if (ret) {
-		kfree(genpd_dev);
+		kfree(virt_dev);
 		return ERR_PTR(ret);
 	}
 
 	/* Try to attach the device to the PM domain at the specified index. */
-	ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index, false);
+	ret = __genpd_dev_pm_attach(virt_dev, dev->of_node, index, false);
 	if (ret < 1) {
-		device_unregister(genpd_dev);
+		device_unregister(virt_dev);
 		return ret ? ERR_PTR(ret) : NULL;
 	}
 
-	pm_runtime_enable(genpd_dev);
-	genpd_queue_power_off_work(dev_to_genpd(genpd_dev));
+	pm_runtime_enable(virt_dev);
+	genpd_queue_power_off_work(dev_to_genpd(virt_dev));
 
-	return genpd_dev;
+	return virt_dev;
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
 
@@ -2521,52 +2615,36 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states);
 
 /**
- * of_genpd_opp_to_performance_state- Gets performance state of device's
- * power domain corresponding to a DT node's "required-opps" property.
+ * pm_genpd_opp_to_performance_state - Gets performance state of the genpd from its OPP node.
  *
- * @dev: Device for which the performance-state needs to be found.
- * @np: DT node where the "required-opps" property is present. This can be
- *	the device node itself (if it doesn't have an OPP table) or a node
- *	within the OPP table of a device (if device has an OPP table).
+ * @genpd_dev: Genpd's device for which the performance-state needs to be found.
+ * @opp: struct dev_pm_opp of the OPP for which we need to find performance
+ *	state.
  *
- * Returns performance state corresponding to the "required-opps" property of
- * a DT node. This calls platform specific genpd->opp_to_performance_state()
- * callback to translate power domain OPP to performance state.
+ * Returns performance state encoded in the OPP of the genpd. This calls
+ * platform specific genpd->opp_to_performance_state() callback to translate
+ * power domain OPP to performance state.
  *
  * Returns performance state on success and 0 on failure.
  */
-unsigned int of_genpd_opp_to_performance_state(struct device *dev,
-					       struct device_node *np)
+unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev,
+					       struct dev_pm_opp *opp)
 {
-	struct generic_pm_domain *genpd;
-	struct dev_pm_opp *opp;
-	int state = 0;
+	struct generic_pm_domain *genpd = NULL;
+	int state;
 
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return 0;
+	genpd = container_of(genpd_dev, struct generic_pm_domain, dev);
 
-	if (unlikely(!genpd->set_performance_state))
+	if (unlikely(!genpd->opp_to_performance_state))
 		return 0;
 
 	genpd_lock(genpd);
-
-	opp = of_dev_pm_opp_find_required_opp(&genpd->dev, np);
-	if (IS_ERR(opp)) {
-		dev_err(dev, "Failed to find required OPP: %ld\n",
-			PTR_ERR(opp));
-		goto unlock;
-	}
-
 	state = genpd->opp_to_performance_state(genpd, opp);
-	dev_pm_opp_put(opp);
-
-unlock:
 	genpd_unlock(genpd);
 
 	return state;
 }
-EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state);
+EXPORT_SYMBOL_GPL(pm_genpd_opp_to_performance_state);
 
 static int __init genpd_bus_init(void)
 {
@@ -2671,7 +2749,7 @@ exit:
 	return 0;
 }
 
-static int genpd_summary_show(struct seq_file *s, void *data)
+static int summary_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd;
 	int ret = 0;
@@ -2694,7 +2772,7 @@ static int genpd_summary_show(struct seq_file *s, void *data)
 	return ret;
 }
 
-static int genpd_status_show(struct seq_file *s, void *data)
+static int status_show(struct seq_file *s, void *data)
 {
 	static const char * const status_lookup[] = {
 		[GPD_STATE_ACTIVE] = "on",
@@ -2721,7 +2799,7 @@ exit:
 	return ret;
 }
 
-static int genpd_sub_domains_show(struct seq_file *s, void *data)
+static int sub_domains_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 	struct gpd_link *link;
@@ -2738,7 +2816,7 @@ static int genpd_sub_domains_show(struct seq_file *s, void *data)
 	return ret;
 }
 
-static int genpd_idle_states_show(struct seq_file *s, void *data)
+static int idle_states_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 	unsigned int i;
@@ -2767,7 +2845,7 @@ static int genpd_idle_states_show(struct seq_file *s, void *data)
 	return ret;
 }
 
-static int genpd_active_time_show(struct seq_file *s, void *data)
+static int active_time_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 	ktime_t delta = 0;
@@ -2787,7 +2865,7 @@ static int genpd_active_time_show(struct seq_file *s, void *data)
 	return ret;
 }
 
-static int genpd_total_idle_time_show(struct seq_file *s, void *data)
+static int total_idle_time_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 	ktime_t delta = 0, total = 0;
@@ -2815,7 +2893,7 @@ static int genpd_total_idle_time_show(struct seq_file *s, void *data)
 }
 
 
-static int genpd_devices_show(struct seq_file *s, void *data)
+static int devices_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 	struct pm_domain_data *pm_data;
@@ -2841,7 +2919,7 @@ static int genpd_devices_show(struct seq_file *s, void *data)
 	return ret;
 }
 
-static int genpd_perf_state_show(struct seq_file *s, void *data)
+static int perf_state_show(struct seq_file *s, void *data)
 {
 	struct generic_pm_domain *genpd = s->private;
 
@@ -2854,37 +2932,14 @@ static int genpd_perf_state_show(struct seq_file *s, void *data)
 	return 0;
 }
 
-#define define_genpd_open_function(name) \
-static int genpd_##name##_open(struct inode *inode, struct file *file) \
-{ \
-	return single_open(file, genpd_##name##_show, inode->i_private); \
-}
-
-define_genpd_open_function(summary);
-define_genpd_open_function(status);
-define_genpd_open_function(sub_domains);
-define_genpd_open_function(idle_states);
-define_genpd_open_function(active_time);
-define_genpd_open_function(total_idle_time);
-define_genpd_open_function(devices);
-define_genpd_open_function(perf_state);
-
-#define define_genpd_debugfs_fops(name) \
-static const struct file_operations genpd_##name##_fops = { \
-	.open = genpd_##name##_open, \
-	.read = seq_read, \
-	.llseek = seq_lseek, \
-	.release = single_release, \
-}
-
-define_genpd_debugfs_fops(summary);
-define_genpd_debugfs_fops(status);
-define_genpd_debugfs_fops(sub_domains);
-define_genpd_debugfs_fops(idle_states);
-define_genpd_debugfs_fops(active_time);
-define_genpd_debugfs_fops(total_idle_time);
-define_genpd_debugfs_fops(devices);
-define_genpd_debugfs_fops(perf_state);
+DEFINE_SHOW_ATTRIBUTE(summary);
+DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(sub_domains);
+DEFINE_SHOW_ATTRIBUTE(idle_states);
+DEFINE_SHOW_ATTRIBUTE(active_time);
+DEFINE_SHOW_ATTRIBUTE(total_idle_time);
+DEFINE_SHOW_ATTRIBUTE(devices);
+DEFINE_SHOW_ATTRIBUTE(perf_state);
 
 static int __init genpd_debug_init(void)
 {
@@ -2897,7 +2952,7 @@ static int __init genpd_debug_init(void)
 		return -ENOMEM;
 
 	d = debugfs_create_file("pm_genpd_summary", S_IRUGO,
-			genpd_debugfs_dir, NULL, &genpd_summary_fops);
+			genpd_debugfs_dir, NULL, &summary_fops);
 	if (!d)
 		return -ENOMEM;
 
@@ -2907,20 +2962,20 @@ static int __init genpd_debug_init(void)
 			return -ENOMEM;
 
 		debugfs_create_file("current_state", 0444,
-				d, genpd, &genpd_status_fops);
+				d, genpd, &status_fops);
 		debugfs_create_file("sub_domains", 0444,
-				d, genpd, &genpd_sub_domains_fops);
+				d, genpd, &sub_domains_fops);
 		debugfs_create_file("idle_states", 0444,
-				d, genpd, &genpd_idle_states_fops);
+				d, genpd, &idle_states_fops);
 		debugfs_create_file("active_time", 0444,
-				d, genpd, &genpd_active_time_fops);
+				d, genpd, &active_time_fops);
 		debugfs_create_file("total_idle_time", 0444,
-				d, genpd, &genpd_total_idle_time_fops);
+				d, genpd, &total_idle_time_fops);
 		debugfs_create_file("devices", 0444,
-				d, genpd, &genpd_devices_fops);
+				d, genpd, &devices_fops);
 		if (genpd->set_performance_state)
 			debugfs_create_file("perf_state", 0444,
-					    d, genpd, &genpd_perf_state_fops);
+					    d, genpd, &perf_state_fops);
 	}
 
 	return 0;
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index beb85c31f3fa..70624695b6d5 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -8,6 +8,8 @@
  */
 
 #include <linux/sched/mm.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
 #include <linux/export.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
@@ -93,7 +95,7 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status)
 static void pm_runtime_deactivate_timer(struct device *dev)
 {
 	if (dev->power.timer_expires > 0) {
-		del_timer(&dev->power.suspend_timer);
+		hrtimer_cancel(&dev->power.suspend_timer);
 		dev->power.timer_expires = 0;
 	}
 }
@@ -124,12 +126,11 @@ static void pm_runtime_cancel_pending(struct device *dev)
  * This function may be called either with or without dev->power.lock held.
  * Either way it can be racy, since power.last_busy may be updated at any time.
  */
-unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
+u64 pm_runtime_autosuspend_expiration(struct device *dev)
 {
 	int autosuspend_delay;
-	long elapsed;
-	unsigned long last_busy;
-	unsigned long expires = 0;
+	u64 last_busy, expires = 0;
+	u64 now = ktime_to_ns(ktime_get());
 
 	if (!dev->power.use_autosuspend)
 		goto out;
@@ -139,19 +140,9 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
 		goto out;
 
 	last_busy = READ_ONCE(dev->power.last_busy);
-	elapsed = jiffies - last_busy;
-	if (elapsed < 0)
-		goto out;	/* jiffies has wrapped around. */
 
-	/*
-	 * If the autosuspend_delay is >= 1 second, align the timer by rounding
-	 * up to the nearest second.
-	 */
-	expires = last_busy + msecs_to_jiffies(autosuspend_delay);
-	if (autosuspend_delay >= 1000)
-		expires = round_jiffies(expires);
-	expires += !expires;
-	if (elapsed >= expires - last_busy)
+	expires = last_busy + autosuspend_delay * NSEC_PER_MSEC;
+	if (expires <= now)
 		expires = 0;	/* Already expired. */
 
  out:
@@ -515,7 +506,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	/* If the autosuspend_delay time hasn't expired yet, reschedule. */
 	if ((rpmflags & RPM_AUTO)
 	    && dev->power.runtime_status != RPM_SUSPENDING) {
-		unsigned long expires = pm_runtime_autosuspend_expiration(dev);
+		u64 expires = pm_runtime_autosuspend_expiration(dev);
 
 		if (expires != 0) {
 			/* Pending requests need to be canceled. */
@@ -528,10 +519,20 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 			 * expire; pm_suspend_timer_fn() will take care of the
 			 * rest.
 			 */
-			if (!(dev->power.timer_expires && time_before_eq(
-			    dev->power.timer_expires, expires))) {
+			if (!(dev->power.timer_expires &&
+					dev->power.timer_expires <= expires)) {
+				/*
+				 * We add a slack of 25% to gather wakeups
+				 * without sacrificing the granularity.
+				 */
+				u64 slack = READ_ONCE(dev->power.autosuspend_delay) *
+						    (NSEC_PER_MSEC >> 2);
+
 				dev->power.timer_expires = expires;
-				mod_timer(&dev->power.suspend_timer, expires);
+				hrtimer_start_range_ns(&dev->power.suspend_timer,
+						ns_to_ktime(expires),
+						slack,
+						HRTIMER_MODE_ABS);
 			}
 			dev->power.timer_autosuspends = 1;
 			goto out;
@@ -895,23 +896,25 @@ static void pm_runtime_work(struct work_struct *work)
  *
  * Check if the time is right and queue a suspend request.
  */
-static void pm_suspend_timer_fn(struct timer_list *t)
+static enum hrtimer_restart  pm_suspend_timer_fn(struct hrtimer *timer)
 {
-	struct device *dev = from_timer(dev, t, power.suspend_timer);
+	struct device *dev = container_of(timer, struct device, power.suspend_timer);
 	unsigned long flags;
-	unsigned long expires;
+	u64 expires;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
 
 	expires = dev->power.timer_expires;
 	/* If 'expire' is after 'jiffies' we've been called too early. */
-	if (expires > 0 && !time_after(expires, jiffies)) {
+	if (expires > 0 && expires < ktime_to_ns(ktime_get())) {
 		dev->power.timer_expires = 0;
 		rpm_suspend(dev, dev->power.timer_autosuspends ?
 		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
 	}
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return HRTIMER_NORESTART;
 }
 
 /**
@@ -922,6 +925,7 @@ static void pm_suspend_timer_fn(struct timer_list *t)
 int pm_schedule_suspend(struct device *dev, unsigned int delay)
 {
 	unsigned long flags;
+	ktime_t expires;
 	int retval;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
@@ -938,10 +942,10 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 	/* Other scheduled or pending requests need to be canceled. */
 	pm_runtime_cancel_pending(dev);
 
-	dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
-	dev->power.timer_expires += !dev->power.timer_expires;
+	expires = ktime_add(ktime_get(), ms_to_ktime(delay));
+	dev->power.timer_expires = ktime_to_ns(expires);
 	dev->power.timer_autosuspends = 0;
-	mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
+	hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS);
 
  out:
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -1491,7 +1495,8 @@ void pm_runtime_init(struct device *dev)
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
-	timer_setup(&dev->power.suspend_timer, pm_suspend_timer_fn, 0);
+	hrtimer_init(&dev->power.suspend_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	dev->power.suspend_timer.function = pm_suspend_timer_fn;
 
 	init_waitqueue_head(&dev->power.wait_queue);
 }
diff --git a/drivers/clk/qcom/gcc-qcs404.c b/drivers/clk/qcom/gcc-qcs404.c
index ef1b267cb058..64da032bb9ed 100644
--- a/drivers/clk/qcom/gcc-qcs404.c
+++ b/drivers/clk/qcom/gcc-qcs404.c
@@ -297,7 +297,7 @@ static struct clk_alpha_pll gpll0_out_main = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gpll0_out_main",
 			.parent_names = (const char *[])
-					{ "gpll0_sleep_clk_src" },
+					{ "cxo" },
 			.num_parents = 1,
 			.ops = &clk_alpha_pll_ops,
 		},
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 4e1131ef85ae..688f10227793 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -114,6 +114,17 @@ config ARM_QCOM_CPUFREQ_KRYO
 
 	  If in doubt, say N.
 
+config ARM_QCOM_CPUFREQ_HW
+	tristate "QCOM CPUFreq HW driver"
+	depends on ARCH_QCOM || COMPILE_TEST
+	help
+	  Support for the CPUFreq HW driver.
+	  Some QCOM chipsets have a HW engine to offload the steps
+	  necessary for changing the frequency of the CPUs. Firmware loaded
+	  in this engine exposes a programming interface to the OS.
+	  The driver implements the cpufreq interface for this HW engine.
+	  Say Y if you want to support CPUFreq HW.
+
 config ARM_S3C_CPUFREQ
 	bool
 	help
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index d5ee4562ed06..08c071be2491 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
 obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ)	+= omap-cpufreq.o
 obj-$(CONFIG_ARM_PXA2xx_CPUFREQ)	+= pxa2xx-cpufreq.o
 obj-$(CONFIG_PXA3xx)			+= pxa3xx-cpufreq.o
+obj-$(CONFIG_ARM_QCOM_CPUFREQ_HW)	+= qcom-cpufreq-hw.o
 obj-$(CONFIG_ARM_QCOM_CPUFREQ_KRYO)	+= qcom-cpufreq-kryo.o
 obj-$(CONFIG_ARM_S3C2410_CPUFREQ)	+= s3c2410-cpufreq.o
 obj-$(CONFIG_ARM_S3C2412_CPUFREQ)	+= s3c2412-cpufreq.o
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index dbf82f36d270..33c309a08c64 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -123,8 +123,6 @@ static void nforce2_write_pll(int pll)
 	/* Now write the value in all 64 registers */
 	for (temp = 0; temp <= 0x3f; temp++)
 		pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
-
-	return;
 }
 
 /**
@@ -438,4 +436,3 @@ static void __exit nforce2_exit(void)
 
 module_init(nforce2_init);
 module_exit(nforce2_exit);
-
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index dd5440d3372d..80c5bf590acb 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 #include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <asm/io.h>
 #include <linux/uaccess.h>
 #include <asm/pal.h>
@@ -28,7 +27,6 @@ MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
 
-
 struct cpufreq_acpi_io {
 	struct acpi_processor_performance	acpi_data;
 	unsigned int				resume;
@@ -348,10 +346,7 @@ acpi_cpufreq_exit (void)
 	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
-	return;
 }
 
-
 late_initcall(acpi_cpufreq_init);
 module_exit(acpi_cpufreq_exit);
-
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index d8c3595e9023..9fedf627e000 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -177,22 +177,16 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 	/* scaling down?  scale voltage after frequency */
 	if (new_freq < old_freq) {
 		ret = regulator_set_voltage_tol(arm_reg, volt, 0);
-		if (ret) {
+		if (ret)
 			dev_warn(cpu_dev,
 				 "failed to scale vddarm down: %d\n", ret);
-			ret = 0;
-		}
 		ret = regulator_set_voltage_tol(soc_reg, imx6_soc_volt[index], 0);
-		if (ret) {
+		if (ret)
 			dev_warn(cpu_dev, "failed to scale vddsoc down: %d\n", ret);
-			ret = 0;
-		}
 		if (!IS_ERR(pu_reg)) {
 			ret = regulator_set_voltage_tol(pu_reg, imx6_soc_volt[index], 0);
-			if (ret) {
+			if (ret)
 				dev_warn(cpu_dev, "failed to scale vddpu down: %d\n", ret);
-				ret = 0;
-			}
 		}
 	}
 
@@ -411,9 +405,10 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	if (of_machine_is_compatible("fsl,imx6ul") ||
 	    of_machine_is_compatible("fsl,imx6ull")) {
 		ret = imx6ul_opp_check_speed_grading(cpu_dev);
-		if (ret == -EPROBE_DEFER)
-			return ret;
 		if (ret) {
+			if (ret == -EPROBE_DEFER)
+				return ret;
+
 			dev_err(cpu_dev, "failed to read ocotp: %d\n",
 				ret);
 			return ret;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9578312e43f2..106402b89961 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -830,6 +830,28 @@ skip_epp:
 	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 }
 
+static void intel_pstate_hwp_force_min_perf(int cpu)
+{
+	u64 value;
+	int min_perf;
+
+	value = all_cpu_data[cpu]->hwp_req_cached;
+	value &= ~GENMASK_ULL(31, 0);
+	min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
+
+	/* Set hwp_max = hwp_min */
+	value |= HWP_MAX_PERF(min_perf);
+	value |= HWP_MIN_PERF(min_perf);
+
+	/* Set EPP/EPB to min */
+	if (static_cpu_has(X86_FEATURE_HWP_EPP))
+		value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
+	else
+		intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
+
+	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
+}
+
 static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu_data = all_cpu_data[policy->cpu];
@@ -2084,10 +2106,12 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 	pr_debug("CPU %d exiting\n", policy->cpu);
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
-	if (hwp_active)
+	if (hwp_active) {
 		intel_pstate_hwp_save_state(policy);
-	else
+		intel_pstate_hwp_force_min_perf(policy->cpu);
+	} else {
 		intel_cpufreq_stop_cpu(policy);
+	}
 }
 
 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index be623dd7b9f2..1d32a863332d 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -411,6 +411,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpunode)
 		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
 		pfunc_vdnap0_complete =
 			pmf_find_function(root, "slewing-done");
+		of_node_put(root);
 		if (pfunc_set_vdnap0 == NULL ||
 		    pfunc_vdnap0_complete == NULL) {
 			pr_err("Can't find required platform function\n");
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index bf6519cf64bc..7e7ad3879c4e 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -253,18 +253,18 @@ static int init_powernv_pstates(void)
 
 	if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
 		pr_warn("ibm,pstate-min node not found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
 		pr_warn("ibm,pstate-max node not found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
 				 &pstate_nominal)) {
 		pr_warn("ibm,pstate-nominal not found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	if (of_property_read_u32(power_mgt, "ibm,pstate-ultra-turbo",
@@ -293,14 +293,14 @@ next:
 	pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
 	if (!pstate_ids) {
 		pr_warn("ibm,pstate-ids not found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
 				      &len_freqs);
 	if (!pstate_freqs) {
 		pr_warn("ibm,pstate-frequencies-mhz not found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	if (len_ids != len_freqs) {
@@ -311,7 +311,7 @@ next:
 	nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
 	if (!nr_pstates) {
 		pr_warn("No PStates found\n");
-		return -ENODEV;
+		goto out;
 	}
 
 	powernv_pstate_info.nr_pstates = nr_pstates;
@@ -352,7 +352,12 @@ next:
 
 	/* End of list marker entry */
 	powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
+
+	of_node_put(power_mgt);
 	return 0;
+out:
+	of_node_put(power_mgt);
+	return -ENODEV;
 }
 
 /* Returns the CPU frequency corresponding to the pstate_id. */
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
new file mode 100644
index 000000000000..d83939a1b3d4
--- /dev/null
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#define LUT_MAX_ENTRIES			40U
+#define LUT_SRC				GENMASK(31, 30)
+#define LUT_L_VAL			GENMASK(7, 0)
+#define LUT_CORE_COUNT			GENMASK(18, 16)
+#define LUT_ROW_SIZE			32
+#define CLK_HW_DIV			2
+
+/* Register offsets */
+#define REG_ENABLE			0x0
+#define REG_LUT_TABLE			0x110
+#define REG_PERF_STATE			0x920
+
+static unsigned long cpu_hw_rate, xo_rate;
+static struct platform_device *global_pdev;
+
+static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
+					unsigned int index)
+{
+	void __iomem *perf_state_reg = policy->driver_data;
+
+	writel_relaxed(index, perf_state_reg);
+
+	return 0;
+}
+
+static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
+{
+	void __iomem *perf_state_reg;
+	struct cpufreq_policy *policy;
+	unsigned int index;
+
+	policy = cpufreq_cpu_get_raw(cpu);
+	if (!policy)
+		return 0;
+
+	perf_state_reg = policy->driver_data;
+
+	index = readl_relaxed(perf_state_reg);
+	index = min(index, LUT_MAX_ENTRIES - 1);
+
+	return policy->freq_table[index].frequency;
+}
+
+static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
+						unsigned int target_freq)
+{
+	void __iomem *perf_state_reg = policy->driver_data;
+	int index;
+
+	index = policy->cached_resolved_idx;
+	if (index < 0)
+		return 0;
+
+	writel_relaxed(index, perf_state_reg);
+
+	return policy->freq_table[index].frequency;
+}
+
+static int qcom_cpufreq_hw_read_lut(struct device *dev,
+				    struct cpufreq_policy *policy,
+				    void __iomem *base)
+{
+	u32 data, src, lval, i, core_count, prev_cc = 0, prev_freq = 0, freq;
+	unsigned int max_cores = cpumask_weight(policy->cpus);
+	struct cpufreq_frequency_table	*table;
+
+	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
+		data = readl_relaxed(base + REG_LUT_TABLE + i * LUT_ROW_SIZE);
+		src = FIELD_GET(LUT_SRC, data);
+		lval = FIELD_GET(LUT_L_VAL, data);
+		core_count = FIELD_GET(LUT_CORE_COUNT, data);
+
+		if (src)
+			freq = xo_rate * lval / 1000;
+		else
+			freq = cpu_hw_rate / 1000;
+
+		/* Ignore boosts in the middle of the table */
+		if (core_count != max_cores) {
+			table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		} else {
+			table[i].frequency = freq;
+			dev_dbg(dev, "index=%d freq=%d, core_count %d\n", i,
+				freq, core_count);
+		}
+
+		/*
+		 * Two of the same frequencies with the same core counts means
+		 * end of table
+		 */
+		if (i > 0 && prev_freq == freq && prev_cc == core_count) {
+			struct cpufreq_frequency_table *prev = &table[i - 1];
+
+			/*
+			 * Only treat the last frequency that might be a boost
+			 * as the boost frequency
+			 */
+			if (prev_cc != max_cores) {
+				prev->frequency = prev_freq;
+				prev->flags = CPUFREQ_BOOST_FREQ;
+			}
+
+			break;
+		}
+
+		prev_cc = core_count;
+		prev_freq = freq;
+	}
+
+	table[i].frequency = CPUFREQ_TABLE_END;
+	policy->freq_table = table;
+
+	return 0;
+}
+
+static void qcom_get_related_cpus(int index, struct cpumask *m)
+{
+	struct device_node *cpu_np;
+	struct of_phandle_args args;
+	int cpu, ret;
+
+	for_each_possible_cpu(cpu) {
+		cpu_np = of_cpu_device_node_get(cpu);
+		if (!cpu_np)
+			continue;
+
+		ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
+						 "#freq-domain-cells", 0,
+						 &args);
+		of_node_put(cpu_np);
+		if (ret < 0)
+			continue;
+
+		if (index == args.args[0])
+			cpumask_set_cpu(cpu, m);
+	}
+}
+
+static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
+{
+	struct device *dev = &global_pdev->dev;
+	struct of_phandle_args args;
+	struct device_node *cpu_np;
+	struct resource *res;
+	void __iomem *base;
+	int ret, index;
+
+	cpu_np = of_cpu_device_node_get(policy->cpu);
+	if (!cpu_np)
+		return -EINVAL;
+
+	ret = of_parse_phandle_with_args(cpu_np, "qcom,freq-domain",
+					 "#freq-domain-cells", 0, &args);
+	of_node_put(cpu_np);
+	if (ret)
+		return ret;
+
+	index = args.args[0];
+
+	res = platform_get_resource(global_pdev, IORESOURCE_MEM, index);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	/* HW should be in enabled state to proceed */
+	if (!(readl_relaxed(base + REG_ENABLE) & 0x1)) {
+		dev_err(dev, "Domain-%d cpufreq hardware not enabled\n", index);
+		ret = -ENODEV;
+		goto error;
+	}
+
+	qcom_get_related_cpus(index, policy->cpus);
+	if (!cpumask_weight(policy->cpus)) {
+		dev_err(dev, "Domain-%d failed to get related CPUs\n", index);
+		ret = -ENOENT;
+		goto error;
+	}
+
+	policy->driver_data = base + REG_PERF_STATE;
+
+	ret = qcom_cpufreq_hw_read_lut(dev, policy, base);
+	if (ret) {
+		dev_err(dev, "Domain-%d failed to read LUT\n", index);
+		goto error;
+	}
+
+	policy->fast_switch_possible = true;
+
+	return 0;
+error:
+	devm_iounmap(dev, base);
+	return ret;
+}
+
+static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
+{
+	void __iomem *base = policy->driver_data - REG_PERF_STATE;
+
+	kfree(policy->freq_table);
+	devm_iounmap(&global_pdev->dev, base);
+
+	return 0;
+}
+
+static struct freq_attr *qcom_cpufreq_hw_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	&cpufreq_freq_attr_scaling_boost_freqs,
+	NULL
+};
+
+static struct cpufreq_driver cpufreq_qcom_hw_driver = {
+	.flags		= CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
+			  CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
+	.verify		= cpufreq_generic_frequency_table_verify,
+	.target_index	= qcom_cpufreq_hw_target_index,
+	.get		= qcom_cpufreq_hw_get,
+	.init		= qcom_cpufreq_hw_cpu_init,
+	.exit		= qcom_cpufreq_hw_cpu_exit,
+	.fast_switch    = qcom_cpufreq_hw_fast_switch,
+	.name		= "qcom-cpufreq-hw",
+	.attr		= qcom_cpufreq_hw_attr,
+};
+
+static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
+{
+	struct clk *clk;
+	int ret;
+
+	clk = clk_get(&pdev->dev, "xo");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	xo_rate = clk_get_rate(clk);
+	clk_put(clk);
+
+	clk = clk_get(&pdev->dev, "alternate");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	cpu_hw_rate = clk_get_rate(clk) / CLK_HW_DIV;
+	clk_put(clk);
+
+	global_pdev = pdev;
+
+	ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver);
+	if (ret)
+		dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n");
+	else
+		dev_dbg(&pdev->dev, "QCOM CPUFreq HW driver initialized\n");
+
+	return ret;
+}
+
+static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
+{
+	return cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
+}
+
+static const struct of_device_id qcom_cpufreq_hw_match[] = {
+	{ .compatible = "qcom,cpufreq-hw" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match);
+
+static struct platform_driver qcom_cpufreq_hw_driver = {
+	.probe = qcom_cpufreq_hw_driver_probe,
+	.remove = qcom_cpufreq_hw_driver_remove,
+	.driver = {
+		.name = "qcom-cpufreq-hw",
+		.of_match_table = qcom_cpufreq_hw_match,
+	},
+};
+
+static int __init qcom_cpufreq_hw_init(void)
+{
+	return platform_driver_register(&qcom_cpufreq_hw_driver);
+}
+subsys_initcall(qcom_cpufreq_hw_init);
+
+static void __exit qcom_cpufreq_hw_exit(void)
+{
+	platform_driver_unregister(&qcom_cpufreq_hw_driver);
+}
+module_exit(qcom_cpufreq_hw_exit);
+
+MODULE_DESCRIPTION("QCOM CPUFREQ HW Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
index 4d976e8dbb2f..0df87b6480fe 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
@@ -63,18 +63,7 @@ static int board_show(struct seq_file *seq, void *p)
 	return 0;
 }
 
-static int fops_board_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, board_show, NULL);
-}
-
-static const struct file_operations fops_board = {
-	.open		= fops_board_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
+DEFINE_SHOW_ATTRIBUTE(board);
 
 static int info_show(struct seq_file *seq, void *p)
 {
@@ -105,18 +94,7 @@ static int info_show(struct seq_file *seq, void *p)
 	return 0;
 }
 
-static int fops_info_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, info_show, NULL);
-}
-
-static const struct file_operations fops_info = {
-	.open		= fops_info_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
+DEFINE_SHOW_ATTRIBUTE(info);
 
 static int io_show(struct seq_file *seq, void *p)
 {
@@ -162,19 +140,7 @@ static int io_show(struct seq_file *seq, void *p)
 	return 0;
 }
 
-static int fops_io_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, io_show, NULL);
-}
-
-static const struct file_operations fops_io = {
-	.open		= fops_io_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
-
+DEFINE_SHOW_ATTRIBUTE(io);
 
 static int __init s3c_freq_debugfs_init(void)
 {
@@ -185,13 +151,13 @@ static int __init s3c_freq_debugfs_init(void)
 	}
 
 	dbgfs_file_io = debugfs_create_file("io-timing", S_IRUGO, dbgfs_root,
-					    NULL, &fops_io);
+					    NULL, &io_fops);
 
 	dbgfs_file_info = debugfs_create_file("info", S_IRUGO, dbgfs_root,
-					      NULL, &fops_info);
+					      NULL, &info_fops);
 
 	dbgfs_file_board = debugfs_create_file("board", S_IRUGO, dbgfs_root,
-					       NULL, &fops_board);
+					       NULL, &board_fops);
 
 	return 0;
 }
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index db2ede565f1a..b44476a1b7ad 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -167,6 +167,7 @@ static int __init bl_idle_init(void)
 {
 	int ret;
 	struct device_node *root = of_find_node_by_path("/");
+	const struct of_device_id *match_id;
 
 	if (!root)
 		return -ENODEV;
@@ -174,7 +175,11 @@ static int __init bl_idle_init(void)
 	/*
 	 * Initialize the driver just for a compliant set of machines
 	 */
-	if (!of_match_node(compatible_machine_match, root))
+	match_id = of_match_node(compatible_machine_match, root);
+
+	of_node_put(root);
+
+	if (!match_id)
 		return -ENODEV;
 
 	if (!mcpm_is_available())
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 4a97446f66d8..7f108309e871 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -202,7 +202,6 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
 	ktime_t time_start, time_end;
-	s64 diff;
 
 	/*
 	 * Tell the time framework to switch to a broadcast timer because our
@@ -248,6 +247,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 		local_irq_enable();
 
 	if (entered_state >= 0) {
+		s64 diff, delay = drv->states[entered_state].exit_latency;
+		int i;
+
 		/*
 		 * Update cpuidle counters
 		 * This can be moved to within driver enter routine,
@@ -260,6 +262,33 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 		dev->last_residency = (int)diff;
 		dev->states_usage[entered_state].time += dev->last_residency;
 		dev->states_usage[entered_state].usage++;
+
+		if (diff < drv->states[entered_state].target_residency) {
+			for (i = entered_state - 1; i >= 0; i--) {
+				if (drv->states[i].disabled ||
+				    dev->states_usage[i].disable)
+					continue;
+
+				/* Shallower states are enabled, so update. */
+				dev->states_usage[entered_state].above++;
+				break;
+			}
+		} else if (diff > delay) {
+			for (i = entered_state + 1; i < drv->state_count; i++) {
+				if (drv->states[i].disabled ||
+				    dev->states_usage[i].disable)
+					continue;
+
+				/*
+				 * Update if a deeper state would have been a
+				 * better match for the observed idle duration.
+				 */
+				if (diff - delay >= drv->states[i].target_residency)
+					dev->states_usage[entered_state].below++;
+
+				break;
+			}
+		}
 	} else {
 		dev->last_residency = 0;
 	}
@@ -702,4 +731,5 @@ static int __init cpuidle_init(void)
 }
 
 module_param(off, int, 0444);
+module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444);
 core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 2965ab32a583..d6613101af92 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -7,6 +7,7 @@
 #define __DRIVER_CPUIDLE_H
 
 /* For internal use only */
+extern char param_governor[];
 extern struct cpuidle_governor *cpuidle_curr_governor;
 extern struct list_head cpuidle_governors;
 extern struct list_head cpuidle_detected_devices;
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 9fed1b829292..bb93e5cf6a4a 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -11,10 +11,13 @@
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/mutex.h>
+#include <linux/module.h>
 #include <linux/pm_qos.h>
 
 #include "cpuidle.h"
 
+char param_governor[CPUIDLE_NAME_LEN];
+
 LIST_HEAD(cpuidle_governors);
 struct cpuidle_governor *cpuidle_curr_governor;
 
@@ -86,9 +89,11 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 	mutex_lock(&cpuidle_lock);
 	if (__cpuidle_find_governor(gov->name) == NULL) {
 		ret = 0;
-		list_add_tail(&gov->governor_list, &cpuidle_governors);
 		if (!cpuidle_curr_governor ||
-		    cpuidle_curr_governor->rating < gov->rating)
+		    !strncasecmp(param_governor, gov->name, CPUIDLE_NAME_LEN) ||
+		    (cpuidle_curr_governor->rating < gov->rating &&
+		     strncasecmp(param_governor, cpuidle_curr_governor->name,
+				 CPUIDLE_NAME_LEN)))
 			cpuidle_switch_governor(gov);
 	}
 	mutex_unlock(&cpuidle_lock);
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 85792d371add..b17d153e724f 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -20,8 +20,17 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 
 	local_irq_enable();
 	if (!current_set_polling_and_test()) {
-		u64 limit = (u64)drv->states[1].target_residency * NSEC_PER_USEC;
 		unsigned int loop_count = 0;
+		u64 limit = TICK_USEC;
+		int i;
+
+		for (i = 1; i < drv->state_count; i++) {
+			if (drv->states[i].disabled || dev->states_usage[i].disable)
+				continue;
+
+			limit = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
+			break;
+		}
 
 		while (!need_resched()) {
 			cpu_relax();
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index e754c7aae7f7..eb20adb5de23 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -301,6 +301,8 @@ define_show_state_str_function(name)
 define_show_state_str_function(desc)
 define_show_state_ull_function(disable)
 define_store_state_ull_function(disable)
+define_show_state_ull_function(above)
+define_show_state_ull_function(below)
 
 define_one_state_ro(name, show_state_name);
 define_one_state_ro(desc, show_state_desc);
@@ -310,6 +312,8 @@ define_one_state_ro(power, show_state_power_usage);
 define_one_state_ro(usage, show_state_usage);
 define_one_state_ro(time, show_state_time);
 define_one_state_rw(disable, show_state_disable, store_state_disable);
+define_one_state_ro(above, show_state_above);
+define_one_state_ro(below, show_state_below);
 
 static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_name.attr,
@@ -320,6 +324,8 @@ static struct attribute *cpuidle_state_default_attrs[] = {
 	&attr_usage.attr,
 	&attr_time.attr,
 	&attr_disable.attr,
+	&attr_above.attr,
+	&attr_below.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 8816c697b205..387f1cf1dc20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -330,7 +330,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 			case CHIP_TOPAZ:
 				if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
 				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
-				    ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) {
+				    ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) ||
+				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
+				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
 					info->is_kicker = true;
 					strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
 				} else
@@ -351,7 +353,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				if (type == CGS_UCODE_ID_SMU) {
 					if (((adev->pdev->device == 0x67ef) &&
 					     ((adev->pdev->revision == 0xe0) ||
-					      (adev->pdev->revision == 0xe2) ||
 					      (adev->pdev->revision == 0xe5))) ||
 					    ((adev->pdev->device == 0x67ff) &&
 					     ((adev->pdev->revision == 0xcf) ||
@@ -359,8 +360,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					      (adev->pdev->revision == 0xff)))) {
 						info->is_kicker = true;
 						strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
-					} else
+					} else if ((adev->pdev->device == 0x67ef) &&
+						   (adev->pdev->revision == 0xe2)) {
+						info->is_kicker = true;
+						strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+					} else {
 						strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+					}
 				} else if (type == CGS_UCODE_ID_SMU_SK) {
 					strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
 				}
@@ -375,17 +381,35 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					      (adev->pdev->revision == 0xe7) ||
 					      (adev->pdev->revision == 0xef))) ||
 					    ((adev->pdev->device == 0x6fdf) &&
-					     (adev->pdev->revision == 0xef))) {
+					     ((adev->pdev->revision == 0xef) ||
+					      (adev->pdev->revision == 0xff)))) {
 						info->is_kicker = true;
 						strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
-					} else
+					} else if ((adev->pdev->device == 0x67df) &&
+						   ((adev->pdev->revision == 0xe1) ||
+						    (adev->pdev->revision == 0xf7))) {
+						info->is_kicker = true;
+						strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+					} else {
 						strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+					}
 				} else if (type == CGS_UCODE_ID_SMU_SK) {
 					strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
 				}
 				break;
 			case CHIP_POLARIS12:
-				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+				if (((adev->pdev->device == 0x6987) &&
+				     ((adev->pdev->revision == 0xc0) ||
+				      (adev->pdev->revision == 0xc3))) ||
+				    ((adev->pdev->device == 0x6981) &&
+				     ((adev->pdev->revision == 0x00) ||
+				      (adev->pdev->revision == 0x01) ||
+				      (adev->pdev->revision == 0x10)))) {
+					info->is_kicker = true;
+					strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+				} else {
+					strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+				}
 				break;
 			case CHIP_VEGAM:
 				strcpy(fw_name, "amdgpu/vegam_smc.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 663043c8f0f5..0acc8dee2cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -124,14 +124,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		goto free_chunk;
 	}
 
+	mutex_lock(&p->ctx->lock);
+
 	/* skip guilty context job */
 	if (atomic_read(&p->ctx->guilty) == 1) {
 		ret = -ECANCELED;
 		goto free_chunk;
 	}
 
-	mutex_lock(&p->ctx->lock);
-
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8de55f7f1a3a..74b611e8a1b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -872,7 +872,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6869, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	/* Vega 12 */
 	{0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
@@ -885,6 +891,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	/* Raven */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index a9f18ea7e354..e4ded890b1cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -337,12 +337,19 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x6864, &vega10_device_info },	/* Vega10 */
 	{ 0x6867, &vega10_device_info },	/* Vega10 */
 	{ 0x6868, &vega10_device_info },	/* Vega10 */
+	{ 0x6869, &vega10_device_info },	/* Vega10 */
+	{ 0x686A, &vega10_device_info },	/* Vega10 */
+	{ 0x686B, &vega10_device_info },	/* Vega10 */
 	{ 0x686C, &vega10_vf_device_info },	/* Vega10  vf*/
+	{ 0x686D, &vega10_device_info },	/* Vega10 */
+	{ 0x686E, &vega10_device_info },	/* Vega10 */
+	{ 0x686F, &vega10_device_info },	/* Vega10 */
 	{ 0x687F, &vega10_device_info },	/* Vega10 */
 	{ 0x66a0, &vega20_device_info },	/* Vega20 */
 	{ 0x66a1, &vega20_device_info },	/* Vega20 */
 	{ 0x66a2, &vega20_device_info },	/* Vega20 */
 	{ 0x66a3, &vega20_device_info },	/* Vega20 */
+	{ 0x66a4, &vega20_device_info },	/* Vega20 */
 	{ 0x66a7, &vega20_device_info },	/* Vega20 */
 	{ 0x66af, &vega20_device_info }		/* Vega20 */
 };
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 3367dd30cdd0..3b7fce5d7258 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -130,7 +130,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	data->registry_data.disable_auto_wattman = 1;
 	data->registry_data.auto_wattman_debug = 0;
 	data->registry_data.auto_wattman_sample_period = 100;
-	data->registry_data.fclk_gfxclk_ratio = 0x3F6CCCCD;
+	data->registry_data.fclk_gfxclk_ratio = 0;
 	data->registry_data.auto_wattman_threshold = 50;
 	data->registry_data.gfxoff_controlled_by_driver = 1;
 	data->gfxoff_allowed = false;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
index 62f36ba2435b..c1a99dfe4913 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
@@ -386,6 +386,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_AgmResetPsm                 ((uint16_t) 0x403)
 #define PPSMC_MSG_ReadVftCell                 ((uint16_t) 0x404)
 
+#define PPSMC_MSG_ApplyAvfsCksOffVoltage      ((uint16_t) 0x415)
+
 #define PPSMC_MSG_GFX_CU_PG_ENABLE            ((uint16_t) 0x280)
 #define PPSMC_MSG_GFX_CU_PG_DISABLE           ((uint16_t) 0x281)
 #define PPSMC_MSG_GetCurrPkgPwr               ((uint16_t) 0x282)
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 872d3824337b..a1e0ac9ae248 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -1985,6 +1985,12 @@ int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
 
+	/* Apply avfs cks-off voltages to avoid the overshoot
+	 * when switching to the highest sclk frequency
+	 */
+	if (data->apply_avfs_cks_off_voltage)
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index 99d5e4f98f49..a6edd5df33b0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -37,10 +37,13 @@ MODULE_FIRMWARE("amdgpu/fiji_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_k2_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_k2_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_smc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/vegam_smc.bin");
 MODULE_FIRMWARE("amdgpu/vega10_smc.bin");
 MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin");
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
index 481896fb712a..85e6736f0a32 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -235,7 +235,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
 		plane->bpp = skl_pixel_formats[fmt].bpp;
 		plane->drm_format = skl_pixel_formats[fmt].drm_format;
 	} else {
-		plane->tiled = !!(val & DISPPLANE_TILED);
+		plane->tiled = val & DISPPLANE_TILED;
 		fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK);
 		plane->bpp = bdw_pixel_formats[fmt].bpp;
 		plane->drm_format = bdw_pixel_formats[fmt].drm_format;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ffdbbac4400e..47062ee979cf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1444,6 +1444,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
 	intel_uncore_sanitize(dev_priv);
 
+	intel_gt_init_workarounds(dev_priv);
 	i915_gem_load_init_fences(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9102571e9692..872a2e159a5f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -67,6 +67,7 @@
 #include "intel_ringbuffer.h"
 #include "intel_uncore.h"
 #include "intel_wopcm.h"
+#include "intel_workarounds.h"
 #include "intel_uc.h"
 
 #include "i915_gem.h"
@@ -1805,6 +1806,7 @@ struct drm_i915_private {
 	int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
 
 	struct i915_workarounds workarounds;
+	struct i915_wa_list gt_wa_list;
 
 	struct i915_frontbuffer_tracking fb_tracking;
 
@@ -2148,6 +2150,8 @@ struct drm_i915_private {
 		struct delayed_work idle_work;
 
 		ktime_t last_init_time;
+
+		struct i915_vma *scratch;
 	} gt;
 
 	/* perform PHY state sanity checks? */
@@ -3870,4 +3874,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
 		return I915_HWS_CSB_WRITE_INDEX;
 }
 
+static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
+{
+	return i915_ggtt_offset(i915->gt.scratch);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0c8aa57ce83b..6ae9a6080cc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5305,7 +5305,7 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	intel_gt_workarounds_apply(dev_priv);
+	intel_gt_apply_workarounds(dev_priv);
 
 	i915_gem_init_swizzling(dev_priv);
 
@@ -5500,6 +5500,44 @@ err_active:
 	goto out_ctx;
 }
 
+static int
+i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
+
+	obj = i915_gem_object_create_stolen(i915, size);
+	if (!obj)
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj)) {
+		DRM_ERROR("Failed to allocate scratch page\n");
+		return PTR_ERR(obj);
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (ret)
+		goto err_unref;
+
+	i915->gt.scratch = vma;
+	return 0;
+
+err_unref:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+static void i915_gem_fini_scratch(struct drm_i915_private *i915)
+{
+	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -5546,12 +5584,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
-	ret = i915_gem_contexts_init(dev_priv);
+	ret = i915_gem_init_scratch(dev_priv,
+				    IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_ggtt;
 	}
 
+	ret = i915_gem_contexts_init(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_scratch;
+	}
+
 	ret = intel_engines_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
@@ -5624,6 +5669,8 @@ err_pm:
 err_context:
 	if (ret != -EIO)
 		i915_gem_contexts_fini(dev_priv);
+err_scratch:
+	i915_gem_fini_scratch(dev_priv);
 err_ggtt:
 err_unlock:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -5675,8 +5722,11 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
+	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	intel_wa_list_free(&dev_priv->gt_wa_list);
+
 	intel_cleanup_gt_powersave(dev_priv);
 
 	intel_uc_fini_misc(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d4fac09095f8..1aaccbe7e1de 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1268,7 +1268,7 @@ relocate_entry(struct i915_vma *vma,
 		else if (gen >= 4)
 			len = 4;
 		else
-			len = 6;
+			len = 3;
 
 		batch = reloc_gpu(eb, vma, len);
 		if (IS_ERR(batch))
@@ -1309,11 +1309,6 @@ relocate_entry(struct i915_vma *vma,
 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 			*batch++ = addr;
 			*batch++ = target_offset;
-
-			/* And again for good measure (blb/pnv) */
-			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*batch++ = addr;
-			*batch++ = target_offset;
 		}
 
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3eb33e000d6f..db4128d6c09b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1495,7 +1495,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			if (HAS_BROKEN_CS_TLB(i915))
 				ee->wa_batchbuffer =
 					i915_error_object_create(i915,
-								 engine->scratch);
+								 i915->gt.scratch);
 			request_record_user_bo(request, ee);
 
 			ee->ctx =
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 217ed3ee1cab..76b5f94ea6cb 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -490,46 +490,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_cmd_parser(engine);
 }
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size)
-{
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	int ret;
-
-	WARN_ON(engine->scratch);
-
-	obj = i915_gem_object_create_stolen(engine->i915, size);
-	if (!obj)
-		obj = i915_gem_object_create_internal(engine->i915, size);
-	if (IS_ERR(obj)) {
-		DRM_ERROR("Failed to allocate scratch page\n");
-		return PTR_ERR(obj);
-	}
-
-	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unref;
-	}
-
-	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (ret)
-		goto err_unref;
-
-	engine->scratch = vma;
-	return 0;
-
-err_unref:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
-{
-	i915_vma_unpin_and_release(&engine->scratch, 0);
-}
-
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
@@ -704,8 +664,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
-	intel_engine_cleanup_scratch(engine);
-
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -720,6 +678,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	__intel_context_unpin(i915->kernel_context, engine);
 
 	i915_timeline_fini(&engine->timeline);
+
+	intel_wa_list_free(&engine->wa_list);
 }
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 37c94a54efcb..58d1d3d47dd3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -442,8 +442,13 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * may not be visible to the HW prior to the completion of the UC
 	 * register write and that we may begin execution from the context
 	 * before its image is complete leading to invalid PD chasing.
+	 *
+	 * Furthermore, Braswell, at least, wants a full mb to be sure that
+	 * the writes are coherent in memory (visible to the GPU) prior to
+	 * execution, and not just visible to other CPUs (as is the result of
+	 * wmb).
 	 */
-	wmb();
+	mb();
 	return ce->lrc_desc;
 }
 
@@ -1443,9 +1448,10 @@ static int execlists_request_alloc(struct i915_request *request)
 static u32 *
 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
+	/* NB no one else is allowed to scribble over scratch + 256! */
 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	*batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1459,7 +1465,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 
 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = i915_scratch_offset(engine->i915) + 256;
 	*batch++ = 0;
 
 	return batch;
@@ -1496,7 +1502,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 				       PIPE_CONTROL_GLOBAL_GTT_IVB |
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_QW_WRITE,
-				       i915_ggtt_offset(engine->scratch) +
+				       i915_scratch_offset(engine->i915) +
 				       2 * CACHELINE_BYTES);
 
 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1573,7 +1579,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 					       PIPE_CONTROL_GLOBAL_GTT_IVB |
 					       PIPE_CONTROL_CS_STALL |
 					       PIPE_CONTROL_QW_WRITE,
-					       i915_ggtt_offset(engine->scratch)
+					       i915_scratch_offset(engine->i915)
 					       + 2 * CACHELINE_BYTES);
 	}
 
@@ -1793,6 +1799,8 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
+	intel_engine_apply_workarounds(engine);
+
 	intel_mocs_init_engine(engine);
 
 	intel_engine_reset_breadcrumbs(engine);
@@ -2139,7 +2147,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
 {
 	struct intel_engine_cs *engine = request->engine;
 	u32 scratch_addr =
-		i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+		i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
 	bool vf_flush_wa = false, dc_flush_wa = false;
 	u32 *cs, flags = 0;
 	int len;
@@ -2476,10 +2484,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
-	if (ret)
-		goto err_cleanup_common;
-
 	ret = intel_init_workaround_bb(engine);
 	if (ret) {
 		/*
@@ -2491,11 +2495,9 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 			  ret);
 	}
 
-	return 0;
+	intel_engine_init_workarounds(engine);
 
-err_cleanup_common:
-	intel_engine_cleanup_common(engine);
-	return ret;
+	return 0;
 }
 
 int logical_xcs_ring_init(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 187bb0ceb4ac..1f8d2a66c791 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -69,19 +69,28 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 static int
 gen2_render_ring_flush(struct i915_request *rq, u32 mode)
 {
+	unsigned int num_store_dw;
 	u32 cmd, *cs;
 
 	cmd = MI_FLUSH;
-
+	num_store_dw = 0;
 	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_READ_FLUSH;
+	if (mode & EMIT_FLUSH)
+		num_store_dw = 4;
 
-	cs = intel_ring_begin(rq, 2);
+	cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
 	*cs++ = cmd;
-	*cs++ = MI_NOOP;
+	while (num_store_dw--) {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = i915_scratch_offset(rq->i915);
+		*cs++ = 0;
+	}
+	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;
@@ -150,8 +159,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 	 */
 	if (mode & EMIT_INVALIDATE) {
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 
@@ -159,8 +167,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 			*cs++ = MI_FLUSH;
 
 		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
-		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
-			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
 		*cs++ = 0;
 		*cs++ = 0;
 	}
@@ -212,8 +219,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 static int
 intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs;
 
 	cs = intel_ring_begin(rq, 6);
@@ -246,8 +252,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
 static int
 gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 	int ret;
 
@@ -316,8 +321,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
 static int
 gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 {
-	u32 scratch_addr =
-		i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
+	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs, flags = 0;
 
 	/*
@@ -971,7 +975,7 @@ i965_emit_bb_start(struct i915_request *rq,
 }
 
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
-#define I830_BATCH_LIMIT (256*1024)
+#define I830_BATCH_LIMIT SZ_256K
 #define I830_TLB_ENTRIES (2)
 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
 static int
@@ -979,7 +983,9 @@ i830_emit_bb_start(struct i915_request *rq,
 		   u64 offset, u32 len,
 		   unsigned int dispatch_flags)
 {
-	u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
+	u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+
+	GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -1437,7 +1443,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct i915_timeline *timeline;
 	struct intel_ring *ring;
-	unsigned int size;
 	int err;
 
 	intel_engine_setup_common(engine);
@@ -1462,21 +1467,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 	GEM_BUG_ON(engine->buffer);
 	engine->buffer = ring;
 
-	size = PAGE_SIZE;
-	if (HAS_BROKEN_CS_TLB(engine->i915))
-		size = I830_WA_SIZE;
-	err = intel_engine_create_scratch(engine, size);
-	if (err)
-		goto err_unpin;
-
 	err = intel_engine_init_common(engine);
 	if (err)
-		goto err_scratch;
+		goto err_unpin;
 
 	return 0;
 
-err_scratch:
-	intel_engine_cleanup_scratch(engine);
 err_unpin:
 	intel_ring_unpin(ring);
 err_ring:
@@ -1550,7 +1546,7 @@ static int flush_pd_dir(struct i915_request *rq)
 	/* Stall until the page table load is complete */
 	*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
-	*cs++ = i915_ggtt_offset(engine->scratch);
+	*cs++ = i915_scratch_offset(rq->i915);
 	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
@@ -1659,7 +1655,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			/* Insert a delay before the next switch! */
 			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = i915_ggtt_offset(engine->scratch);
+			*cs++ = i915_scratch_offset(rq->i915);
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2dfa585712c2..767a7192c969 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -15,6 +15,7 @@
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 #include "intel_gpu_commands.h"
+#include "intel_workarounds.h"
 
 struct drm_printer;
 struct i915_sched_attr;
@@ -440,7 +441,7 @@ struct intel_engine_cs {
 
 	struct intel_hw_status_page status_page;
 	struct i915_ctx_workarounds wa_ctx;
-	struct i915_vma *scratch;
+	struct i915_wa_list wa_list;
 
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
@@ -898,10 +899,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_engine_create_scratch(struct intel_engine_cs *engine,
-				unsigned int size);
-void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
-
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 4bcdeaf8d98f..6e580891db96 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -48,6 +48,20 @@
  * - Public functions to init or apply the given workaround type.
  */
 
+static void wa_init_start(struct i915_wa_list *wal, const char *name)
+{
+	wal->name = name;
+}
+
+static void wa_init_finish(struct i915_wa_list *wal)
+{
+	if (!wal->count)
+		return;
+
+	DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
+			 wal->count, wal->name);
+}
+
 static void wa_add(struct drm_i915_private *i915,
 		   i915_reg_t reg, const u32 mask, const u32 val)
 {
@@ -580,160 +594,175 @@ int intel_ctx_workarounds_emit(struct i915_request *rq)
 	return 0;
 }
 
-static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void
+wal_add(struct i915_wa_list *wal, const struct i915_wa *wa)
+{
+	const unsigned int grow = 1 << 4;
+
+	GEM_BUG_ON(!is_power_of_2(grow));
+
+	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
+		struct i915_wa *list;
+
+		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
+				     GFP_KERNEL);
+		if (!list) {
+			DRM_ERROR("No space for workaround init!\n");
+			return;
+		}
+
+		if (wal->list)
+			memcpy(list, wal->list, sizeof(*wa) * wal->count);
+
+		wal->list = list;
+	}
+
+	wal->list[wal->count++] = *wa;
+}
+
+static void
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+	struct i915_wa wa = {
+		.reg = reg,
+		.mask = val,
+		.val = _MASKED_BIT_ENABLE(val)
+	};
+
+	wal_add(wal, &wa);
+}
+
+static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+		   u32 val)
 {
+	struct i915_wa wa = {
+		.reg = reg,
+		.mask = mask,
+		.val = val
+	};
+
+	wal_add(wal, &wa);
 }
 
-static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
+	wa_write_masked_or(wal, reg, ~0, val);
 }
 
-static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-	/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
-		   _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+	wa_write_masked_or(wal, reg, val, val);
+}
 
-	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
-		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+static void gen9_gt_workarounds_init(struct drm_i915_private *i915)
+{
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
 	/* WaDisableKillLogic:bxt,skl,kbl */
-	if (!IS_COFFEELAKE(dev_priv))
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   ECOCHK_DIS_TLB);
+	if (!IS_COFFEELAKE(i915))
+		wa_write_or(wal,
+			    GAM_ECOCHK,
+			    ECOCHK_DIS_TLB);
 
-	if (HAS_LLC(dev_priv)) {
+	if (HAS_LLC(i915)) {
 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 		 *
 		 * Must match Display Engine. See
 		 * WaCompressedResourceDisplayNewHashMode.
 		 */
-		I915_WRITE(MMCD_MISC_CTRL,
-			   I915_READ(MMCD_MISC_CTRL) |
-			   MMCD_PCLA |
-			   MMCD_HOTSPOT_EN);
+		wa_write_or(wal,
+			    MMCD_MISC_CTRL,
+			    MMCD_PCLA | MMCD_HOTSPOT_EN);
 	}
 
 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-		   BDW_DISABLE_HDC_INVALIDATION);
-
-	/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
-	if (IS_GEN9_LP(dev_priv)) {
-		u32 val = I915_READ(GEN8_L3SQCREG1);
-
-		val &= ~L3_PRIO_CREDITS_MASK;
-		val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
-		I915_WRITE(GEN8_L3SQCREG1, val);
-	}
-
-	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-	I915_WRITE(GEN8_L3SQCREG4,
-		   I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	wa_write_or(wal,
+		    GAM_ECOCHK,
+		    BDW_DISABLE_HDC_INVALIDATION);
 }
 
-static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void skl_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	gen9_gt_workarounds_apply(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
-	/* WaEnableGapsTsvCreditFix:skl */
-	I915_WRITE(GEN8_GARBCNTL,
-		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+	gen9_gt_workarounds_init(i915);
 
 	/* WaDisableGafsUnitClkGating:skl */
-	I915_WRITE(GEN7_UCGCTL4,
-		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+	wa_write_or(wal,
+		    GEN7_UCGCTL4,
+		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:skl */
-	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
-		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-			   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
+		wa_write_or(wal,
+			    GEN9_GAMT_ECO_REG_RW_IA,
+			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
-static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void bxt_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	gen9_gt_workarounds_apply(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
-	/* WaDisablePooledEuLoadBalancingFix:bxt */
-	I915_WRITE(FF_SLICE_CS_CHICKEN2,
-		   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
+	gen9_gt_workarounds_init(i915);
 
 	/* WaInPlaceDecompressionHang:bxt */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	wa_write_or(wal,
+		    GEN9_GAMT_ECO_REG_RW_IA,
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
-static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void kbl_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	gen9_gt_workarounds_apply(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
-	/* WaEnableGapsTsvCreditFix:kbl */
-	I915_WRITE(GEN8_GARBCNTL,
-		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+	gen9_gt_workarounds_init(i915);
 
 	/* WaDisableDynamicCreditSharing:kbl */
-	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   I915_READ(GAMT_CHKN_BIT_REG) |
-			   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
+		wa_write_or(wal,
+			    GAMT_CHKN_BIT_REG,
+			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
 
 	/* WaDisableGafsUnitClkGating:kbl */
-	I915_WRITE(GEN7_UCGCTL4,
-		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+	wa_write_or(wal,
+		    GEN7_UCGCTL4,
+		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:kbl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-
-	/* WaKBLVECSSemaphoreWaitPoll:kbl */
-	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
-		struct intel_engine_cs *engine;
-		unsigned int tmp;
-
-		for_each_engine(engine, dev_priv, tmp) {
-			if (engine->id == RCS)
-				continue;
-
-			I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
-		}
-	}
+	wa_write_or(wal,
+		    GEN9_GAMT_ECO_REG_RW_IA,
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
-static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void glk_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	gen9_gt_workarounds_apply(dev_priv);
+	gen9_gt_workarounds_init(i915);
 }
 
-static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void cfl_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	gen9_gt_workarounds_apply(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
-	/* WaEnableGapsTsvCreditFix:cfl */
-	I915_WRITE(GEN8_GARBCNTL,
-		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+	gen9_gt_workarounds_init(i915);
 
 	/* WaDisableGafsUnitClkGating:cfl */
-	I915_WRITE(GEN7_UCGCTL4,
-		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+	wa_write_or(wal,
+		    GEN7_UCGCTL4,
+		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:cfl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	wa_write_or(wal,
+		    GEN9_GAMT_ECO_REG_RW_IA,
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
 static void wa_init_mcr(struct drm_i915_private *dev_priv)
 {
 	const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
-	u32 mcr;
+	struct i915_wa_list *wal = &dev_priv->gt_wa_list;
 	u32 mcr_slice_subslice_mask;
 
 	/*
@@ -770,8 +799,6 @@ static void wa_init_mcr(struct drm_i915_private *dev_priv)
 		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
 	}
 
-	mcr = I915_READ(GEN8_MCR_SELECTOR);
-
 	if (INTEL_GEN(dev_priv) >= 11)
 		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
 					  GEN11_MCR_SUBSLICE_MASK;
@@ -789,148 +816,170 @@ static void wa_init_mcr(struct drm_i915_private *dev_priv)
 	 * occasions, such as INSTDONE, where this value is dependent
 	 * on s/ss combo, the read should be done with read_subslice_reg.
 	 */
-	mcr &= ~mcr_slice_subslice_mask;
-	mcr |= intel_calculate_mcr_s_ss_select(dev_priv);
-	I915_WRITE(GEN8_MCR_SELECTOR, mcr);
+	wa_write_masked_or(wal,
+			   GEN8_MCR_SELECTOR,
+			   mcr_slice_subslice_mask,
+			   intel_calculate_mcr_s_ss_select(dev_priv));
 }
 
-static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void cnl_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	wa_init_mcr(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
+
+	wa_init_mcr(i915);
 
 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   I915_READ(GAMT_CHKN_BIT_REG) |
-			   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
+	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
+		wa_write_or(wal,
+			    GAMT_CHKN_BIT_REG,
+			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
 
 	/* WaInPlaceDecompressionHang:cnl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-
-	/* WaEnablePreemptionGranularityControlByUMD:cnl */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	wa_write_or(wal,
+		    GEN9_GAMT_ECO_REG_RW_IA,
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
-static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+static void icl_gt_workarounds_init(struct drm_i915_private *i915)
 {
-	wa_init_mcr(dev_priv);
+	struct i915_wa_list *wal = &i915->gt_wa_list;
 
-	/* This is not an Wa. Enable for better image quality */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
+	wa_init_mcr(i915);
 
 	/* WaInPlaceDecompressionHang:icl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-					    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
-
-	/* WaPipelineFlushCoherentLines:icl */
-	I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
-				   GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-	/* Wa_1405543622:icl
-	 * Formerly known as WaGAPZPriorityScheme
-	 */
-	I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
-				  GEN11_ARBITRATION_PRIO_ORDER_MASK);
-
-	/* Wa_1604223664:icl
-	 * Formerly known as WaL3BankAddressHashing
-	 */
-	I915_WRITE(GEN8_GARBCNTL,
-		   (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
-		   GEN11_HASH_CTRL_EXCL_BIT0);
-	I915_WRITE(GEN11_GLBLINVL,
-		   (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
-		   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
+	wa_write_or(wal,
+		    GEN9_GAMT_ECO_REG_RW_IA,
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	/* WaModifyGamTlbPartitioning:icl */
-	I915_WRITE(GEN11_GACB_PERF_CTRL,
-		   (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
-		   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
-
-	/* Wa_1405733216:icl
-	 * Formerly known as WaDisableCleanEvicts
-	 */
-	I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
-				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
+	wa_write_masked_or(wal,
+			   GEN11_GACB_PERF_CTRL,
+			   GEN11_HASH_CTRL_MASK,
+			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
 
 	/* Wa_1405766107:icl
 	 * Formerly known as WaCL2SFHalfMaxAlloc
 	 */
-	I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
-				      GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
-				      GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
+	wa_write_or(wal,
+		    GEN11_LSN_UNSLCVC,
+		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
+		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
 
 	/* Wa_220166154:icl
 	 * Formerly known as WaDisCtxReload
 	 */
-	I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
-					   GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
+	wa_write_or(wal,
+		    GEN8_GAMW_ECO_DEV_RW_IA,
+		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
 
 	/* Wa_1405779004:icl (pre-prod) */
-	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
-		I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
-			   I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
-			   MSCUNIT_CLKGATE_DIS);
+	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
+		wa_write_or(wal,
+			    SLICE_UNIT_LEVEL_CLKGATE,
+			    MSCUNIT_CLKGATE_DIS);
 
 	/* Wa_1406680159:icl */
-	I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
-		   I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
-		   GWUNIT_CLKGATE_DIS);
-
-	/* Wa_1604302699:icl */
-	I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
-		   I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
-		   GEN11_I2M_WRITE_DISABLE);
+	wa_write_or(wal,
+		    SUBSLICE_UNIT_LEVEL_CLKGATE,
+		    GWUNIT_CLKGATE_DIS);
 
 	/* Wa_1406838659:icl (pre-prod) */
-	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
-		I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
-			   I915_READ(INF_UNIT_LEVEL_CLKGATE) |
-			   CGPSF_CLKGATE_DIS);
-
-	/* WaForwardProgressSoftReset:icl */
-	I915_WRITE(GEN10_SCRATCH_LNCF2,
-		   I915_READ(GEN10_SCRATCH_LNCF2) |
-		   PMFLUSHDONE_LNICRSDROP |
-		   PMFLUSH_GAPL3UNBLOCK |
-		   PMFLUSHDONE_LNEBLK);
+	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
+		wa_write_or(wal,
+			    INF_UNIT_LEVEL_CLKGATE,
+			    CGPSF_CLKGATE_DIS);
 
 	/* Wa_1406463099:icl
 	 * Formerly known as WaGamTlbPendError
 	 */
-	I915_WRITE(GAMT_CHKN_BIT_REG,
-		   I915_READ(GAMT_CHKN_BIT_REG) |
-		   GAMT_CHKN_DISABLE_L3_COH_PIPE);
+	wa_write_or(wal,
+		    GAMT_CHKN_BIT_REG,
+		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
 }
 
-void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+void intel_gt_init_workarounds(struct drm_i915_private *i915)
 {
-	if (INTEL_GEN(dev_priv) < 8)
+	struct i915_wa_list *wal = &i915->gt_wa_list;
+
+	wa_init_start(wal, "GT");
+
+	if (INTEL_GEN(i915) < 8)
 		return;
-	else if (IS_BROADWELL(dev_priv))
-		bdw_gt_workarounds_apply(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		chv_gt_workarounds_apply(dev_priv);
-	else if (IS_SKYLAKE(dev_priv))
-		skl_gt_workarounds_apply(dev_priv);
-	else if (IS_BROXTON(dev_priv))
-		bxt_gt_workarounds_apply(dev_priv);
-	else if (IS_KABYLAKE(dev_priv))
-		kbl_gt_workarounds_apply(dev_priv);
-	else if (IS_GEMINILAKE(dev_priv))
-		glk_gt_workarounds_apply(dev_priv);
-	else if (IS_COFFEELAKE(dev_priv))
-		cfl_gt_workarounds_apply(dev_priv);
-	else if (IS_CANNONLAKE(dev_priv))
-		cnl_gt_workarounds_apply(dev_priv);
-	else if (IS_ICELAKE(dev_priv))
-		icl_gt_workarounds_apply(dev_priv);
+	else if (IS_BROADWELL(i915))
+		return;
+	else if (IS_CHERRYVIEW(i915))
+		return;
+	else if (IS_SKYLAKE(i915))
+		skl_gt_workarounds_init(i915);
+	else if (IS_BROXTON(i915))
+		bxt_gt_workarounds_init(i915);
+	else if (IS_KABYLAKE(i915))
+		kbl_gt_workarounds_init(i915);
+	else if (IS_GEMINILAKE(i915))
+		glk_gt_workarounds_init(i915);
+	else if (IS_COFFEELAKE(i915))
+		cfl_gt_workarounds_init(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_gt_workarounds_init(i915);
+	else if (IS_ICELAKE(i915))
+		icl_gt_workarounds_init(i915);
 	else
-		MISSING_CASE(INTEL_GEN(dev_priv));
+		MISSING_CASE(INTEL_GEN(i915));
+
+	wa_init_finish(wal);
+}
+
+static enum forcewake_domains
+wal_get_fw_for_rmw(struct drm_i915_private *dev_priv,
+		   const struct i915_wa_list *wal)
+{
+	enum forcewake_domains fw = 0;
+	struct i915_wa *wa;
+	unsigned int i;
+
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+		fw |= intel_uncore_forcewake_for_reg(dev_priv,
+						     wa->reg,
+						     FW_REG_READ |
+						     FW_REG_WRITE);
+
+	return fw;
+}
+
+static void
+wa_list_apply(struct drm_i915_private *dev_priv, const struct i915_wa_list *wal)
+{
+	enum forcewake_domains fw;
+	unsigned long flags;
+	struct i915_wa *wa;
+	unsigned int i;
+
+	if (!wal->count)
+		return;
+
+	fw = wal_get_fw_for_rmw(dev_priv, wal);
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+	intel_uncore_forcewake_get__locked(dev_priv, fw);
+
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+		u32 val = I915_READ_FW(wa->reg);
+
+		val &= ~wa->mask;
+		val |= wa->val;
+
+		I915_WRITE_FW(wa->reg, val);
+	}
+
+	intel_uncore_forcewake_put__locked(dev_priv, fw);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
+
+	DRM_DEBUG_DRIVER("Applied %u %s workarounds\n", wal->count, wal->name);
+}
+
+void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv)
+{
+	wa_list_apply(dev_priv, &dev_priv->gt_wa_list);
 }
 
 struct whitelist {
@@ -1077,6 +1126,146 @@ void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
 	whitelist_apply(engine, whitelist_build(engine, &w));
 }
 
+static void rcs_engine_wa_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_wa_list *wal = &engine->wa_list;
+
+	if (IS_ICELAKE(i915)) {
+		/* This is not an Wa. Enable for better image quality */
+		wa_masked_en(wal,
+			     _3D_CHICKEN3,
+			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
+
+		/* WaPipelineFlushCoherentLines:icl */
+		wa_write_or(wal,
+			    GEN8_L3SQCREG4,
+			    GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+		/*
+		 * Wa_1405543622:icl
+		 * Formerly known as WaGAPZPriorityScheme
+		 */
+		wa_write_or(wal,
+			    GEN8_GARBCNTL,
+			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
+
+		/*
+		 * Wa_1604223664:icl
+		 * Formerly known as WaL3BankAddressHashing
+		 */
+		wa_write_masked_or(wal,
+				   GEN8_GARBCNTL,
+				   GEN11_HASH_CTRL_EXCL_MASK,
+				   GEN11_HASH_CTRL_EXCL_BIT0);
+		wa_write_masked_or(wal,
+				   GEN11_GLBLINVL,
+				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
+				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
+
+		/*
+		 * Wa_1405733216:icl
+		 * Formerly known as WaDisableCleanEvicts
+		 */
+		wa_write_or(wal,
+			    GEN8_L3SQCREG4,
+			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
+
+		/* Wa_1604302699:icl */
+		wa_write_or(wal,
+			    GEN10_L3_CHICKEN_MODE_REGISTER,
+			    GEN11_I2M_WRITE_DISABLE);
+
+		/* WaForwardProgressSoftReset:icl */
+		wa_write_or(wal,
+			    GEN10_SCRATCH_LNCF2,
+			    PMFLUSHDONE_LNICRSDROP |
+			    PMFLUSH_GAPL3UNBLOCK |
+			    PMFLUSHDONE_LNEBLK);
+	}
+
+	if (IS_GEN9(i915) || IS_CANNONLAKE(i915)) {
+		/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,cnl */
+		wa_masked_en(wal,
+			     GEN7_FF_SLICE_CS_CHICKEN1,
+			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
+	}
+
+	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
+		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
+		wa_write_or(wal,
+			    GEN8_GARBCNTL,
+			    GEN9_GAPS_TSV_CREDIT_DISABLE);
+	}
+
+	if (IS_BROXTON(i915)) {
+		/* WaDisablePooledEuLoadBalancingFix:bxt */
+		wa_masked_en(wal,
+			     FF_SLICE_CS_CHICKEN2,
+			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
+	}
+
+	if (IS_GEN9(i915)) {
+		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
+		wa_masked_en(wal,
+			     GEN9_CSFE_CHICKEN1_RCS,
+			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
+
+		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
+		wa_write_or(wal,
+			    BDW_SCRATCH1,
+			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
+		if (IS_GEN9_LP(i915))
+			wa_write_masked_or(wal,
+					   GEN8_L3SQCREG1,
+					   L3_PRIO_CREDITS_MASK,
+					   L3_GENERAL_PRIO_CREDITS(62) |
+					   L3_HIGH_PRIO_CREDITS(2));
+
+		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
+		wa_write_or(wal,
+			    GEN8_L3SQCREG4,
+			    GEN8_LQSC_FLUSH_COHERENT_LINES);
+	}
+}
+
+static void xcs_engine_wa_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct i915_wa_list *wal = &engine->wa_list;
+
+	/* WaKBLVECSSemaphoreWaitPoll:kbl */
+	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
+		wa_write(wal,
+			 RING_SEMA_WAIT_POLL(engine->mmio_base),
+			 1);
+	}
+}
+
+void intel_engine_init_workarounds(struct intel_engine_cs *engine)
+{
+	struct i915_wa_list *wal = &engine->wa_list;
+
+	if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
+		return;
+
+	wa_init_start(wal, engine->name);
+
+	if (engine->id == RCS)
+		rcs_engine_wa_init(engine);
+	else
+		xcs_engine_wa_init(engine);
+
+	wa_init_finish(wal);
+}
+
+void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
+{
+	wa_list_apply(engine->i915, &engine->wa_list);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_workarounds.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
index b11d0623e626..979695a53964 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -7,11 +7,35 @@
 #ifndef _I915_WORKAROUNDS_H_
 #define _I915_WORKAROUNDS_H_
 
+#include <linux/slab.h>
+
+struct i915_wa {
+	i915_reg_t	  reg;
+	u32		  mask;
+	u32		  val;
+};
+
+struct i915_wa_list {
+	const char	*name;
+	struct i915_wa	*list;
+	unsigned int	count;
+};
+
+static inline void intel_wa_list_free(struct i915_wa_list *wal)
+{
+	kfree(wal->list);
+	memset(wal, 0, sizeof(*wal));
+}
+
 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv);
 int intel_ctx_workarounds_emit(struct i915_request *rq);
 
-void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv);
+void intel_gt_init_workarounds(struct drm_i915_private *dev_priv);
+void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv);
 
 void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);
 
+void intel_engine_init_workarounds(struct intel_engine_cs *engine);
+void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
+
 #endif
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 66df1b177959..27b507eb4a99 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -818,10 +818,13 @@ static int mtk_dsi_create_conn_enc(struct drm_device *drm, struct mtk_dsi *dsi)
 	dsi->encoder.possible_crtcs = 1;
 
 	/* If there's a bridge, attach to it and let it create the connector */
-	ret = drm_bridge_attach(&dsi->encoder, dsi->bridge, NULL);
-	if (ret) {
-		DRM_ERROR("Failed to attach bridge to drm\n");
-
+	if (dsi->bridge) {
+		ret = drm_bridge_attach(&dsi->encoder, dsi->bridge, NULL);
+		if (ret) {
+			DRM_ERROR("Failed to attach bridge to drm\n");
+			goto err_encoder_cleanup;
+		}
+	} else {
 		/* Otherwise create our own connector and attach to a panel */
 		ret = mtk_dsi_create_connector(drm, dsi);
 		if (ret)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 6cbbae3f438b..db1bf7f88c1f 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -198,6 +198,22 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 /******************************************************************************
  * EVO channel helpers
  *****************************************************************************/
+static void
+evo_flush(struct nv50_dmac *dmac)
+{
+	/* Push buffer fetches are not coherent with BAR1, we need to ensure
+	 * writes have been flushed right through to VRAM before writing PUT.
+	 */
+	if (dmac->push.type & NVIF_MEM_VRAM) {
+		struct nvif_device *device = dmac->base.device;
+		nvif_wr32(&device->object, 0x070000, 0x00000001);
+		nvif_msec(device, 2000,
+			if (!(nvif_rd32(&device->object, 0x070000) & 0x00000002))
+				break;
+		);
+	}
+}
+
 u32 *
 evo_wait(struct nv50_dmac *evoc, int nr)
 {
@@ -208,6 +224,7 @@ evo_wait(struct nv50_dmac *evoc, int nr)
 	mutex_lock(&dmac->lock);
 	if (put + nr >= (PAGE_SIZE / 4) - 8) {
 		dmac->ptr[put] = 0x20000000;
+		evo_flush(dmac);
 
 		nvif_wr32(&dmac->base.user, 0x0000, 0x00000000);
 		if (nvif_msec(device, 2000,
@@ -230,17 +247,7 @@ evo_kick(u32 *push, struct nv50_dmac *evoc)
 {
 	struct nv50_dmac *dmac = evoc;
 
-	/* Push buffer fetches are not coherent with BAR1, we need to ensure
-	 * writes have been flushed right through to VRAM before writing PUT.
-	 */
-	if (dmac->push.type & NVIF_MEM_VRAM) {
-		struct nvif_device *device = dmac->base.device;
-		nvif_wr32(&device->object, 0x070000, 0x00000001);
-		nvif_msec(device, 2000,
-			if (!(nvif_rd32(&device->object, 0x070000) & 0x00000002))
-				break;
-		);
-	}
+	evo_flush(dmac);
 
 	nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2);
 	mutex_unlock(&dmac->lock);
@@ -1264,6 +1271,7 @@ nv50_mstm_del(struct nv50_mstm **pmstm)
 {
 	struct nv50_mstm *mstm = *pmstm;
 	if (mstm) {
+		drm_dp_mst_topology_mgr_destroy(&mstm->mgr);
 		kfree(*pmstm);
 		*pmstm = NULL;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 2b2baf6e0e0d..d2928d43f29a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1171,10 +1171,16 @@ nouveau_platform_device_create(const struct nvkm_device_tegra_func *func,
 		goto err_free;
 	}
 
+	err = nouveau_drm_device_init(drm);
+	if (err)
+		goto err_put;
+
 	platform_set_drvdata(pdev, drm);
 
 	return drm;
 
+err_put:
+	drm_dev_put(drm);
 err_free:
 	nvkm_device_del(pdevice);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 941f35233b1f..5864cb452c5c 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -448,11 +448,6 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static void rockchip_drm_platform_shutdown(struct platform_device *pdev)
-{
-	rockchip_drm_platform_remove(pdev);
-}
-
 static const struct of_device_id rockchip_drm_dt_ids[] = {
 	{ .compatible = "rockchip,display-subsystem", },
 	{ /* sentinel */ },
@@ -462,7 +457,6 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids);
 static struct platform_driver rockchip_drm_platform_driver = {
 	.probe = rockchip_drm_platform_probe,
 	.remove = rockchip_drm_platform_remove,
-	.shutdown = rockchip_drm_platform_shutdown,
 	.driver = {
 		.name = "rockchip-drm",
 		.of_match_table = rockchip_drm_dt_ids,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 61a84b958d67..d7a2dfb8ee9b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -49,6 +49,8 @@
 
 #define VMWGFX_REPO "In Tree"
 
+#define VMWGFX_VALIDATION_MEM_GRAN (16*PAGE_SIZE)
+
 
 /**
  * Fully encoded drm commands. Might move to vmw_drm.h
@@ -918,7 +920,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		spin_unlock(&dev_priv->cap_lock);
 	}
 
-
+	vmw_validation_mem_init_ttm(dev_priv, VMWGFX_VALIDATION_MEM_GRAN);
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 59f614225bcd..aca974b14b55 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -606,6 +606,9 @@ struct vmw_private {
 
 	struct vmw_cmdbuf_man *cman;
 	DECLARE_BITMAP(irqthread_pending, VMW_IRQTHREAD_MAX);
+
+	/* Validation memory reservation */
+	struct vmw_validation_mem vvm;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -846,6 +849,8 @@ extern int vmw_ttm_global_init(struct vmw_private *dev_priv);
 extern void vmw_ttm_global_release(struct vmw_private *dev_priv);
 extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
 
+extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
+					size_t gran);
 /**
  * TTM buffer object driver - vmwgfx_ttm_buffer.c
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 5a6b70ba137a..f2d13a72c05d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1738,7 +1738,6 @@ static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
 				      void *buf)
 {
 	struct vmw_buffer_object *vmw_bo;
-	int ret;
 
 	struct {
 		uint32_t header;
@@ -1748,7 +1747,6 @@ static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
 	return vmw_translate_guest_ptr(dev_priv, sw_context,
 				       &cmd->body.ptr,
 				       &vmw_bo);
-	return ret;
 }
 
 
@@ -3837,6 +3835,8 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	struct sync_file *sync_file = NULL;
 	DECLARE_VAL_CONTEXT(val_ctx, &sw_context->res_ht, 1);
 
+	vmw_validation_set_val_mem(&val_ctx, &dev_priv->vvm);
+
 	if (flags & DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 7b1e5a5cbd2c..f88247046721 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -96,3 +96,39 @@ void vmw_ttm_global_release(struct vmw_private *dev_priv)
 	drm_global_item_unref(&dev_priv->bo_global_ref.ref);
 	drm_global_item_unref(&dev_priv->mem_global_ref);
 }
+
+/* struct vmw_validation_mem callback */
+static int vmw_vmt_reserve(struct vmw_validation_mem *m, size_t size)
+{
+	static struct ttm_operation_ctx ctx = {.interruptible = false,
+					       .no_wait_gpu = false};
+	struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm);
+
+	return ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, &ctx);
+}
+
+/* struct vmw_validation_mem callback */
+static void vmw_vmt_unreserve(struct vmw_validation_mem *m, size_t size)
+{
+	struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm);
+
+	return ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_validation_mem_init_ttm - Interface the validation memory tracker
+ * to ttm.
+ * @dev_priv: Pointer to struct vmw_private. The reason we choose a vmw private
+ * rather than a struct vmw_validation_mem is to make sure assumption in the
+ * callbacks that struct vmw_private derives from struct vmw_validation_mem
+ * holds true.
+ * @gran: The recommended allocation granularity
+ */
+void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, size_t gran)
+{
+	struct vmw_validation_mem *vvm = &dev_priv->vvm;
+
+	vvm->reserve_mem = vmw_vmt_reserve;
+	vvm->unreserve_mem = vmw_vmt_unreserve;
+	vvm->gran = gran;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 184025fa938e..f116f092e00b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -104,11 +104,25 @@ void *vmw_validation_mem_alloc(struct vmw_validation_context *ctx,
 		return NULL;
 
 	if (ctx->mem_size_left < size) {
-		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		struct page *page;
 
+		if (ctx->vm && ctx->vm_size_left < PAGE_SIZE) {
+			int ret = ctx->vm->reserve_mem(ctx->vm, ctx->vm->gran);
+
+			if (ret)
+				return NULL;
+
+			ctx->vm_size_left += ctx->vm->gran;
+			ctx->total_mem += ctx->vm->gran;
+		}
+
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (!page)
 			return NULL;
 
+		if (ctx->vm)
+			ctx->vm_size_left -= PAGE_SIZE;
+
 		list_add_tail(&page->lru, &ctx->page_list);
 		ctx->page_address = page_address(page);
 		ctx->mem_size_left = PAGE_SIZE;
@@ -138,6 +152,11 @@ static void vmw_validation_mem_free(struct vmw_validation_context *ctx)
 	}
 
 	ctx->mem_size_left = 0;
+	if (ctx->vm && ctx->total_mem) {
+		ctx->vm->unreserve_mem(ctx->vm, ctx->total_mem);
+		ctx->total_mem = 0;
+		ctx->vm_size_left = 0;
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index b57e3292c386..3b396fea40d7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -34,6 +34,21 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 
 /**
+ * struct vmw_validation_mem - Custom interface to provide memory reservations
+ * for the validation code.
+ * @reserve_mem: Callback to reserve memory
+ * @unreserve_mem: Callback to unreserve memory
+ * @gran: Reservation granularity. Contains a hint how much memory should
+ * be reserved in each call to @reserve_mem(). A slow implementation may want
+ * reservation to be done in large batches.
+ */
+struct vmw_validation_mem {
+	int (*reserve_mem)(struct vmw_validation_mem *m, size_t size);
+	void (*unreserve_mem)(struct vmw_validation_mem *m, size_t size);
+	size_t gran;
+};
+
+/**
  * struct vmw_validation_context - Per command submission validation context
  * @ht: Hash table used to find resource- or buffer object duplicates
  * @resource_list: List head for resource validation metadata
@@ -47,6 +62,10 @@
  * buffer objects
  * @mem_size_left: Free memory left in the last page in @page_list
  * @page_address: Kernel virtual address of the last page in @page_list
+ * @vm: A pointer to the memory reservation interface or NULL if no
+ * memory reservation is needed.
+ * @vm_size_left: Amount of reserved memory that so far has not been allocated.
+ * @total_mem: Amount of reserved memory.
  */
 struct vmw_validation_context {
 	struct drm_open_hash *ht;
@@ -59,6 +78,9 @@ struct vmw_validation_context {
 	unsigned int merge_dups;
 	unsigned int mem_size_left;
 	u8 *page_address;
+	struct vmw_validation_mem *vm;
+	size_t vm_size_left;
+	size_t total_mem;
 };
 
 struct vmw_buffer_object;
@@ -102,6 +124,21 @@ vmw_validation_has_bos(struct vmw_validation_context *ctx)
 }
 
 /**
+ * vmw_validation_set_val_mem - Register a validation mem object for
+ * validation memory reservation
+ * @ctx: The validation context
+ * @vm: Pointer to a struct vmw_validation_mem
+ *
+ * Must be set before the first attempt to allocate validation memory.
+ */
+static inline void
+vmw_validation_set_val_mem(struct vmw_validation_context *ctx,
+			   struct vmw_validation_mem *vm)
+{
+	ctx->vm = vm;
+}
+
+/**
  * vmw_validation_set_ht - Register a hash table for duplicate finding
  * @ctx: The validation context
  * @ht: Pointer to a hash table to use for duplicate finding
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ed35c9a9a110..27519eb8ee63 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -17,6 +17,9 @@
 #ifndef HID_IDS_H_FILE
 #define HID_IDS_H_FILE
 
+#define USB_VENDOR_ID_258A		0x258a
+#define USB_DEVICE_ID_258A_6A88		0x6a88
+
 #define USB_VENDOR_ID_3M		0x0596
 #define USB_DEVICE_ID_3M1968		0x0500
 #define USB_DEVICE_ID_3M2256		0x0502
@@ -941,6 +944,10 @@
 #define USB_VENDOR_ID_REALTEK		0x0bda
 #define USB_DEVICE_ID_REALTEK_READER	0x0152
 
+#define USB_VENDOR_ID_RETROUSB		0xf000
+#define USB_DEVICE_ID_RETROUSB_SNES_RETROPAD	0x0003
+#define USB_DEVICE_ID_RETROUSB_SNES_RETROPORT	0x00f1
+
 #define USB_VENDOR_ID_ROCCAT		0x1e7d
 #define USB_DEVICE_ID_ROCCAT_ARVO	0x30d4
 #define USB_DEVICE_ID_ROCCAT_ISKU	0x319c
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index 1882a4ab0f29..98b059d79bc8 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -42,6 +42,7 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
 
 static const struct hid_device_id ite_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index c85a79986b6a..94088c0ed68a 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -137,6 +137,8 @@ static const struct hid_device_id hid_quirks[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003), HID_QUIRK_NOGET },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008), HID_QUIRK_NOGET },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_REALTEK, USB_DEVICE_ID_REALTEK_READER), HID_QUIRK_NO_INIT_REPORTS },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPAD), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_RETROUSB, USB_DEVICE_ID_RETROUSB_SNES_RETROPORT), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RUMBLEPAD), HID_QUIRK_BADPAD },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD2), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS },
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 25d43c8f1c2a..558de0b9895c 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -267,6 +267,9 @@ is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
 	struct net_device *cookie_ndev = cookie;
 	bool match = false;
 
+	if (!rdma_ndev)
+		return false;
+
 	rcu_read_lock();
 	if (netif_is_bond_master(cookie_ndev) &&
 	    rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9b20479dc710..7e6d70936c63 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -12500,7 +12500,8 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	}
 
 	/* allocate space for the counter values */
-	dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
+	dd->cntrs = kcalloc(dd->ndevcntrs + num_driver_cntrs, sizeof(u64),
+			    GFP_KERNEL);
 	if (!dd->cntrs)
 		goto bail;
 
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 1401b6ea4a28..2b882347d0c2 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -155,6 +155,8 @@ struct hfi1_ib_stats {
 extern struct hfi1_ib_stats hfi1_stats;
 extern const struct pci_error_handlers hfi1_pci_err_handler;
 
+extern int num_driver_cntrs;
+
 /*
  * First-cut criterion for "device is active" is
  * two thousand dwords combined Tx, Rx traffic per
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 6f3bc4dab858..1a016248039f 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -340,6 +340,13 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
 	default:
 		break;
 	}
+
+	/*
+	 * System latency between send and schedule is large enough that
+	 * forcing call_send to true for piothreshold packets is necessary.
+	 */
+	if (wqe->length <= piothreshold)
+		*call_send = true;
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 48e11e510358..a365089a9305 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1479,7 +1479,7 @@ static const char * const driver_cntr_names[] = {
 static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
 static const char **dev_cntr_names;
 static const char **port_cntr_names;
-static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
 static int num_dev_cntrs;
 static int num_port_cntrs;
 static int cntr_names_initialized;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 61aab7c0c513..45c421c87100 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1066,7 +1066,9 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 
 	err = uverbs_get_flags32(&access, attrs,
 				 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
-				 IB_ACCESS_SUPPORTED);
+				 IB_ACCESS_LOCAL_WRITE |
+				 IB_ACCESS_REMOTE_WRITE |
+				 IB_ACCESS_REMOTE_READ);
 	if (err)
 		return err;
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 2cc3d69ab6f6..4dc6cc640ce0 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -506,14 +506,13 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 			u64 io_virt, size_t bcnt, u32 *bytes_mapped)
 {
+	int npages = 0, current_seq, page_shift, ret, np;
+	bool implicit = false;
 	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
 	u64 access_mask = ODP_READ_ALLOWED_BIT;
-	int npages = 0, page_shift, np;
 	u64 start_idx, page_mask;
 	struct ib_umem_odp *odp;
-	int current_seq;
 	size_t size;
-	int ret;
 
 	if (!odp_mr->page_list) {
 		odp = implicit_mr_get_data(mr, io_virt, bcnt);
@@ -521,7 +520,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 		if (IS_ERR(odp))
 			return PTR_ERR(odp);
 		mr = odp->private;
-
+		implicit = true;
 	} else {
 		odp = odp_mr;
 	}
@@ -600,7 +599,7 @@ next_mr:
 
 out:
 	if (ret == -EAGAIN) {
-		if (mr->parent || !odp->dying) {
+		if (implicit || !odp->dying) {
 			unsigned long timeout =
 				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 5936de71883f..6fc93834da44 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
 	bool dirty_flag;
 	*result = true;
 
+	if (from_cblock(cmd->cache_blocks) == 0)
+		/* Nothing to do */
+		return 0;
+
 	r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
 				   from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
 	if (r) {
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 0bd8d498b3b9..dadd9696340c 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
 struct dm_thin_new_mapping;
 
 /*
- * The pool runs in 4 modes.  Ordered in degraded order for comparisons.
+ * The pool runs in various modes.  Ordered in degraded order for comparisons.
  */
 enum pool_mode {
 	PM_WRITE,		/* metadata may be changed */
@@ -282,9 +282,38 @@ struct pool {
 	mempool_t mapping_pool;
 };
 
-static enum pool_mode get_pool_mode(struct pool *pool);
 static void metadata_operation_failed(struct pool *pool, const char *op, int r);
 
+static enum pool_mode get_pool_mode(struct pool *pool)
+{
+	return pool->pf.mode;
+}
+
+static void notify_of_pool_mode_change(struct pool *pool)
+{
+	const char *descs[] = {
+		"write",
+		"out-of-data-space",
+		"read-only",
+		"read-only",
+		"fail"
+	};
+	const char *extra_desc = NULL;
+	enum pool_mode mode = get_pool_mode(pool);
+
+	if (mode == PM_OUT_OF_DATA_SPACE) {
+		if (!pool->pf.error_if_no_space)
+			extra_desc = " (queue IO)";
+		else
+			extra_desc = " (error IO)";
+	}
+
+	dm_table_event(pool->ti->table);
+	DMINFO("%s: switching pool to %s%s mode",
+	       dm_device_name(pool->pool_md),
+	       descs[(int)mode], extra_desc ? : "");
+}
+
 /*
  * Target context for a pool.
  */
@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
 	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
 }
 
-static void notify_of_pool_mode_change_to_oods(struct pool *pool);
-
 /*
  * We're holding onto IO to allow userland time to react.  After the
  * timeout either the pool will have been resized (and thus back in
@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)
 
 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 		pool->pf.error_if_no_space = true;
-		notify_of_pool_mode_change_to_oods(pool);
+		notify_of_pool_mode_change(pool);
 		error_retry_list_with_code(pool, BLK_STS_NOSPC);
 	}
 }
@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
 
 /*----------------------------------------------------------------*/
 
-static enum pool_mode get_pool_mode(struct pool *pool)
-{
-	return pool->pf.mode;
-}
-
-static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
-{
-	dm_table_event(pool->ti->table);
-	DMINFO("%s: switching pool to %s mode",
-	       dm_device_name(pool->pool_md), new_mode);
-}
-
-static void notify_of_pool_mode_change_to_oods(struct pool *pool)
-{
-	if (!pool->pf.error_if_no_space)
-		notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
-	else
-		notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
-}
-
 static bool passdown_enabled(struct pool_c *pt)
 {
 	return pt->adjusted_pf.discard_passdown;
@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 
 	switch (new_mode) {
 	case PM_FAIL:
-		if (old_mode != new_mode)
-			notify_of_pool_mode_change(pool, "failure");
 		dm_pool_metadata_read_only(pool->pmd);
 		pool->process_bio = process_bio_fail;
 		pool->process_discard = process_bio_fail;
@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 
 	case PM_OUT_OF_METADATA_SPACE:
 	case PM_READ_ONLY:
-		if (!is_read_only_pool_mode(old_mode))
-			notify_of_pool_mode_change(pool, "read-only");
 		dm_pool_metadata_read_only(pool->pmd);
 		pool->process_bio = process_bio_read_only;
 		pool->process_discard = process_bio_success;
@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		 * alarming rate.  Adjust your low water mark if you're
 		 * frequently seeing this mode.
 		 */
-		if (old_mode != new_mode)
-			notify_of_pool_mode_change_to_oods(pool);
 		pool->out_of_data_space = true;
 		pool->process_bio = process_bio_read_only;
 		pool->process_discard = process_discard_bio;
@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		break;
 
 	case PM_WRITE:
-		if (old_mode != new_mode)
-			notify_of_pool_mode_change(pool, "write");
 		if (old_mode == PM_OUT_OF_DATA_SPACE)
 			cancel_delayed_work_sync(&pool->no_space_timeout);
 		pool->out_of_data_space = false;
@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 	 * doesn't cause an unexpected mode transition on resume.
 	 */
 	pt->adjusted_pf.mode = new_mode;
+
+	if (old_mode != new_mode)
+		notify_of_pool_mode_change(pool);
 }
 
 static void abort_transaction(struct pool *pool)
@@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 20, 0},
+	.version = {1, 21, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 20, 0},
+	.version = {1, 21, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 981154e59461..6af5babe6837 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -20,7 +20,6 @@ struct dmz_bioctx {
 	struct dm_zone		*zone;
 	struct bio		*bio;
 	refcount_t		ref;
-	blk_status_t		status;
 };
 
 /*
@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
 {
 	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 
-	if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
-		bioctx->status = status;
-	bio_endio(bio);
+	if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
+		bio->bi_status = status;
+
+	if (refcount_dec_and_test(&bioctx->ref)) {
+		struct dm_zone *zone = bioctx->zone;
+
+		if (zone) {
+			if (bio->bi_status != BLK_STS_OK &&
+			    bio_op(bio) == REQ_OP_WRITE &&
+			    dmz_is_seq(zone))
+				set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
+			dmz_deactivate_zone(zone);
+		}
+		bio_endio(bio);
+	}
 }
 
 /*
- * Partial clone read BIO completion callback. This terminates the
+ * Completion callback for an internally cloned target BIO. This terminates the
  * target BIO when there are no more references to its context.
  */
-static void dmz_read_bio_end_io(struct bio *bio)
+static void dmz_clone_endio(struct bio *clone)
 {
-	struct dmz_bioctx *bioctx = bio->bi_private;
-	blk_status_t status = bio->bi_status;
+	struct dmz_bioctx *bioctx = clone->bi_private;
+	blk_status_t status = clone->bi_status;
 
-	bio_put(bio);
+	bio_put(clone);
 	dmz_bio_endio(bioctx->bio, status);
 }
 
 /*
- * Issue a BIO to a zone. The BIO may only partially process the
+ * Issue a clone of a target BIO. The clone may only partially process the
  * original target BIO.
  */
-static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
-			       struct bio *bio, sector_t chunk_block,
-			       unsigned int nr_blocks)
+static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
+			  struct bio *bio, sector_t chunk_block,
+			  unsigned int nr_blocks)
 {
 	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
-	sector_t sector;
 	struct bio *clone;
 
-	/* BIO remap sector */
-	sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
-
-	/* If the read is not partial, there is no need to clone the BIO */
-	if (nr_blocks == dmz_bio_blocks(bio)) {
-		/* Setup and submit the BIO */
-		bio->bi_iter.bi_sector = sector;
-		refcount_inc(&bioctx->ref);
-		generic_make_request(bio);
-		return 0;
-	}
-
-	/* Partial BIO: we need to clone the BIO */
 	clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
 	if (!clone)
 		return -ENOMEM;
 
-	/* Setup the clone */
-	clone->bi_iter.bi_sector = sector;
+	bio_set_dev(clone, dmz->dev->bdev);
+	clone->bi_iter.bi_sector =
+		dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
 	clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
-	clone->bi_end_io = dmz_read_bio_end_io;
+	clone->bi_end_io = dmz_clone_endio;
 	clone->bi_private = bioctx;
 
 	bio_advance(bio, clone->bi_iter.bi_size);
 
-	/* Submit the clone */
 	refcount_inc(&bioctx->ref);
 	generic_make_request(clone);
 
+	if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
+		zone->wp_block += nr_blocks;
+
 	return 0;
 }
 
@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
 		if (nr_blocks) {
 			/* Valid blocks found: read them */
 			nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
-			ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
+			ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
 			if (ret)
 				return ret;
 			chunk_block += nr_blocks;
@@ -229,25 +229,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
 }
 
 /*
- * Issue a write BIO to a zone.
- */
-static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
-				 struct bio *bio, sector_t chunk_block,
-				 unsigned int nr_blocks)
-{
-	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
-
-	/* Setup and submit the BIO */
-	bio_set_dev(bio, dmz->dev->bdev);
-	bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
-	refcount_inc(&bioctx->ref);
-	generic_make_request(bio);
-
-	if (dmz_is_seq(zone))
-		zone->wp_block += nr_blocks;
-}
-
-/*
  * Write blocks directly in a data zone, at the write pointer.
  * If a buffer zone is assigned, invalidate the blocks written
  * in place.
@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
 		return -EROFS;
 
 	/* Submit write */
-	dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
+	ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
+	if (ret)
+		return ret;
 
 	/*
 	 * Validate the blocks in the data zone and invalidate
@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
 		return -EROFS;
 
 	/* Submit write */
-	dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
+	ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
+	if (ret)
+		return ret;
 
 	/*
 	 * Validate the blocks in the buffer zone
@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
 	bioctx->zone = NULL;
 	bioctx->bio = bio;
 	refcount_set(&bioctx->ref, 1);
-	bioctx->status = BLK_STS_OK;
 
 	/* Set the BIO pending in the flush list */
 	if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
@@ -624,35 +608,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
 }
 
 /*
- * Completed target BIO processing.
- */
-static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
-{
-	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
-
-	if (bioctx->status == BLK_STS_OK && *error)
-		bioctx->status = *error;
-
-	if (!refcount_dec_and_test(&bioctx->ref))
-		return DM_ENDIO_INCOMPLETE;
-
-	/* Done */
-	bio->bi_status = bioctx->status;
-
-	if (bioctx->zone) {
-		struct dm_zone *zone = bioctx->zone;
-
-		if (*error && bio_op(bio) == REQ_OP_WRITE) {
-			if (dmz_is_seq(zone))
-				set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
-		}
-		dmz_deactivate_zone(zone);
-	}
-
-	return DM_ENDIO_DONE;
-}
-
-/*
  * Get zoned device information.
  */
 static int dmz_get_zoned_device(struct dm_target *ti, char *path)
@@ -946,7 +901,6 @@ static struct target_type dmz_type = {
 	.ctr		 = dmz_ctr,
 	.dtr		 = dmz_dtr,
 	.map		 = dmz_map,
-	.end_io		 = dmz_end_io,
 	.io_hints	 = dmz_io_hints,
 	.prepare_ioctl	 = dmz_prepare_ioctl,
 	.postsuspend	 = dmz_suspend,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c510179a7f84..63a7c416b224 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 		return ret;
 	}
 
+	blk_queue_split(md->queue, &bio);
+
 	init_clone_info(&ci, md, map, bio);
 
 	if (bio->bi_opf & REQ_PREFLUSH) {
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 8add62a18293..102eb35fcf3f 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -110,6 +110,19 @@ config MEDIA_CONTROLLER_DVB
 
 	  This is currently experimental.
 
+config MEDIA_CONTROLLER_REQUEST_API
+	bool "Enable Media controller Request API (EXPERIMENTAL)"
+	depends on MEDIA_CONTROLLER && STAGING_MEDIA
+	default n
+	---help---
+	  DO NOT ENABLE THIS OPTION UNLESS YOU KNOW WHAT YOU'RE DOING.
+
+	  This option enables the Request API for the Media controller and V4L2
+	  interfaces. It is currently needed by a few stateless codec drivers.
+
+	  There is currently no intention to provide API or ABI stability for
+	  this new API as of yet.
+
 #
 # Video4Linux support
 #	Only enables if one of the V4L2 types (ATV, webcam, radio) is selected
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index 975ff5669f72..8ff8722cb6b1 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -947,7 +947,7 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 	}
 	atomic_dec(&q->owned_by_drv_count);
 
-	if (vb->req_obj.req) {
+	if (state != VB2_BUF_STATE_QUEUED && vb->req_obj.req) {
 		/* This is not supported at the moment */
 		WARN_ON(state == VB2_BUF_STATE_REQUEUEING);
 		media_request_object_unbind(&vb->req_obj);
@@ -1359,8 +1359,12 @@ static void vb2_req_release(struct media_request_object *obj)
 {
 	struct vb2_buffer *vb = container_of(obj, struct vb2_buffer, req_obj);
 
-	if (vb->state == VB2_BUF_STATE_IN_REQUEST)
+	if (vb->state == VB2_BUF_STATE_IN_REQUEST) {
 		vb->state = VB2_BUF_STATE_DEQUEUED;
+		if (vb->request)
+			media_request_put(vb->request);
+		vb->request = NULL;
+	}
 }
 
 static const struct media_request_object_ops vb2_core_req_ops = {
@@ -1528,6 +1532,18 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
 			return ret;
 
 		vb->state = VB2_BUF_STATE_IN_REQUEST;
+
+		/*
+		 * Increment the refcount and store the request.
+		 * The request refcount is decremented again when the
+		 * buffer is dequeued. This is to prevent vb2_buffer_done()
+		 * from freeing the request from interrupt context, which can
+		 * happen if the application closed the request fd after
+		 * queueing the request.
+		 */
+		media_request_get(req);
+		vb->request = req;
+
 		/* Fill buffer information for the userspace */
 		if (pb) {
 			call_void_bufop(q, copy_timestamp, vb, pb);
@@ -1749,10 +1765,6 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
 			call_void_memop(vb, unmap_dmabuf, vb->planes[i].mem_priv);
 			vb->planes[i].dbuf_mapped = 0;
 		}
-	if (vb->req_obj.req) {
-		media_request_object_unbind(&vb->req_obj);
-		media_request_object_put(&vb->req_obj);
-	}
 	call_void_bufop(q, init_buffer, vb);
 }
 
@@ -1797,6 +1809,14 @@ int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
 	/* go back to dequeued state */
 	__vb2_dqbuf(vb);
 
+	if (WARN_ON(vb->req_obj.req)) {
+		media_request_object_unbind(&vb->req_obj);
+		media_request_object_put(&vb->req_obj);
+	}
+	if (vb->request)
+		media_request_put(vb->request);
+	vb->request = NULL;
+
 	dprintk(2, "dqbuf of buffer %d, with state %d\n",
 			vb->index, vb->state);
 
@@ -1903,6 +1923,14 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
 			vb->prepared = false;
 		}
 		__vb2_dqbuf(vb);
+
+		if (vb->req_obj.req) {
+			media_request_object_unbind(&vb->req_obj);
+			media_request_object_put(&vb->req_obj);
+		}
+		if (vb->request)
+			media_request_put(vb->request);
+		vb->request = NULL;
 	}
 }
 
@@ -1940,10 +1968,8 @@ int vb2_core_streamon(struct vb2_queue *q, unsigned int type)
 		if (ret)
 			return ret;
 		ret = vb2_start_streaming(q);
-		if (ret) {
-			__vb2_queue_cancel(q);
+		if (ret)
 			return ret;
-		}
 	}
 
 	q->streaming = 1;
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index a17033ab2c22..1d35aeabfd85 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -333,10 +333,10 @@ static int vb2_fill_vb2_v4l2_buffer(struct vb2_buffer *vb, struct v4l2_buffer *b
 }
 
 static int vb2_queue_or_prepare_buf(struct vb2_queue *q, struct media_device *mdev,
-				    struct v4l2_buffer *b,
-				    const char *opname,
+				    struct v4l2_buffer *b, bool is_prepare,
 				    struct media_request **p_req)
 {
+	const char *opname = is_prepare ? "prepare_buf" : "qbuf";
 	struct media_request *req;
 	struct vb2_v4l2_buffer *vbuf;
 	struct vb2_buffer *vb;
@@ -378,6 +378,9 @@ static int vb2_queue_or_prepare_buf(struct vb2_queue *q, struct media_device *md
 			return ret;
 	}
 
+	if (is_prepare)
+		return 0;
+
 	if (!(b->flags & V4L2_BUF_FLAG_REQUEST_FD)) {
 		if (q->uses_requests) {
 			dprintk(1, "%s: queue uses requests\n", opname);
@@ -631,8 +634,10 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps)
 		*caps |= V4L2_BUF_CAP_SUPPORTS_USERPTR;
 	if (q->io_modes & VB2_DMABUF)
 		*caps |= V4L2_BUF_CAP_SUPPORTS_DMABUF;
+#ifdef CONFIG_MEDIA_CONTROLLER_REQUEST_API
 	if (q->supports_requests)
 		*caps |= V4L2_BUF_CAP_SUPPORTS_REQUESTS;
+#endif
 }
 
 int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
@@ -657,7 +662,7 @@ int vb2_prepare_buf(struct vb2_queue *q, struct media_device *mdev,
 	if (b->flags & V4L2_BUF_FLAG_REQUEST_FD)
 		return -EINVAL;
 
-	ret = vb2_queue_or_prepare_buf(q, mdev, b, "prepare_buf", NULL);
+	ret = vb2_queue_or_prepare_buf(q, mdev, b, true, NULL);
 
 	return ret ? ret : vb2_core_prepare_buf(q, b->index, b);
 }
@@ -729,7 +734,7 @@ int vb2_qbuf(struct vb2_queue *q, struct media_device *mdev,
 		return -EBUSY;
 	}
 
-	ret = vb2_queue_or_prepare_buf(q, mdev, b, "qbuf", &req);
+	ret = vb2_queue_or_prepare_buf(q, mdev, b, false, &req);
 	if (ret)
 		return ret;
 	ret = vb2_core_qbuf(q, b->index, b, req);
diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index bed24372e61f..b8ec88612df7 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -381,10 +381,14 @@ static long media_device_get_topology(struct media_device *mdev, void *arg)
 static long media_device_request_alloc(struct media_device *mdev,
 				       int *alloc_fd)
 {
+#ifdef CONFIG_MEDIA_CONTROLLER_REQUEST_API
 	if (!mdev->ops || !mdev->ops->req_validate || !mdev->ops->req_queue)
 		return -ENOTTY;
 
 	return media_request_alloc(mdev, alloc_fd);
+#else
+	return -ENOTTY;
+#endif
 }
 
 static long copy_arg_from_user(void *karg, void __user *uarg, unsigned int cmd)
diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
index 013cdebecbc4..13fb69c58967 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -997,11 +997,18 @@ static int vicodec_start_streaming(struct vb2_queue *q,
 
 	q_data->sequence = 0;
 
-	if (!V4L2_TYPE_IS_OUTPUT(q->type))
+	if (!V4L2_TYPE_IS_OUTPUT(q->type)) {
+		if (!ctx->is_enc) {
+			state->width = q_data->width;
+			state->height = q_data->height;
+		}
 		return 0;
+	}
 
-	state->width = q_data->width;
-	state->height = q_data->height;
+	if (ctx->is_enc) {
+		state->width = q_data->width;
+		state->height = q_data->height;
+	}
 	state->ref_frame.width = state->ref_frame.height = 0;
 	state->ref_frame.luma = kvmalloc(size + 2 * size / chroma_div,
 					 GFP_KERNEL);
diff --git a/drivers/media/platform/vivid/vivid-sdr-cap.c b/drivers/media/platform/vivid/vivid-sdr-cap.c
index dcdc80e272c2..9acc709b0740 100644
--- a/drivers/media/platform/vivid/vivid-sdr-cap.c
+++ b/drivers/media/platform/vivid/vivid-sdr-cap.c
@@ -276,8 +276,6 @@ static int sdr_cap_start_streaming(struct vb2_queue *vq, unsigned count)
 
 		list_for_each_entry_safe(buf, tmp, &dev->sdr_cap_active, list) {
 			list_del(&buf->list);
-			v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req,
-						   &dev->ctrl_hdl_sdr_cap);
 			vb2_buffer_done(&buf->vb.vb2_buf,
 					VB2_BUF_STATE_QUEUED);
 		}
diff --git a/drivers/media/platform/vivid/vivid-vbi-cap.c b/drivers/media/platform/vivid/vivid-vbi-cap.c
index 903cebeb5ce5..d666271bdaed 100644
--- a/drivers/media/platform/vivid/vivid-vbi-cap.c
+++ b/drivers/media/platform/vivid/vivid-vbi-cap.c
@@ -204,8 +204,6 @@ static int vbi_cap_start_streaming(struct vb2_queue *vq, unsigned count)
 
 		list_for_each_entry_safe(buf, tmp, &dev->vbi_cap_active, list) {
 			list_del(&buf->list);
-			v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req,
-						   &dev->ctrl_hdl_vbi_cap);
 			vb2_buffer_done(&buf->vb.vb2_buf,
 					VB2_BUF_STATE_QUEUED);
 		}
diff --git a/drivers/media/platform/vivid/vivid-vbi-out.c b/drivers/media/platform/vivid/vivid-vbi-out.c
index 9357c07e30d6..cd56476902a2 100644
--- a/drivers/media/platform/vivid/vivid-vbi-out.c
+++ b/drivers/media/platform/vivid/vivid-vbi-out.c
@@ -96,8 +96,6 @@ static int vbi_out_start_streaming(struct vb2_queue *vq, unsigned count)
 
 		list_for_each_entry_safe(buf, tmp, &dev->vbi_out_active, list) {
 			list_del(&buf->list);
-			v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req,
-						   &dev->ctrl_hdl_vbi_out);
 			vb2_buffer_done(&buf->vb.vb2_buf,
 					VB2_BUF_STATE_QUEUED);
 		}
diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c
index 9c8e8be81ce3..673772cd17d6 100644
--- a/drivers/media/platform/vivid/vivid-vid-cap.c
+++ b/drivers/media/platform/vivid/vivid-vid-cap.c
@@ -243,8 +243,6 @@ static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count)
 
 		list_for_each_entry_safe(buf, tmp, &dev->vid_cap_active, list) {
 			list_del(&buf->list);
-			v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req,
-						   &dev->ctrl_hdl_vid_cap);
 			vb2_buffer_done(&buf->vb.vb2_buf,
 					VB2_BUF_STATE_QUEUED);
 		}
diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c
index aaf13f03d5d4..628eae154ee7 100644
--- a/drivers/media/platform/vivid/vivid-vid-out.c
+++ b/drivers/media/platform/vivid/vivid-vid-out.c
@@ -162,8 +162,6 @@ static int vid_out_start_streaming(struct vb2_queue *vq, unsigned count)
 
 		list_for_each_entry_safe(buf, tmp, &dev->vid_out_active, list) {
 			list_del(&buf->list);
-			v4l2_ctrl_request_complete(buf->vb.vb2_buf.req_obj.req,
-						   &dev->ctrl_hdl_vid_out);
 			vb2_buffer_done(&buf->vb.vb2_buf,
 					VB2_BUF_STATE_QUEUED);
 		}
diff --git a/drivers/media/platform/vsp1/vsp1_lif.c b/drivers/media/platform/vsp1/vsp1_lif.c
index 0b18f0bd7419..8b0a26335d70 100644
--- a/drivers/media/platform/vsp1/vsp1_lif.c
+++ b/drivers/media/platform/vsp1/vsp1_lif.c
@@ -95,7 +95,7 @@ static void lif_configure_stream(struct vsp1_entity *entity,
 	format = vsp1_entity_get_pad_format(&lif->entity, lif->entity.config,
 					    LIF_PAD_SOURCE);
 
-	switch (entity->vsp1->version & VI6_IP_VERSION_SOC_MASK) {
+	switch (entity->vsp1->version & VI6_IP_VERSION_MODEL_MASK) {
 	case VI6_IP_VERSION_MODEL_VSPD_GEN2:
 	case VI6_IP_VERSION_MODEL_VSPD_V2H:
 		hbth = 1536;
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 5f2b033a7a42..10b8d94edbef 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1563,7 +1563,7 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 	u64 offset;
 	s64 val;
 
-	switch (ctrl->type) {
+	switch ((u32)ctrl->type) {
 	case V4L2_CTRL_TYPE_INTEGER:
 		return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl);
 	case V4L2_CTRL_TYPE_INTEGER64:
@@ -2232,7 +2232,7 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	is_array = nr_of_dims > 0;
 
 	/* Prefill elem_size for all types handled by std_type_ops */
-	switch (type) {
+	switch ((u32)type) {
 	case V4L2_CTRL_TYPE_INTEGER64:
 		elem_size = sizeof(s64);
 		break;
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index c35b5b08bb33..111934838da2 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -472,7 +472,7 @@ out:
 static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
 			       struct mmc_blk_ioc_data *idata)
 {
-	struct mmc_command cmd = {};
+	struct mmc_command cmd = {}, sbc = {};
 	struct mmc_data data = {};
 	struct mmc_request mrq = {};
 	struct scatterlist sg;
@@ -550,10 +550,15 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
 	}
 
 	if (idata->rpmb) {
-		err = mmc_set_blockcount(card, data.blocks,
-			idata->ic.write_flag & (1 << 31));
-		if (err)
-			return err;
+		sbc.opcode = MMC_SET_BLOCK_COUNT;
+		/*
+		 * We don't do any blockcount validation because the max size
+		 * may be increased by a future standard. We just copy the
+		 * 'Reliable Write' bit here.
+		 */
+		sbc.arg = data.blocks | (idata->ic.write_flag & BIT(31));
+		sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+		mrq.sbc = &sbc;
 	}
 
 	if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_SANITIZE_START) &&
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index adf32682f27a..c60a7625b1fa 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -104,6 +104,7 @@ struct mmc_omap_slot {
 	unsigned int		vdd;
 	u16			saved_con;
 	u16			bus_mode;
+	u16			power_mode;
 	unsigned int		fclk_freq;
 
 	struct tasklet_struct	cover_tasklet;
@@ -1157,7 +1158,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct mmc_omap_slot *slot = mmc_priv(mmc);
 	struct mmc_omap_host *host = slot->host;
 	int i, dsor;
-	int clk_enabled;
+	int clk_enabled, init_stream;
 
 	mmc_omap_select_slot(slot, 0);
 
@@ -1167,6 +1168,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		slot->vdd = ios->vdd;
 
 	clk_enabled = 0;
+	init_stream = 0;
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF:
 		mmc_omap_set_power(slot, 0, ios->vdd);
@@ -1174,13 +1176,17 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	case MMC_POWER_UP:
 		/* Cannot touch dsor yet, just power up MMC */
 		mmc_omap_set_power(slot, 1, ios->vdd);
+		slot->power_mode = ios->power_mode;
 		goto exit;
 	case MMC_POWER_ON:
 		mmc_omap_fclk_enable(host, 1);
 		clk_enabled = 1;
 		dsor |= 1 << 11;
+		if (slot->power_mode != MMC_POWER_ON)
+			init_stream = 1;
 		break;
 	}
+	slot->power_mode = ios->power_mode;
 
 	if (slot->bus_mode != ios->bus_mode) {
 		if (slot->pdata->set_bus_mode != NULL)
@@ -1196,7 +1202,7 @@ static void mmc_omap_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	for (i = 0; i < 2; i++)
 		OMAP_MMC_WRITE(host, CON, dsor);
 	slot->saved_con = dsor;
-	if (ios->power_mode == MMC_POWER_ON) {
+	if (init_stream) {
 		/* worst case at 400kHz, 80 cycles makes 200 microsecs */
 		int usecs = 250;
 
@@ -1234,6 +1240,7 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
 	slot->host = host;
 	slot->mmc = mmc;
 	slot->id = id;
+	slot->power_mode = MMC_POWER_UNDEFINED;
 	slot->pdata = &host->pdata->slots[id];
 
 	host->slots[id] = slot;
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 88347ce78f23..d264391616f9 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -288,9 +288,9 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	struct device *dev = omap_host->dev;
 	struct mmc_ios *ios = &mmc->ios;
 	u32 start_window = 0, max_window = 0;
+	bool dcrc_was_enabled = false;
 	u8 cur_match, prev_match = 0;
 	u32 length = 0, max_len = 0;
-	u32 ier = host->ier;
 	u32 phase_delay = 0;
 	int ret = 0;
 	u32 reg;
@@ -317,9 +317,10 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * during the tuning procedure. So disable it during the
 	 * tuning procedure.
 	 */
-	ier &= ~SDHCI_INT_DATA_CRC;
-	sdhci_writel(host, ier, SDHCI_INT_ENABLE);
-	sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
+	if (host->ier & SDHCI_INT_DATA_CRC) {
+		host->ier &= ~SDHCI_INT_DATA_CRC;
+		dcrc_was_enabled = true;
+	}
 
 	while (phase_delay <= MAX_PHASE_DELAY) {
 		sdhci_omap_set_dll(omap_host, phase_delay);
@@ -366,6 +367,9 @@ tuning_error:
 
 ret:
 	sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+	/* Reenable forbidden interrupt */
+	if (dcrc_was_enabled)
+		host->ier |= SDHCI_INT_DATA_CRC;
 	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	return ret;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 99bdae53fa2e..451b08a818a9 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -216,8 +216,12 @@ void sdhci_reset(struct sdhci_host *host, u8 mask)
 	timeout = ktime_add_ms(ktime_get(), 100);
 
 	/* hw clears the bit when it's done */
-	while (sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask) {
-		if (ktime_after(ktime_get(), timeout)) {
+	while (1) {
+		bool timedout = ktime_after(ktime_get(), timeout);
+
+		if (!(sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask))
+			break;
+		if (timedout) {
 			pr_err("%s: Reset 0x%x never completed.\n",
 				mmc_hostname(host->mmc), (int)mask);
 			sdhci_dumpregs(host);
@@ -1608,9 +1612,13 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
 
 	/* Wait max 20 ms */
 	timeout = ktime_add_ms(ktime_get(), 20);
-	while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
-		& SDHCI_CLOCK_INT_STABLE)) {
-		if (ktime_after(ktime_get(), timeout)) {
+	while (1) {
+		bool timedout = ktime_after(ktime_get(), timeout);
+
+		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		if (clk & SDHCI_CLOCK_INT_STABLE)
+			break;
+		if (timedout) {
 			pr_err("%s: Internal clock never stabilised.\n",
 			       mmc_hostname(host->mmc));
 			sdhci_dumpregs(host);
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 2c2df4e4fc14..e5507add8f04 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -196,12 +196,12 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	if (IS_ERR(opp_table))
 		return 0;
 
-	count = opp_table->regulator_count;
-
 	/* Regulator may not be required for the device */
-	if (!count)
+	if (!opp_table->regulators)
 		goto put_opp_table;
 
+	count = opp_table->regulator_count;
+
 	uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL);
 	if (!uV)
 		goto put_opp_table;
@@ -548,44 +548,6 @@ _generic_set_opp_clk_only(struct device *dev, struct clk *clk,
 	return ret;
 }
 
-static inline int
-_generic_set_opp_domain(struct device *dev, struct clk *clk,
-			unsigned long old_freq, unsigned long freq,
-			unsigned int old_pstate, unsigned int new_pstate)
-{
-	int ret;
-
-	/* Scaling up? Scale domain performance state before frequency */
-	if (freq > old_freq) {
-		ret = dev_pm_genpd_set_performance_state(dev, new_pstate);
-		if (ret)
-			return ret;
-	}
-
-	ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq);
-	if (ret)
-		goto restore_domain_state;
-
-	/* Scaling down? Scale domain performance state after frequency */
-	if (freq < old_freq) {
-		ret = dev_pm_genpd_set_performance_state(dev, new_pstate);
-		if (ret)
-			goto restore_freq;
-	}
-
-	return 0;
-
-restore_freq:
-	if (_generic_set_opp_clk_only(dev, clk, freq, old_freq))
-		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
-			__func__, old_freq);
-restore_domain_state:
-	if (freq > old_freq)
-		dev_pm_genpd_set_performance_state(dev, old_pstate);
-
-	return ret;
-}
-
 static int _generic_set_opp_regulator(const struct opp_table *opp_table,
 				      struct device *dev,
 				      unsigned long old_freq,
@@ -635,6 +597,84 @@ restore_voltage:
 	return ret;
 }
 
+static int _set_opp_custom(const struct opp_table *opp_table,
+			   struct device *dev, unsigned long old_freq,
+			   unsigned long freq,
+			   struct dev_pm_opp_supply *old_supply,
+			   struct dev_pm_opp_supply *new_supply)
+{
+	struct dev_pm_set_opp_data *data;
+	int size;
+
+	data = opp_table->set_opp_data;
+	data->regulators = opp_table->regulators;
+	data->regulator_count = opp_table->regulator_count;
+	data->clk = opp_table->clk;
+	data->dev = dev;
+
+	data->old_opp.rate = old_freq;
+	size = sizeof(*old_supply) * opp_table->regulator_count;
+	if (IS_ERR(old_supply))
+		memset(data->old_opp.supplies, 0, size);
+	else
+		memcpy(data->old_opp.supplies, old_supply, size);
+
+	data->new_opp.rate = freq;
+	memcpy(data->new_opp.supplies, new_supply, size);
+
+	return opp_table->set_opp(data);
+}
+
+/* This is only called for PM domain for now */
+static int _set_required_opps(struct device *dev,
+			      struct opp_table *opp_table,
+			      struct dev_pm_opp *opp)
+{
+	struct opp_table **required_opp_tables = opp_table->required_opp_tables;
+	struct device **genpd_virt_devs = opp_table->genpd_virt_devs;
+	unsigned int pstate;
+	int i, ret = 0;
+
+	if (!required_opp_tables)
+		return 0;
+
+	/* Single genpd case */
+	if (!genpd_virt_devs) {
+		pstate = opp->required_opps[0]->pstate;
+		ret = dev_pm_genpd_set_performance_state(dev, pstate);
+		if (ret) {
+			dev_err(dev, "Failed to set performance state of %s: %d (%d)\n",
+				dev_name(dev), pstate, ret);
+		}
+		return ret;
+	}
+
+	/* Multiple genpd case */
+
+	/*
+	 * Acquire genpd_virt_dev_lock to make sure we don't use a genpd_dev
+	 * after it is freed from another thread.
+	 */
+	mutex_lock(&opp_table->genpd_virt_dev_lock);
+
+	for (i = 0; i < opp_table->required_opp_count; i++) {
+		pstate = opp->required_opps[i]->pstate;
+
+		if (!genpd_virt_devs[i])
+			continue;
+
+		ret = dev_pm_genpd_set_performance_state(genpd_virt_devs[i], pstate);
+		if (ret) {
+			dev_err(dev, "Failed to set performance rate of %s: %d (%d)\n",
+				dev_name(genpd_virt_devs[i]), pstate, ret);
+			break;
+		}
+	}
+	mutex_unlock(&opp_table->genpd_virt_dev_lock);
+
+	return ret;
+}
+
 /**
  * dev_pm_opp_set_rate() - Configure new OPP based on frequency
  * @dev:	 device for which we do this operation
@@ -649,7 +689,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 	unsigned long freq, old_freq;
 	struct dev_pm_opp *old_opp, *opp;
 	struct clk *clk;
-	int ret, size;
+	int ret;
 
 	if (unlikely(!target_freq)) {
 		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
@@ -702,44 +742,34 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", __func__,
 		old_freq, freq);
 
-	/* Only frequency scaling */
-	if (!opp_table->regulators) {
-		/*
-		 * We don't support devices with both regulator and
-		 * domain performance-state for now.
-		 */
-		if (opp_table->genpd_performance_state)
-			ret = _generic_set_opp_domain(dev, clk, old_freq, freq,
-						      IS_ERR(old_opp) ? 0 : old_opp->pstate,
-						      opp->pstate);
-		else
-			ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq);
-	} else if (!opp_table->set_opp) {
+	/* Scaling up? Configure required OPPs before frequency */
+	if (freq > old_freq) {
+		ret = _set_required_opps(dev, opp_table, opp);
+		if (ret)
+			goto put_opp;
+	}
+
+	if (opp_table->set_opp) {
+		ret = _set_opp_custom(opp_table, dev, old_freq, freq,
+				      IS_ERR(old_opp) ? NULL : old_opp->supplies,
+				      opp->supplies);
+	} else if (opp_table->regulators) {
 		ret = _generic_set_opp_regulator(opp_table, dev, old_freq, freq,
 						 IS_ERR(old_opp) ? NULL : old_opp->supplies,
 						 opp->supplies);
 	} else {
-		struct dev_pm_set_opp_data *data;
-
-		data = opp_table->set_opp_data;
-		data->regulators = opp_table->regulators;
-		data->regulator_count = opp_table->regulator_count;
-		data->clk = clk;
-		data->dev = dev;
-
-		data->old_opp.rate = old_freq;
-		size = sizeof(*opp->supplies) * opp_table->regulator_count;
-		if (IS_ERR(old_opp))
-			memset(data->old_opp.supplies, 0, size);
-		else
-			memcpy(data->old_opp.supplies, old_opp->supplies, size);
-
-		data->new_opp.rate = freq;
-		memcpy(data->new_opp.supplies, opp->supplies, size);
+		/* Only frequency scaling */
+		ret = _generic_set_opp_clk_only(dev, clk, old_freq, freq);
+	}
 
-		ret = opp_table->set_opp(data);
+	/* Scaling down? Configure required OPPs after frequency */
+	if (!ret && freq < old_freq) {
+		ret = _set_required_opps(dev, opp_table, opp);
+		if (ret)
+			dev_err(dev, "Failed to set required opps: %d\n", ret);
 	}
 
+put_opp:
 	dev_pm_opp_put(opp);
 put_old_opp:
 	if (!IS_ERR(old_opp))
@@ -810,8 +840,12 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 		return NULL;
 
 	mutex_init(&opp_table->lock);
+	mutex_init(&opp_table->genpd_virt_dev_lock);
 	INIT_LIST_HEAD(&opp_table->dev_list);
 
+	/* Mark regulator count uninitialized */
+	opp_table->regulator_count = -1;
+
 	opp_dev = _add_opp_dev(dev, opp_table);
 	if (!opp_dev) {
 		kfree(opp_table);
@@ -888,6 +922,8 @@ static void _opp_table_kref_release(struct kref *kref)
 	struct opp_table *opp_table = container_of(kref, struct opp_table, kref);
 	struct opp_device *opp_dev, *temp;
 
+	_of_clear_opp_table(opp_table);
+
 	/* Release clk */
 	if (!IS_ERR(opp_table->clk))
 		clk_put(opp_table->clk);
@@ -905,6 +941,7 @@ static void _opp_table_kref_release(struct kref *kref)
 		_remove_opp_dev(opp_dev, opp_table);
 	}
 
+	mutex_destroy(&opp_table->genpd_virt_dev_lock);
 	mutex_destroy(&opp_table->lock);
 	list_del(&opp_table->node);
 	kfree(opp_table);
@@ -961,6 +998,7 @@ static void _opp_kref_release(struct kref *kref)
 	 * frequency/voltage list.
 	 */
 	blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_REMOVE, opp);
+	_of_opp_free_required_opps(opp_table, opp);
 	opp_debug_remove_one(opp);
 	list_del(&opp->node);
 	kfree(opp);
@@ -1028,7 +1066,7 @@ struct dev_pm_opp *_opp_allocate(struct opp_table *table)
 	int count, supply_size;
 
 	/* Allocate space for at least one supply */
-	count = table->regulator_count ? table->regulator_count : 1;
+	count = table->regulator_count > 0 ? table->regulator_count : 1;
 	supply_size = sizeof(*opp->supplies) * count;
 
 	/* allocate new OPP node and supplies structures */
@@ -1049,6 +1087,9 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 	struct regulator *reg;
 	int i;
 
+	if (!opp_table->regulators)
+		return true;
+
 	for (i = 0; i < opp_table->regulator_count; i++) {
 		reg = opp_table->regulators[i];
 
@@ -1333,7 +1374,7 @@ static int _allocate_set_opp_data(struct opp_table *opp_table)
 	struct dev_pm_set_opp_data *data;
 	int len, count = opp_table->regulator_count;
 
-	if (WARN_ON(!count))
+	if (WARN_ON(!opp_table->regulators))
 		return -EINVAL;
 
 	/* space for set_opp_data */
@@ -1430,7 +1471,7 @@ free_regulators:
 
 	kfree(opp_table->regulators);
 	opp_table->regulators = NULL;
-	opp_table->regulator_count = 0;
+	opp_table->regulator_count = -1;
 err:
 	dev_pm_opp_put_opp_table(opp_table);
 
@@ -1459,7 +1500,7 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
 
 	kfree(opp_table->regulators);
 	opp_table->regulators = NULL;
-	opp_table->regulator_count = 0;
+	opp_table->regulator_count = -1;
 
 put_opp_table:
 	dev_pm_opp_put_opp_table(opp_table);
@@ -1587,6 +1628,155 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table)
 EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper);
 
 /**
+ * dev_pm_opp_set_genpd_virt_dev - Set virtual genpd device for an index
+ * @dev: Consumer device for which the genpd device is getting set.
+ * @virt_dev: virtual genpd device.
+ * @index: index.
+ *
+ * Multiple generic power domains for a device are supported with the help of
+ * virtual genpd devices, which are created for each consumer device - genpd
+ * pair. These are the device structures which are attached to the power domain
+ * and are required by the OPP core to set the performance state of the genpd.
+ *
+ * This helper will normally be called by the consumer driver of the device
+ * "dev", as only that has details of the genpd devices.
+ *
+ * This helper needs to be called once for each of those virtual devices, but
+ * only if multiple domains are available for a device. Otherwise the original
+ * device structure will be used instead by the OPP core.
+ */
+struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev,
+						struct device *virt_dev,
+						int index)
+{
+	struct opp_table *opp_table;
+
+	opp_table = dev_pm_opp_get_opp_table(dev);
+	if (!opp_table)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&opp_table->genpd_virt_dev_lock);
+
+	if (unlikely(!opp_table->genpd_virt_devs ||
+		     index >= opp_table->required_opp_count ||
+		     opp_table->genpd_virt_devs[index])) {
+
+		dev_err(dev, "Invalid request to set required device\n");
+		dev_pm_opp_put_opp_table(opp_table);
+		mutex_unlock(&opp_table->genpd_virt_dev_lock);
+
+		return ERR_PTR(-EINVAL);
+	}
+
+	opp_table->genpd_virt_devs[index] = virt_dev;
+	mutex_unlock(&opp_table->genpd_virt_dev_lock);
+
+	return opp_table;
+}
+
+/**
+ * dev_pm_opp_put_genpd_virt_dev() - Releases resources blocked for genpd device.
+ * @opp_table: OPP table returned by dev_pm_opp_set_genpd_virt_dev().
+ * @virt_dev: virtual genpd device.
+ *
+ * This releases the resource previously acquired with a call to
+ * dev_pm_opp_set_genpd_virt_dev(). The consumer driver shall call this helper
+ * if it doesn't want OPP core to update performance state of a power domain
+ * anymore.
+ */
+void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table,
+				   struct device *virt_dev)
+{
+	int i;
+
+	/*
+	 * Acquire genpd_virt_dev_lock to make sure virt_dev isn't getting
+	 * used in parallel.
+	 */
+	mutex_lock(&opp_table->genpd_virt_dev_lock);
+
+	for (i = 0; i < opp_table->required_opp_count; i++) {
+		if (opp_table->genpd_virt_devs[i] != virt_dev)
+			continue;
+
+		opp_table->genpd_virt_devs[i] = NULL;
+		dev_pm_opp_put_opp_table(opp_table);
+
+		/* Drop the vote */
+		dev_pm_genpd_set_performance_state(virt_dev, 0);
+		break;
+	}
+
+	mutex_unlock(&opp_table->genpd_virt_dev_lock);
+
+	if (unlikely(i == opp_table->required_opp_count))
+		dev_err(virt_dev, "Failed to find required device entry\n");
+}
+
+/**
+ * dev_pm_opp_xlate_performance_state() - Find required OPP's pstate for src_table.
+ * @src_table: OPP table which has dst_table as one of its required OPP table.
+ * @dst_table: Required OPP table of the src_table.
+ * @pstate: Current performance state of the src_table.
+ *
+ * This Returns pstate of the OPP (present in @dst_table) pointed out by the
+ * "required-opps" property of the OPP (present in @src_table) which has
+ * performance state set to @pstate.
+ *
+ * Return: Zero or positive performance state on success, otherwise negative
+ * value on errors.
+ */
+int dev_pm_opp_xlate_performance_state(struct opp_table *src_table,
+				       struct opp_table *dst_table,
+				       unsigned int pstate)
+{
+	struct dev_pm_opp *opp;
+	int dest_pstate = -EINVAL;
+	int i;
+
+	if (!pstate)
+		return 0;
+
+	/*
+	 * Normally the src_table will have the "required_opps" property set to
+	 * point to one of the OPPs in the dst_table, but in some cases the
+	 * genpd and its master have one to one mapping of performance states
+	 * and so none of them have the "required-opps" property set. Return the
+	 * pstate of the src_table as it is in such cases.
+	 */
+	if (!src_table->required_opp_count)
+		return pstate;
+
+	for (i = 0; i < src_table->required_opp_count; i++) {
+		if (src_table->required_opp_tables[i]->np == dst_table->np)
+			break;
+	}
+
+	if (unlikely(i == src_table->required_opp_count)) {
+		pr_err("%s: Couldn't find matching OPP table (%p: %p)\n",
+		       __func__, src_table, dst_table);
+		return -EINVAL;
+	}
+
+	mutex_lock(&src_table->lock);
+
+	list_for_each_entry(opp, &src_table->opp_list, node) {
+		if (opp->pstate == pstate) {
+			dest_pstate = opp->required_opps[i]->pstate;
+			goto unlock;
+		}
+	}
+
+	pr_err("%s: Couldn't find matching OPP (%p: %p)\n", __func__, src_table,
+	       dst_table);
+
+unlock:
+	mutex_unlock(&src_table->lock);
+
+	return dest_pstate;
+}
+
+/**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
  * @freq:	Frequency in Hz for this OPP
@@ -1612,6 +1802,9 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	if (!opp_table)
 		return -ENOMEM;
 
+	/* Fix regulator count for dynamic OPPs */
+	opp_table->regulator_count = 1;
+
 	ret = _opp_add_v1(opp_table, dev, freq, u_volt, true);
 	if (ret)
 		dev_pm_opp_put_opp_table(opp_table);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 38a08805a30c..06f0f632ec47 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -73,6 +73,167 @@ struct opp_table *_managed_opp(struct device *dev, int index)
 	return managed_table;
 }
 
+/* The caller must call dev_pm_opp_put() after the OPP is used */
+static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
+					  struct device_node *opp_np)
+{
+	struct dev_pm_opp *opp;
+
+	lockdep_assert_held(&opp_table_lock);
+
+	mutex_lock(&opp_table->lock);
+
+	list_for_each_entry(opp, &opp_table->opp_list, node) {
+		if (opp->np == opp_np) {
+			dev_pm_opp_get(opp);
+			mutex_unlock(&opp_table->lock);
+			return opp;
+		}
+	}
+
+	mutex_unlock(&opp_table->lock);
+
+	return NULL;
+}
+
+static struct device_node *of_parse_required_opp(struct device_node *np,
+						 int index)
+{
+	struct device_node *required_np;
+
+	required_np = of_parse_phandle(np, "required-opps", index);
+	if (unlikely(!required_np)) {
+		pr_err("%s: Unable to parse required-opps: %pOF, index: %d\n",
+		       __func__, np, index);
+	}
+
+	return required_np;
+}
+
+/* The caller must call dev_pm_opp_put_opp_table() after the table is used */
+static struct opp_table *_find_table_of_opp_np(struct device_node *opp_np)
+{
+	struct opp_table *opp_table;
+	struct device_node *opp_table_np;
+
+	lockdep_assert_held(&opp_table_lock);
+
+	opp_table_np = of_get_parent(opp_np);
+	if (!opp_table_np)
+		goto err;
+
+	/* It is safe to put the node now as all we need now is its address */
+	of_node_put(opp_table_np);
+
+	list_for_each_entry(opp_table, &opp_tables, node) {
+		if (opp_table_np == opp_table->np) {
+			_get_opp_table_kref(opp_table);
+			return opp_table;
+		}
+	}
+
+err:
+	return ERR_PTR(-ENODEV);
+}
+
+/* Free resources previously acquired by _opp_table_alloc_required_tables() */
+static void _opp_table_free_required_tables(struct opp_table *opp_table)
+{
+	struct opp_table **required_opp_tables = opp_table->required_opp_tables;
+	struct device **genpd_virt_devs = opp_table->genpd_virt_devs;
+	int i;
+
+	if (!required_opp_tables)
+		return;
+
+	for (i = 0; i < opp_table->required_opp_count; i++) {
+		if (IS_ERR_OR_NULL(required_opp_tables[i]))
+			break;
+
+		dev_pm_opp_put_opp_table(required_opp_tables[i]);
+	}
+
+	kfree(required_opp_tables);
+	kfree(genpd_virt_devs);
+
+	opp_table->required_opp_count = 0;
+	opp_table->genpd_virt_devs = NULL;
+	opp_table->required_opp_tables = NULL;
+}
+
+/*
+ * Populate all devices and opp tables which are part of "required-opps" list.
+ * Checking only the first OPP node should be enough.
+ */
+static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
+					     struct device *dev,
+					     struct device_node *opp_np)
+{
+	struct opp_table **required_opp_tables;
+	struct device **genpd_virt_devs = NULL;
+	struct device_node *required_np, *np;
+	int count, i;
+
+	/* Traversing the first OPP node is all we need */
+	np = of_get_next_available_child(opp_np, NULL);
+	if (!np) {
+		dev_err(dev, "Empty OPP table\n");
+		return;
+	}
+
+	count = of_count_phandle_with_args(np, "required-opps", NULL);
+	if (!count)
+		goto put_np;
+
+	if (count > 1) {
+		genpd_virt_devs = kcalloc(count, sizeof(*genpd_virt_devs),
+					GFP_KERNEL);
+		if (!genpd_virt_devs)
+			goto put_np;
+	}
+
+	required_opp_tables = kcalloc(count, sizeof(*required_opp_tables),
+				      GFP_KERNEL);
+	if (!required_opp_tables) {
+		kfree(genpd_virt_devs);
+		goto put_np;
+	}
+
+	opp_table->genpd_virt_devs = genpd_virt_devs;
+	opp_table->required_opp_tables = required_opp_tables;
+	opp_table->required_opp_count = count;
+
+	for (i = 0; i < count; i++) {
+		required_np = of_parse_required_opp(np, i);
+		if (!required_np)
+			goto free_required_tables;
+
+		required_opp_tables[i] = _find_table_of_opp_np(required_np);
+		of_node_put(required_np);
+
+		if (IS_ERR(required_opp_tables[i]))
+			goto free_required_tables;
+
+		/*
+		 * We only support genpd's OPPs in the "required-opps" for now,
+		 * as we don't know how much about other cases. Error out if the
+		 * required OPP doesn't belong to a genpd.
+		 */
+		if (!required_opp_tables[i]->is_genpd) {
+			dev_err(dev, "required-opp doesn't belong to genpd: %pOF\n",
+				required_np);
+			goto free_required_tables;
+		}
+	}
+
+	goto put_np;
+
+free_required_tables:
+	_opp_table_free_required_tables(opp_table);
+put_np:
+	of_node_put(np);
+}
+
 void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
 			int index)
 {
@@ -92,6 +253,9 @@ void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
 	of_property_read_u32(np, "voltage-tolerance",
 			     &opp_table->voltage_tolerance_v1);
 
+	if (of_find_property(np, "#power-domain-cells", NULL))
+		opp_table->is_genpd = true;
+
 	/* Get OPP table node */
 	opp_np = _opp_of_get_opp_desc_node(np, index);
 	of_node_put(np);
@@ -106,9 +270,86 @@ void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
 
 	opp_table->np = opp_np;
 
+	_opp_table_alloc_required_tables(opp_table, dev, opp_np);
 	of_node_put(opp_np);
 }
 
+void _of_clear_opp_table(struct opp_table *opp_table)
+{
+	_opp_table_free_required_tables(opp_table);
+}
+
+/*
+ * Release all resources previously acquired with a call to
+ * _of_opp_alloc_required_opps().
+ */
+void _of_opp_free_required_opps(struct opp_table *opp_table,
+				struct dev_pm_opp *opp)
+{
+	struct dev_pm_opp **required_opps = opp->required_opps;
+	int i;
+
+	if (!required_opps)
+		return;
+
+	for (i = 0; i < opp_table->required_opp_count; i++) {
+		if (!required_opps[i])
+			break;
+
+		/* Put the reference back */
+		dev_pm_opp_put(required_opps[i]);
+	}
+
+	kfree(required_opps);
+	opp->required_opps = NULL;
+}
+
+/* Populate all required OPPs which are part of "required-opps" list */
+static int _of_opp_alloc_required_opps(struct opp_table *opp_table,
+				       struct dev_pm_opp *opp)
+{
+	struct dev_pm_opp **required_opps;
+	struct opp_table *required_table;
+	struct device_node *np;
+	int i, ret, count = opp_table->required_opp_count;
+
+	if (!count)
+		return 0;
+
+	required_opps = kcalloc(count, sizeof(*required_opps), GFP_KERNEL);
+	if (!required_opps)
+		return -ENOMEM;
+
+	opp->required_opps = required_opps;
+
+	for (i = 0; i < count; i++) {
+		required_table = opp_table->required_opp_tables[i];
+
+		np = of_parse_required_opp(opp->np, i);
+		if (unlikely(!np)) {
+			ret = -ENODEV;
+			goto free_required_opps;
+		}
+
+		required_opps[i] = _find_opp_of_np(required_table, np);
+		of_node_put(np);
+
+		if (!required_opps[i]) {
+			pr_err("%s: Unable to find required OPP node: %pOF (%d)\n",
+			       __func__, opp->np, i);
+			ret = -ENODEV;
+			goto free_required_opps;
+		}
+	}
+
+	return 0;
+
+free_required_opps:
+	_of_opp_free_required_opps(opp_table, opp);
+
+	return ret;
+}
+
 static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
 			      struct device_node *np)
 {
@@ -150,12 +391,10 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 			      struct opp_table *opp_table)
 {
 	u32 *microvolt, *microamp = NULL;
-	int supplies, vcount, icount, ret, i, j;
+	int supplies = opp_table->regulator_count, vcount, icount, ret, i, j;
 	struct property *prop = NULL;
 	char name[NAME_MAX];
 
-	supplies = opp_table->regulator_count ? opp_table->regulator_count : 1;
-
 	/* Search for "opp-microvolt-<name>" */
 	if (opp_table->prop_name) {
 		snprintf(name, sizeof(name), "opp-microvolt-%s",
@@ -170,7 +409,13 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 
 		/* Missing property isn't a problem, but an invalid entry is */
 		if (!prop) {
-			if (!opp_table->regulator_count)
+			if (unlikely(supplies == -1)) {
+				/* Initialize regulator_count */
+				opp_table->regulator_count = 0;
+				return 0;
+			}
+
+			if (!supplies)
 				return 0;
 
 			dev_err(dev, "%s: opp-microvolt missing although OPP managing regulators\n",
@@ -179,6 +424,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 		}
 	}
 
+	if (unlikely(supplies == -1)) {
+		/* Initialize regulator_count */
+		supplies = opp_table->regulator_count = 1;
+	} else if (unlikely(!supplies)) {
+		dev_err(dev, "%s: opp-microvolt wasn't expected\n", __func__);
+		return -EINVAL;
+	}
+
 	vcount = of_property_count_u32_elems(opp->np, name);
 	if (vcount < 0) {
 		dev_err(dev, "%s: Invalid %s property (%d)\n",
@@ -326,8 +579,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 	ret = of_property_read_u64(np, "opp-hz", &rate);
 	if (ret < 0) {
 		/* "opp-hz" is optional for devices like power domains. */
-		if (!of_find_property(dev->of_node, "#power-domain-cells",
-				      NULL)) {
+		if (!opp_table->is_genpd) {
 			dev_err(dev, "%s: opp-hz not found\n", __func__);
 			goto free_opp;
 		}
@@ -354,21 +606,26 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 	new_opp->dynamic = false;
 	new_opp->available = true;
 
+	ret = _of_opp_alloc_required_opps(opp_table, new_opp);
+	if (ret)
+		goto free_opp;
+
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 		new_opp->clock_latency_ns = val;
 
-	new_opp->pstate = of_genpd_opp_to_performance_state(dev, np);
-
 	ret = opp_parse_supplies(new_opp, dev, opp_table);
 	if (ret)
-		goto free_opp;
+		goto free_required_opps;
+
+	if (opp_table->is_genpd)
+		new_opp->pstate = pm_genpd_opp_to_performance_state(dev, new_opp);
 
 	ret = _opp_add(dev, new_opp, opp_table, rate_not_available);
 	if (ret) {
 		/* Don't return error for duplicate OPPs */
 		if (ret == -EBUSY)
 			ret = 0;
-		goto free_opp;
+		goto free_required_opps;
 	}
 
 	/* OPP to select on device suspend */
@@ -398,6 +655,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 	blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADD, new_opp);
 	return new_opp;
 
+free_required_opps:
+	_of_opp_free_required_opps(opp_table, new_opp);
 free_opp:
 	_opp_free(new_opp);
 
@@ -727,58 +986,48 @@ put_cpu_node:
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
 
 /**
- * of_dev_pm_opp_find_required_opp() - Search for required OPP.
- * @dev: The device whose OPP node is referenced by the 'np' DT node.
+ * of_get_required_opp_performance_state() - Search for required OPP and return its performance state.
  * @np: Node that contains the "required-opps" property.
+ * @index: Index of the phandle to parse.
  *
- * Returns the OPP of the device 'dev', whose phandle is present in the "np"
- * node. Although the "required-opps" property supports having multiple
- * phandles, this helper routine only parses the very first phandle in the list.
- *
- * Return: Matching opp, else returns ERR_PTR in case of error and should be
- * handled using IS_ERR.
+ * Returns the performance state of the OPP pointed out by the "required-opps"
+ * property at @index in @np.
  *
- * The callers are required to call dev_pm_opp_put() for the returned OPP after
- * use.
+ * Return: Zero or positive performance state on success, otherwise negative
+ * value on errors.
  */
-struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev,
-						   struct device_node *np)
+int of_get_required_opp_performance_state(struct device_node *np, int index)
 {
-	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct dev_pm_opp *opp;
 	struct device_node *required_np;
 	struct opp_table *opp_table;
+	int pstate = -EINVAL;
 
-	opp_table = _find_opp_table(dev);
-	if (IS_ERR(opp_table))
-		return ERR_CAST(opp_table);
+	required_np = of_parse_required_opp(np, index);
+	if (!required_np)
+		return -EINVAL;
 
-	required_np = of_parse_phandle(np, "required-opps", 0);
-	if (unlikely(!required_np)) {
-		dev_err(dev, "Unable to parse required-opps\n");
-		goto put_opp_table;
+	opp_table = _find_table_of_opp_np(required_np);
+	if (IS_ERR(opp_table)) {
+		pr_err("%s: Failed to find required OPP table %pOF: %ld\n",
+		       __func__, np, PTR_ERR(opp_table));
+		goto put_required_np;
 	}
 
-	mutex_lock(&opp_table->lock);
-
-	list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
-		if (temp_opp->available && temp_opp->np == required_np) {
-			opp = temp_opp;
-
-			/* Increment the reference count of OPP */
-			dev_pm_opp_get(opp);
-			break;
-		}
+	opp = _find_opp_of_np(opp_table, required_np);
+	if (opp) {
+		pstate = opp->pstate;
+		dev_pm_opp_put(opp);
 	}
 
-	mutex_unlock(&opp_table->lock);
+	dev_pm_opp_put_opp_table(opp_table);
 
+put_required_np:
 	of_node_put(required_np);
-put_opp_table:
-	dev_pm_opp_put_opp_table(opp_table);
 
-	return opp;
+	return pstate;
 }
-EXPORT_SYMBOL_GPL(of_dev_pm_opp_find_required_opp);
+EXPORT_SYMBOL_GPL(of_get_required_opp_performance_state);
 
 /**
  * dev_pm_opp_get_of_node() - Gets the DT node corresponding to an opp
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 9c6544b4f4f9..e24d81497375 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -63,6 +63,7 @@ extern struct list_head opp_tables;
  * @supplies:	Power supplies voltage/current values
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
+ * @required_opps: List of OPPs that are required by this OPP.
  * @opp_table:	points back to the opp_table struct this opp belongs to
  * @np:		OPP's device node.
  * @dentry:	debugfs dentry pointer (per opp)
@@ -84,6 +85,7 @@ struct dev_pm_opp {
 
 	unsigned long clock_latency_ns;
 
+	struct dev_pm_opp **required_opps;
 	struct opp_table *opp_table;
 
 	struct device_node *np;
@@ -133,13 +135,21 @@ enum opp_table_access {
  * @parsed_static_opps: True if OPPs are initialized from DT.
  * @shared_opp: OPP is shared between multiple devices.
  * @suspend_opp: Pointer to OPP to be used during device suspend.
+ * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
+ * @genpd_virt_devs: List of virtual devices for multiple genpd support.
+ * @required_opp_tables: List of device OPP tables that are required by OPPs in
+ *		this table.
+ * @required_opp_count: Number of required devices.
  * @supported_hw: Array of version number to support.
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
  * @clk: Device's clock handle
  * @regulators: Supply regulators
- * @regulator_count: Number of power supply regulators
+ * @regulator_count: Number of power supply regulators. Its value can be -1
+ * (uninitialized), 0 (no opp-microvolt property) or > 0 (has opp-microvolt
+ * property).
  * @genpd_performance_state: Device's power domain support performance state.
+ * @is_genpd: Marks if the OPP table belongs to a genpd.
  * @set_opp: Platform specific set_opp callback
  * @set_opp_data: Data to be passed to set_opp callback
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
@@ -171,13 +181,19 @@ struct opp_table {
 	enum opp_table_access shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
+	struct mutex genpd_virt_dev_lock;
+	struct device **genpd_virt_devs;
+	struct opp_table **required_opp_tables;
+	unsigned int required_opp_count;
+
 	unsigned int *supported_hw;
 	unsigned int supported_hw_count;
 	const char *prop_name;
 	struct clk *clk;
 	struct regulator **regulators;
-	unsigned int regulator_count;
+	int regulator_count;
 	bool genpd_performance_state;
+	bool is_genpd;
 
 	int (*set_opp)(struct dev_pm_set_opp_data *data);
 	struct dev_pm_set_opp_data *set_opp_data;
@@ -206,10 +222,16 @@ void _put_opp_list_kref(struct opp_table *opp_table);
 
 #ifdef CONFIG_OF
 void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index);
+void _of_clear_opp_table(struct opp_table *opp_table);
 struct opp_table *_managed_opp(struct device *dev, int index);
+void _of_opp_free_required_opps(struct opp_table *opp_table,
+				struct dev_pm_opp *opp);
 #else
 static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index) {}
+static inline void _of_clear_opp_table(struct opp_table *opp_table) {}
 static inline struct opp_table *_managed_opp(struct device *dev, int index) { return NULL; }
+static inline void _of_opp_free_required_opps(struct opp_table *opp_table,
+					      struct dev_pm_opp *opp) {}
 #endif
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index 53d449076dee..ea87d739f534 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -191,7 +191,8 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin,
 		case PIN_CONFIG_BIAS_DISABLE:
 			dev_dbg(pc->dev, "pin %u: disable bias\n", pin);
 
-			meson_calc_reg_and_bit(bank, pin, REG_PULL, &reg, &bit);
+			meson_calc_reg_and_bit(bank, pin, REG_PULLEN, &reg,
+					       &bit);
 			ret = regmap_update_bits(pc->reg_pullen, reg,
 						 BIT(bit), 0);
 			if (ret)
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660.c b/drivers/pinctrl/qcom/pinctrl-sdm660.c
index 6838b38555a1..1bfb0ae6b387 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm660.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm660.c
@@ -33,7 +33,7 @@ enum {
 	}
 
 
-#define PINGROUP(id, base, f1, f2, f3, f4, f5, f6, f7, f8, f9)	\
+#define PINGROUP(id, _tile, f1, f2, f3, f4, f5, f6, f7, f8, f9)	\
 	{					        \
 		.name = "gpio" #id,			\
 		.pins = gpio##id##_pins,		\
@@ -51,11 +51,12 @@ enum {
 			msm_mux_##f9			\
 		},				        \
 		.nfuncs = 10,				\
-		.ctl_reg = base + REG_SIZE * id,	\
-		.io_reg = base + 0x4 + REG_SIZE * id,		\
-		.intr_cfg_reg = base + 0x8 + REG_SIZE * id,		\
-		.intr_status_reg = base + 0xc + REG_SIZE * id,	\
-		.intr_target_reg = base + 0x8 + REG_SIZE * id,	\
+		.ctl_reg = REG_SIZE * id,		\
+		.io_reg = 0x4 + REG_SIZE * id,		\
+		.intr_cfg_reg = 0x8 + REG_SIZE * id,	\
+		.intr_status_reg = 0xc + REG_SIZE * id,	\
+		.intr_target_reg = 0x8 + REG_SIZE * id,	\
+		.tile = _tile,			\
 		.mux_bit = 2,			\
 		.pull_bit = 0,			\
 		.drv_bit = 6,			\
@@ -82,6 +83,7 @@ enum {
 		.intr_cfg_reg = 0,			\
 		.intr_status_reg = 0,			\
 		.intr_target_reg = 0,			\
+		.tile = NORTH,				\
 		.mux_bit = -1,				\
 		.pull_bit = pull,			\
 		.drv_bit = drv,				\
@@ -1397,13 +1399,13 @@ static const struct msm_pingroup sdm660_groups[] = {
 	PINGROUP(111, SOUTH, _, _, _, _, _, _, _, _, _),
 	PINGROUP(112, SOUTH, _, _, _, _, _, _, _, _, _),
 	PINGROUP(113, SOUTH, _, _, _, _, _, _, _, _, _),
-	SDC_QDSD_PINGROUP(sdc1_clk, 0x99a000, 13, 6),
-	SDC_QDSD_PINGROUP(sdc1_cmd, 0x99a000, 11, 3),
-	SDC_QDSD_PINGROUP(sdc1_data, 0x99a000, 9, 0),
-	SDC_QDSD_PINGROUP(sdc2_clk, 0x99b000, 14, 6),
-	SDC_QDSD_PINGROUP(sdc2_cmd, 0x99b000, 11, 3),
-	SDC_QDSD_PINGROUP(sdc2_data, 0x99b000, 9, 0),
-	SDC_QDSD_PINGROUP(sdc1_rclk, 0x99a000, 15, 0),
+	SDC_QDSD_PINGROUP(sdc1_clk, 0x9a000, 13, 6),
+	SDC_QDSD_PINGROUP(sdc1_cmd, 0x9a000, 11, 3),
+	SDC_QDSD_PINGROUP(sdc1_data, 0x9a000, 9, 0),
+	SDC_QDSD_PINGROUP(sdc2_clk, 0x9b000, 14, 6),
+	SDC_QDSD_PINGROUP(sdc2_cmd, 0x9b000, 11, 3),
+	SDC_QDSD_PINGROUP(sdc2_data, 0x9b000, 9, 0),
+	SDC_QDSD_PINGROUP(sdc1_rclk, 0x9a000, 15, 0),
 };
 
 static const struct msm_pinctrl_soc_data sdm660_pinctrl = {
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
index 6624499eae72..4ada80317a3b 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c
@@ -568,7 +568,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = {
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 11),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 1)),	/* PH_EINT11 */
+		  SUNXI_FUNCTION_IRQ_BANK(0x6, 2, 11)),	/* PH_EINT11 */
 };
 
 static const struct sunxi_pinctrl_desc sun8i_a83t_pinctrl_data = {
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index 1360a7fa542c..c96c01e09740 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * OMAP SmartReflex Voltage Control
  *
@@ -11,10 +12,6 @@
  *
  * Copyright (C) 2007 Texas Instruments, Inc.
  * Lesly A M <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -37,7 +34,6 @@
 static LIST_HEAD(sr_list);
 
 static struct omap_sr_class_data *sr_class;
-static struct omap_sr_pmic_data *sr_pmic_data;
 static struct dentry		*sr_dbg_dir;
 
 static inline void sr_write_reg(struct omap_sr *sr, unsigned offset, u32 value)
@@ -780,25 +776,6 @@ void omap_sr_disable_reset_volt(struct voltagedomain *voltdm)
 	sr_class->disable(sr, 1);
 }
 
-/**
- * omap_sr_register_pmic() - API to register pmic specific info.
- * @pmic_data:	The structure containing pmic specific data.
- *
- * This API is to be called from the PMIC specific code to register with
- * smartreflex driver pmic specific info. Currently the only info required
- * is the smartreflex init on the PMIC side.
- */
-void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data)
-{
-	if (!pmic_data) {
-		pr_warn("%s: Trying to register NULL PMIC data structure with smartreflex\n",
-			__func__);
-		return;
-	}
-
-	sr_pmic_data = pmic_data;
-}
-
 /* PM Debug FS entries to enable and disable smartreflex. */
 static int omap_sr_autocomp_show(void *data, u64 *val)
 {
@@ -1010,8 +987,7 @@ static int omap_sr_remove(struct platform_device *pdev)
 
 	if (sr_info->autocomp_active)
 		sr_stop_vddautocomp(sr_info);
-	if (sr_info->dbg_dir)
-		debugfs_remove_recursive(sr_info->dbg_dir);
+	debugfs_remove_recursive(sr_info->dbg_dir);
 
 	pm_runtime_disable(&pdev->dev);
 	list_del(&sr_info->node);
@@ -1065,17 +1041,6 @@ static int __init sr_init(void)
 {
 	int ret = 0;
 
-	/*
-	 * sr_init is a late init. If by then a pmic specific API is not
-	 * registered either there is no need for anything to be done on
-	 * the PMIC side or somebody has forgotten to register a PMIC
-	 * handler. Warn for the second condition.
-	 */
-	if (sr_pmic_data && sr_pmic_data->sr_pmic_init)
-		sr_pmic_data->sr_pmic_init();
-	else
-		pr_warn("%s: No PMIC hook to init smartreflex\n", __func__);
-
 	ret = platform_driver_register(&smartreflex_driver);
 	if (ret) {
 		pr_err("%s: platform driver register failed for SR\n",
diff --git a/drivers/staging/media/sunxi/cedrus/Kconfig b/drivers/staging/media/sunxi/cedrus/Kconfig
index a7a34e89c42d..3252efa422f9 100644
--- a/drivers/staging/media/sunxi/cedrus/Kconfig
+++ b/drivers/staging/media/sunxi/cedrus/Kconfig
@@ -3,6 +3,7 @@ config VIDEO_SUNXI_CEDRUS
 	depends on VIDEO_DEV && VIDEO_V4L2 && MEDIA_CONTROLLER
 	depends on HAS_DMA
 	depends on OF
+	depends on MEDIA_CONTROLLER_REQUEST_API
 	select SUNXI_SRAM
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
index 32adbcbe6175..07520a2ce179 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
@@ -255,10 +255,10 @@ int cedrus_hw_probe(struct cedrus_dev *dev)
 
 	res = platform_get_resource(dev->pdev, IORESOURCE_MEM, 0);
 	dev->base = devm_ioremap_resource(dev->dev, res);
-	if (!dev->base) {
+	if (IS_ERR(dev->base)) {
 		v4l2_err(&dev->v4l2_dev, "Failed to map registers\n");
 
-		ret = -ENOMEM;
+		ret = PTR_ERR(dev->base);
 		goto err_sram;
 	}
 
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index c4111a98f1a7..2d26ae80e202 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -424,7 +424,7 @@ static int hi3660_thermal_probe(struct hisi_thermal_data *data)
 	struct platform_device *pdev = data->pdev;
 	struct device *dev = &pdev->dev;
 
-	data->nr_sensors = 2;
+	data->nr_sensors = 1;
 
 	data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) *
 				    data->nr_sensors, GFP_KERNEL);
@@ -589,7 +589,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 			return ret;
 		}
 
-		ret = platform_get_irq_byname(pdev, sensor->irq_name);
+		ret = platform_get_irq(pdev, 0);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c
index 47623da0f91b..bbd73c5a4a4e 100644
--- a/drivers/thermal/st/stm_thermal.c
+++ b/drivers/thermal/st/stm_thermal.c
@@ -241,8 +241,8 @@ static int stm_thermal_read_factory_settings(struct stm_thermal_sensor *sensor)
 		sensor->t0 = TS1_T0_VAL1;
 
 	/* Retrieve fmt0 and put it on Hz */
-	sensor->fmt0 = ADJUST * readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET)
-					      & TS1_FMT0_MASK;
+	sensor->fmt0 = ADJUST * (readl_relaxed(sensor->base +
+				 DTS_T0VALR1_OFFSET) & TS1_FMT0_MASK);
 
 	/* Retrieve ramp coefficient */
 	sensor->ramp_coeff = readl_relaxed(sensor->base + DTS_RAMPVALR_OFFSET) &
@@ -532,6 +532,10 @@ static int stm_thermal_prepare(struct stm_thermal_sensor *sensor)
 	if (ret)
 		return ret;
 
+	ret = stm_thermal_read_factory_settings(sensor);
+	if (ret)
+		goto thermal_unprepare;
+
 	ret = stm_thermal_calibration(sensor);
 	if (ret)
 		goto thermal_unprepare;
@@ -636,10 +640,6 @@ static int stm_thermal_probe(struct platform_device *pdev)
 	/* Populate sensor */
 	sensor->base = base;
 
-	ret = stm_thermal_read_factory_settings(sensor);
-	if (ret)
-		return ret;
-
 	sensor->clk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(sensor->clk)) {
 		dev_err(&pdev->dev, "%s: failed to fetch PCLK clock\n",
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 678b27063198..f9ef0673a083 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -562,7 +562,30 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		goto err_alloc;
 	}
 
-	if (!data->levels) {
+	if (data->levels) {
+		/*
+		 * For the DT case, only when brightness levels is defined
+		 * data->levels is filled. For the non-DT case, data->levels
+		 * can come from platform data, however is not usual.
+		 */
+		for (i = 0; i <= data->max_brightness; i++) {
+			if (data->levels[i] > pb->scale)
+				pb->scale = data->levels[i];
+
+			pb->levels = data->levels;
+		}
+	} else if (!data->max_brightness) {
+		/*
+		 * If no brightness levels are provided and max_brightness is
+		 * not set, use the default brightness table. For the DT case,
+		 * max_brightness is set to 0 when brightness levels is not
+		 * specified. For the non-DT case, max_brightness is usually
+		 * set to some value.
+		 */
+
+		/* Get the PWM period (in nanoseconds) */
+		pwm_get_state(pb->pwm, &state);
+
 		ret = pwm_backlight_brightness_default(&pdev->dev, data,
 						       state.period);
 		if (ret < 0) {
@@ -570,13 +593,19 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 				"failed to setup default brightness table\n");
 			goto err_alloc;
 		}
-	}
 
-	for (i = 0; i <= data->max_brightness; i++) {
-		if (data->levels[i] > pb->scale)
-			pb->scale = data->levels[i];
+		for (i = 0; i <= data->max_brightness; i++) {
+			if (data->levels[i] > pb->scale)
+				pb->scale = data->levels[i];
 
-		pb->levels = data->levels;
+			pb->levels = data->levels;
+		}
+	} else {
+		/*
+		 * That only happens for the non-DT case, where platform data
+		 * sets the max_brightness value.
+		 */
+		pb->scale = data->max_brightness;
 	}
 
 	pb->lth_brightness = data->lth_brightness * (state.period / pb->scale);
diff --git a/fs/aio.c b/fs/aio.c
index 97f983592925..aac9659381d2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -45,6 +45,7 @@
 
 #include <asm/kmap_types.h>
 #include <linux/uaccess.h>
+#include <linux/nospec.h>
 
 #include "internal.h"
 
@@ -1038,6 +1039,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	if (!table || id >= table->nr)
 		goto out;
 
+	id = array_index_nospec(id, table->nr);
 	ctx = rcu_dereference(table->table[id]);
 	if (ctx && ctx->user_id == ctx_id) {
 		if (percpu_ref_tryget_live(&ctx->users))
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b5ecd6f50360..4e9a7cc488da 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -563,8 +563,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_puts(m, ",noacl");
 #endif
 
-	if (fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM)
-		seq_puts(m, ",nocopyfrom");
+	if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
+		seq_puts(m, ",copyfrom");
 
 	if (fsopt->mds_namespace)
 		seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c005a5400f2e..79a265ba9200 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -42,7 +42,9 @@
 #define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
 #define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
 
-#define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
+#define CEPH_MOUNT_OPT_DEFAULT			\
+	(CEPH_MOUNT_OPT_DCACHE |		\
+	 CEPH_MOUNT_OPT_NOCOPYFROM)
 
 #define ceph_set_mount_opt(fsc, opt) \
 	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 47395b0c3b35..e909678afa2d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1119,8 +1119,10 @@ static int fuse_permission(struct inode *inode, int mask)
 	if (fc->default_permissions ||
 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
 		struct fuse_inode *fi = get_fuse_inode(inode);
+		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
 
-		if (time_before64(fi->i_time, get_jiffies_64())) {
+		if (perm_mask & READ_ONCE(fi->inval_mask) ||
+		    time_before64(fi->i_time, get_jiffies_64())) {
 			refreshed = true;
 
 			err = fuse_perm_getattr(inode, mask);
@@ -1241,7 +1243,7 @@ static int fuse_dir_open(struct inode *inode, struct file *file)
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
 {
-	fuse_release_common(file, FUSE_RELEASEDIR);
+	fuse_release_common(file, true);
 
 	return 0;
 }
@@ -1249,7 +1251,25 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
 			  int datasync)
 {
-	return fuse_fsync_common(file, start, end, datasync, 1);
+	struct inode *inode = file->f_mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int err;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	if (fc->no_fsyncdir)
+		return 0;
+
+	inode_lock(inode);
+	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
+	if (err == -ENOSYS) {
+		fc->no_fsyncdir = 1;
+		err = 0;
+	}
+	inode_unlock(inode);
+
+	return err;
 }
 
 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b52f9baaa3e7..ffaffe18352a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -89,12 +89,12 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 	iput(req->misc.release.inode);
 }
 
-static void fuse_file_put(struct fuse_file *ff, bool sync)
+static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 {
 	if (refcount_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
 
-		if (ff->fc->no_open) {
+		if (ff->fc->no_open && !isdir) {
 			/*
 			 * Drop the release request when client does not
 			 * implement 'open'
@@ -247,10 +247,11 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 	req->in.args[0].value = inarg;
 }
 
-void fuse_release_common(struct file *file, int opcode)
+void fuse_release_common(struct file *file, bool isdir)
 {
 	struct fuse_file *ff = file->private_data;
 	struct fuse_req *req = ff->reserved_req;
+	int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
 
 	fuse_prepare_release(ff, file->f_flags, opcode);
 
@@ -272,7 +273,7 @@ void fuse_release_common(struct file *file, int opcode)
 	 * synchronous RELEASE is allowed (and desirable) in this case
 	 * because the server can be trusted not to screw up.
 	 */
-	fuse_file_put(ff, ff->fc->destroy_req != NULL);
+	fuse_file_put(ff, ff->fc->destroy_req != NULL, isdir);
 }
 
 static int fuse_open(struct inode *inode, struct file *file)
@@ -288,7 +289,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 	if (fc->writeback_cache)
 		write_inode_now(inode, 1);
 
-	fuse_release_common(file, FUSE_RELEASE);
+	fuse_release_common(file, false);
 
 	/* return value is ignored by VFS */
 	return 0;
@@ -302,7 +303,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags)
 	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
 	 * synchronous, we are fine with not doing igrab() here"
 	 */
-	fuse_file_put(ff, true);
+	fuse_file_put(ff, true, false);
 }
 EXPORT_SYMBOL_GPL(fuse_sync_release);
 
@@ -441,13 +442,30 @@ static int fuse_flush(struct file *file, fl_owner_t id)
 }
 
 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
-		      int datasync, int isdir)
+		      int datasync, int opcode)
 {
 	struct inode *inode = file->f_mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
 	FUSE_ARGS(args);
 	struct fuse_fsync_in inarg;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.fh = ff->fh;
+	inarg.fsync_flags = datasync ? 1 : 0;
+	args.in.h.opcode = opcode;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	return fuse_simple_request(fc, &args);
+}
+
+static int fuse_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
 	if (is_bad_inode(inode))
@@ -479,34 +497,18 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
 	if (err)
 		goto out;
 
-	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+	if (fc->no_fsync)
 		goto out;
 
-	memset(&inarg, 0, sizeof(inarg));
-	inarg.fh = ff->fh;
-	inarg.fsync_flags = datasync ? 1 : 0;
-	args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
-	args.in.h.nodeid = get_node_id(inode);
-	args.in.numargs = 1;
-	args.in.args[0].size = sizeof(inarg);
-	args.in.args[0].value = &inarg;
-	err = fuse_simple_request(fc, &args);
+	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC);
 	if (err == -ENOSYS) {
-		if (isdir)
-			fc->no_fsyncdir = 1;
-		else
-			fc->no_fsync = 1;
+		fc->no_fsync = 1;
 		err = 0;
 	}
 out:
 	inode_unlock(inode);
-	return err;
-}
 
-static int fuse_fsync(struct file *file, loff_t start, loff_t end,
-		      int datasync)
-{
-	return fuse_fsync_common(file, start, end, datasync, 0);
+	return err;
 }
 
 void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -807,7 +809,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
 		put_page(page);
 	}
 	if (req->ff)
-		fuse_file_put(req->ff, false);
+		fuse_file_put(req->ff, false, false);
 }
 
 static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1460,7 +1462,7 @@ static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 		__free_page(req->pages[i]);
 
 	if (req->ff)
-		fuse_file_put(req->ff, false);
+		fuse_file_put(req->ff, false, false);
 }
 
 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
@@ -1619,7 +1621,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
 	ff = __fuse_write_file_get(fc, fi);
 	err = fuse_flush_times(inode, ff);
 	if (ff)
-		fuse_file_put(ff, 0);
+		fuse_file_put(ff, false, false);
 
 	return err;
 }
@@ -1940,7 +1942,7 @@ static int fuse_writepages(struct address_space *mapping,
 		err = 0;
 	}
 	if (data.ff)
-		fuse_file_put(data.ff, false);
+		fuse_file_put(data.ff, false, false);
 
 	kfree(data.orig_pages);
 out:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e9f712e81c7d..2f2c92e6f8cb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -822,13 +822,13 @@ void fuse_sync_release(struct fuse_file *ff, int flags);
 /**
  * Send RELEASE or RELEASEDIR request
  */
-void fuse_release_common(struct file *file, int opcode);
+void fuse_release_common(struct file *file, bool isdir);
 
 /**
  * Send FSYNC or FSYNCDIR request
  */
 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
-		      int datasync, int isdir);
+		      int datasync, int opcode);
 
 /**
  * Notify poll wakeup
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 0b94b23b02d4..568abed20eb2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -115,7 +115,7 @@ static void fuse_i_callback(struct rcu_head *head)
 static void fuse_destroy_inode(struct inode *inode)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	if (S_ISREG(inode->i_mode)) {
+	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
 		WARN_ON(!list_empty(&fi->write_files));
 		WARN_ON(!list_empty(&fi->queued_writes));
 	}
@@ -1068,6 +1068,7 @@ void fuse_dev_free(struct fuse_dev *fud)
 
 		fuse_conn_put(fc);
 	}
+	kfree(fud->pq.processing);
 	kfree(fud);
 }
 EXPORT_SYMBOL_GPL(fuse_dev_free);
diff --git a/fs/iomap.c b/fs/iomap.c
index d6bc98ae8d35..5bc172f3dfe8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page)
 	atomic_set(&iop->read_count, 0);
 	atomic_set(&iop->write_count, 0);
 	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+
+	/*
+	 * migrate_page_move_mapping() assumes that pages with private data have
+	 * their count elevated by 1.
+	 */
+	get_page(page);
 	set_page_private(page, (unsigned long)iop);
 	SetPagePrivate(page);
 	return iop;
@@ -132,6 +138,7 @@ iomap_page_release(struct page *page)
 	WARN_ON_ONCE(atomic_read(&iop->write_count));
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
+	put_page(page);
 	kfree(iop);
 }
 
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index c6289147c787..82c129bfe58d 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -651,6 +651,18 @@ static int ovl_symlink(struct inode *dir, struct dentry *dentry,
 	return ovl_create_object(dentry, S_IFLNK, 0, link);
 }
 
+static int ovl_set_link_redirect(struct dentry *dentry)
+{
+	const struct cred *old_cred;
+	int err;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	err = ovl_set_redirect(dentry, false);
+	revert_creds(old_cred);
+
+	return err;
+}
+
 static int ovl_link(struct dentry *old, struct inode *newdir,
 		    struct dentry *new)
 {
@@ -670,7 +682,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 		goto out_drop_write;
 
 	if (ovl_is_metacopy_dentry(old)) {
-		err = ovl_set_redirect(old, false);
+		err = ovl_set_link_redirect(old);
 		if (err)
 			goto out_drop_write;
 	}
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 8fa37cd7818a..54e5d17d7f3e 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -754,9 +754,8 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 		goto out;
 	}
 
-	/* Otherwise, get a connected non-upper dir or disconnected non-dir */
-	if (d_is_dir(origin.dentry) &&
-	    (origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+	/* Find origin.dentry again with ovl_acceptable() layer check */
+	if (d_is_dir(origin.dentry)) {
 		dput(origin.dentry);
 		origin.dentry = NULL;
 		err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
@@ -769,6 +768,7 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 			goto out_err;
 	}
 
+	/* Get a connected non-upper dir or disconnected non-dir */
 	dentry = ovl_get_dentry(sb, NULL, &origin, index);
 
 out:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 6bcc9dedc342..3b7ed5d2279c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -286,22 +286,13 @@ int ovl_permission(struct inode *inode, int mask)
 	if (err)
 		return err;
 
-	/* No need to do any access on underlying for special files */
-	if (special_file(realinode->i_mode))
-		return 0;
-
-	/* No need to access underlying for execute */
-	mask &= ~MAY_EXEC;
-	if ((mask & (MAY_READ | MAY_WRITE)) == 0)
-		return 0;
-
-	/* Lower files get copied up, so turn write access into read */
-	if (!upperinode && mask & MAY_WRITE) {
+	old_cred = ovl_override_creds(inode->i_sb);
+	if (!upperinode &&
+	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
 		mask &= ~(MAY_WRITE | MAY_APPEND);
+		/* Make sure mounter can read file for copy up later */
 		mask |= MAY_READ;
 	}
-
-	old_cred = ovl_override_creds(inode->i_sb);
 	err = inode_permission(realinode, mask);
 	revert_creds(old_cred);
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index cd58939dc977..7a85e609fc27 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1566,7 +1566,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		cond_resched();
 
 		BUG_ON(!vma_can_userfault(vma));
-		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
@@ -1575,6 +1574,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		if (!vma->vm_userfaultfd_ctx.ctx)
 			goto skip;
 
+		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
+
 		if (vma->vm_start > start)
 			start = vma->vm_start;
 		vma_end = min(end, vma->vm_end);
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 827e4d3bbc7a..8cc7b09c1bc7 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -16,6 +16,7 @@
 #define __ASM_GENERIC_FIXMAP_H
 
 #include <linux/bug.h>
+#include <linux/mm_types.h>
 
 #define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index faed7a8977e8..4dff74f48d4b 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -33,6 +33,8 @@ struct cpuidle_state_usage {
 	unsigned long long	disable;
 	unsigned long long	usage;
 	unsigned long long	time; /* in US */
+	unsigned long long	above; /* Number of times it's been too deep */
+	unsigned long long	below; /* Number of times it's been too shallow */
 #ifdef CONFIG_SUSPEND
 	unsigned long long	s2idle_usage;
 	unsigned long long	s2idle_time; /* in US */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..2c471a2c43fa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -206,6 +206,11 @@ struct page {
 #endif
 } _struct_page_alignment;
 
+/*
+ * Used for sizing the vmemmap region on some architectures
+ */
+#define STRUCT_PAGE_MAX_SHIFT	(order_base_2(sizeof(struct page)))
+
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 847705a6d0ec..db023a92f3a4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -783,6 +783,12 @@ void memory_present(int nid, unsigned long start, unsigned long end);
 static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
 #endif
 
+#if defined(CONFIG_SPARSEMEM)
+void memblocks_present(void);
+#else
+static inline void memblocks_present(void) {}
+#endif
+
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 int local_memory_node(int node_id);
 #else
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e723b78d8357..0bd9de116826 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
+#include <linux/hrtimer.h>
 #include <linux/completion.h>
 
 /*
@@ -608,7 +609,7 @@ struct dev_pm_info {
 	unsigned int		should_wakeup:1;
 #endif
 #ifdef CONFIG_PM
-	struct timer_list	suspend_timer;
+	struct hrtimer		suspend_timer;
 	unsigned long		timer_expires;
 	struct work_struct	work;
 	wait_queue_head_t	wait_queue;
@@ -631,7 +632,7 @@ struct dev_pm_info {
 	enum rpm_status		runtime_status;
 	int			runtime_error;
 	int			autosuspend_delay;
-	unsigned long		last_busy;
+	u64			last_busy;
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 3b5d7280e52e..dd364abb649a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -73,6 +73,7 @@ struct genpd_power_state {
 
 struct genpd_lock_ops;
 struct dev_pm_opp;
+struct opp_table;
 
 struct generic_pm_domain {
 	struct device dev;
@@ -94,6 +95,7 @@ struct generic_pm_domain {
 	unsigned int performance_state;	/* Aggregated max performance state */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
+	struct opp_table *opp_table;	/* OPP table of the genpd */
 	unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,
 						 struct dev_pm_opp *opp);
 	int (*set_performance_state)(struct generic_pm_domain *genpd,
@@ -134,6 +136,10 @@ struct gpd_link {
 	struct list_head master_node;
 	struct generic_pm_domain *slave;
 	struct list_head slave_node;
+
+	/* Sub-domain's per-master domain performance state */
+	unsigned int performance_state;
+	unsigned int prev_performance_state;
 };
 
 struct gpd_timing_data {
@@ -258,8 +264,8 @@ int of_genpd_add_subdomain(struct of_phandle_args *parent,
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
 int of_genpd_parse_idle_states(struct device_node *dn,
 			       struct genpd_power_state **states, int *n);
-unsigned int of_genpd_opp_to_performance_state(struct device *dev,
-				struct device_node *np);
+unsigned int pm_genpd_opp_to_performance_state(struct device *genpd_dev,
+					       struct dev_pm_opp *opp);
 
 int genpd_dev_pm_attach(struct device *dev);
 struct device *genpd_dev_pm_attach_by_id(struct device *dev,
@@ -300,8 +306,8 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn,
 }
 
 static inline unsigned int
-of_genpd_opp_to_performance_state(struct device *dev,
-				  struct device_node *np)
+pm_genpd_opp_to_performance_state(struct device *genpd_dev,
+				  struct dev_pm_opp *opp)
 {
 	return 0;
 }
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5d399eeef172..0a2a88e5a383 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -126,6 +126,9 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name);
 void dev_pm_opp_put_clkname(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
 void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table);
+struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index);
+void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev);
+int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -272,6 +275,18 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const
 
 static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {}
 
+static inline struct opp_table *dev_pm_opp_set_genpd_virt_dev(struct device *dev, struct device *virt_dev, int index)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static inline void dev_pm_opp_put_genpd_virt_dev(struct opp_table *opp_table, struct device *virt_dev) {}
+
+static inline int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate)
+{
+	return -ENOTSUPP;
+}
+
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	return -ENOTSUPP;
@@ -305,8 +320,8 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
-struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np);
 struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp);
+int of_get_required_opp_performance_state(struct device_node *np, int index);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -341,13 +356,13 @@ static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device
 	return NULL;
 }
 
-static inline struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np)
+static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
 {
 	return NULL;
 }
-static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
+static inline int of_get_required_opp_performance_state(struct device_node *np, int index)
 {
-	return NULL;
+	return -ENOTSUPP;
 }
 #endif
 
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index f0fc4700b6ff..54af4eef169f 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -51,7 +51,7 @@ extern void pm_runtime_no_callbacks(struct device *dev);
 extern void pm_runtime_irq_safe(struct device *dev);
 extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
 extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
-extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
+extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
 extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
@@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev)
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
 {
-	WRITE_ONCE(dev->power.last_busy, jiffies);
+	WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get()));
 }
 
 static inline bool pm_runtime_is_irq_safe(struct device *dev)
@@ -168,7 +168,7 @@ static inline void __pm_runtime_use_autosuspend(struct device *dev,
 						bool use) {}
 static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
 						int delay) {}
-static inline unsigned long pm_runtime_autosuspend_expiration(
+static inline u64 pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h
index 7b81dad712de..d0b37e937037 100644
--- a/include/linux/power/smartreflex.h
+++ b/include/linux/power/smartreflex.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * OMAP Smartreflex Defines and Routines
  *
@@ -11,10 +12,6 @@
  *
  * Copyright (C) 2007 Texas Instruments, Inc.
  * Lesly A M <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __POWER_SMARTREFLEX_H
@@ -303,9 +300,6 @@ void omap_sr_enable(struct voltagedomain *voltdm);
 void omap_sr_disable(struct voltagedomain *voltdm);
 void omap_sr_disable_reset_volt(struct voltagedomain *voltdm);
 
-/* API to register the pmic specific data with the smartreflex driver. */
-void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data);
-
 /* Smartreflex driver hooks to be called from Smartreflex class driver */
 int sr_enable(struct omap_sr *sr, unsigned long volt);
 void sr_disable(struct omap_sr *sr);
@@ -320,7 +314,5 @@ static inline void omap_sr_enable(struct voltagedomain *voltdm) {}
 static inline void omap_sr_disable(struct voltagedomain *voltdm) {}
 static inline void omap_sr_disable_reset_volt(
 		struct voltagedomain *voltdm) {}
-static inline void omap_sr_register_pmic(
-		struct omap_sr_pmic_data *pmic_data) {}
 #endif
 #endif
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 564892e19f8c..f492e21c4aa2 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -554,6 +554,60 @@ static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index,
 }
 
 /**
+ * xa_cmpxchg_bh() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_cmpxchg() except it disables softirqs
+ * while holding the array lock.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.  May sleep if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+static inline void *xa_cmpxchg_bh(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_bh(xa);
+	curr = __xa_cmpxchg(xa, index, old, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return curr;
+}
+
+/**
+ * xa_cmpxchg_irq() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_cmpxchg() except it disables interrupts
+ * while holding the array lock.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.  May sleep if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_irq(xa);
+	curr = __xa_cmpxchg(xa, index, old, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return curr;
+}
+
+/**
  * xa_insert() - Store this entry in the XArray unless another entry is
  *			already present.
  * @xa: XArray.
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
new file mode 100644
index 000000000000..d21f40edc09e
--- /dev/null
+++ b/include/media/mpeg2-ctrls.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are the MPEG2 state controls for use with stateless MPEG-2
+ * codec drivers.
+ *
+ * It turns out that these structs are not stable yet and will undergo
+ * more changes. So keep them private until they are stable and ready to
+ * become part of the official public API.
+ */
+
+#ifndef _MPEG2_CTRLS_H_
+#define _MPEG2_CTRLS_H_
+
+#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_MPEG_BASE+250)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION		(V4L2_CID_MPEG_BASE+251)
+
+/* enum v4l2_ctrl_type type values */
+#define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
+#define	V4L2_CTRL_TYPE_MPEG2_QUANTIZATION 0x0104
+
+#define V4L2_MPEG2_PICTURE_CODING_TYPE_I	1
+#define V4L2_MPEG2_PICTURE_CODING_TYPE_P	2
+#define V4L2_MPEG2_PICTURE_CODING_TYPE_B	3
+#define V4L2_MPEG2_PICTURE_CODING_TYPE_D	4
+
+struct v4l2_mpeg2_sequence {
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
+	__u16	horizontal_size;
+	__u16	vertical_size;
+	__u32	vbv_buffer_size;
+
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
+	__u8	profile_and_level_indication;
+	__u8	progressive_sequence;
+	__u8	chroma_format;
+	__u8	pad;
+};
+
+struct v4l2_mpeg2_picture {
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
+	__u8	picture_coding_type;
+
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture coding extension */
+	__u8	f_code[2][2];
+	__u8	intra_dc_precision;
+	__u8	picture_structure;
+	__u8	top_field_first;
+	__u8	frame_pred_frame_dct;
+	__u8	concealment_motion_vectors;
+	__u8	q_scale_type;
+	__u8	intra_vlc_format;
+	__u8	alternate_scan;
+	__u8	repeat_first_field;
+	__u8	progressive_frame;
+	__u8	pad;
+};
+
+struct v4l2_ctrl_mpeg2_slice_params {
+	__u32	bit_size;
+	__u32	data_bit_offset;
+
+	struct v4l2_mpeg2_sequence sequence;
+	struct v4l2_mpeg2_picture picture;
+
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Slice */
+	__u8	quantiser_scale_code;
+
+	__u8	backward_ref_index;
+	__u8	forward_ref_index;
+	__u8	pad;
+};
+
+struct v4l2_ctrl_mpeg2_quantization {
+	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
+	__u8	load_intra_quantiser_matrix;
+	__u8	load_non_intra_quantiser_matrix;
+	__u8	load_chroma_intra_quantiser_matrix;
+	__u8	load_chroma_non_intra_quantiser_matrix;
+
+	__u8	intra_quantiser_matrix[64];
+	__u8	non_intra_quantiser_matrix[64];
+	__u8	chroma_intra_quantiser_matrix[64];
+	__u8	chroma_non_intra_quantiser_matrix[64];
+};
+
+#endif
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 83ce0593b275..d63cf227b0ab 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -22,6 +22,12 @@
 #include <linux/videodev2.h>
 #include <media/media-request.h>
 
+/*
+ * Include the mpeg2 stateless codec compound control definitions.
+ * This will move to the public headers once this API is fully stable.
+ */
+#include <media/mpeg2-ctrls.h>
+
 /* forward references */
 struct file;
 struct v4l2_ctrl_handler;
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index e86981d615ae..4a737b2c610b 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -239,6 +239,7 @@ struct vb2_queue;
  * @num_planes:		number of planes in the buffer
  *			on an internal driver queue.
  * @timestamp:		frame timestamp in ns.
+ * @request:		the request this buffer is associated with.
  * @req_obj:		used to bind this buffer to a request. This
  *			request object has a refcount.
  */
@@ -249,6 +250,7 @@ struct vb2_buffer {
 	unsigned int		memory;
 	unsigned int		num_planes;
 	u64			timestamp;
+	struct media_request	*request;
 	struct media_request_object	req_obj;
 
 	/* private: internal use only
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 3eb5a4c3d60a..ae366b87426a 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -752,6 +752,15 @@
 
 #define ABS_MISC		0x28
 
+/*
+ * 0x2e is reserved and should not be used in input drivers.
+ * It was used by HID as ABS_MISC+6 and userspace needs to detect if
+ * the next ABS_* event is correct or is just ABS_MISC + n.
+ * We define here ABS_RESERVED so userspace can rely on it and detect
+ * the situation described above.
+ */
+#define ABS_RESERVED		0x2e
+
 #define ABS_MT_SLOT		0x2f	/* MT slot being modified */
 #define ABS_MT_TOUCH_MAJOR	0x30	/* Major axis of touching ellipse */
 #define ABS_MT_TOUCH_MINOR	0x31	/* Minor axis (omit if circular) */
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 998983a6e6b7..3dcfc6148f99 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -404,9 +404,6 @@ enum v4l2_mpeg_video_multi_slice_mode {
 #define V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE		(V4L2_CID_MPEG_BASE+228)
 #define V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME		(V4L2_CID_MPEG_BASE+229)
 
-#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_MPEG_BASE+250)
-#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION		(V4L2_CID_MPEG_BASE+251)
-
 #define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
 #define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
 #define V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP		(V4L2_CID_MPEG_BASE+302)
@@ -1097,69 +1094,4 @@ enum v4l2_detect_md_mode {
 #define V4L2_CID_DETECT_MD_THRESHOLD_GRID	(V4L2_CID_DETECT_CLASS_BASE + 3)
 #define V4L2_CID_DETECT_MD_REGION_GRID		(V4L2_CID_DETECT_CLASS_BASE + 4)
 
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_I	1
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_P	2
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_B	3
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_D	4
-
-struct v4l2_mpeg2_sequence {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
-	__u16	horizontal_size;
-	__u16	vertical_size;
-	__u32	vbv_buffer_size;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
-	__u8	profile_and_level_indication;
-	__u8	progressive_sequence;
-	__u8	chroma_format;
-	__u8	pad;
-};
-
-struct v4l2_mpeg2_picture {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
-	__u8	picture_coding_type;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture coding extension */
-	__u8	f_code[2][2];
-	__u8	intra_dc_precision;
-	__u8	picture_structure;
-	__u8	top_field_first;
-	__u8	frame_pred_frame_dct;
-	__u8	concealment_motion_vectors;
-	__u8	q_scale_type;
-	__u8	intra_vlc_format;
-	__u8	alternate_scan;
-	__u8	repeat_first_field;
-	__u8	progressive_frame;
-	__u8	pad;
-};
-
-struct v4l2_ctrl_mpeg2_slice_params {
-	__u32	bit_size;
-	__u32	data_bit_offset;
-
-	struct v4l2_mpeg2_sequence sequence;
-	struct v4l2_mpeg2_picture picture;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Slice */
-	__u8	quantiser_scale_code;
-
-	__u8	backward_ref_index;
-	__u8	forward_ref_index;
-	__u8	pad;
-};
-
-struct v4l2_ctrl_mpeg2_quantization {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
-	__u8	load_intra_quantiser_matrix;
-	__u8	load_non_intra_quantiser_matrix;
-	__u8	load_chroma_intra_quantiser_matrix;
-	__u8	load_chroma_non_intra_quantiser_matrix;
-
-	__u8	intra_quantiser_matrix[64];
-	__u8	non_intra_quantiser_matrix[64];
-	__u8	chroma_intra_quantiser_matrix[64];
-	__u8	chroma_non_intra_quantiser_matrix[64];
-};
-
 #endif
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c8e8ff810190..2ba2ad0e23fb 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1622,8 +1622,6 @@ struct v4l2_ext_control {
 		__u8 __user *p_u8;
 		__u16 __user *p_u16;
 		__u32 __user *p_u32;
-		struct v4l2_ctrl_mpeg2_slice_params __user *p_mpeg2_slice_params;
-		struct v4l2_ctrl_mpeg2_quantization __user *p_mpeg2_quantization;
 		void __user *ptr;
 	};
 } __attribute__ ((packed));
@@ -1669,8 +1667,6 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_U8	     = 0x0100,
 	V4L2_CTRL_TYPE_U16	     = 0x0101,
 	V4L2_CTRL_TYPE_U32	     = 0x0102,
-	V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS = 0x0103,
-	V4L2_CTRL_TYPE_MPEG2_QUANTIZATION = 0x0104,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
diff --git a/init/Kconfig b/init/Kconfig
index cf5b5a0dcbc2..ed9352513c32 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -515,8 +515,8 @@ config PSI_DEFAULT_DISABLED
 	depends on PSI
 	help
 	  If set, pressure stall information tracking will be disabled
-	  per default but can be enabled through passing psi_enable=1
-	  on the kernel commandline during boot.
+	  per default but can be enabled through passing psi=1 on the
+	  kernel commandline during boot.
 
 endmenu # "CPU/Task time and stats accounting"
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 35b50823d83b..98e76cad128b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -318,23 +318,12 @@ static int suspend_stats_show(struct seq_file *s, void *unused)
 
 	return 0;
 }
-
-static int suspend_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, suspend_stats_show, NULL);
-}
-
-static const struct file_operations suspend_stats_operations = {
-	.open           = suspend_stats_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(suspend_stats);
 
 static int __init pm_debugfs_init(void)
 {
 	debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO,
-			NULL, NULL, &suspend_stats_operations);
+			NULL, NULL, &suspend_stats_fops);
 	return 0;
 }
 
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 86d72ffb811b..b7a82502857a 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -184,7 +184,7 @@ static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value)
 	c->target_value = value;
 }
 
-static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused)
+static int pm_qos_debug_show(struct seq_file *s, void *unused)
 {
 	struct pm_qos_object *qos = (struct pm_qos_object *)s->private;
 	struct pm_qos_constraints *c;
@@ -245,18 +245,7 @@ out:
 	return 0;
 }
 
-static int pm_qos_dbg_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pm_qos_dbg_show_requests,
-			   inode->i_private);
-}
-
-static const struct file_operations pm_qos_debug_fops = {
-	.open           = pm_qos_dbg_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(pm_qos_debug);
 
 /**
  * pm_qos_update_target - manages the constraints list and calls the notifiers
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 5e54cbcae673..22bd8980f32f 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Scheduler code and data structures related to cpufreq.
  *
  * Copyright (C) 2016, Intel Corporation
  * Author: Rafael J. Wysocki <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include "sched.h"
 
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3fffad3bc8a8..626ddd4ffa43 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * CPUFreq governor based on scheduler-provided CPU utilization data.
  *
  * Copyright (C) 2016, Intel Corporation
  * Author: Rafael J. Wysocki <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 77734451cb05..e23eb9fc77aa 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5460,6 +5460,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops)
 	if (ops->flags & FTRACE_OPS_FL_ENABLED)
 		ftrace_shutdown(ops, 0);
 	ops->flags |= FTRACE_OPS_FL_DELETED;
+	ftrace_free_filter(ops);
 	mutex_unlock(&ftrace_lock);
 }
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 84a65173b1e9..5574e862de8d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -570,11 +570,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
 		}
 	}
 
+	kfree(op_stack);
+	kfree(inverts);
 	return prog;
 out_free:
 	kfree(op_stack);
-	kfree(prog_stack);
 	kfree(inverts);
+	kfree(prog_stack);
 	return ERR_PTR(ret);
 }
 
@@ -1718,6 +1720,7 @@ static int create_filter(struct trace_event_call *call,
 	err = process_preds(call, filter_string, *filterp, pe);
 	if (err && set_str)
 		append_filter_err(pe, *filterp);
+	create_filter_finish(pe);
 
 	return err;
 }
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 2152d1e530cb..cd12ecb66eb9 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -732,8 +732,10 @@ int set_trigger_filter(char *filter_str,
 
 	/* The filter is for the 'trigger' event, not the triggered event */
 	ret = create_event_filter(file->event_call, filter_str, false, &filter);
-	if (ret)
-		goto out;
+	/*
+	 * If create_event_filter() fails, filter still needs to be freed.
+	 * Which the calling code will do with data->filter.
+	 */
  assign:
 	tmp = rcu_access_pointer(data->filter);
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1106bb6aa01e..14d51548bea6 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -784,11 +784,11 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
 	while (radix_tree_is_internal_node(node)) {
 		unsigned offset;
 
-		if (node == RADIX_TREE_RETRY)
-			goto restart;
 		parent = entry_to_node(node);
 		offset = radix_tree_descend(parent, &node, index);
 		slot = parent->slots + offset;
+		if (node == RADIX_TREE_RETRY)
+			goto restart;
 		if (parent->shift == 0)
 			break;
 	}
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 0598e86af8fc..4676c0a1eeca 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -28,23 +28,28 @@ void xa_dump(const struct xarray *xa) { }
 } while (0)
 #endif
 
+static void *xa_mk_index(unsigned long index)
+{
+	return xa_mk_value(index & LONG_MAX);
+}
+
 static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
+	return xa_store(xa, index, xa_mk_index(index), gfp);
 }
 
 static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	u32 id = 0;
 
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index),
 				gfp) != 0);
 	XA_BUG_ON(xa, id != index);
 }
 
 static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
-	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
+	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index));
 	XA_BUG_ON(xa, xa_load(xa, index) != NULL);
 }
 
@@ -118,7 +123,7 @@ static noinline void check_xas_retry(struct xarray *xa)
 
 	xas_set(&xas, 0);
 	xas_for_each(&xas, entry, ULONG_MAX) {
-		xas_store(&xas, xa_mk_value(xas.xa_index));
+		xas_store(&xas, xa_mk_index(xas.xa_index));
 	}
 	xas_unlock(&xas);
 
@@ -196,7 +201,7 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 		XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
 		xa_set_mark(xa, index + 2, XA_MARK_1);
 		XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
-		xa_store_order(xa, index, order, xa_mk_value(index),
+		xa_store_order(xa, index, order, xa_mk_index(index),
 				GFP_KERNEL);
 		for (i = base; i < next; i++) {
 			XA_STATE(xas, xa, i);
@@ -405,7 +410,7 @@ static noinline void check_xas_erase(struct xarray *xa)
 			xas_set(&xas, j);
 			do {
 				xas_lock(&xas);
-				xas_store(&xas, xa_mk_value(j));
+				xas_store(&xas, xa_mk_index(j));
 				xas_unlock(&xas);
 			} while (xas_nomem(&xas, GFP_KERNEL));
 		}
@@ -423,7 +428,7 @@ static noinline void check_xas_erase(struct xarray *xa)
 		xas_set(&xas, 0);
 		j = i;
 		xas_for_each(&xas, entry, ULONG_MAX) {
-			XA_BUG_ON(xa, entry != xa_mk_value(j));
+			XA_BUG_ON(xa, entry != xa_mk_index(j));
 			xas_store(&xas, NULL);
 			j++;
 		}
@@ -440,17 +445,17 @@ static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
 	unsigned long min = index & ~((1UL << order) - 1);
 	unsigned long max = min + (1UL << order);
 
-	xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL);
-	XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(index));
-	XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(index));
+	xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_index(index));
+	XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_index(index));
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
 
 	xas_lock(&xas);
-	XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index));
+	XA_BUG_ON(xa, xas_store(&xas, xa_mk_index(min)) != xa_mk_index(index));
 	xas_unlock(&xas);
-	XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min));
-	XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min));
+	XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_index(min));
+	XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_index(min));
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
 
@@ -471,6 +476,32 @@ static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
 	xas_unlock(&xas);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
+
+static noinline void check_multi_store_3(struct xarray *xa, unsigned long index,
+		unsigned int order)
+{
+	XA_STATE(xas, xa, 0);
+	void *entry;
+	int n = 0;
+
+	xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL);
+
+	xas_lock(&xas);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		XA_BUG_ON(xa, entry != xa_mk_index(index));
+		n++;
+	}
+	XA_BUG_ON(xa, n != 1);
+	xas_set(&xas, index + 1);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		XA_BUG_ON(xa, entry != xa_mk_index(index));
+		n++;
+	}
+	XA_BUG_ON(xa, n != 2);
+	xas_unlock(&xas);
+
+	xa_destroy(xa);
+}
 #endif
 
 static noinline void check_multi_store(struct xarray *xa)
@@ -523,15 +554,15 @@ static noinline void check_multi_store(struct xarray *xa)
 
 	for (i = 0; i < max_order; i++) {
 		for (j = 0; j < max_order; j++) {
-			xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
-			xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
+			xa_store_order(xa, 0, i, xa_mk_index(i), GFP_KERNEL);
+			xa_store_order(xa, 0, j, xa_mk_index(j), GFP_KERNEL);
 
 			for (k = 0; k < max_order; k++) {
 				void *entry = xa_load(xa, (1UL << k) - 1);
 				if ((i < k) && (j < k))
 					XA_BUG_ON(xa, entry != NULL);
 				else
-					XA_BUG_ON(xa, entry != xa_mk_value(j));
+					XA_BUG_ON(xa, entry != xa_mk_index(j));
 			}
 
 			xa_erase(xa, 0);
@@ -545,6 +576,11 @@ static noinline void check_multi_store(struct xarray *xa)
 		check_multi_store_1(xa, (1UL << i) + 1, i);
 	}
 	check_multi_store_2(xa, 4095, 9);
+
+	for (i = 1; i < 20; i++) {
+		check_multi_store_3(xa, 0, i);
+		check_multi_store_3(xa, 1UL << i, i);
+	}
 #endif
 }
 
@@ -587,16 +623,25 @@ static noinline void check_xa_alloc(void)
 	xa_destroy(&xa0);
 
 	id = 0xfffffffeU;
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != 0);
 	XA_BUG_ON(&xa0, id != 0xfffffffeU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != 0);
 	XA_BUG_ON(&xa0, id != 0xffffffffU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != -ENOSPC);
 	XA_BUG_ON(&xa0, id != 0xffffffffU);
 	xa_destroy(&xa0);
+
+	id = 10;
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
+				GFP_KERNEL) != -ENOSPC);
+	XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0);
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
+				GFP_KERNEL) != -ENOSPC);
+	xa_erase_index(&xa0, 3);
+	XA_BUG_ON(&xa0, !xa_empty(&xa0));
 }
 
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
@@ -610,11 +655,11 @@ retry:
 	xas_lock(&xas);
 	xas_for_each_conflict(&xas, entry) {
 		XA_BUG_ON(xa, !xa_is_value(entry));
-		XA_BUG_ON(xa, entry < xa_mk_value(start));
-		XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
+		XA_BUG_ON(xa, entry < xa_mk_index(start));
+		XA_BUG_ON(xa, entry > xa_mk_index(start + (1UL << order) - 1));
 		count++;
 	}
-	xas_store(&xas, xa_mk_value(start));
+	xas_store(&xas, xa_mk_index(start));
 	xas_unlock(&xas);
 	if (xas_nomem(&xas, GFP_KERNEL)) {
 		count = 0;
@@ -622,9 +667,9 @@ retry:
 	}
 	XA_BUG_ON(xa, xas_error(&xas));
 	XA_BUG_ON(xa, count != present);
-	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
+	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start));
 	XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
-			xa_mk_value(start));
+			xa_mk_index(start));
 	xa_erase_index(xa, start);
 }
 
@@ -703,7 +748,7 @@ static noinline void check_multi_find_2(struct xarray *xa)
 		for (j = 0; j < index; j++) {
 			XA_STATE(xas, xa, j + index);
 			xa_store_index(xa, index - 1, GFP_KERNEL);
-			xa_store_order(xa, index, i, xa_mk_value(index),
+			xa_store_order(xa, index, i, xa_mk_index(index),
 					GFP_KERNEL);
 			rcu_read_lock();
 			xas_for_each(&xas, entry, ULONG_MAX) {
@@ -778,7 +823,7 @@ static noinline void check_find_2(struct xarray *xa)
 		j = 0;
 		index = 0;
 		xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
-			XA_BUG_ON(xa, xa_mk_value(index) != entry);
+			XA_BUG_ON(xa, xa_mk_index(index) != entry);
 			XA_BUG_ON(xa, index != j++);
 		}
 	}
@@ -786,10 +831,34 @@ static noinline void check_find_2(struct xarray *xa)
 	xa_destroy(xa);
 }
 
+static noinline void check_find_3(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long i, j, k;
+	void *entry;
+
+	for (i = 0; i < 100; i++) {
+		for (j = 0; j < 100; j++) {
+			for (k = 0; k < 100; k++) {
+				xas_set(&xas, j);
+				xas_for_each_marked(&xas, entry, k, XA_MARK_0)
+					;
+				if (j > k)
+					XA_BUG_ON(xa,
+						xas.xa_node != XAS_RESTART);
+			}
+		}
+		xa_store_index(xa, i, GFP_KERNEL);
+		xa_set_mark(xa, i, XA_MARK_0);
+	}
+	xa_destroy(xa);
+}
+
 static noinline void check_find(struct xarray *xa)
 {
 	check_find_1(xa);
 	check_find_2(xa);
+	check_find_3(xa);
 	check_multi_find(xa);
 	check_multi_find_2(xa);
 }
@@ -829,11 +898,11 @@ static noinline void check_find_entry(struct xarray *xa)
 			for (index = 0; index < (1UL << (order + 5));
 			     index += (1UL << order)) {
 				xa_store_order(xa, index, order,
-						xa_mk_value(index), GFP_KERNEL);
+						xa_mk_index(index), GFP_KERNEL);
 				XA_BUG_ON(xa, xa_load(xa, index) !=
-						xa_mk_value(index));
+						xa_mk_index(index));
 				XA_BUG_ON(xa, xa_find_entry(xa,
-						xa_mk_value(index)) != index);
+						xa_mk_index(index)) != index);
 			}
 			XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
 			xa_destroy(xa);
@@ -844,7 +913,7 @@ static noinline void check_find_entry(struct xarray *xa)
 	XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
 	xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
 	XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
-	XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_value(LONG_MAX)) != -1);
+	XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1);
 	xa_erase_index(xa, ULONG_MAX);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
@@ -864,7 +933,7 @@ static noinline void check_move_small(struct xarray *xa, unsigned long idx)
 			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
 		XA_BUG_ON(xa, xas.xa_index != i);
 		if (i == 0 || i == idx)
-			XA_BUG_ON(xa, entry != xa_mk_value(i));
+			XA_BUG_ON(xa, entry != xa_mk_index(i));
 		else
 			XA_BUG_ON(xa, entry != NULL);
 	}
@@ -878,7 +947,7 @@ static noinline void check_move_small(struct xarray *xa, unsigned long idx)
 			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
 		XA_BUG_ON(xa, xas.xa_index != i);
 		if (i == 0 || i == idx)
-			XA_BUG_ON(xa, entry != xa_mk_value(i));
+			XA_BUG_ON(xa, entry != xa_mk_index(i));
 		else
 			XA_BUG_ON(xa, entry != NULL);
 	} while (i > 0);
@@ -909,7 +978,7 @@ static noinline void check_move(struct xarray *xa)
 	do {
 		void *entry = xas_prev(&xas);
 		i--;
-		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, entry != xa_mk_index(i));
 		XA_BUG_ON(xa, i != xas.xa_index);
 	} while (i != 0);
 
@@ -918,7 +987,7 @@ static noinline void check_move(struct xarray *xa)
 
 	do {
 		void *entry = xas_next(&xas);
-		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, entry != xa_mk_index(i));
 		XA_BUG_ON(xa, i != xas.xa_index);
 		i++;
 	} while (i < (1 << 16));
@@ -934,7 +1003,7 @@ static noinline void check_move(struct xarray *xa)
 		void *entry = xas_prev(&xas);
 		i--;
 		if ((i < (1 << 8)) || (i >= (1 << 15)))
-			XA_BUG_ON(xa, entry != xa_mk_value(i));
+			XA_BUG_ON(xa, entry != xa_mk_index(i));
 		else
 			XA_BUG_ON(xa, entry != NULL);
 		XA_BUG_ON(xa, i != xas.xa_index);
@@ -946,7 +1015,7 @@ static noinline void check_move(struct xarray *xa)
 	do {
 		void *entry = xas_next(&xas);
 		if ((i < (1 << 8)) || (i >= (1 << 15)))
-			XA_BUG_ON(xa, entry != xa_mk_value(i));
+			XA_BUG_ON(xa, entry != xa_mk_index(i));
 		else
 			XA_BUG_ON(xa, entry != NULL);
 		XA_BUG_ON(xa, i != xas.xa_index);
@@ -976,7 +1045,7 @@ static noinline void xa_store_many_order(struct xarray *xa,
 		if (xas_error(&xas))
 			goto unlock;
 		for (i = 0; i < (1U << order); i++) {
-			XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
+			XA_BUG_ON(xa, xas_store(&xas, xa_mk_index(index + i)));
 			xas_next(&xas);
 		}
 unlock:
@@ -1031,9 +1100,9 @@ static noinline void check_create_range_4(struct xarray *xa,
 		if (xas_error(&xas))
 			goto unlock;
 		for (i = 0; i < (1UL << order); i++) {
-			void *old = xas_store(&xas, xa_mk_value(base + i));
+			void *old = xas_store(&xas, xa_mk_index(base + i));
 			if (xas.xa_index == index)
-				XA_BUG_ON(xa, old != xa_mk_value(base + i));
+				XA_BUG_ON(xa, old != xa_mk_index(base + i));
 			else
 				XA_BUG_ON(xa, old != NULL);
 			xas_next(&xas);
@@ -1085,10 +1154,10 @@ static noinline void __check_store_range(struct xarray *xa, unsigned long first,
 		unsigned long last)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
+	xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL);
 
-	XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
-	XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
+	XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_index(first));
+	XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_index(first));
 	XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
 
@@ -1195,7 +1264,7 @@ static noinline void check_account(struct xarray *xa)
 		XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
 		rcu_read_unlock();
 
-		xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order),
+		xa_store_order(xa, 1 << order, order, xa_mk_index(1UL << order),
 				GFP_KERNEL);
 		XA_BUG_ON(xa, xas.xa_node->count != xas.xa_node->nr_values * 2);
 
diff --git a/lib/xarray.c b/lib/xarray.c
index bbacca576593..5f3f9311de89 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1131,7 +1131,7 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
 		entry = xa_head(xas->xa);
 		xas->xa_node = NULL;
 		if (xas->xa_index > max_index(entry))
-			goto bounds;
+			goto out;
 		if (!xa_is_node(entry)) {
 			if (xa_marked(xas->xa, mark))
 				return entry;
@@ -1180,11 +1180,9 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
 	}
 
 out:
-	if (!max)
+	if (xas->xa_index > max)
 		goto max;
-bounds:
-	xas->xa_node = XAS_BOUNDS;
-	return NULL;
+	return set_bounds(xas);
 max:
 	xas->xa_node = XAS_RESTART;
 	return NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 705a3e9cc910..a80832487981 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1248,10 +1248,11 @@ void free_huge_page(struct page *page)
 		(struct hugepage_subpool *)page_private(page);
 	bool restore_reserve;
 
-	set_page_private(page, 0);
-	page->mapping = NULL;
 	VM_BUG_ON_PAGE(page_count(page), page);
 	VM_BUG_ON_PAGE(page_mapcount(page), page);
+
+	set_page_private(page, 0);
+	page->mapping = NULL;
 	restore_reserve = PagePrivate(page);
 	ClearPagePrivate(page);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 9a2d5ae81ae1..81ae63ca78d0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1727,7 +1727,7 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
 	return -1;
 }
 
-bool __init memblock_is_reserved(phys_addr_t addr)
+bool __init_memblock memblock_is_reserved(phys_addr_t addr)
 {
 	return memblock_search(&memblock.reserved, addr) != -1;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 921f80488bb3..5d07e0b1352f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -661,9 +661,7 @@ static int shmem_free_swap(struct address_space *mapping,
 {
 	void *old;
 
-	xa_lock_irq(&mapping->i_pages);
-	old = __xa_cmpxchg(&mapping->i_pages, index, radswap, NULL, 0);
-	xa_unlock_irq(&mapping->i_pages);
+	old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0);
 	if (old != radswap)
 		return -ENOENT;
 	free_swap_and_cache(radix_to_swp_entry(radswap));
diff --git a/mm/sparse.c b/mm/sparse.c
index 33307fc05c4d..3abc8cc50201 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -240,6 +240,22 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 }
 
 /*
+ * Mark all memblocks as present using memory_present(). This is a
+ * convienence function that is useful for a number of arches
+ * to mark all of the systems memory as present during initialization.
+ */
+void __init memblocks_present(void)
+{
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg) {
+		memory_present(memblock_get_region_node(reg),
+			       memblock_region_memory_base_pfn(reg),
+			       memblock_region_memory_end_pfn(reg));
+	}
+}
+
+/*
  * Subtle, we encode the real pfn into the mem_map such that
  * the identity pfn - section_mem_map will return the actual
  * physical page frame number.
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 8081b6cf67d2..34414c6efad6 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -47,8 +47,8 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
 	$xs	= "[0-9a-f ]";	# hex character or space
 	$funcre = qr/^$x* <(.*)>:$/;
 	if ($arch eq 'aarch64') {
-		#ffffffc0006325cc:       a9bb7bfd        stp     x29, x30, [sp,#-80]!
-		$re = qr/^.*stp.*sp,\#-([0-9]{1,8})\]\!/o;
+		#ffffffc0006325cc:       a9bb7bfd        stp     x29, x30, [sp, #-80]!
+		$re = qr/^.*stp.*sp, \#-([0-9]{1,8})\]\!/o;
 	} elsif ($arch eq 'arm') {
 		#c0008ffc:	e24dd064	sub	sp, sp, #100	; 0x64
 		$re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o;
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
index 5056fb3b897d..e559c6294c39 100755
--- a/scripts/spdxcheck.py
+++ b/scripts/spdxcheck.py
@@ -168,6 +168,7 @@ class id_parser(object):
         self.curline = 0
         try:
             for line in fd:
+                line = line.decode(locale.getpreferredencoding(False), errors='ignore')
                 self.curline += 1
                 if self.curline > maxlines:
                     break
@@ -249,12 +250,13 @@ if __name__ == '__main__':
 
     try:
         if len(args.path) and args.path[0] == '-':
-            parser.parse_lines(sys.stdin, args.maxlines, '-')
+            stdin = os.fdopen(sys.stdin.fileno(), 'rb')
+            parser.parse_lines(stdin, args.maxlines, '-')
         else:
             if args.path:
                 for p in args.path:
                     if os.path.isfile(p):
-                        parser.parse_lines(open(p), args.maxlines, p)
+                        parser.parse_lines(open(p, 'rb'), args.maxlines, p)
                     elif os.path.isdir(p):
                         scan_git_subtree(repo.head.reference.commit.tree, p)
                     else:
diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c
index 64c3cb0fb926..654a50319198 100644
--- a/sound/firewire/fireface/ff-protocol-ff400.c
+++ b/sound/firewire/fireface/ff-protocol-ff400.c
@@ -30,7 +30,7 @@ static int ff400_get_clock(struct snd_ff *ff, unsigned int *rate,
 	int err;
 
 	err = snd_fw_transaction(ff->unit, TCODE_READ_QUADLET_REQUEST,
-				 FF400_SYNC_STATUS, &reg, sizeof(reg), 0);
+				 FF400_CLOCK_CONFIG, &reg, sizeof(reg), 0);
 	if (err < 0)
 		return err;
 	data = le32_to_cpu(reg);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8d75597028ee..15021c839372 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5520,6 +5520,9 @@ enum {
 	ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
 	ALC295_FIXUP_HP_AUTO_MUTE,
 	ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
+	ALC294_FIXUP_ASUS_MIC,
+	ALC294_FIXUP_ASUS_HEADSET_MIC,
+	ALC294_FIXUP_ASUS_SPK,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6392,6 +6395,8 @@ static const struct hda_fixup alc269_fixups[] = {
 	[ALC285_FIXUP_LENOVO_HEADPHONE_NOISE] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc285_fixup_invalidate_dacs,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_THINKPAD_ACPI
 	},
 	[ALC295_FIXUP_HP_AUTO_MUTE] = {
 		.type = HDA_FIXUP_FUNC,
@@ -6406,6 +6411,36 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MIC
 	},
+	[ALC294_FIXUP_ASUS_MIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x13, 0x90a60160 }, /* use as internal mic */
+			{ 0x19, 0x04a11120 }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+	},
+	[ALC294_FIXUP_ASUS_HEADSET_MIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a1113c }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+	},
+	[ALC294_FIXUP_ASUS_SPK] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* Set EAPD high */
+			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x40 },
+			{ 0x20, AC_VERB_SET_PROC_COEF, 0x8800 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6548,6 +6583,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
+	SND_PCI_QUIRK(0x1043, 0x14a1, "ASUS UX533FD", ALC294_FIXUP_ASUS_SPK),
 	SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
 	SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
@@ -7155,6 +7191,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 	SND_HDA_PIN_QUIRK(0x10ec0293, 0x1028, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC292_STANDARD_PINS,
 		{0x13, 0x90a60140}),
+	SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_MIC,
+		{0x14, 0x90170110},
+		{0x1b, 0x90a70130},
+		{0x21, 0x04211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_SPK,
+		{0x12, 0x90a60130},
+		{0x17, 0x90170110},
+		{0x21, 0x04211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC295_STANDARD_PINS,
 		{0x17, 0x21014020},
@@ -7227,6 +7271,37 @@ static void alc269_fill_coef(struct hda_codec *codec)
 	alc_update_coef_idx(codec, 0x4, 0, 1<<11);
 }
 
+static void alc294_hp_init(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+	hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0];
+	int i, val;
+
+	if (!hp_pin)
+		return;
+
+	snd_hda_codec_write(codec, hp_pin, 0,
+			    AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+
+	msleep(100);
+
+	snd_hda_codec_write(codec, hp_pin, 0,
+			    AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0);
+
+	alc_update_coef_idx(codec, 0x6f, 0x000f, 0);/* Set HP depop to manual mode */
+	alc_update_coefex_idx(codec, 0x58, 0x00, 0x8000, 0x8000); /* HP depop procedure start */
+
+	/* Wait for depop procedure finish  */
+	val = alc_read_coefex_idx(codec, 0x58, 0x01);
+	for (i = 0; i < 20 && val & 0x0080; i++) {
+		msleep(50);
+		val = alc_read_coefex_idx(codec, 0x58, 0x01);
+	}
+	/* Set HP depop to auto mode */
+	alc_update_coef_idx(codec, 0x6f, 0x000f, 0x000b);
+	msleep(50);
+}
+
 /*
  */
 static int patch_alc269(struct hda_codec *codec)
@@ -7352,6 +7427,7 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->codec_variant = ALC269_TYPE_ALC294;
 		spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
+		alc294_hp_init(codec);
 		break;
 	case 0x10ec0300:
 		spec->codec_variant = ALC269_TYPE_ALC300;
@@ -7363,6 +7439,7 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->codec_variant = ALC269_TYPE_ALC700;
 		spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */
+		alc294_hp_init(codec);
 		break;
 
 	}
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index db66a952c173..fd8765af19bb 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -89,6 +89,7 @@ endif
 localedir ?=	/usr/share/locale
 docdir ?=       /usr/share/doc/packages/cpupower
 confdir ?=      /etc/
+bash_completion_dir ?= /usr/share/bash-completion/completions
 
 # Toolchain: what tools do we use, and what options do they need:
 
@@ -96,7 +97,8 @@ CP = cp -fpR
 INSTALL = /usr/bin/install -c
 INSTALL_PROGRAM = ${INSTALL}
 INSTALL_DATA  = ${INSTALL} -m 644
-INSTALL_SCRIPT = ${INSTALL_PROGRAM}
+#bash completion scripts get sourced and so they should be rw only.
+INSTALL_SCRIPT = ${INSTALL} -m 644
 
 # If you are running a cross compiler, you may want to set this
 # to something more interesting, like "arm-linux-".  If you want
@@ -288,6 +290,8 @@ install-lib:
 install-tools:
 	$(INSTALL) -d $(DESTDIR)${bindir}
 	$(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir}
+	$(INSTALL) -d $(DESTDIR)${bash_completion_dir}
+	$(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower'
 
 install-man:
 	$(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
diff --git a/tools/power/cpupower/cpupower-completion.sh b/tools/power/cpupower/cpupower-completion.sh
new file mode 100644
index 000000000000..e10839cfcfc1
--- /dev/null
+++ b/tools/power/cpupower/cpupower-completion.sh
@@ -0,0 +1,128 @@
+# -*- shell-script -*-
+# bash completion script for cpupower
+# Taken from git.git's completion script.
+
+_cpupower_commands="frequency-info frequency-set idle-info idle-set set info monitor"
+
+_frequency_info ()
+{
+	local flags="-f -w -l -d -p -g -a -s -y -o -m -n --freq --hwfreq --hwlimits --driver --policy --governors --related-cpus --affected-cpus --stats --latency --proc --human --no-rounding"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		frequency-info) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_frequency_set ()
+{
+	local flags="-f -g --freq --governor -d --min -u --max -r --related"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		-f| --freq | -d | --min | -u | --max)
+		if [ -d /sys/devices/system/cpu/cpufreq/ ] ; then
+			COMPREPLY=($(compgen -W '$(cat $(ls -d /sys/devices/system/cpu/cpufreq/policy* | head -1)/scaling_available_frequencies)' -- "$cur"))
+		fi ;;
+		-g| --governor)
+		if [ -d /sys/devices/system/cpu/cpufreq/ ] ; then
+			COMPREPLY=($(compgen -W '$(cat $(ls -d /sys/devices/system/cpu/cpufreq/policy* | head -1)/scaling_available_governors)' -- "$cur"))
+		fi;;
+		frequency-set) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_idle_info()
+{
+	local flags="-f --silent"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		idle-info) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_idle_set()
+{
+	local flags="-d --disable -e --enable -D --disable-by-latency -E --enable-all"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		idle-set) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_set()
+{
+	local flags="--perf-bias, -b"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		set) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_monitor()
+{
+	local flags="-l -m -i -c -v"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev" in
+		monitor) COMPREPLY=($(compgen -W "$flags" -- "$cur")) ;;
+	esac
+}
+
+_taskset()
+{
+	local prev_to_prev="${COMP_WORDS[COMP_CWORD-2]}"
+	local prev="${COMP_WORDS[COMP_CWORD-1]}"
+	local cur="${COMP_WORDS[COMP_CWORD]}"
+	case "$prev_to_prev" in
+		-c|--cpu) COMPREPLY=($(compgen -W "$_cpupower_commands" -- "$cur")) ;;
+	esac
+	case "$prev" in
+		frequency-info) _frequency_info ;;
+		frequency-set) _frequency_set ;;
+		idle-info) _idle_info ;;
+		idle-set) _idle_set ;;
+		set) _set ;;
+		monitor) _monitor ;;
+	esac
+
+}
+
+_cpupower ()
+{
+	local i
+	local c=1
+	local command
+
+	while test $c -lt $COMP_CWORD; do
+		if test $c == 1; then
+			command="${COMP_WORDS[c]}"
+		fi
+		c=$((++c))
+	done
+
+	# Complete name of subcommand if the user has not finished typing it yet.
+	if test $c -eq $COMP_CWORD -a -z "$command"; then
+		COMPREPLY=($(compgen -W "help -v --version -c --cpu $_cpupower_commands" -- "${COMP_WORDS[COMP_CWORD]}"))
+		return
+	fi
+
+	# Complete arguments to subcommands.
+	case "$command" in
+		-v|--version) return ;;
+		-c|--cpu) _taskset ;;
+		help) COMPREPLY=($(compgen -W "$_cpupower_commands" -- "${COMP_WORDS[COMP_CWORD]}")) ;;
+		frequency-info) _frequency_info ;;
+		frequency-set) _frequency_set ;;
+		idle-info) _idle_info ;;
+		idle-set) _idle_set ;;
+		set) _set ;;
+		monitor) _monitor ;;
+	esac
+}
+
+complete -o bashdefault -o default -F _cpupower cpupower 2>/dev/null \
+    || complete -o default -F _cpupower cpupower
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 84e2b648e622..2fa3c5757bcb 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -585,9 +585,9 @@ current_max_cpu = 0
 
 read_trace_data(filename)
 
-clear_trace_file()
-# Free the memory
 if interval:
+    clear_trace_file()
+    # Free the memory
     free_trace_buffer()
 
 if graph_data_present == False:
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 328f62e6ea02..9327c0ddc3a5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1,6 +1,6 @@
 /*
  * turbostat -- show CPU frequency and C-state residency
- * on modern Intel turbo-capable processors.
+ * on modern Intel and AMD processors.
  *
  * Copyright (c) 2013 Intel Corporation.
  * Len Brown <[email protected]>
@@ -71,6 +71,8 @@ unsigned int do_irtl_snb;
 unsigned int do_irtl_hsw;
 unsigned int units = 1000000;	/* MHz etc */
 unsigned int genuine_intel;
+unsigned int authentic_amd;
+unsigned int max_level, max_extended_level;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
 unsigned int no_MSR_MISC_PWR_MGMT;
@@ -1667,30 +1669,51 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 
 void get_apic_id(struct thread_data *t)
 {
-	unsigned int eax, ebx, ecx, edx, max_level;
+	unsigned int eax, ebx, ecx, edx;
 
-	eax = ebx = ecx = edx = 0;
+	if (DO_BIC(BIC_APIC)) {
+		eax = ebx = ecx = edx = 0;
+		__cpuid(1, eax, ebx, ecx, edx);
 
-	if (!genuine_intel)
+		t->apic_id = (ebx >> 24) & 0xff;
+	}
+
+	if (!DO_BIC(BIC_X2APIC))
 		return;
 
-	__cpuid(0, max_level, ebx, ecx, edx);
+	if (authentic_amd) {
+		unsigned int topology_extensions;
 
-	__cpuid(1, eax, ebx, ecx, edx);
-	t->apic_id = (ebx >> 24) & 0xf;
+		if (max_extended_level < 0x8000001e)
+			return;
 
-	if (max_level < 0xb)
+		eax = ebx = ecx = edx = 0;
+		__cpuid(0x80000001, eax, ebx, ecx, edx);
+			topology_extensions = ecx & (1 << 22);
+
+		if (topology_extensions == 0)
+			return;
+
+		eax = ebx = ecx = edx = 0;
+		__cpuid(0x8000001e, eax, ebx, ecx, edx);
+
+		t->x2apic_id = eax;
 		return;
+	}
 
-	if (!DO_BIC(BIC_X2APIC))
+	if (!genuine_intel)
+		return;
+
+	if (max_level < 0xb)
 		return;
 
 	ecx = 0;
 	__cpuid(0xb, eax, ebx, ecx, edx);
 	t->x2apic_id = edx;
 
-	if (debug && (t->apic_id != t->x2apic_id))
-		fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
+	if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
+		fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
+				t->cpu_id, t->apic_id, t->x2apic_id);
 }
 
 /*
@@ -1953,11 +1976,12 @@ done:
 #define PCL_7S 11 /* PC7 Shrink */
 #define PCL__8 12 /* PC8 */
 #define PCL__9 13 /* PC9 */
-#define PCLUNL 14 /* Unlimited */
+#define PCL_10 14 /* PC10 */
+#define PCLUNL 15 /* Unlimited */
 
 int pkg_cstate_limit = PCLUKN;
 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
-	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
+	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
 
 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
@@ -1965,7 +1989,7 @@ int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S,
 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 
@@ -3113,13 +3137,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	bclk = discover_bclk(family, model);
 
 	switch (model) {
-	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
-	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
-	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
-	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
 		pkg_cstate_limits = nhm_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
@@ -3131,16 +3150,11 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		break;
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
 		has_misc_feature_control = 1;
@@ -3159,13 +3173,12 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		no_MSR_MISC_PWR_MGMT = 1;
 		break;
 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		pkg_cstate_limits = phi_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
-		pkg_cstate_limits = bxt_pkg_cstate_limits;
+		pkg_cstate_limits = glm_pkg_cstate_limits;
 		break;
 	default:
 		return 0;
@@ -3220,7 +3233,6 @@ int is_bdx(unsigned int family, unsigned int model)
 
 	switch (model) {
 	case INTEL_FAM6_BROADWELL_X:
-	case INTEL_FAM6_BROADWELL_XEON_D:
 		return 1;
 	}
 	return 0;
@@ -3246,9 +3258,7 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 	switch (model) {
 	/* Nehalem compatible, but do not include turbo-ratio limit support */
 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
-	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		return 0;
 	default:
 		return 1;
@@ -3303,7 +3313,6 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
 
 	switch (model) {
 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		return 1;
 	default:
 		return 0;
@@ -3337,21 +3346,15 @@ int has_config_tdp(unsigned int family, unsigned int model)
 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 
 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		return 1;
 	default:
 		return 0;
@@ -3744,9 +3747,7 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
 	switch (model) {
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		return (rapl_dram_energy_units = 15.3 / 1000000);
 	default:
 		return (rapl_energy_units);
@@ -3775,7 +3776,6 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SANDYBRIDGE:
 	case INTEL_FAM6_IVYBRIDGE:
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
@@ -3799,9 +3799,6 @@ void rapl_probe(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_PkgWatt);
 		break;
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		BIC_PRESENT(BIC_PKG__);
@@ -3820,10 +3817,8 @@ void rapl_probe(unsigned int family, unsigned int model)
 		break;
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
@@ -3916,7 +3911,6 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 
 	switch (model) {
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
 		do_gfx_perf_limit_reasons = 1;
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
@@ -4128,16 +4122,11 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
 	case INTEL_FAM6_HASWELL_X:	/* HSW */
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
-	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
@@ -4166,12 +4155,9 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
 		return 0;
 
 	switch (model) {
-	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
+	case INTEL_FAM6_HASWELL_CORE:
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
@@ -4195,9 +4181,6 @@ int has_skl_msrs(unsigned int family, unsigned int model)
 
 	switch (model) {
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		return 1;
 	}
@@ -4222,7 +4205,6 @@ int is_knl(unsigned int family, unsigned int model)
 		return 0;
 	switch (model) {
 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
-	case INTEL_FAM6_XEON_PHI_KNM:
 		return 1;
 	}
 	return 0;
@@ -4436,18 +4418,56 @@ void decode_c6_demotion_policy_msr(void)
 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
 }
 
+/*
+ * When models are the same, for the purpose of turbostat, reuse
+ */
+unsigned int intel_model_duplicates(unsigned int model)
+{
+
+	switch(model) {
+	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
+	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
+	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
+		return INTEL_FAM6_NEHALEM;
+
+	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
+	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
+		return INTEL_FAM6_NEHALEM_EX;
+
+	case INTEL_FAM6_XEON_PHI_KNM:
+		return INTEL_FAM6_XEON_PHI_KNL;
+
+	case INTEL_FAM6_HASWELL_ULT:
+		return INTEL_FAM6_HASWELL_CORE;
+
+	case INTEL_FAM6_BROADWELL_X:
+	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
+		return INTEL_FAM6_BROADWELL_X;
+
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
+		return INTEL_FAM6_SKYLAKE_MOBILE;
+	}
+	return model;
+}
 void process_cpuid()
 {
-	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
-	unsigned int fms, family, model, stepping;
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
 	unsigned int has_turbo;
 
 	eax = ebx = ecx = edx = 0;
 
 	__cpuid(0, max_level, ebx, ecx, edx);
 
-	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+	if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
 		genuine_intel = 1;
+	else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+		authentic_amd = 1;
 
 	if (!quiet)
 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
@@ -4461,25 +4481,8 @@ void process_cpuid()
 		family += (fms >> 20) & 0xff;
 	if (family >= 6)
 		model += ((fms >> 16) & 0xf) << 4;
-
-	if (!quiet) {
-		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
-			max_level, family, model, stepping, family, model, stepping);
-		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
-			ecx & (1 << 0) ? "SSE3" : "-",
-			ecx & (1 << 3) ? "MONITOR" : "-",
-			ecx & (1 << 6) ? "SMX" : "-",
-			ecx & (1 << 7) ? "EIST" : "-",
-			ecx & (1 << 8) ? "TM2" : "-",
-			edx & (1 << 4) ? "TSC" : "-",
-			edx & (1 << 5) ? "MSR" : "-",
-			edx & (1 << 22) ? "ACPI-TM" : "-",
-			edx & (1 << 28) ? "HT" : "-",
-			edx & (1 << 29) ? "TM" : "-");
-	}
-
-	if (!(edx & (1 << 5)))
-		errx(1, "CPUID: no MSR");
+	ecx_flags = ecx;
+	edx_flags = edx;
 
 	/*
 	 * check max extended function levels of CPUID.
@@ -4489,6 +4492,27 @@ void process_cpuid()
 	ebx = ecx = edx = 0;
 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
 
+	if (!quiet) {
+		fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+			max_level, max_extended_level, family, model, stepping, family, model, stepping);
+		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
+			ecx_flags & (1 << 0) ? "SSE3" : "-",
+			ecx_flags & (1 << 3) ? "MONITOR" : "-",
+			ecx_flags & (1 << 6) ? "SMX" : "-",
+			ecx_flags & (1 << 7) ? "EIST" : "-",
+			ecx_flags & (1 << 8) ? "TM2" : "-",
+			edx_flags & (1 << 4) ? "TSC" : "-",
+			edx_flags & (1 << 5) ? "MSR" : "-",
+			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
+			edx_flags & (1 << 28) ? "HT" : "-",
+			edx_flags & (1 << 29) ? "TM" : "-");
+	}
+	if (genuine_intel)
+		model = intel_model_duplicates(model);
+
+	if (!(edx_flags & (1 << 5)))
+		errx(1, "CPUID: no MSR");
+
 	if (max_extended_level >= 0x80000007) {
 
 		/*
@@ -4576,9 +4600,6 @@ void process_cpuid()
 			if (crystal_hz == 0)
 				switch(model) {
 				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
-				case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
-				case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
-				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 					crystal_hz = 24000000;	/* 24.0 MHz */
 					break;
 				case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
@@ -4860,6 +4881,8 @@ void topology_probe()
 		return;
 
 	for (i = 0; i <= topo.max_cpu_num; ++i) {
+		if (cpu_is_not_present(i))
+			continue;
 		fprintf(outf,
 			"cpu %d pkg %d node %d lnode %d core %d thread %d\n",
 			i, cpus[i].physical_package_id,
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index acf1afa01c5b..397d6b612502 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -7,6 +7,7 @@ LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder xarray
 CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+	 regression4.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
 ifndef SHIFT
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 77a44c54998f..7a22d6e3732e 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -308,6 +308,7 @@ int main(int argc, char **argv)
 	regression1_test();
 	regression2_test();
 	regression3_test();
+	regression4_test();
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
index 3c8a1584e9ee..135145af18b7 100644
--- a/tools/testing/radix-tree/regression.h
+++ b/tools/testing/radix-tree/regression.h
@@ -5,5 +5,6 @@
 void regression1_test(void);
 void regression2_test(void);
 void regression3_test(void);
+void regression4_test(void);
 
 #endif
diff --git a/tools/testing/radix-tree/regression4.c b/tools/testing/radix-tree/regression4.c
new file mode 100644
index 000000000000..cf4e5aba6b08
--- /dev/null
+++ b/tools/testing/radix-tree/regression4.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static pthread_barrier_t worker_barrier;
+static int obj0, obj1;
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+
+static void *reader_fn(void *arg)
+{
+	int i;
+	void *entry;
+
+	rcu_register_thread();
+	pthread_barrier_wait(&worker_barrier);
+
+	for (i = 0; i < 1000000; i++) {
+		rcu_read_lock();
+		entry = radix_tree_lookup(&mt_tree, 0);
+		rcu_read_unlock();
+		if (entry != &obj0) {
+			printf("iteration %d bad entry = %p\n", i, entry);
+			abort();
+		}
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+static void *writer_fn(void *arg)
+{
+	int i;
+
+	rcu_register_thread();
+	pthread_barrier_wait(&worker_barrier);
+
+	for (i = 0; i < 1000000; i++) {
+		radix_tree_insert(&mt_tree, 1, &obj1);
+		radix_tree_delete(&mt_tree, 1);
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+void regression4_test(void)
+{
+	pthread_t reader, writer;
+
+	printv(1, "regression test 4 starting\n");
+
+	radix_tree_insert(&mt_tree, 0, &obj0);
+	pthread_barrier_init(&worker_barrier, NULL, 2);
+
+	if (pthread_create(&reader, NULL, reader_fn, NULL) ||
+	    pthread_create(&writer, NULL, writer_fn, NULL)) {
+		perror("pthread_create");
+		exit(1);
+	}
+
+	if (pthread_join(reader, NULL) || pthread_join(writer, NULL)) {
+		perror("pthread_join");
+		exit(1);
+	}
+
+	printv(1, "regression test 4 passed\n");
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e1473234968d..c9a2abf8be1b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2731,9 +2731,14 @@ TEST(syscall_restart)
 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 	ASSERT_EQ(true, WIFSTOPPED(status));
 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
-	/* Verify signal delivery came from parent now. */
 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
-	EXPECT_EQ(getpid(), info.si_pid);
+	/*
+	 * There is no siginfo on SIGSTOP any more, so we can't verify
+	 * signal delivery came from parent now (getpid() == info.si_pid).
+	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
+	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
+	 */
+	EXPECT_EQ(SIGSTOP, info.si_signo);
 
 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
 	ASSERT_EQ(0, kill(child_pid, SIGCONT));