diff options
1009 files changed, 30284 insertions, 9956 deletions
@@ -29,6 +29,7 @@ Alexandre Belloni <[email protected]> <alexandre.belloni@free-electr Alexei Starovoitov <[email protected]> <[email protected]> Alexei Starovoitov <[email protected]> <[email protected]> Alexei Starovoitov <[email protected]> <[email protected]> +Alex Hung <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> Alex Shi <[email protected]> <[email protected]> @@ -382,6 +383,7 @@ Santosh Shilimkar <[email protected]> Santosh Shilimkar <[email protected]> Sarangdhar Joshi <[email protected]> Sascha Hauer <[email protected]> +Satya Priya <[email protected]> <[email protected]> S.Çağlar Onur <[email protected]> Sean Christopherson <[email protected]> <[email protected]> Sean Nyekjaer <[email protected]> <[email protected]> @@ -389,6 +391,7 @@ Sebastian Reichel <[email protected]> <[email protected]> Sebastian Reichel <[email protected]> <[email protected]> Sedat Dilek <[email protected]> <[email protected]> Seth Forshee <[email protected]> <[email protected]> +Shannon Nelson <[email protected]> <[email protected]> Shiraz Hashim <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> Shuah Khan <[email protected]> <[email protected]> @@ -416,6 +419,7 @@ TripleX Chung <[email protected]> <[email protected]> TripleX Chung <[email protected]> <[email protected]> Tsuneo Yoshioka <[email protected]> Tycho Andersen <[email protected]> <[email protected]> +Tzung-Bi Shih <[email protected]> <[email protected]> Uwe Kleine-König <[email protected]> Uwe Kleine-König <[email protected]> Uwe Kleine-König <[email protected]> @@ -2452,6 +2452,10 @@ S: 482 Shadowgraph Dr. S: San Jose, CA 95110 S: USA +N: Michal Marek +D: Kbuild Maintainer 2009-2017 + N: Martin Mares W: http://www.ucw.cz/~mj/ diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..42af9ca0127e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6959,3 +6959,14 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. + + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 8f3d30c5a0d8..06e23538f79c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors. Kernel Module Options for ``amd-pstate`` ========================================= -.. _shared_mem: - -``shared_mem`` -Use a module param (shared_mem) to enable related processors manually with -**amd_pstate.shared_mem=1**. -Due to the performance issue on the processors with `Shared Memory Support -<perf_cap_>`_, we disable it presently and will re-enable this by default -once we address performance issue with this solution. - -To check whether the current processor is using `Full MSR Support <perf_cap_>`_ -or `Shared Memory Support <perf_cap_>`_ : :: - - ray@hr-test1:~$ lscpu | grep cppc - Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm - -If the CPU flags have ``cppc``, then this processor supports `Full MSR Support -<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_. +Passive Mode +------------ + +``amd_pstate=passive`` + +It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line. +In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC +performance scale as a relative number. This can be expressed as percentage of nominal +performance (infrastructure max). Below the nominal sustained performance level, +desired performance expresses the average performance level of the processor subject +to the Performance Reduction Tolerance register. Above the nominal performance level, +processor must provide at least nominal performance requested and go higher if current +operating conditions allow. ``cpupower`` tool support for ``amd-pstate`` diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 17e774d96c5e..cec2371173d7 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -332,13 +332,14 @@ avoid defining types with 'bpf\_' prefix to not be broken in future releases. In other words, no backwards compatibility is guaranteed if one using a type in BTF with 'bpf\_' prefix. -Q: What is the compatibility story for special BPF types in local kptrs? ------------------------------------------------------------------------- -Q: Same as above, but for local kptrs (i.e. pointers to objects allocated using -bpf_obj_new for user defined structures). Will the kernel preserve backwards +Q: What is the compatibility story for special BPF types in allocated objects? +------------------------------------------------------------------------------ +Q: Same as above, but for allocated objects (i.e. objects allocated using +bpf_obj_new for user defined types). Will the kernel preserve backwards compatibility for these features? A: NO. Unlike map value types, there are no stability guarantees for this case. The -whole local kptr API itself is unstable (since it is exposed through kfuncs). +whole API to work with allocated objects and any support for special fields +inside them is unstable (since it is exposed through kfuncs). diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index 761474bd7fe6..03d4993eda6f 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -44,6 +44,33 @@ is a guarantee that the reported issue will be overlooked.** Submitting patches ================== +Q: How do I run BPF CI on my changes before sending them out for review? +------------------------------------------------------------------------ +A: BPF CI is GitHub based and hosted at https://github.com/kernel-patches/bpf. +While GitHub also provides a CLI that can be used to accomplish the same +results, here we focus on the UI based workflow. + +The following steps lay out how to start a CI run for your patches: + +- Create a fork of the aforementioned repository in your own account (one time + action) + +- Clone the fork locally, check out a new branch tracking either the bpf-next + or bpf branch, and apply your to-be-tested patches on top of it + +- Push the local branch to your fork and create a pull request against + kernel-patches/bpf's bpf-next_base or bpf_base branch, respectively + +Shortly after the pull request has been created, the CI workflow will run. Note +that capacity is shared with patches submitted upstream being checked and so +depending on utilization the run can take a while to finish. + +Note furthermore that both base branches (bpf-next_base and bpf_base) will be +updated as patches are pushed to the respective upstream branches they track. As +such, your patch set will automatically (be attempted to) be rebased as well. +This behavior can result in a CI run being aborted and restarted with the new +base line. + Q: To which mailing list do I need to submit my BPF patches? ------------------------------------------------------------ A: Please submit your BPF patches to the bpf kernel mailing list: diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index cf8722f96090..7cd7c5415a99 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -1062,4 +1062,9 @@ format.:: 7. Testing ========== -Kernel bpf selftest `test_btf.c` provides extensive set of BTF-related tests. +The kernel BPF selftest `tools/testing/selftests/bpf/prog_tests/btf.c`_ +provides an extensive set of BTF-related tests. + +.. Links +.. _tools/testing/selftests/bpf/prog_tests/btf.c: + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/btf.c diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 1b50de1983ee..1088d44634d6 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -29,6 +29,7 @@ that goes into great technical depth about the BPF Architecture. clang-notes linux-notes other + redirect .. only:: subproject and html diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 0f858156371d..90774479ab7a 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -72,6 +72,30 @@ argument as its size. By default, without __sz annotation, the size of the type of the pointer is used. Without __sz annotation, a kfunc cannot accept a void pointer. +2.2.2 __k Annotation +-------------------- + +This annotation is only understood for scalar arguments, where it indicates that +the verifier must check the scalar argument to be a known constant, which does +not indicate a size parameter, and the value of the constant is relevant to the +safety of the program. + +An example is given below:: + + void *bpf_obj_new(u32 local_type_id__k, ...) + { + ... + } + +Here, bpf_obj_new uses local_type_id argument to find out the size of that type +ID in program's BTF and return a sized pointer to it. Each type ID will have a +distinct size, hence it is crucial to treat each such call as distinct when +values don't match during verifier state pruning checks. + +Hence, whenever a constant scalar argument is accepted by a kfunc which is not a +size parameter, and the value of the constant matters for program safety, __k +suffix should be used. + .. _BPF_kfunc_nodef: 2.3 Using an existing kernel function @@ -137,22 +161,20 @@ KF_ACQUIRE and KF_RET_NULL flags. -------------------------- The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It -indicates that the all pointer arguments will always have a guaranteed lifetime, -and pointers to kernel objects are always passed to helpers in their unmodified -form (as obtained from acquire kfuncs). - -It can be used to enforce that a pointer to a refcounted object acquired from a -kfunc or BPF helper is passed as an argument to this kfunc without any -modifications (e.g. pointer arithmetic) such that it is trusted and points to -the original object. - -Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs, -but those can have a non-zero offset. - -This flag is often used for kfuncs that operate (change some property, perform -some operation) on an object that was obtained using an acquire kfunc. Such -kfuncs need an unchanged pointer to ensure the integrity of the operation being -performed on the expected object. +indicates that the all pointer arguments are valid, and that all pointers to +BTF objects have been passed in their unmodified form (that is, at a zero +offset, and without having been obtained from walking another pointer). + +There are two types of pointers to kernel objects which are considered "valid": + +1. Pointers which are passed as tracepoint or struct_ops callback arguments. +2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc. + +Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to +KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset. + +The definition of "valid" pointers is subject to change at any time, and has +absolutely no ABI stability guarantees. 2.4.6 KF_SLEEPABLE flag ----------------------- diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst index 3722537d1384..f9b3b252e28f 100644 --- a/Documentation/bpf/libbpf/index.rst +++ b/Documentation/bpf/libbpf/index.rst @@ -1,5 +1,7 @@ .. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +.. _libbpf: + libbpf ====== @@ -7,6 +9,7 @@ libbpf :maxdepth: 1 API Documentation <https://libbpf.readthedocs.io/en/latest/api.html> + program_types libbpf_naming_convention libbpf_build diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst new file mode 100644 index 000000000000..ad4d4d5eecb0 --- /dev/null +++ b/Documentation/bpf/libbpf/program_types.rst @@ -0,0 +1,203 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +.. _program_types_and_elf: + +Program Types and ELF Sections +============================== + +The table below lists the program types, their attach types where relevant and the ELF section +names supported by libbpf for them. The ELF section names follow these rules: + +- ``type`` is an exact match, e.g. ``SEC("socket")`` +- ``type+`` means it can be either exact ``SEC("type")`` or well-formed ``SEC("type/extras")`` + with a '``/``' separator between ``type`` and ``extras``. + +When ``extras`` are specified, they provide details of how to auto-attach the BPF program. The +format of ``extras`` depends on the program type, e.g. ``SEC("tracepoint/<category>/<name>")`` +for tracepoints or ``SEC("usdt/<path>:<provider>:<name>")`` for USDT probes. The extras are +described in more detail in the footnotes. + + ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| Program Type | Attach Type | ELF Section Name | Sleepable | ++===========================================+========================================+==================================+===========+ +| ``BPF_PROG_TYPE_CGROUP_DEVICE`` | ``BPF_CGROUP_DEVICE`` | ``cgroup/dev`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_CGROUP_SKB`` | | ``cgroup/skb`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET_EGRESS`` | ``cgroup_skb/egress`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET_INGRESS`` | ``cgroup_skb/ingress`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_CGROUP_SOCKOPT`` | ``BPF_CGROUP_GETSOCKOPT`` | ``cgroup/getsockopt`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_SETSOCKOPT`` | ``cgroup/setsockopt`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_CGROUP_SOCK_ADDR`` | ``BPF_CGROUP_INET4_BIND`` | ``cgroup/bind4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET4_CONNECT`` | ``cgroup/connect4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET4_GETPEERNAME`` | ``cgroup/getpeername4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET4_GETSOCKNAME`` | ``cgroup/getsockname4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET6_BIND`` | ``cgroup/bind6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET6_CONNECT`` | ``cgroup/connect6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET6_GETPEERNAME`` | ``cgroup/getpeername6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET6_GETSOCKNAME`` | ``cgroup/getsockname6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UDP4_RECVMSG`` | ``cgroup/recvmsg4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UDP4_SENDMSG`` | ``cgroup/sendmsg4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET6_POST_BIND`` | ``cgroup/post_bind6`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET_SOCK_CREATE`` | ``cgroup/sock_create`` | | ++ + +----------------------------------+-----------+ +| | | ``cgroup/sock`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_INET_SOCK_RELEASE`` | ``cgroup/sock_release`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_CGROUP_SYSCTL`` | ``BPF_CGROUP_SYSCTL`` | ``cgroup/sysctl`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_EXT`` | | ``freplace+`` [#fentry]_ | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_FLOW_DISSECTOR`` | ``BPF_FLOW_DISSECTOR`` | ``flow_dissector`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_KPROBE`` | | ``kprobe+`` [#kprobe]_ | | ++ + +----------------------------------+-----------+ +| | | ``kretprobe+`` [#kprobe]_ | | ++ + +----------------------------------+-----------+ +| | | ``ksyscall+`` [#ksyscall]_ | | ++ + +----------------------------------+-----------+ +| | | ``kretsyscall+`` [#ksyscall]_ | | ++ + +----------------------------------+-----------+ +| | | ``uprobe+`` [#uprobe]_ | | ++ + +----------------------------------+-----------+ +| | | ``uprobe.s+`` [#uprobe]_ | Yes | ++ + +----------------------------------+-----------+ +| | | ``uretprobe+`` [#uprobe]_ | | ++ + +----------------------------------+-----------+ +| | | ``uretprobe.s+`` [#uprobe]_ | Yes | ++ + +----------------------------------+-----------+ +| | | ``usdt+`` [#usdt]_ | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | | ++ + +----------------------------------+-----------+ +| | | ``kretprobe.multi+`` [#kpmulti]_ | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LSM`` | ``BPF_LSM_CGROUP`` | ``lsm_cgroup+`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_LSM_MAC`` | ``lsm+`` [#lsm]_ | | ++ + +----------------------------------+-----------+ +| | | ``lsm.s+`` [#lsm]_ | Yes | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LWT_IN`` | | ``lwt_in`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LWT_OUT`` | | ``lwt_out`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LWT_SEG6LOCAL`` | | ``lwt_seg6local`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | | ++ + +----------------------------------+-----------+ +| | | ``raw_tracepoint.w+`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_RAW_TRACEPOINT`` | | ``raw_tp+`` [#rawtp]_ | | ++ + +----------------------------------+-----------+ +| | | ``raw_tracepoint+`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | | ++ + +----------------------------------+-----------+ +| | | ``tc`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SK_MSG`` | ``BPF_SK_MSG_VERDICT`` | ``sk_msg`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SK_REUSEPORT`` | ``BPF_SK_REUSEPORT_SELECT_OR_MIGRATE`` | ``sk_reuseport/migrate`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_SK_REUSEPORT_SELECT`` | ``sk_reuseport`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SK_SKB`` | | ``sk_skb`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_SK_SKB_STREAM_PARSER`` | ``sk_skb/stream_parser`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_SK_SKB_STREAM_VERDICT`` | ``sk_skb/stream_verdict`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SOCKET_FILTER`` | | ``socket`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_TRACEPOINT`` | | ``tp+`` [#tp]_ | | ++ + +----------------------------------+-----------+ +| | | ``tracepoint+`` [#tp]_ | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_TRACING`` | ``BPF_MODIFY_RETURN`` | ``fmod_ret+`` [#fentry]_ | | ++ + +----------------------------------+-----------+ +| | | ``fmod_ret.s+`` [#fentry]_ | Yes | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_TRACE_FENTRY`` | ``fentry+`` [#fentry]_ | | ++ + +----------------------------------+-----------+ +| | | ``fentry.s+`` [#fentry]_ | Yes | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_TRACE_FEXIT`` | ``fexit+`` [#fentry]_ | | ++ + +----------------------------------+-----------+ +| | | ``fexit.s+`` [#fentry]_ | Yes | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_TRACE_ITER`` | ``iter+`` [#iter]_ | | ++ + +----------------------------------+-----------+ +| | | ``iter.s+`` [#iter]_ | Yes | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_TRACE_RAW_TP`` | ``tp_btf+`` [#fentry]_ | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ +| ``BPF_PROG_TYPE_XDP`` | ``BPF_XDP_CPUMAP`` | ``xdp.frags/cpumap`` | | ++ + +----------------------------------+-----------+ +| | | ``xdp/cpumap`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_XDP_DEVMAP`` | ``xdp.frags/devmap`` | | ++ + +----------------------------------+-----------+ +| | | ``xdp/devmap`` | | ++ +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_XDP`` | ``xdp.frags`` | | ++ + +----------------------------------+-----------+ +| | | ``xdp`` | | ++-------------------------------------------+----------------------------------------+----------------------------------+-----------+ + + +.. rubric:: Footnotes + +.. [#fentry] The ``fentry`` attach format is ``fentry[.s]/<function>``. +.. [#kprobe] The ``kprobe`` attach format is ``kprobe/<function>[+<offset>]``. Valid + characters for ``function`` are ``a-zA-Z0-9_.`` and ``offset`` must be a valid + non-negative integer. +.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``. +.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``. +.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``. +.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern`` + supports ``*`` and ``?`` wildcards. Valid characters for pattern are + ``a-zA-Z0-9_.*?``. +.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``. +.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``. +.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``. +.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``. diff --git a/Documentation/bpf/map_array.rst b/Documentation/bpf/map_array.rst index b2cceb6c696b..f2f51a53e8ae 100644 --- a/Documentation/bpf/map_array.rst +++ b/Documentation/bpf/map_array.rst @@ -32,7 +32,11 @@ Usage Kernel BPF ---------- -.. c:function:: +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) Array elements can be retrieved using the ``bpf_map_lookup_elem()`` helper. @@ -40,7 +44,11 @@ This helper returns a pointer into the array element, so to avoid data races with userspace reading the value, the user must use primitives like ``__sync_fetch_and_add()`` when updating the value in-place. -.. c:function:: +bpf_map_update_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) Array elements can be updated using the ``bpf_map_update_elem()`` helper. @@ -53,7 +61,7 @@ To clear an array element, you may use ``bpf_map_update_elem()`` to insert a zero value to that index. Per CPU Array -~~~~~~~~~~~~~ +------------- Values stored in ``BPF_MAP_TYPE_ARRAY`` can be accessed by multiple programs across different CPUs. To restrict storage to a single CPU, you may use a @@ -63,7 +71,11 @@ When using a ``BPF_MAP_TYPE_PERCPU_ARRAY`` the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers automatically access the slot for the current CPU. -.. c:function:: +bpf_map_lookup_percpu_elem() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the array @@ -119,7 +131,7 @@ This example BPF program shows how to access an array element. index = ip.protocol; value = bpf_map_lookup_elem(&my_map, &index); if (value) - __sync_fetch_and_add(&value, skb->len); + __sync_fetch_and_add(value, skb->len); return 0; } diff --git a/Documentation/bpf/map_bloom_filter.rst b/Documentation/bpf/map_bloom_filter.rst new file mode 100644 index 000000000000..c82487f2fe0d --- /dev/null +++ b/Documentation/bpf/map_bloom_filter.rst @@ -0,0 +1,174 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Red Hat, Inc. + +========================= +BPF_MAP_TYPE_BLOOM_FILTER +========================= + +.. note:: + - ``BPF_MAP_TYPE_BLOOM_FILTER`` was introduced in kernel version 5.16 + +``BPF_MAP_TYPE_BLOOM_FILTER`` provides a BPF bloom filter map. Bloom +filters are a space-efficient probabilistic data structure used to +quickly test whether an element exists in a set. In a bloom filter, +false positives are possible whereas false negatives are not. + +The bloom filter map does not have keys, only values. When the bloom +filter map is created, it must be created with a ``key_size`` of 0. The +bloom filter map supports two operations: + +- push: adding an element to the map +- peek: determining whether an element is present in the map + +BPF programs must use ``bpf_map_push_elem`` to add an element to the +bloom filter map and ``bpf_map_peek_elem`` to query the map. These +operations are exposed to userspace applications using the existing +``bpf`` syscall in the following way: + +- ``BPF_MAP_UPDATE_ELEM`` -> push +- ``BPF_MAP_LOOKUP_ELEM`` -> peek + +The ``max_entries`` size that is specified at map creation time is used +to approximate a reasonable bitmap size for the bloom filter, and is not +otherwise strictly enforced. If the user wishes to insert more entries +into the bloom filter than ``max_entries``, this may lead to a higher +false positive rate. + +The number of hashes to use for the bloom filter is configurable using +the lower 4 bits of ``map_extra`` in ``union bpf_attr`` at map creation +time. If no number is specified, the default used will be 5 hash +functions. In general, using more hashes decreases both the false +positive rate and the speed of a lookup. + +It is not possible to delete elements from a bloom filter map. A bloom +filter map may be used as an inner map. The user is responsible for +synchronising concurrent updates and lookups to ensure no false negative +lookups occur. + +Usage +===== + +Kernel BPF +---------- + +bpf_map_push_elem() +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + +A ``value`` can be added to a bloom filter using the +``bpf_map_push_elem()`` helper. The ``flags`` parameter must be set to +``BPF_ANY`` when adding an entry to the bloom filter. This helper +returns ``0`` on success, or negative error in case of failure. + +bpf_map_peek_elem() +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + long bpf_map_peek_elem(struct bpf_map *map, void *value) + +The ``bpf_map_peek_elem()`` helper is used to determine whether +``value`` is present in the bloom filter map. This helper returns ``0`` +if ``value`` is probably present in the map, or ``-ENOENT`` if ``value`` +is definitely not present in the map. + +Userspace +--------- + +bpf_map_update_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags) + +A userspace program can add a ``value`` to a bloom filter using libbpf's +``bpf_map_update_elem`` function. The ``key`` parameter must be set to +``NULL`` and ``flags`` must be set to ``BPF_ANY``. Returns ``0`` on +success, or negative error in case of failure. + +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + + int bpf_map_lookup_elem (int fd, const void *key, void *value) + +A userspace program can determine the presence of ``value`` in a bloom +filter using libbpf's ``bpf_map_lookup_elem`` function. The ``key`` +parameter must be set to ``NULL``. Returns ``0`` if ``value`` is +probably present in the map, or ``-ENOENT`` if ``value`` is definitely +not present in the map. + +Examples +======== + +Kernel BPF +---------- + +This snippet shows how to declare a bloom filter in a BPF program: + +.. code-block:: c + + struct { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + __type(value, __u32); + __uint(max_entries, 1000); + __uint(map_extra, 3); + } bloom_filter SEC(".maps"); + +This snippet shows how to determine presence of a value in a bloom +filter in a BPF program: + +.. code-block:: c + + void *lookup(__u32 key) + { + if (bpf_map_peek_elem(&bloom_filter, &key) == 0) { + /* Verify not a false positive and fetch an associated + * value using a secondary lookup, e.g. in a hash table + */ + return bpf_map_lookup_elem(&hash_table, &key); + } + return 0; + } + +Userspace +--------- + +This snippet shows how to use libbpf to create a bloom filter map from +userspace: + +.. code-block:: c + + int create_bloom() + { + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_extra = 3); /* number of hashes */ + + return bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, + "ipv6_bloom", /* name */ + 0, /* key size, must be zero */ + sizeof(ipv6_addr), /* value size */ + 10000, /* max entries */ + &opts); /* create options */ + } + +This snippet shows how to add an element to a bloom filter from +userspace: + +.. code-block:: c + + int add_element(struct bpf_map *bloom_map, __u32 value) + { + int bloom_fd = bpf_map__fd(bloom_map); + return bpf_map_update_elem(bloom_fd, NULL, &value, BPF_ANY); + } + +References +========== + +https://lwn.net/ml/bpf/[email protected]/ diff --git a/Documentation/bpf/map_cpumap.rst b/Documentation/bpf/map_cpumap.rst index eaf57b38cafd..923cfc8ab51f 100644 --- a/Documentation/bpf/map_cpumap.rst +++ b/Documentation/bpf/map_cpumap.rst @@ -30,30 +30,35 @@ Usage Kernel BPF ---------- -.. c:function:: +bpf_redirect_map() +^^^^^^^^^^^^^^^^^^ +.. code-block:: c + long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) - Redirect the packet to the endpoint referenced by ``map`` at index ``key``. - For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs. +Redirect the packet to the endpoint referenced by ``map`` at index ``key``. +For ``BPF_MAP_TYPE_CPUMAP`` this map contains references to CPUs. - The lower two bits of ``flags`` are used as the return code if the map lookup - fails. This is so that the return value can be one of the XDP program return - codes up to ``XDP_TX``, as chosen by the caller. +The lower two bits of ``flags`` are used as the return code if the map lookup +fails. This is so that the return value can be one of the XDP program return +codes up to ``XDP_TX``, as chosen by the caller. -Userspace ---------- +User space +---------- .. note:: CPUMAP entries can only be updated/looked up/deleted from user space and not from an eBPF program. Trying to call these functions from a kernel eBPF program will result in the program failing to load and a verifier warning. -.. c:function:: - int bpf_map_update_elem(int fd, const void *key, const void *value, - __u64 flags); +bpf_map_update_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); - CPU entries can be added or updated using the ``bpf_map_update_elem()`` - helper. This helper replaces existing elements atomically. The ``value`` parameter - can be ``struct bpf_cpumap_val``. +CPU entries can be added or updated using the ``bpf_map_update_elem()`` +helper. This helper replaces existing elements atomically. The ``value`` parameter +can be ``struct bpf_cpumap_val``. .. code-block:: c @@ -65,23 +70,29 @@ Userspace } bpf_prog; }; - The flags argument can be one of the following: +The flags argument can be one of the following: - BPF_ANY: Create a new element or update an existing element. - BPF_NOEXIST: Create a new element only if it did not exist. - BPF_EXIST: Update an existing element. -.. c:function:: +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + int bpf_map_lookup_elem(int fd, const void *key, void *value); - CPU entries can be retrieved using the ``bpf_map_lookup_elem()`` - helper. +CPU entries can be retrieved using the ``bpf_map_lookup_elem()`` +helper. + +bpf_map_delete_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c -.. c:function:: int bpf_map_delete_elem(int fd, const void *key); - CPU entries can be deleted using the ``bpf_map_delete_elem()`` - helper. This helper will return 0 on success, or negative error in case of - failure. +CPU entries can be deleted using the ``bpf_map_delete_elem()`` +helper. This helper will return 0 on success, or negative error in case of +failure. Examples ======== @@ -142,8 +153,8 @@ The following code snippet shows how to declare a ``BPF_MAP_TYPE_CPUMAP`` called return bpf_redirect_map(&cpu_map, cpu_dest, 0); } -Userspace ---------- +User space +---------- The following code snippet shows how to dynamically set the max_entries for a CPUMAP to the max number of cpus available on the system. diff --git a/Documentation/bpf/map_devmap.rst b/Documentation/bpf/map_devmap.rst new file mode 100644 index 000000000000..927312c7b8c8 --- /dev/null +++ b/Documentation/bpf/map_devmap.rst @@ -0,0 +1,238 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Red Hat, Inc. + +================================================= +BPF_MAP_TYPE_DEVMAP and BPF_MAP_TYPE_DEVMAP_HASH +================================================= + +.. note:: + - ``BPF_MAP_TYPE_DEVMAP`` was introduced in kernel version 4.14 + - ``BPF_MAP_TYPE_DEVMAP_HASH`` was introduced in kernel version 5.4 + +``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` are BPF maps primarily +used as backend maps for the XDP BPF helper call ``bpf_redirect_map()``. +``BPF_MAP_TYPE_DEVMAP`` is backed by an array that uses the key as +the index to lookup a reference to a net device. While ``BPF_MAP_TYPE_DEVMAP_HASH`` +is backed by a hash table that uses a key to lookup a reference to a net device. +The user provides either <``key``/ ``ifindex``> or <``key``/ ``struct bpf_devmap_val``> +pairs to update the maps with new net devices. + +.. note:: + - The key to a hash map doesn't have to be an ``ifindex``. + - While ``BPF_MAP_TYPE_DEVMAP_HASH`` allows for densely packing the net devices + it comes at the cost of a hash of the key when performing a look up. + +The setup and packet enqueue/send code is shared between the two types of +devmap; only the lookup and insertion is different. + +Usage +===== +Kernel BPF +---------- +bpf_redirect_map() +^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + +Redirect the packet to the endpoint referenced by ``map`` at index ``key``. +For ``BPF_MAP_TYPE_DEVMAP`` and ``BPF_MAP_TYPE_DEVMAP_HASH`` this map contains +references to net devices (for forwarding packets through other ports). + +The lower two bits of *flags* are used as the return code if the map lookup +fails. This is so that the return value can be one of the XDP program return +codes up to ``XDP_TX``, as chosen by the caller. The higher bits of ``flags`` +can be set to ``BPF_F_BROADCAST`` or ``BPF_F_EXCLUDE_INGRESS`` as defined +below. + +With ``BPF_F_BROADCAST`` the packet will be broadcast to all the interfaces +in the map, with ``BPF_F_EXCLUDE_INGRESS`` the ingress interface will be excluded +from the broadcast. + +.. note:: + - The key is ignored if BPF_F_BROADCAST is set. + - The broadcast feature can also be used to implement multicast forwarding: + simply create multiple DEVMAPs, each one corresponding to a single multicast group. + +This helper will return ``XDP_REDIRECT`` on success, or the value of the two +lower bits of the ``flags`` argument if the map lookup fails. + +More information about redirection can be found :doc:`redirect` + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + +Net device entries can be retrieved using the ``bpf_map_lookup_elem()`` +helper. + +User space +---------- +.. note:: + DEVMAP entries can only be updated/deleted from user space and not + from an eBPF program. Trying to call these functions from a kernel eBPF + program will result in the program failing to load and a verifier warning. + +bpf_map_update_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); + +Net device entries can be added or updated using the ``bpf_map_update_elem()`` +helper. This helper replaces existing elements atomically. The ``value`` parameter +can be ``struct bpf_devmap_val`` or a simple ``int ifindex`` for backwards +compatibility. + + .. code-block:: c + + struct bpf_devmap_val { + __u32 ifindex; /* device index */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; + }; + +The ``flags`` argument can be one of the following: + - ``BPF_ANY``: Create a new element or update an existing element. + - ``BPF_NOEXIST``: Create a new element only if it did not exist. + - ``BPF_EXIST``: Update an existing element. + +DEVMAPs can associate a program with a device entry by adding a ``bpf_prog.fd`` +to ``struct bpf_devmap_val``. Programs are run after ``XDP_REDIRECT`` and have +access to both Rx device and Tx device. The program associated with the ``fd`` +must have type XDP with expected attach type ``xdp_devmap``. +When a program is associated with a device index, the program is run on an +``XDP_REDIRECT`` and before the buffer is added to the per-cpu queue. Examples +of how to attach/use xdp_devmap progs can be found in the kernel selftests: + +- ``tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c`` +- ``tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c`` + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + +.. c:function:: + int bpf_map_lookup_elem(int fd, const void *key, void *value); + +Net device entries can be retrieved using the ``bpf_map_lookup_elem()`` +helper. + +bpf_map_delete_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + +.. c:function:: + int bpf_map_delete_elem(int fd, const void *key); + +Net device entries can be deleted using the ``bpf_map_delete_elem()`` +helper. This helper will return 0 on success, or negative error in case of +failure. + +Examples +======== + +Kernel BPF +---------- + +The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP`` +called tx_port. + +.. code-block:: c + + struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 256); + } tx_port SEC(".maps"); + +The following code snippet shows how to declare a ``BPF_MAP_TYPE_DEVMAP_HASH`` +called forward_map. + +.. code-block:: c + + struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __type(key, __u32); + __type(value, struct bpf_devmap_val); + __uint(max_entries, 32); + } forward_map SEC(".maps"); + +.. note:: + + The value type in the DEVMAP above is a ``struct bpf_devmap_val`` + +The following code snippet shows a simple xdp_redirect_map program. This program +would work with a user space program that populates the devmap ``forward_map`` based +on ingress ifindexes. The BPF program (below) is redirecting packets using the +ingress ``ifindex`` as the ``key``. + +.. code-block:: c + + SEC("xdp") + int xdp_redirect_map_func(struct xdp_md *ctx) + { + int index = ctx->ingress_ifindex; + + return bpf_redirect_map(&forward_map, index, 0); + } + +The following code snippet shows a BPF program that is broadcasting packets to +all the interfaces in the ``tx_port`` devmap. + +.. code-block:: c + + SEC("xdp") + int xdp_redirect_map_func(struct xdp_md *ctx) + { + return bpf_redirect_map(&tx_port, 0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + } + +User space +---------- + +The following code snippet shows how to update a devmap called ``tx_port``. + +.. code-block:: c + + int update_devmap(int ifindex, int redirect_ifindex) + { + int ret; + + ret = bpf_map_update_elem(bpf_map__fd(tx_port), &ifindex, &redirect_ifindex, 0); + if (ret < 0) { + fprintf(stderr, "Failed to update devmap_ value: %s\n", + strerror(errno)); + } + + return ret; + } + +The following code snippet shows how to update a hash_devmap called ``forward_map``. + +.. code-block:: c + + int update_devmap(int ifindex, int redirect_ifindex) + { + struct bpf_devmap_val devmap_val = { .ifindex = redirect_ifindex }; + int ret; + + ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0); + if (ret < 0) { + fprintf(stderr, "Failed to update devmap_ value: %s\n", + strerror(errno)); + } + return ret; + } + +References +=========== + +- https://lwn.net/Articles/728146/ +- https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=6f9d451ab1a33728adb72d7ff66a7b374d665176 +- https://elixir.bootlin.com/linux/latest/source/net/core/filter.c#L4106 diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst index e85120878b27..8669426264c6 100644 --- a/Documentation/bpf/map_hash.rst +++ b/Documentation/bpf/map_hash.rst @@ -34,7 +34,14 @@ the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``. Usage ===== -.. c:function:: +Kernel BPF +---------- + +bpf_map_update_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) Hash entries can be added or updated using the ``bpf_map_update_elem()`` @@ -49,14 +56,22 @@ parameter can be used to control the update behaviour: ``bpf_map_update_elem()`` returns 0 on success, or negative error in case of failure. -.. c:function:: +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) Hash entries can be retrieved using the ``bpf_map_lookup_elem()`` helper. This helper returns a pointer to the value associated with ``key``, or ``NULL`` if no entry was found. -.. c:function:: +bpf_map_delete_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_delete_elem(struct bpf_map *map, const void *key) Hash entries can be deleted using the ``bpf_map_delete_elem()`` @@ -70,7 +85,11 @@ For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH`` the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers automatically access the hash slot for the current CPU. -.. c:function:: +bpf_map_lookup_percpu_elem() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the @@ -89,7 +108,11 @@ See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``. Userspace --------- -.. c:function:: +bpf_map_get_next_key() +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key) In userspace, it is possible to iterate through the keys of a hash using diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst index 31be1aa7ba2c..74d64a30f500 100644 --- a/Documentation/bpf/map_lpm_trie.rst +++ b/Documentation/bpf/map_lpm_trie.rst @@ -35,7 +35,11 @@ Usage Kernel BPF ---------- -.. c:function:: +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) The longest prefix entry for a given data value can be found using the @@ -48,7 +52,11 @@ performing longest prefix lookups. For example, when searching for the longest prefix match for an IPv4 address, ``prefixlen`` should be set to ``32``. -.. c:function:: +bpf_map_update_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) Prefix entries can be added or updated using the ``bpf_map_update_elem()`` @@ -61,7 +69,11 @@ case of failure. The flags parameter must be one of BPF_ANY, BPF_NOEXIST or BPF_EXIST, but the value is ignored, giving BPF_ANY semantics. -.. c:function:: +bpf_map_delete_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_delete_elem(struct bpf_map *map, const void *key) Prefix entries can be deleted using the ``bpf_map_delete_elem()`` @@ -74,7 +86,11 @@ Userspace Access from userspace uses libbpf APIs with the same names as above, with the map identified by ``fd``. -.. c:function:: +bpf_map_get_next_key() +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + int bpf_map_get_next_key (int fd, const void *cur_key, void *next_key) A userspace program can iterate through the entries in an LPM trie using diff --git a/Documentation/bpf/map_of_maps.rst b/Documentation/bpf/map_of_maps.rst index 07212b9227a9..7b5617c2d017 100644 --- a/Documentation/bpf/map_of_maps.rst +++ b/Documentation/bpf/map_of_maps.rst @@ -45,7 +45,11 @@ Usage Kernel BPF Helper ----------------- -.. c:function:: +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) Inner maps can be retrieved using the ``bpf_map_lookup_elem()`` helper. This diff --git a/Documentation/bpf/map_queue_stack.rst b/Documentation/bpf/map_queue_stack.rst index f20e31a647b9..8d14ed49d6e1 100644 --- a/Documentation/bpf/map_queue_stack.rst +++ b/Documentation/bpf/map_queue_stack.rst @@ -28,7 +28,11 @@ Usage Kernel BPF ---------- -.. c:function:: +bpf_map_push_elem() +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) An element ``value`` can be added to a queue or stack using the @@ -38,14 +42,22 @@ when the queue or stack is full, the oldest element will be removed to make room for ``value`` to be added. Returns ``0`` on success, or negative error in case of failure. -.. c:function:: +bpf_map_peek_elem() +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_peek_elem(struct bpf_map *map, void *value) This helper fetches an element ``value`` from a queue or stack without removing it. Returns ``0`` on success, or negative error in case of failure. -.. c:function:: +bpf_map_pop_elem() +~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + long bpf_map_pop_elem(struct bpf_map *map, void *value) This helper removes an element into ``value`` from a queue or @@ -55,7 +67,11 @@ stack. Returns ``0`` on success, or negative error in case of failure. Userspace --------- -.. c:function:: +bpf_map_update_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + int bpf_map_update_elem (int fd, const void *key, const void *value, __u64 flags) A userspace program can push ``value`` onto a queue or stack using libbpf's @@ -64,7 +80,11 @@ A userspace program can push ``value`` onto a queue or stack using libbpf's same semantics as the ``bpf_map_push_elem`` kernel helper. Returns ``0`` on success, or negative error in case of failure. -.. c:function:: +bpf_map_lookup_elem() +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + int bpf_map_lookup_elem (int fd, const void *key, void *value) A userspace program can peek at the ``value`` at the head of a queue or stack @@ -72,7 +92,11 @@ using the libbpf ``bpf_map_lookup_elem`` function. The ``key`` parameter must be set to ``NULL``. Returns ``0`` on success, or negative error in case of failure. -.. c:function:: +bpf_map_lookup_and_delete_elem() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: c + int bpf_map_lookup_and_delete_elem (int fd, const void *key, void *value) A userspace program can pop a ``value`` from the head of a queue or stack using diff --git a/Documentation/bpf/map_xskmap.rst b/Documentation/bpf/map_xskmap.rst new file mode 100644 index 000000000000..7093b8208451 --- /dev/null +++ b/Documentation/bpf/map_xskmap.rst @@ -0,0 +1,192 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Red Hat, Inc. + +=================== +BPF_MAP_TYPE_XSKMAP +=================== + +.. note:: + - ``BPF_MAP_TYPE_XSKMAP`` was introduced in kernel version 4.18 + +The ``BPF_MAP_TYPE_XSKMAP`` is used as a backend map for XDP BPF helper +call ``bpf_redirect_map()`` and ``XDP_REDIRECT`` action, like 'devmap' and 'cpumap'. +This map type redirects raw XDP frames to `AF_XDP`_ sockets (XSKs), a new type of +address family in the kernel that allows redirection of frames from a driver to +user space without having to traverse the full network stack. An AF_XDP socket +binds to a single netdev queue. A mapping of XSKs to queues is shown below: + +.. code-block:: none + + +---------------------------------------------------+ + | xsk A | xsk B | xsk C |<---+ User space + =========================================================|========== + | Queue 0 | Queue 1 | Queue 2 | | Kernel + +---------------------------------------------------+ | + | Netdev eth0 | | + +---------------------------------------------------+ | + | +=============+ | | + | | key | xsk | | | + | +---------+ +=============+ | | + | | | | 0 | xsk A | | | + | | | +-------------+ | | + | | | | 1 | xsk B | | | + | | BPF |-- redirect -->+-------------+-------------+ + | | prog | | 2 | xsk C | | + | | | +-------------+ | + | | | | + | | | | + | +---------+ | + | | + +---------------------------------------------------+ + +.. note:: + An AF_XDP socket that is bound to a certain <netdev/queue_id> will *only* + accept XDP frames from that <netdev/queue_id>. If an XDP program tries to redirect + from a <netdev/queue_id> other than what the socket is bound to, the frame will + not be received on the socket. + +Typically an XSKMAP is created per netdev. This map contains an array of XSK File +Descriptors (FDs). The number of array elements is typically set or adjusted using +the ``max_entries`` map parameter. For AF_XDP ``max_entries`` is equal to the number +of queues supported by the netdev. + +.. note:: + Both the map key and map value size must be 4 bytes. + +Usage +===== + +Kernel BPF +---------- +bpf_redirect_map() +^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + +Redirect the packet to the endpoint referenced by ``map`` at index ``key``. +For ``BPF_MAP_TYPE_XSKMAP`` this map contains references to XSK FDs +for sockets attached to a netdev's queues. + +.. note:: + If the map is empty at an index, the packet is dropped. This means that it is + necessary to have an XDP program loaded with at least one XSK in the + XSKMAP to be able to get any traffic to user space through the socket. + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + +XSK entry references of type ``struct xdp_sock *`` can be retrieved using the +``bpf_map_lookup_elem()`` helper. + +User space +---------- +.. note:: + XSK entries can only be updated/deleted from user space and not from + a BPF program. Trying to call these functions from a kernel BPF program will + result in the program failing to load and a verifier warning. + +bpf_map_update_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) + +XSK entries can be added or updated using the ``bpf_map_update_elem()`` +helper. The ``key`` parameter is equal to the queue_id of the queue the XSK +is attaching to. And the ``value`` parameter is the FD value of that socket. + +Under the hood, the XSKMAP update function uses the XSK FD value to retrieve the +associated ``struct xdp_sock`` instance. + +The flags argument can be one of the following: + +- BPF_ANY: Create a new element or update an existing element. +- BPF_NOEXIST: Create a new element only if it did not exist. +- BPF_EXIST: Update an existing element. + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_lookup_elem(int fd, const void *key, void *value) + +Returns ``struct xdp_sock *`` or negative error in case of failure. + +bpf_map_delete_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_delete_elem(int fd, const void *key) + +XSK entries can be deleted using the ``bpf_map_delete_elem()`` +helper. This helper will return 0 on success, or negative error in case of +failure. + +.. note:: + When `libxdp`_ deletes an XSK it also removes the associated socket + entry from the XSKMAP. + +Examples +======== +Kernel +------ + +The following code snippet shows how to declare a ``BPF_MAP_TYPE_XSKMAP`` called +``xsks_map`` and how to redirect packets to an XSK. + +.. code-block:: c + + struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 64); + } xsks_map SEC(".maps"); + + + SEC("xdp") + int xsk_redir_prog(struct xdp_md *ctx) + { + __u32 index = ctx->rx_queue_index; + + if (bpf_map_lookup_elem(&xsks_map, &index)) + return bpf_redirect_map(&xsks_map, index, 0); + return XDP_PASS; + } + +User space +---------- + +The following code snippet shows how to update an XSKMAP with an XSK entry. + +.. code-block:: c + + int update_xsks_map(struct bpf_map *xsks_map, int queue_id, int xsk_fd) + { + int ret; + + ret = bpf_map_update_elem(bpf_map__fd(xsks_map), &queue_id, &xsk_fd, 0); + if (ret < 0) + fprintf(stderr, "Failed to update xsks_map: %s\n", strerror(errno)); + + return ret; + } + +For an example on how create AF_XDP sockets, please see the AF_XDP-example and +AF_XDP-forwarding programs in the `bpf-examples`_ directory in the `libxdp`_ repository. +For a detailed explaination of the AF_XDP interface please see: + +- `libxdp-readme`_. +- `AF_XDP`_ kernel documentation. + +.. note:: + The most comprehensive resource for using XSKMAPs and AF_XDP is `libxdp`_. + +.. _libxdp: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp +.. _AF_XDP: https://www.kernel.org/doc/html/latest/networking/af_xdp.html +.. _bpf-examples: https://github.com/xdp-project/bpf-examples +.. _libxdp-readme: https://github.com/xdp-project/xdp-tools/tree/master/lib/libxdp#using-af_xdp-sockets diff --git a/Documentation/bpf/programs.rst b/Documentation/bpf/programs.rst index 620eb667ac7a..c99000ab6d9b 100644 --- a/Documentation/bpf/programs.rst +++ b/Documentation/bpf/programs.rst @@ -7,3 +7,6 @@ Program Types :glob: prog_* + +For a list of all program types, see :ref:`program_types_and_elf` in +the :ref:`libbpf` documentation. diff --git a/Documentation/bpf/redirect.rst b/Documentation/bpf/redirect.rst new file mode 100644 index 000000000000..2fa2b0b05004 --- /dev/null +++ b/Documentation/bpf/redirect.rst @@ -0,0 +1,81 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Red Hat, Inc. + +======== +Redirect +======== +XDP_REDIRECT +############ +Supported maps +-------------- + +XDP_REDIRECT works with the following map types: + +- ``BPF_MAP_TYPE_DEVMAP`` +- ``BPF_MAP_TYPE_DEVMAP_HASH`` +- ``BPF_MAP_TYPE_CPUMAP`` +- ``BPF_MAP_TYPE_XSKMAP`` + +For more information on these maps, please see the specific map documentation. + +Process +------- + +.. kernel-doc:: net/core/filter.c + :doc: xdp redirect + +.. note:: + Not all drivers support transmitting frames after a redirect, and for + those that do, not all of them support non-linear frames. Non-linear xdp + bufs/frames are bufs/frames that contain more than one fragment. + +Debugging packet drops +---------------------- +Silent packet drops for XDP_REDIRECT can be debugged using: + +- bpf_trace +- perf_record + +bpf_trace +^^^^^^^^^ +The following bpftrace command can be used to capture and count all XDP tracepoints: + +.. code-block:: none + + sudo bpftrace -e 'tracepoint:xdp:* { @cnt[probe] = count(); }' + Attaching 12 probes... + ^C + + @cnt[tracepoint:xdp:mem_connect]: 18 + @cnt[tracepoint:xdp:mem_disconnect]: 18 + @cnt[tracepoint:xdp:xdp_exception]: 19605 + @cnt[tracepoint:xdp:xdp_devmap_xmit]: 1393604 + @cnt[tracepoint:xdp:xdp_redirect]: 22292200 + +.. note:: + The various xdp tracepoints can be found in ``source/include/trace/events/xdp.h`` + +The following bpftrace command can be used to extract the ``ERRNO`` being returned as +part of the err parameter: + +.. code-block:: none + + sudo bpftrace -e \ + 'tracepoint:xdp:xdp_redirect*_err {@redir_errno[-args->err] = count();} + tracepoint:xdp:xdp_devmap_xmit {@devmap_errno[-args->err] = count();}' + +perf record +^^^^^^^^^^^ +The perf tool also supports recording tracepoints: + +.. code-block:: none + + perf record -a -e xdp:xdp_redirect_err \ + -e xdp:xdp_redirect_map_err \ + -e xdp:xdp_exception \ + -e xdp:xdp_devmap_xmit + +References +=========== + +- https://github.com/xdp-project/xdp-tutorial/tree/master/tracing02-xdp-monitor diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml index b283c8ca2bbf..5c08d8b6e995 100644 --- a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml @@ -62,13 +62,6 @@ properties: description: Inform the driver that last channel will be used to sensor battery. - aspeed,trim-data-valid: - type: boolean - description: | - The ADC reference voltage can be calibrated to obtain the trimming - data which will be stored in otp. This property informs the driver that - the data store in the otp is valid. - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml b/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml index fe1c5016f7f3..1c191bc5a178 100644 --- a/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml +++ b/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml @@ -16,8 +16,11 @@ description: properties: compatible: - items: + oneOf: - const: goodix,gt7375p + - items: + - const: goodix,gt7986u + - const: goodix,gt7375p reg: enum: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml index 2684562df4d9..be29e0b80995 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml @@ -24,7 +24,7 @@ properties: oneOf: - items: - enum: - - qcom,sc7280-bwmon + - qcom,sc7280-cpu-bwmon - qcom,sdm845-bwmon - const: qcom,msm8998-bwmon - const: qcom,msm8998-bwmon # BWMON v4 diff --git a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml index 24d7bf21499e..9d44236f2deb 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml @@ -36,6 +36,9 @@ properties: resets: maxItems: 1 + iommus: + maxItems: 1 + required: - compatible - reg @@ -43,6 +46,7 @@ required: - clocks - clock-names - resets + - iommus additionalProperties: false @@ -59,6 +63,7 @@ examples: clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; ... diff --git a/Documentation/devicetree/bindings/net/marvell,dfx-server.yaml b/Documentation/devicetree/bindings/net/marvell,dfx-server.yaml new file mode 100644 index 000000000000..8a14c919e3f7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/marvell,dfx-server.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/marvell,dfx-server.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Prestera DFX server + +maintainers: + - Miquel Raynal <[email protected]> + +select: + properties: + compatible: + contains: + const: marvell,dfx-server + required: + - compatible + +properties: + compatible: + items: + - const: marvell,dfx-server + - const: simple-bus + + reg: + maxItems: 1 + + ranges: true + + '#address-cells': + const: 1 + + '#size-cells': + const: 1 + +required: + - compatible + - reg + - ranges + +# The DFX server may expose clocks described as subnodes +additionalProperties: + type: object + +examples: + - | + + #define MBUS_ID(target,attributes) (((target) << 24) | ((attributes) << 16)) + bus@0 { + reg = <0 0>; + #address-cells = <2>; + #size-cells = <1>; + + dfx-bus@ac000000 { + compatible = "marvell,dfx-server", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 MBUS_ID(0x08, 0x00) 0 0x100000>; + reg = <MBUS_ID(0x08, 0x00) 0 0x100000>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.txt b/Documentation/devicetree/bindings/net/marvell,prestera.txt deleted file mode 100644 index e28938ddfdf5..000000000000 --- a/Documentation/devicetree/bindings/net/marvell,prestera.txt +++ /dev/null @@ -1,81 +0,0 @@ -Marvell Prestera Switch Chip bindings -------------------------------------- - -Required properties: -- compatible: must be "marvell,prestera" and one of the following - "marvell,prestera-98dx3236", - "marvell,prestera-98dx3336", - "marvell,prestera-98dx4251", -- reg: address and length of the register set for the device. -- interrupts: interrupt for the device - -Optional properties: -- dfx: phandle reference to the "DFX Server" node - -Example: - -switch { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 MBUS_ID(0x03, 0x00) 0 0x100000>; - - packet-processor@0 { - compatible = "marvell,prestera-98dx3236", "marvell,prestera"; - reg = <0 0x4000000>; - interrupts = <33>, <34>, <35>; - dfx = <&dfx>; - }; -}; - -DFX Server bindings -------------------- - -Required properties: -- compatible: must be "marvell,dfx-server", "simple-bus" -- ranges: describes the address mapping of a memory-mapped bus. -- reg: address and length of the register set for the device. - -Example: - -dfx-server { - compatible = "marvell,dfx-server", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 MBUS_ID(0x08, 0x00) 0 0x100000>; - reg = <MBUS_ID(0x08, 0x00) 0 0x100000>; -}; - -Marvell Prestera SwitchDev bindings ------------------------------------ -Optional properties: -- compatible: must be "marvell,prestera" -- base-mac-provider: describes handle to node which provides base mac address, - might be a static base mac address or nvme cell provider. - -Example: - -eeprom_mac_addr: eeprom-mac-addr { - compatible = "eeprom,mac-addr-cell"; - status = "okay"; - - nvmem = <&eeprom_at24>; -}; - -prestera { - compatible = "marvell,prestera"; - status = "okay"; - - base-mac-provider = <&eeprom_mac_addr>; -}; - -The current implementation of Prestera Switchdev PCI interface driver requires -that BAR2 is assigned to 0xf6000000 as base address from the PCI IO range: - -&cp0_pcie0 { - ranges = <0x81000000 0x0 0xfb000000 0x0 0xfb000000 0x0 0xf0000 - 0x82000000 0x0 0xf6000000 0x0 0xf6000000 0x0 0x2000000 - 0x82000000 0x0 0xf9000000 0x0 0xf9000000 0x0 0x100000>; - phys = <&cp0_comphy0 0>; - status = "okay"; -}; diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.yaml b/Documentation/devicetree/bindings/net/marvell,prestera.yaml new file mode 100644 index 000000000000..5ea8b73663a5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/marvell,prestera.yaml @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/marvell,prestera.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Prestera switch family + +maintainers: + - Miquel Raynal <[email protected]> + +properties: + compatible: + oneOf: + - items: + - enum: + - marvell,prestera-98dx3236 + - marvell,prestera-98dx3336 + - marvell,prestera-98dx4251 + - const: marvell,prestera + - enum: + - pci11ab,c804 + - pci11ab,c80c + - pci11ab,cc1e + + reg: + maxItems: 1 + + interrupts: + maxItems: 3 + + dfx: + description: Reference to the DFX Server bus node. + $ref: /schemas/types.yaml#/definitions/phandle + + nvmem-cells: true + + nvmem-cell-names: true + +if: + properties: + compatible: + contains: + const: marvell,prestera + +# Memory mapped AlleyCat3 family +then: + properties: + nvmem-cells: false + nvmem-cell-names: false + required: + - interrupts + +# PCI Aldrin family +else: + properties: + interrupts: false + dfx: false + +required: + - compatible + - reg + +# Ports can also be described +additionalProperties: + type: object + +examples: + - | + packet-processor@0 { + compatible = "marvell,prestera-98dx3236", "marvell,prestera"; + reg = <0 0x4000000>; + interrupts = <33>, <34>, <35>; + dfx = <&dfx>; + }; + + - | + pcie@0 { + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x0 0x0 0x0>; + reg = <0x0 0x0 0x0 0x0 0x0 0x0>; + device_type = "pci"; + + switch@0,0 { + reg = <0x0 0x0 0x0 0x0 0x0>; + compatible = "pci11ab,c80c"; + nvmem-cells = <&mac_address 0>; + nvmem-cell-names = "mac-address"; + }; + }; diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml index b2558421268a..6924aff0b2c5 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml @@ -14,7 +14,9 @@ properties: oneOf: - const: nxp,nxp-nci-i2c - items: - - const: nxp,pn547 + - enum: + - nxp,nq310 + - nxp,pn547 - const: nxp,nxp-nci-i2c enable-gpios: diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.yaml b/Documentation/devicetree/bindings/net/qca,ar71xx.yaml index 1ebf9e8c8a1d..89f94b31b546 100644 --- a/Documentation/devicetree/bindings/net/qca,ar71xx.yaml +++ b/Documentation/devicetree/bindings/net/qca,ar71xx.yaml @@ -123,7 +123,6 @@ examples: switch_port0: port@0 { reg = <0x0>; - label = "cpu"; ethernet = <ð1>; phy-mode = "gmii"; diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt index 1aa4c6006cd0..80e505a2fda1 100644 --- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -68,6 +68,8 @@ Optional properties: - mdio : Child node for MDIO bus. Must be defined if PHY access is required through the core's MDIO interface (i.e. always, unless the PHY is accessed through a different bus). + Non-standard MDIO bus frequency is supported via + "clock-frequency", see mdio.yaml. - pcs-handle: Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X modes, where "pcs-handle" should be used to point diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml index c3e9f3485449..dea293f403d9 100644 --- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml +++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -8,7 +8,7 @@ title: Audio codec controlled by ChromeOS EC maintainers: - Cheng-Yi Chiang <[email protected]> - - Tzung-Bi Shih <[email protected]> + - Tzung-Bi Shih <[email protected]> description: | Google's ChromeOS EC codec is a digital mic codec provided by the diff --git a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml index 1d73204451b1..ea7d4900ee4a 100644 --- a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml +++ b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Realtek rt1015p codec devicetree bindings maintainers: - - Tzung-Bi Shih <[email protected]> + - Tzung-Bi Shih <[email protected]> description: | Rt1015p is a rt1015 variant which does not support I2C and diff --git a/Documentation/driver-api/miscellaneous.rst b/Documentation/driver-api/miscellaneous.rst index 304ffb146cf9..4a5104a368ac 100644 --- a/Documentation/driver-api/miscellaneous.rst +++ b/Documentation/driver-api/miscellaneous.rst @@ -16,12 +16,11 @@ Parallel Port Devices 16x50 UART Driver ================= -.. kernel-doc:: drivers/tty/serial/serial_core.c - :export: - .. kernel-doc:: drivers/tty/serial/8250/8250_core.c :export: +See serial/driver.rst for related APIs. + Pulse-Width Modulation (PWM) ============================ diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 96cd7a26f3d9..adc4bf4f3c50 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -566,7 +566,8 @@ miimon link monitoring. A value of 100 is a good starting point. The use_carrier option, below, affects how the link state is determined. See the High Availability section for additional - information. The default value is 0. + information. The default value is 100 if arp_interval is not + set. min_links @@ -956,6 +957,7 @@ xmit_hash_policy hash = hash XOR source IP XOR destination IP hash = hash XOR (hash RSHIFT 16) hash = hash XOR (hash RSHIFT 8) + hash = hash RSHIFT 1 And then hash is reduced modulo slave count. If the protocol is IPv6 then the source and destination diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst index 51e6624fb774..1d2f55feca24 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support.rst @@ -181,10 +181,13 @@ when necessary using the below listed API:: - int dpaa2_mac_connect(struct dpaa2_mac *mac); - void dpaa2_mac_disconnect(struct dpaa2_mac *mac); -A phylink integration is necessary only when the partner DPMAC is not of TYPE_FIXED. -One can check for this condition using the below API:: +A phylink integration is necessary only when the partner DPMAC is not of +``TYPE_FIXED``. This means it is either of ``TYPE_PHY``, or of +``TYPE_BACKPLANE`` (the difference being the two that in the ``TYPE_BACKPLANE`` +mode, the MC firmware does not access the PCS registers). One can check for +this condition using the following helper:: - - bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,struct fsl_mc_io *mc_io); + - static inline bool dpaa2_mac_is_type_phy(struct dpaa2_mac *mac); Before connection to a MAC, the caller must allocate and populate the dpaa2_mac structure with the associated net_device, a pointer to the MC portal diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index 5edf50d7dbd5..e8fa7ac9e6b1 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -25,7 +25,7 @@ Enabling the driver and kconfig options | at build time via kernel Kconfig flags. | Basic features, ethernet net device rx/tx offloads and XDP, are available with the most basic flags | CONFIG_MLX5_CORE=y/m and CONFIG_MLX5_CORE_EN=y. -| For the list of advanced features please see below. +| For the list of advanced features, please see below. **CONFIG_MLX5_CORE=(y/m/n)** (module mlx5_core.ko) @@ -89,11 +89,11 @@ Enabling the driver and kconfig options **CONFIG_MLX5_EN_IPSEC=(y/n)** -| Enables `IPSec XFRM cryptography-offload accelaration <http://www.mellanox.com/related-docs/prod_software/Mellanox_Innova_IPsec_Ethernet_Adapter_Card_User_Manual.pdf>`_. +| Enables `IPSec XFRM cryptography-offload acceleration <http://www.mellanox.com/related-docs/prod_software/Mellanox_Innova_IPsec_Ethernet_Adapter_Card_User_Manual.pdf>`_. **CONFIG_MLX5_EN_TLS=(y/n)** -| TLS cryptography-offload accelaration. +| TLS cryptography-offload acceleration. **CONFIG_MLX5_INFINIBAND=(y/n/m)** (module mlx5_ib.ko) @@ -139,14 +139,14 @@ flow_steering_mode: Device flow steering mode The flow steering mode parameter controls the flow steering mode of the driver. Two modes are supported: 1. 'dmfs' - Device managed flow steering. -2. 'smfs - Software/Driver managed flow steering. +2. 'smfs' - Software/Driver managed flow steering. In DMFS mode, the HW steering entities are created and managed through the Firmware. In SMFS mode, the HW steering entities are created and managed though by -the driver directly into Hardware without firmware intervention. +the driver directly into hardware without firmware intervention. -SMFS mode is faster and provides better rule inserstion rate compared to default DMFS mode. +SMFS mode is faster and provides better rule insertion rate compared to default DMFS mode. User command examples: @@ -165,9 +165,9 @@ User command examples: enable_roce: RoCE enablement state ---------------------------------- RoCE enablement state controls driver support for RoCE traffic. -When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic. +When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well-known UDP RoCE port is handled as raw ethernet traffic. -To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload. +To change RoCE enablement state, a user must change the driverinit cmode value and run devlink reload. User command examples: @@ -186,7 +186,7 @@ User command examples: esw_port_metadata: Eswitch port metadata state ---------------------------------------------- -When applicable, disabling Eswitch metadata can increase packet rate +When applicable, disabling eswitch metadata can increase packet rate up to 20% depending on the use case and packet sizes. Eswitch port metadata state controls whether to internally tag packets with @@ -253,26 +253,26 @@ mlx5 subfunction ================ mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface. -A Subfunction has its own function capabilities and its own resources. This +A subfunction has its own function capabilities and its own resources. This means a subfunction has its own dedicated queues (txq, rxq, cq, eq). These queues are neither shared nor stolen from the parent PCI function. -When a subfunction is RDMA capable, it has its own QP1, GID table and rdma +When a subfunction is RDMA capable, it has its own QP1, GID table, and RDMA resources neither shared nor stolen from the parent PCI function. A subfunction has a dedicated window in PCI BAR space that is not shared -with ther other subfunctions or the parent PCI function. This ensures that all -devices (netdev, rdma, vdpa etc.) of the subfunction accesses only assigned +with the other subfunctions or the parent PCI function. This ensures that all +devices (netdev, rdma, vdpa, etc.) of the subfunction accesses only assigned PCI BAR space. -A Subfunction supports eswitch representation through which it supports tc +A subfunction supports eswitch representation through which it supports tc offloads. The user configures eswitch to send/receive packets from/to the subfunction port. Subfunctions share PCI level resources such as PCI MSI-X IRQs with other subfunctions and/or with its parent PCI function. -Example mlx5 software, system and device view:: +Example mlx5 software, system, and device view:: _______ | admin | @@ -310,7 +310,7 @@ Example mlx5 software, system and device view:: | (device add/del) _____|____ ____|________ | | | subfunction | - | PCI NIC |---- activate/deactive events---->| host driver | + | PCI NIC |--- activate/deactivate events--->| host driver | |__________| | (mlx5_core) | |_____________| @@ -320,7 +320,7 @@ Subfunction is created using devlink port interface. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev -- Add a devlink port of subfunction flaovur:: +- Add a devlink port of subfunction flavour:: $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false @@ -379,18 +379,18 @@ device created for the PCI VF/SF. function: hw_addr 00:00:00:00:00:00 -- Set the MAC address of the VF identified by its unique devlink port index:: +- Set the MAC address of the SF identified by its unique devlink port index:: $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 $ devlink port show pci/0000:06:00.0/32768 - pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcivf pfnum 0 sfnum 88 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 function: hw_addr 00:00:00:00:88:88 SF state setup -------------- -To use the SF, the user must active the SF using the SF function state +To use the SF, the user must activate the SF using the SF function state attribute. - Get the state of the SF identified by its unique devlink port index:: @@ -447,7 +447,7 @@ for it. Additionally, the SF port also gets the event when the driver attaches to the auxiliary device of the subfunction. This results in changing the operational -state of the function. This provides visiblity to the user to decide when is it +state of the function. This provides visibility to the user to decide when is it safe to delete the SF port for graceful termination of the subfunction. - Show the SF port operational state:: @@ -464,14 +464,14 @@ tx reporter ----------- The tx reporter is responsible for reporting and recovering of the following two error scenarios: -- TX timeout +- tx timeout Report on kernel tx timeout detection. Recover by searching lost interrupts. -- TX error completion +- tx error completion Report on error tx completion. - Recover by flushing the TX queue and reset it. + Recover by flushing the tx queue and reset it. -TX reporter also support on demand diagnose callback, on which it provides +tx reporter also support on demand diagnose callback, on which it provides real time information of its send queues status. User commands examples: @@ -491,32 +491,32 @@ rx reporter ----------- The rx reporter is responsible for reporting and recovering of the following two error scenarios: -- RX queues initialization (population) timeout - RX queues descriptors population on ring initialization is done in - napi context via triggering an irq, in case of a failure to get - the minimum amount of descriptors, a timeout would occur and it - could be recoverable by polling the EQ (Event Queue). -- RX completions with errors (reported by HW on interrupt context) +- rx queues' initialization (population) timeout + Population of rx queues' descriptors on ring initialization is done + in napi context via triggering an irq. In case of a failure to get + the minimum amount of descriptors, a timeout would occur, and + descriptors could be recovered by polling the EQ (Event Queue). +- rx completions with errors (reported by HW on interrupt context) Report on rx completion error. Recover (if needed) by flushing the related queue and reset it. -RX reporter also supports on demand diagnose callback, on which it -provides real time information of its receive queues status. +rx reporter also supports on demand diagnose callback, on which it +provides real time information of its receive queues' status. -- Diagnose rx queues status, and corresponding completion queue:: +- Diagnose rx queues' status and corresponding completion queue:: $ devlink health diagnose pci/0000:82:00.0 reporter rx -NOTE: This command has valid output only when interface is up, otherwise the command has empty output. +NOTE: This command has valid output only when interface is up. Otherwise, the command has empty output. - Show number of rx errors indicated, number of recover flows ended successfully, - is autorecover enabled and graceful period from last recover:: + is autorecover enabled, and graceful period from last recover:: $ devlink health show pci/0000:82:00.0 reporter rx fw reporter ----------- -The fw reporter implements diagnose and dump callbacks. +The fw reporter implements `diagnose` and `dump` callbacks. It follows symptoms of fw error such as fw syndrome by triggering fw core dump and storing it into the dump buffer. The fw reporter diagnose command can be triggered any time by the user to check @@ -537,7 +537,7 @@ running it on other PF or any VF will return "Operation not permitted". fw fatal reporter ----------------- -The fw fatal reporter implements dump and recover callbacks. +The fw fatal reporter implements `dump` and `recover` callbacks. It follows fatal errors indications by CR-space dump and recover flow. The CR-space dump uses vsc interface which is valid even if the FW command interface is not functional, which is the case in most FW fatal errors. @@ -552,7 +552,7 @@ User commands examples: $ devlink health recover pci/0000:82:00.0 reporter fw_fatal -- Read FW CR-space dump if already strored or trigger new one:: +- Read FW CR-space dump if already stored or trigger new one:: $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal @@ -561,10 +561,10 @@ NOTE: This command can run only on PF. mlx5 tracepoints ================ -mlx5 driver provides internal trace points for tracking and debugging using +mlx5 driver provides internal tracepoints for tracking and debugging using kernel tracepoints interfaces (refer to Documentation/trace/ftrace.rst). -For the list of support mlx5 events check /sys/kernel/debug/tracing/events/mlx5/ +For the list of support mlx5 events, check `/sys/kernel/debug/tracing/events/mlx5/`. tc and eswitch offloads tracepoints: diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index 643f5903d1d8..98557c2ab1c1 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -130,10 +130,11 @@ it is deployed. Subfunction is created and deployed in unit of 1. Unlike SRIOV VFs, a subfunction doesn't require its own PCI virtual function. A subfunction communicates with the hardware through the parent PCI function. -To use a subfunction, 3 steps setup sequence is followed. -(1) create - create a subfunction; -(2) configure - configure subfunction attributes; -(3) deploy - deploy the subfunction; +To use a subfunction, 3 steps setup sequence is followed: + +1) create - create a subfunction; +2) configure - configure subfunction attributes; +3) deploy - deploy the subfunction; Subfunction management is done using devlink port user interface. User performs setup on the subfunction management device. @@ -216,13 +217,17 @@ nodes with the same priority form a WFQ subgroup in the sibling group and arbitration among them is based on assigned weights. Arbitration flow from the high level: + #. Choose a node, or group of nodes with the highest priority that stays within the BW limit and are not blocked. Use ``tx_priority`` as a parameter for this arbitration. + #. If group of nodes have the same priority perform WFQ arbitration on that subgroup. Use ``tx_weight`` as a parameter for this arbitration. + #. Select the winner node, and continue arbitration flow among it's children, until leaf node is reached, and the winner is established. + #. If all the nodes from the highest priority sub-group are satisfied, or overused their assigned BW, move to the lower priority nodes. diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index f06dca9a1eb6..9232cd7da301 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -31,6 +31,15 @@ in its ``devlink_region_ops`` structure. If snapshot id is not set in the ``DEVLINK_CMD_REGION_NEW`` request kernel will allocate one and send the snapshot information to user space. +Regions may optionally allow directly reading from their contents without a +snapshot. Direct read requests are not atomic. In particular a read request +of size 256 bytes or larger will be split into multiple chunks. If atomic +access is required, use a snapshot. A driver wishing to enable this for a +region should implement the ``.read`` callback in the ``devlink_region_ops`` +structure. User space can request a direct read by using the +``DEVLINK_ATTR_REGION_DIRECT`` attribute instead of specifying a snapshot +id. + example usage ------------- @@ -65,6 +74,10 @@ example usage $ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0 length 16 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 + # Read from the region without a snapshot + $ devlink region read pci/0000:00:05.0/fw-health address 16 length 16 + 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8 + As regions are likely very device or driver specific, no generic regions are defined. See the driver-specific documentation files for information on the specific regions a driver supports. diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index 890062da7820..625efb3777d5 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -189,12 +189,21 @@ device data. * - ``nvm-flash`` - The contents of the entire flash chip, sometimes referred to as the device's Non Volatile Memory. + * - ``shadow-ram`` + - The contents of the Shadow RAM, which is loaded from the beginning + of the flash. Although the contents are primarily from the flash, + this area also contains data generated during device boot which is + not stored in flash. * - ``device-caps`` - The contents of the device firmware's capabilities buffer. Useful to determine the current state and configuration of the device. -Users can request an immediate capture of a snapshot via the -``DEVLINK_CMD_REGION_NEW`` +Both the ``nvm-flash`` and ``shadow-ram`` regions can be accessed without a +snapshot. The ``device-caps`` region requires a snapshot as the contents are +sent by firmware and can't be split into separate reads. + +Users can request an immediate capture of a snapshot for all three regions +via the ``DEVLINK_CMD_REGION_NEW`` command. .. code:: shell diff --git a/Documentation/networking/generic_netlink.rst b/Documentation/networking/generic_netlink.rst index 59e04ccf80c1..d960dbd7e80e 100644 --- a/Documentation/networking/generic_netlink.rst +++ b/Documentation/networking/generic_netlink.rst @@ -6,4 +6,4 @@ Generic Netlink A wiki document on how to use Generic Netlink can be found here: - * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto + * https://wiki.linuxfoundation.org/networking/generic_netlink_howto diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst index 922e0b547bc3..66b07f14714c 100644 --- a/Documentation/process/code-of-conduct-interpretation.rst +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're uncertain how to handle situations that come up. It will not be considered a violation report unless you want it to be. If you are uncertain about approaching the TAB or any other maintainers, please -reach out to our conflict mediator, Joanna Lee <[email protected]>. +reach out to our conflict mediator, Joanna Lee <[email protected]>. In the end, "be kind to each other" is really what the end goal is for everybody. We know everyone is human and we all fail at times, but the diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst index 128878f5bb70..f3ec25b163d7 100644 --- a/Documentation/translations/zh_CN/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/loongarch/introduction.rst @@ -70,8 +70,8 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其� ================= ================== =================== ========== .. note:: - 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 - ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。 + 注意:在一些遗留代码中有时可能见到 ``$fv0`` 和 ``$fv1`` ,它们是 + ``$fa0`` 和 ``$fa1`` 的别名,属于已经废弃的用法。 向量寄存器 diff --git a/MAINTAINERS b/MAINTAINERS index 61fe86968111..955c1be1efb2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2197,7 +2197,7 @@ M: Wei Xu <[email protected]> L: [email protected] (moderated for non-subscribers) S: Supported W: http://www.hisilicon.com -T: git git://github.com/hisilicon/linux-hisi.git +T: git https://github.com/hisilicon/linux-hisi.git F: arch/arm/boot/dts/hi3* F: arch/arm/boot/dts/hip* F: arch/arm/boot/dts/hisi* @@ -4810,7 +4810,7 @@ R: Jeff Layton <[email protected]> S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: include/linux/ceph/ F: include/linux/crush/ F: net/ceph/ @@ -4822,7 +4822,7 @@ R: Jeff Layton <[email protected]> S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: Documentation/filesystems/ceph.rst F: fs/ceph/ @@ -4912,7 +4912,7 @@ F: drivers/platform/chrome/ CHROMEOS EC CODEC DRIVER M: Cheng-Yi Chiang <[email protected]> -M: Tzung-Bi Shih <[email protected]> +M: Tzung-Bi Shih <[email protected]> R: Guenter Roeck <[email protected]> S: Maintained @@ -5586,8 +5586,6 @@ F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -5595,8 +5593,6 @@ F: drivers/crypto/chelsio CXGB4 INLINE CRYPTO DRIVER M: Ayush Sawal <[email protected]> -M: Vinay Kumar Yadav <[email protected]> -M: Rohit Maheshwari <[email protected]> S: Supported W: http://www.chelsio.com @@ -10291,7 +10287,7 @@ T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ INTEL HID EVENT DRIVER -M: Alex Hung <[email protected]> +M: Alex Hung <[email protected]> S: Maintained F: drivers/platform/x86/intel/hid.c @@ -11039,6 +11035,7 @@ KCONFIG M: Masahiro Yamada <[email protected]> S: Maintained +Q: https://patchwork.kernel.org/project/linux-kbuild/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include @@ -11096,10 +11093,12 @@ F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) M: Masahiro Yamada <[email protected]> -M: Michal Marek <[email protected]> +R: Nathan Chancellor <[email protected]> R: Nick Desaulniers <[email protected]> +R: Nicolas Schier <[email protected]> S: Maintained +Q: https://patchwork.kernel.org/project/linux-kbuild/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git F: Documentation/kbuild/ F: Makefile @@ -13630,6 +13629,12 @@ S: Supported F: drivers/misc/atmel-ssc.c F: include/linux/atmel-ssc.h +MICROCHIP SOC DRIVERS +M: Conor Dooley <[email protected]> +S: Supported +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: drivers/soc/microchip/ + MICROCHIP USB251XB DRIVER M: Richard Leitner <[email protected]> @@ -15949,6 +15954,7 @@ Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +F: Documentation/devicetree/bindings/pci/ F: drivers/pci/controller/ F: drivers/pci/pci-bridge-emul.c F: drivers/pci/pci-bridge-emul.h @@ -16055,7 +16061,7 @@ F: Documentation/devicetree/bindings/pci/microchip* F: drivers/pci/controller/*microchip* PCIE DRIVER FOR QUALCOMM MSM -M: Stanimir Varbanov <[email protected]> +M: Manivannan Sadhasivam <[email protected]> S: Maintained @@ -16145,7 +16151,8 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <[email protected]> +M: Shannon Nelson <[email protected]> +M: Brett Creeley <[email protected]> S: Supported @@ -17228,7 +17235,7 @@ R: Dongsheng Yang <[email protected]> S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h @@ -17481,10 +17488,8 @@ S: Maintained F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER -M: Amitkumar Karwar <[email protected]> -M: Siva Rebbagondla <[email protected]> -S: Maintained +S: Orphan F: drivers/net/wireless/rsi/ REGISTER MAP ABSTRACTION @@ -17729,7 +17734,7 @@ F: arch/riscv/ N: riscv K: riscv -RISC-V/MICROCHIP POLARFIRE SOC SUPPORT +RISC-V MICROCHIP FPGA SUPPORT M: Conor Dooley <[email protected]> M: Daire McNamara <[email protected]> @@ -17747,17 +17752,26 @@ F: Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml F: arch/riscv/boot/dts/microchip/ F: drivers/char/hw_random/mpfs-rng.c F: drivers/clk/microchip/clk-mpfs.c -F: drivers/i2c/busses/i2c-microchip-core.c +F: drivers/i2c/busses/i2c-microchip-corei2c.c F: drivers/mailbox/mailbox-mpfs.c F: drivers/pci/controller/pcie-microchip-host.c F: drivers/reset/reset-mpfs.c F: drivers/rtc/rtc-mpfs.c -F: drivers/soc/microchip/ +F: drivers/soc/microchip/mpfs-sys-controller.c F: drivers/spi/spi-microchip-core-qspi.c F: drivers/spi/spi-microchip-core.c F: drivers/usb/musb/mpfs.c F: include/soc/microchip/mpfs.h +RISC-V MISC SOC SUPPORT +M: Conor Dooley <[email protected]> +S: Maintained +Q: https://patchwork.kernel.org/project/linux-riscv/list/ +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: Documentation/devicetree/bindings/riscv/ +F: arch/riscv/boot/dts/ + RNBD BLOCK DRIVERS M: Md. Haris Iqbal <[email protected]> M: Jack Wang <[email protected]> @@ -17998,7 +18012,7 @@ L: [email protected] S: Maintained F: drivers/video/fbdev/savage/ -S390 +S390 ARCHITECTURE M: Heiko Carstens <[email protected]> M: Vasily Gorbik <[email protected]> M: Alexander Gordeev <[email protected]> @@ -18053,6 +18067,15 @@ L: [email protected] S: Supported F: drivers/s390/net/ +S390 MM +M: Alexander Gordeev <[email protected]> +M: Gerald Schaefer <[email protected]> +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git +F: arch/s390/include/asm/pgtable.h +F: arch/s390/mm + S390 PCI SUBSYSTEM M: Niklas Schnelle <[email protected]> M: Gerald Schaefer <[email protected]> @@ -18784,7 +18807,6 @@ M: Palmer Dabbelt <[email protected]> M: Paul Walmsley <[email protected]> S: Supported -T: git https://github.com/sifive/riscv-linux.git N: sifive K: [^@]sifive @@ -18803,6 +18825,13 @@ S: Maintained F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml F: drivers/dma/sf-pdma/ +SIFIVE SOC DRIVERS +M: Conor Dooley <[email protected]> +S: Maintained +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: drivers/soc/sifive/ + SILEAD TOUCHSCREEN DRIVER M: Hans de Goede <[email protected]> @@ -19604,6 +19633,11 @@ M: Ion Badulescu <[email protected]> S: Odd Fixes F: drivers/net/ethernet/adaptec/starfire* +STARFIVE DEVICETREES +M: Emil Renner Berthing <[email protected]> +S: Maintained +F: arch/riscv/boot/dts/starfive/ + STARFIVE JH7100 CLOCK DRIVERS M: Emil Renner Berthing <[email protected]> S: Maintained @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index dae448040a97..947497413977 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 60d61291f344..024af2db638e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 0fc9e6b8b05d..03d2e8544a4e 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1270,10 +1270,10 @@ clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/lan966x-pcb8291.dts b/arch/arm/boot/dts/lan966x-pcb8291.dts index f4f054cdf2a8..3a3d76af8612 100644 --- a/arch/arm/boot/dts/lan966x-pcb8291.dts +++ b/arch/arm/boot/dts/lan966x-pcb8291.dts @@ -69,6 +69,12 @@ pins = "GPIO_35", "GPIO_36"; function = "can0_b"; }; + + sgpio_a_pins: sgpio-a-pins { + /* SCK, D0, D1, LD */ + pins = "GPIO_32", "GPIO_33", "GPIO_34", "GPIO_35"; + function = "sgpio_a"; + }; }; &can0 { @@ -118,6 +124,20 @@ status = "okay"; }; +&sgpio { + pinctrl-0 = <&sgpio_a_pins>; + pinctrl-names = "default"; + microchip,sgpio-port-ranges = <0 3>, <8 11>; + status = "okay"; + + gpio@0 { + ngpios = <64>; + }; + gpio@1 { + ngpios = <64>; + }; +}; + &switch { status = "okay"; }; diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 9fd4d9db9f8f..becdc0b664bf 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -35,11 +35,10 @@ &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; }; diff --git a/arch/arm/boot/dts/rk3066a-mk808.dts b/arch/arm/boot/dts/rk3066a-mk808.dts index cfa318a506eb..2db5ba706208 100644 --- a/arch/arm/boot/dts/rk3066a-mk808.dts +++ b/arch/arm/boot/dts/rk3066a-mk808.dts @@ -32,7 +32,7 @@ keyup-threshold-microvolt = <2500000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index e7cf18823558..118deacd38c4 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -71,7 +71,7 @@ #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..44b54af0bbf9 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -379,7 +379,7 @@ rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, @@ -607,7 +607,6 @@ &global_timer { interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 399d6b9c5fd4..382d2839cf47 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -28,19 +28,19 @@ press-threshold-microvolt = <300000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <640000>; }; - esc { + button-esc { label = "Esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1000000>; }; - home { + button-home { label = "Home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <1300000>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 052afe5543e2..3836c61cfb76 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,11 +233,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio7>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index 713f55e143c6..db1eb648e0e1 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -162,11 +162,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 80e0f07c8e87..13cfdaa95cc7 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,11 +165,10 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio0>; interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi index 0ae2bd150e37..793951655b73 100644 --- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi @@ -241,7 +241,6 @@ interrupt-parent = <&gpio5>; interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index bf285091a9eb..cb4e42ede56a 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -76,6 +76,13 @@ reg = <0x1013c200 0x20>; interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { diff --git a/arch/arm/boot/dts/sama7g5-pinfunc.h b/arch/arm/boot/dts/sama7g5-pinfunc.h index 4eb30445d205..6e87f0d4b8fc 100644 --- a/arch/arm/boot/dts/sama7g5-pinfunc.h +++ b/arch/arm/boot/dts/sama7g5-pinfunc.h @@ -261,7 +261,7 @@ #define PIN_PB2__FLEXCOM6_IO0 PINMUX_PIN(PIN_PB2, 2, 1) #define PIN_PB2__ADTRG PINMUX_PIN(PIN_PB2, 3, 1) #define PIN_PB2__A20 PINMUX_PIN(PIN_PB2, 4, 1) -#define PIN_PB2__FLEXCOM11_IO0 PINMUX_PIN(PIN_PB2, 6, 3) +#define PIN_PB2__FLEXCOM11_IO1 PINMUX_PIN(PIN_PB2, 6, 3) #define PIN_PB3 35 #define PIN_PB3__GPIO PINMUX_PIN(PIN_PB3, 0, 0) #define PIN_PB3__RF1 PINMUX_PIN(PIN_PB3, 1, 1) diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -45,12 +45,6 @@ typedef pte_t *pte_addr_t; /* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - -/* * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) (prot) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..ef48a55e9af8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include <linux/const.h> #include <asm/proc-fns.h> +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include <asm-generic/pgtable-nopud.h> @@ -139,13 +148,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, */ #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index ffed4d949042..e4904faf1753 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -169,10 +169,15 @@ sr_ena_2: cmp tmp1, #UDDRC_STAT_SELFREF_TYPE_SW bne sr_ena_2 - /* Put DDR PHY's DLL in bypass mode for non-backup modes. */ + /* Disable DX DLLs for non-backup modes. */ cmp r7, #AT91_PM_BACKUP beq sr_ena_3 + /* Do not soft reset the AC DLL. */ + ldr tmp1, [r3, DDR3PHY_ACDLLCR] + bic tmp1, tmp1, DDR3PHY_ACDLLCR_DLLSRST + str tmp1, [r3, DDR3PHY_ACDLLCR] + /* Disable DX DLLs. */ ldr tmp1, [r3, #DDR3PHY_DX0DLLCR] orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 25c9d184fa4c..1c57ac401649 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c42debaded95..c1494a4dee25 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 53f6660656ac..ca1d287a0a01 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -161,6 +161,7 @@ clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; video-codec@1c0e000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts index 7e0aeb2db305..a0aeac619929 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts @@ -34,11 +34,25 @@ off-on-delay-us = <12000>; }; - extcon_usbotg1: extcon-usbotg1 { - compatible = "linux,extcon-usb-gpio"; + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + type = "micro"; + label = "X19"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usb1_extcon>; - id-gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&pinctrl_usb1_connector>; + id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usb_dr_connector: endpoint { + remote-endpoint = <&usb1_drd_sw>; + }; + }; + }; }; }; @@ -105,13 +119,19 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; - extcon = <&extcon_usbotg1>; srp-disable; hnp-disable; adp-disable; power-active-high; over-current-active-low; + usb-role-switch; status = "okay"; + + port { + usb1_drd_sw: endpoint { + remote-endpoint = <&usb_dr_connector>; + }; + }; }; &usbotg2 { @@ -231,7 +251,7 @@ <MX8MM_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x84>; }; - pinctrl_usb1_extcon: usb1-extcongrp { + pinctrl_usb1_connector: usb1-connectorgrp { fsl,pins = <MX8MM_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x1c0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index dabd94dc30c4..50ef92915c67 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -1244,10 +1244,10 @@ clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ad0b99adf691..67b554ba690c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1102,7 +1102,7 @@ gpmi: nand-controller@33002000 { compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 9f1469db554d..b4c1ef2559f2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -544,14 +544,14 @@ pinctrl_pcie0: pcie0grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x61 /* open drain, pull up */ - MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x41 + MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x60 /* open drain, pull up */ + MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x40 >; }; pinctrl_pcie0_reg: pcie0reggrp { fsl,pins = < - MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x41 + MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x40 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx93-pinfunc.h b/arch/arm64/boot/dts/freescale/imx93-pinfunc.h index 4298a145f8a9..4298a145f8a9 100755..100644 --- a/arch/arm64/boot/dts/freescale/imx93-pinfunc.h +++ b/arch/arm64/boot/dts/freescale/imx93-pinfunc.h diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index a47acf9bdf24..a721cdd80489 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -668,7 +668,7 @@ apcs_glb: mailbox@b111000 { compatible = "qcom,ipq8074-apcs-apps-global"; - reg = <0x0b111000 0x6000>; + reg = <0x0b111000 0x1000>; #clock-cells = <1>; #mbox-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index c0a2baffa49d..aba717644391 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -3504,7 +3504,7 @@ }; saw3: syscon@9a10000 { - compatible = "qcom,tcsr-msm8996", "syscon"; + compatible = "syscon"; reg = <0x09a10000 0x1000>; }; diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 87ab0e1ecd16..4dee790f1049 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -43,7 +43,6 @@ regulator-always-on; regulator-boot-on; - regulator-allow-set-load; vin-supply = <&vreg_3p3>; }; @@ -137,6 +136,9 @@ regulator-max-microvolt = <880000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7a_1p8: ldo7 { @@ -152,6 +154,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l11a_0p8: ldo11 { @@ -258,6 +263,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_1p8: ldo7 { @@ -273,6 +281,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts index b608b82dff03..2c62ba6a49c5 100644 --- a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts @@ -83,6 +83,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l4c: ldo4 { @@ -98,6 +101,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c: ldo7 { @@ -113,6 +119,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l17c: ldo17 { @@ -121,6 +130,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 212580316d3e..4cdc88d33944 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2296,7 +2296,8 @@ lpass_audiocc: clock-controller@3300000 { compatible = "qcom,sc7280-lpassaudiocc"; - reg = <0 0x03300000 0 0x30000>; + reg = <0 0x03300000 0 0x30000>, + <0 0x032a9000 0 0x1000>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&lpass_aon LPASS_AON_CC_MAIN_RCG_CLK_SRC>; clock-names = "bi_tcxo", "lpass_aon_cc_main_rcg_clk_src"; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index fea7d8273ccd..5e30349efd20 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -124,6 +124,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l13c: ldo13 { @@ -146,6 +149,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l4d: ldo4 { diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c32bcded2aef..212d63d5cbf2 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -885,13 +885,13 @@ ufs_mem_phy: phy@1d87000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01d87000 0 0xe10>; + reg = <0 0x01d87000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; clock-names = "ref", "ref_aux"; - clocks = <&rpmhcc RPMH_CXO_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; resets = <&ufs_mem_hc 0>; @@ -953,13 +953,13 @@ ufs_card_phy: phy@1da7000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01da7000 0 0xe10>; + reg = <0 0x01da7000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; clock-names = "ref", "ref_aux"; - clocks = <&gcc GCC_UFS_1_CARD_CLKREF_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_CARD_PHY_AUX_CLK>; resets = <&ufs_card_hc 0>; @@ -1181,26 +1181,16 @@ usb_0_ssphy: usb3-phy@88eb400 { reg = <0 0x088eb400 0 0x100>, <0 0x088eb600 0 0x3ec>, - <0 0x088ec400 0 0x1f0>, + <0 0x088ec400 0 0x364>, <0 0x088eba00 0 0x100>, <0 0x088ebc00 0 0x3ec>, - <0 0x088ec700 0 0x64>; + <0 0x088ec200 0 0x18>; #phy-cells = <0>; #clock-cells = <0>; clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; clock-names = "pipe0"; clock-output-names = "usb0_phy_pipe_clk_src"; }; - - usb_0_dpphy: dp-phy@88ed200 { - reg = <0 0x088ed200 0 0x200>, - <0 0x088ed400 0 0x200>, - <0 0x088eda00 0 0x200>, - <0 0x088ea600 0 0x200>, - <0 0x088ea800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; usb_1_hsphy: phy@8902000 { @@ -1242,8 +1232,8 @@ usb_1_ssphy: usb3-phy@8903400 { reg = <0 0x08903400 0 0x100>, - <0 0x08903c00 0 0x3ec>, - <0 0x08904400 0 0x1f0>, + <0 0x08903600 0 0x3ec>, + <0 0x08904400 0 0x364>, <0 0x08903a00 0 0x100>, <0 0x08903c00 0 0x3ec>, <0 0x08904200 0 0x18>; @@ -1253,16 +1243,6 @@ clock-names = "pipe0"; clock-output-names = "usb1_phy_pipe_clk_src"; }; - - usb_1_dpphy: dp-phy@8904200 { - reg = <0 0x08904200 0 0x200>, - <0 0x08904400 0 0x200>, - <0 0x08904a00 0 0x200>, - <0 0x08904600 0 0x200>, - <0 0x08904800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; system-cache-controller@9200000 { diff --git a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi index 014fe3a31548..fb6e5a140c9f 100644 --- a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi @@ -348,6 +348,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_3p0: ldo7 { @@ -367,6 +370,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi index 549e0a2aa9fe..5428aab3058d 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi @@ -317,6 +317,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7c_2p85: ldo7 { @@ -339,6 +342,9 @@ regulator-max-microvolt = <2960000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index a5b62cadb129..e276eed1f8e2 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -334,6 +334,7 @@ exit-latency-us = <6562>; min-residency-us = <9987>; local-timer-stop; + status = "disabled"; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts index 0fcf5bd88fc7..69ae6503c2f6 100644 --- a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts +++ b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts @@ -107,6 +107,9 @@ regulator-max-microvolt = <888000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l6b_1p2: ldo6 { @@ -115,6 +118,9 @@ regulator-max-microvolt = <1208000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l7b_2p96: ldo7 { @@ -123,6 +129,9 @@ regulator-max-microvolt = <2504000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; vreg_l9b_1p2: ldo9 { @@ -131,6 +140,9 @@ regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; regulator-allow-set-load; + regulator-allowed-modes = + <RPMH_REGULATOR_MODE_LPM + RPMH_REGULATOR_MODE_HPM>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts index 07008d84434c..c1bbd555f5f5 100644 --- a/arch/arm64/boot/dts/rockchip/px30-evb.dts +++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts @@ -30,31 +30,31 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - esc-key { + button-esc { label = "esc"; linux,code = <KEY_ESC>; press-threshold-microvolt = <1310000>; }; - home-key { + button-home { label = "home"; linux,code = <KEY_HOME>; press-threshold-microvolt = <624000>; }; - menu-key { + button-menu { label = "menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <987000>; }; - vol-down-key { + button-down { label = "volume down"; linux,code = <KEY_VOLUMEDOWN>; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { label = "volume up"; linux,code = <KEY_VOLUMEUP>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts index 9fe9b0d11003..184b84fdde07 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts @@ -23,7 +23,7 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - func-key { + button-func { linux,code = <KEY_FN>; label = "function"; press-threshold-microvolt = <18000>; @@ -37,31 +37,31 @@ poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - esc-key { + button-esc { linux,code = <KEY_MICMUTE>; label = "micmute"; press-threshold-microvolt = <1130000>; }; - home-key { + button-home { linux,code = <KEY_MODE>; label = "mode"; press-threshold-microvolt = <901000>; }; - menu-key { + button-menu { linux,code = <KEY_PLAY>; label = "play"; press-threshold-microvolt = <624000>; }; - vol-down-key { + button-down { linux,code = <KEY_VOLUMEDOWN>; label = "volume down"; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { linux,code = <KEY_VOLUMEUP>; label = "volume up"; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index ea6820902ede..7ea48167747c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -19,7 +19,7 @@ stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 43c928ac98f0..1deef53a4c94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -25,7 +25,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 7f5bba0c6001..81d1064fdb21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -208,11 +208,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index 38d757c00548..5589f3db6b36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -192,11 +192,10 @@ vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index ed3348b558f8..a47d9f758611 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -734,10 +734,6 @@ camera: &i2c7 { }; /* PINCTRL OVERRIDES */ -&ec_ap_int_l { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; -}; - &ap_fw_wp { rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 2a332763c35c..9d9297bc5f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -123,7 +123,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 452728b82e42..3bf8f959e42c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -39,7 +39,7 @@ keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts index 72182c58cc46..65cb21837b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts @@ -19,7 +19,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index 278123b4f911..b6e082f1f6d9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -167,6 +167,7 @@ }; &emmc_phy { + rockchip,enable-strobe-pulldown; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 9e2e246e0bab..dba4d03bfc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -52,13 +52,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 04c752f49be9..115c14c0a3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -207,7 +207,7 @@ cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 5a2661ae0131..7ba1c28f70a9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -98,13 +98,12 @@ }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; interrupt-parent = <&gpio0>; interrupts = <RK_PA5 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index 2f4b1b2e3ac7..bbf1e3f24585 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -41,7 +41,7 @@ keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 645ced6617a6..1f76d3501bda 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -509,7 +509,6 @@ &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 13927e7d0724..dbec2b7173a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -33,13 +33,13 @@ press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = <KEY_BACK>; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = <KEY_MENU>; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 935b8c68a71d..bf9eb0405b62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -297,11 +297,10 @@ clock-frequency = <400000>; status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 0d45868132b9..8d61f824c12d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -23,7 +23,7 @@ io-channel-names = "buttons"; keyup-threshold-microvolt = <1750000>; - recovery { + button-recovery { label = "recovery"; linux,code = <KEY_VENDOR>; press-threshold-microvolt = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index a05460b92415..25a8c781f4e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -740,7 +740,7 @@ &uart1 { pinctrl-names = "default"; - pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn>; + pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn &uart1m0_rtsn>; status = "okay"; uart-has-rtscts; @@ -748,13 +748,14 @@ compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; vbat-supply = <&vcc_sys>; vddio-supply = <&vcca1v8_pmu>; + max-speed = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 77b179cd20e7..b276eb0810c7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -246,7 +246,7 @@ compatible = "rockchip,rk809"; reg = <0x20>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA7 IRQ_TYPE_LEVEL_LOW>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_LOW>; assigned-clocks = <&cru I2S1_MCLKOUT_TX>; assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index dba648c2f57e..9fd262334d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -142,7 +142,7 @@ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m0_miim @@ -432,11 +432,7 @@ &i2c3 { pinctrl-names = "default"; - pinctrl-0 = <&i2c3m1_xfer>; - status = "okay"; -}; - -&i2c5 { + pinctrl-0 = <&i2c3m0_xfer>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index c282f6e79960..26d7fda275ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -500,7 +500,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_EDGE_FALLING>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index fb87a168fe96..539ef8cc7792 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -509,7 +509,6 @@ interrupt-parent = <&gpio0>; interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 71a1af42f0e8..edf6625ce965 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -863,12 +863,12 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_present(pud) && pud_user(pud); + return pud_leaf(pud) && pud_user(pud); } #endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 795344ab4ec4..322a831f8ede 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -299,11 +299,11 @@ SYM_TYPED_FUNC_START(ftrace_stub) ret SYM_FUNC_END(ftrace_stub) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * void return_to_handler(void) * diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f4cb54d5afd6..01b57b726322 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -97,7 +97,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..5332b1433f38 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,7 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); +extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); #include <asm-generic/irq.h> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..aa0e0e0d4ee5 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,13 +349,17 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -455,7 +459,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } @@ -478,7 +484,9 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 71189b28bfb2..3dd172d9ffea 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -19,21 +19,21 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -void loongson3_smp_setup(void); -void loongson3_prepare_cpus(unsigned int max_cpus); -void loongson3_boot_secondary(int cpu, struct task_struct *idle); -void loongson3_init_secondary(void); -void loongson3_smp_finish(void); -void loongson3_send_ipi_single(int cpu, unsigned int action); -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action); +void loongson_smp_setup(void); +void loongson_prepare_cpus(unsigned int max_cpus); +void loongson_boot_secondary(int cpu, struct task_struct *idle); +void loongson_init_secondary(void); +void loongson_smp_finish(void); +void loongson_send_ipi_single(int cpu, unsigned int action); +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void); -void loongson3_cpu_die(unsigned int cpu); +int loongson_cpu_disable(void); +void loongson_cpu_die(unsigned int cpu); #endif static inline void plat_smp_setup(void) { - loongson3_smp_setup(); + loongson_smp_setup(); } static inline int raw_smp_processor_id(void) @@ -85,28 +85,28 @@ extern void show_ipi_list(struct seq_file *p, int prec); */ static inline void smp_send_reschedule(int cpu) { - loongson3_send_ipi_single(cpu, SMP_RESCHEDULE); + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); } static inline void arch_send_call_function_single_ipi(int cpu) { - loongson3_send_ipi_single(cpu, SMP_CALL_FUNCTION); + loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson3_send_ipi_mask(mask, SMP_CALL_FUNCTION); + loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { - return loongson3_cpu_disable(); + return loongson_cpu_disable(); } static inline void __cpu_die(unsigned int cpu) { - loongson3_cpu_die(cpu); + loongson_cpu_die(cpu); } extern void play_dead(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 335398482038..8319cc409009 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -56,23 +56,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - #ifdef CONFIG_SMP static int set_processor_mask(u32 id, u32 flags) { @@ -156,13 +139,21 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -173,8 +164,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e..0524bf1169b7 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -117,7 +117,7 @@ void __init init_IRQ(void) if (ipi_irq < 0) panic("IPI IRQ mapping failed\n"); irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); if (r < 0) panic("IPI IRQ request failed\n"); #endif diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2526b68f1c0f..ddb8ba4eb399 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1eb63fa9bc81..ae436def7ee9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -257,7 +257,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 781a4d4bdddc..6ed72f7ff278 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -136,12 +136,12 @@ static void ipi_write_action(int cpu, u32 action) } } -void loongson3_send_ipi_single(int cpu, unsigned int action) +void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -149,7 +149,7 @@ void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } -irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -169,7 +169,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } -void __init loongson3_smp_setup(void) +void __init loongson_smp_setup(void) { cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -178,7 +178,7 @@ void __init loongson3_smp_setup(void) pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } -void __init loongson3_prepare_cpus(unsigned int max_cpus) +void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; @@ -193,7 +193,7 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) /* * Setup the PC, SP, and TP of a secondary processor and start it running! */ -void loongson3_boot_secondary(int cpu, struct task_struct *idle) +void loongson_boot_secondary(int cpu, struct task_struct *idle) { unsigned long entry; @@ -205,13 +205,13 @@ void loongson3_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, SMP_BOOT_CPU); } /* * SMP init and finish on secondary CPUs */ -void loongson3_init_secondary(void) +void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | @@ -231,7 +231,7 @@ void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; } -void loongson3_smp_finish(void) +void loongson_smp_finish(void) { local_irq_enable(); iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); @@ -240,7 +240,7 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void) +int loongson_cpu_disable(void) { unsigned long flags; unsigned int cpu = smp_processor_id(); @@ -262,7 +262,7 @@ int loongson3_cpu_disable(void) return 0; } -void loongson3_cpu_die(unsigned int cpu) +void loongson_cpu_die(unsigned int cpu) { while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); @@ -300,19 +300,19 @@ void play_dead(void) */ #ifdef CONFIG_PM -static int loongson3_ipi_suspend(void) +static int loongson_ipi_suspend(void) { return 0; } -static void loongson3_ipi_resume(void) +static void loongson_ipi_resume(void) { iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); } -static struct syscore_ops loongson3_ipi_syscore_ops = { - .resume = loongson3_ipi_resume, - .suspend = loongson3_ipi_suspend, +static struct syscore_ops loongson_ipi_syscore_ops = { + .resume = loongson_ipi_resume, + .suspend = loongson_ipi_suspend, }; /* @@ -321,7 +321,7 @@ static struct syscore_ops loongson3_ipi_syscore_ops = { */ static int __init ipi_pm_init(void) { - register_syscore_ops(&loongson3_ipi_syscore_ops); + register_syscore_ops(&loongson_ipi_syscore_ops); return 0; } @@ -425,7 +425,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { init_new_context(current, &init_mm); current_thread_info()->cpu = 0; - loongson3_prepare_cpus(max_cpus); + loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); @@ -436,7 +436,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - loongson3_boot_secondary(cpu, tidle); + loongson_boot_secondary(cpu, tidle); /* Wait for CPU to start and be ready to sync counters */ if (!wait_for_completion_timeout(&cpu_starting, @@ -465,7 +465,7 @@ asmlinkage void start_secondary(void) cpu_probe(); constant_clockevent_init(); - loongson3_init_secondary(); + loongson_init_secondary(); set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); @@ -487,11 +487,11 @@ asmlinkage void start_secondary(void) complete(&cpu_running); /* - * irq will be enabled in loongson3_smp_finish(), enabling it too + * irq will be enabled in loongson_smp_finish(), enabling it too * early is dangerous. */ WARN_ON_ONCE(!irqs_disabled()); - loongson3_smp_finish(); + loongson_smp_finish(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index b206d9159205..4571c3c87cd4 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -43,7 +43,8 @@ static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; - unsigned long frame_size = 0, frame_ra = -1; + long frame_ra = -1; + unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; if (state->sp >= info->end || state->sp < info->begin) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 3f8a86c4336a..02e6be9c5b0d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -67,12 +67,12 @@ linux.bin.ub linux.bin.gz: linux.bin linux.bin: vmlinux linux.bin linux.bin.gz linux.bin.ub: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' PHONY += simpleImage.$(DTB) simpleImage.$(DTB): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(addprefix $(boot)/$@., ub unstrip strip) - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 8c3ad76602f3..29c11a06b750 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7786e3ac7611..8c3862b4c259 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS #endif .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) { - *(.data.rel.ro*) + *(.data.rel.ro .data.rel.ro.*) } .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 87be3e855bf7..c907f747d2a0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -199,7 +199,16 @@ unsigned long __get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -register unsigned long current_stack_pointer asm("r15"); +/* avoid using global register due to gcc bug in versions < 8.4 */ +#define current_stack_pointer (__current_stack_pointer()) + +static __always_inline unsigned long __current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("lgr %0,15" : "=d" (sp)); + return sp; +} static __always_inline unsigned short stap(void) { diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..e4ef67e4da0a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9860ca5979f8..9e38ffaadb5d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')' + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8b70237c33f7..d6f3703e4119 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -861,8 +861,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* stop everything (includes BRS) */ - amd_pmu_disable_all(); + amd_brs_disable_all(); /* Drain BRS is in use (could be inactive) */ if (cpuc->lbr_users) @@ -873,7 +872,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) cpuc->enabled = pmu_enabled; if (pmu_enabled) - amd_pmu_enable_all(0); + amd_brs_enable_all(); return amd_pmu_adjust_nmi_window(handled); } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index d568afc705d2..83f15fe411b3 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -553,6 +553,7 @@ static void uncore_clean_online(void) hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { hlist_del(&uncore->node); + kfree(uncore->events); kfree(uncore); } } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 82ef87e9a897..42a55794004a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1263,6 +1263,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) if (1 << order != nr_pages) goto out; + /* + * Some processors cannot always support single range for more than + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might + * also be affected, so for now rather than trying to keep track of + * which ones, just disable it for all. + */ + if (nr_pages > 1) + goto out; + buf->single = true; buf->nr_pages = nr_pages; ret = 0; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f49bc3ec76e6..a269049a43ce 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; int ret; ret = hv_common_cpu_init(cpu); @@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) { - if (hv_root_partition) { - /* - * For root partition we get the hypervisor provided VP assist - * page, instead of allocating a new page. - */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - *hvp = memremap(msr.pfn << - HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - } else { - /* - * The VP assist page is an "overlay" page (see Hyper-V TLFS's - * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed - * out to make sure we always write the EOI MSR in - * hv_apic_eoi_write() *after* the EOI optimization is disabled - * in hv_cpu_die(), otherwise a CPU may not be stopped in the - * case of CPU offlining and the VM will hang. - */ + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - if (*hvp) - msr.pfn = vmalloc_to_pfn(*hvp); - } - WARN_ON(!(*hvp)); - if (*hvp) { - msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - } + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..b2da7cb64b31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,6 +305,9 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 60ece592b220..dbb38a6b4dfb 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -37,7 +37,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .spinlock.text;" +asm (".pushsection .spinlock.text, \"ax\";" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index ebe79d60619f..da8b8ea6b063 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl, if (!length || !IS_ALIGNED(length, PAGE_SIZE)) return -EINVAL; + if (offset + length < offset) + return -EINVAL; + if (offset + length - PAGE_SIZE >= encl->size) return -EINVAL; diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec7bbac3a9f2..8009c8346d8f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ static void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); - } else if (tsx_ctrl_is_supported()) { + } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { rdmsrl(MSR_IA32_TSX_CTRL, msr); msr |= TSX_CTRL_CPUID_CLEAR; wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) u64 mcu_opt_ctrl; /* Check if RTM_ALLOW exists */ - if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + if (!boot_cpu_has_bug(X86_BUG_TAA) || + !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) return; @@ -191,7 +174,20 @@ void __init tsx_init(void) return; } - if (!tsx_ctrl_is_supported()) { + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + } else { tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; return; } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3b28c5b25e12..d00db56a8868 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -605,9 +605,9 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal) if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); save_fpregs_to_fpstate(dst_fpu); + fpregs_unlock(); if (!(clone_flags & CLONE_THREAD)) fpu_inherit_perms(dst_fpu); - fpregs_unlock(); /* * Children never inherit PASID state. diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1ccb769f62af..b6f96d47e596 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, { bool list_unstable, zapped_root = false; + lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); @@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - - if (is_tdp_mmu_fault) + if (is_tdp_mmu_fault) { r = kvm_tdp_mmu_map(vcpu, fault); - else + } else { + r = make_mmu_pages_available(vcpu); + if (r) + goto out_unlock; r = __direct_map(vcpu, fault); + } out_unlock: if (is_tdp_mmu_fault) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..995bc0f90759 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) + return; + + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); } @@ -1125,6 +1131,9 @@ void svm_free_nested(struct vcpu_svm *svm) if (!svm->nested.initialized) return; + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) + svm_switch_vmcb(svm, &svm->vmcb01); + svm_vcpu_free_msrpm(svm->nested.msrpm); svm->nested.msrpm = NULL; @@ -1143,9 +1152,6 @@ void svm_free_nested(struct vcpu_svm *svm) svm->nested.initialized = false; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void svm_leave_nested(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4b6d2b050e57..ce362e88a567 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1438,6 +1432,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); sev_free_vcpu(vcpu); @@ -3425,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..5b0d4859e4b7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) { + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); } @@ -6440,9 +6441,6 @@ out: return kvm_state.size; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 490ec23c8450..2835bd796639 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SMM) { if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { - kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); } @@ -9805,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) int kvm_check_nested_events(struct kvm_vcpu *vcpu) { - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { kvm_x86_ops.nested_ops->triple_fault(vcpu); return 1; } @@ -10560,15 +10566,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - if (is_guest_mode(vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (is_guest_mode(vcpu)) kvm_x86_ops.nested_ops->triple_fault(vcpu); - } else { + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; - goto out; } + goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ @@ -11997,8 +12004,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) WARN_ON_ONCE(!init_event && (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); + /* + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's + * possible to INIT the vCPU while L2 is active. Force the vCPU back + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER + * bits), i.e. virtualization is disabled. + */ + if (is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + kvm_lapic_reset(vcpu, init_event); + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2dae413bd62a..f3098c0e386a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } +static inline int max_evtchn_port(struct kvm *kvm) +{ + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) + return EVTCHN_2L_NR_CHANNELS; + else + return COMPAT_EVTCHN_2L_NR_CHANNELS; +} + static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { @@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, *r = -EFAULT; goto out; } + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { + *r = -EINVAL; + goto out; + } } if (sched_poll.nr_ports == 1) @@ -1215,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; + u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -1242,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif + cpl = static_call(kvm_x86_get_cpl)(vcpu); trace_kvm_xen_hypercall(input, params[0], params[1], params[2], params[3], params[4], params[5]); + /* + * Only allow hypercall acceleration for CPL0. The rare hypercalls that + * are permitted in guest userspace can be handled by the VMM. + */ + if (unlikely(cpl > 0)) + goto handle_in_userspace; + switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { @@ -1279,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) if (handled) return kvm_xen_hypercall_set_result(vcpu, r); +handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); + vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; @@ -1297,14 +1319,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) return 0; } -static inline int max_evtchn_port(struct kvm *kvm) -{ - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) - return EVTCHN_2L_NR_CHANNELS; - else - return COMPAT_EVTCHN_2L_NR_CHANNELS; -} - static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78c5bc654cff..6453fbaedb08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4cd39f304e20..93ae33248f42 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, - MSR_AMD64_DE_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6a5c849ee061..ed761c62ad0a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1213,7 +1213,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * parent so that offline always happens towards the root. */ if (parent) - blkcg_pin_online(css); + blkcg_pin_online(&parent->css); return 0; } diff --git a/block/blk-core.c b/block/blk-core.c index 17667159482e..5487912befe8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -425,7 +425,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) goto fail_stats; - blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); q->nr_requests = BLKDEV_DEFAULT_RQ; diff --git a/block/blk-mq.c b/block/blk-mq.c index 6a789cda68a5..228a6696d835 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4045,9 +4045,14 @@ EXPORT_SYMBOL(__blk_mq_alloc_disk); struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass) { + struct gendisk *disk; + if (!blk_get_queue(q)) return NULL; - return __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + if (!disk) + blk_put_queue(q); + return disk; } EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); diff --git a/block/blk-settings.c b/block/blk-settings.c index 8bb9eef5310e..8ac1038d0c79 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -57,8 +57,8 @@ void blk_set_default_limits(struct queue_limits *lim) lim->misaligned = 0; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; + lim->dma_alignment = 511; } -EXPORT_SYMBOL(blk_set_default_limits); /** * blk_set_stacking_limits - set default limits for stacking devices @@ -600,6 +600,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + t->dma_alignment = max(t->dma_alignment, b->dma_alignment); /* Set non-power-of-2 compatible chunk_sectors boundary */ if (b->chunk_sectors) @@ -773,7 +774,7 @@ EXPORT_SYMBOL(blk_queue_virt_boundary); **/ void blk_queue_dma_alignment(struct request_queue *q, int mask) { - q->dma_alignment = mask; + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_dma_alignment); @@ -795,8 +796,8 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) { BUG_ON(mask > PAGE_SIZE); - if (mask > q->dma_alignment) - q->dma_alignment = mask; + if (mask > q->limits.dma_alignment) + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_update_dma_alignment); diff --git a/block/blk.h b/block/blk.h index d6ea0d1a6db0..a186ea20f39d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -331,6 +331,7 @@ void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); +void blk_set_default_limits(struct queue_limits *lim); int blk_dev_init(void); /* diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index f52265293482..73db0cb44fc7 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -1778,7 +1778,7 @@ static void speakup_con_update(struct vc_data *vc) { unsigned long flags; - if (!speakup_console[vc->vc_num] || spk_parked) + if (!speakup_console[vc->vc_num] || spk_parked || !synth) return; if (!spin_trylock_irqsave(&speakup_info.spinlock, flags)) /* Speakup output, discard */ diff --git a/drivers/accessibility/speakup/utils.h b/drivers/accessibility/speakup/utils.h index 4bf2ee8ac246..4ce9a12f7664 100644 --- a/drivers/accessibility/speakup/utils.h +++ b/drivers/accessibility/speakup/utils.h @@ -54,7 +54,7 @@ static inline int oops(const char *msg, const char *info) static inline struct st_key *hash_name(char *name) { - u_char *pn = (u_char *)name; + unsigned char *pn = (unsigned char *)name; int hash = 0; while (*pn) { diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1c39cfce32fa..4ad42b0f75cd 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -739,6 +739,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, const char *failure_string; struct binder_buffer *buffer; + if (unlikely(vma->vm_mm != alloc->mm)) { + ret = -EINVAL; + failure_string = "invalid vma->vm_mm"; + goto err_invalid_mm; + } + mutex_lock(&binder_alloc_mmap_lock); if (alloc->buffer_size) { ret = -EBUSY; @@ -785,6 +791,7 @@ err_alloc_pages_failed: alloc->buffer_size = 0; err_already_mapped: mutex_unlock(&binder_alloc_mmap_lock); +err_invalid_mm: binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%s: %d %lx-%lx %s failed %d\n", __func__, alloc->pid, vma->vm_start, vma->vm_end, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f3e4db16fd07..8532b839a343 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2672,7 +2672,7 @@ static int init_submitter(struct drbd_device *device) enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) { struct drbd_resource *resource = adm_ctx->resource; - struct drbd_connection *connection; + struct drbd_connection *connection, *n; struct drbd_device *device; struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; @@ -2789,7 +2789,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig return NO_ERROR; out_idr_remove_from_resource: - for_each_connection(connection, resource) { + for_each_connection_safe(connection, n, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); if (peer_device) kref_put(&connection->kref, drbd_destroy_connection); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f96cb01e9604..e9de9d846b73 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,10 +57,8 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - union { - struct callback_head work; - struct llist_node node; - }; + struct llist_node node; + struct callback_head work; }; struct ublk_uring_cmd_pdu { @@ -766,15 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req) ubq_complete_io_cmd(io, UBLK_IO_RES_OK); } +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + io_cmds = llist_reverse_order(io_cmds); + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); +} + +static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); +} + static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - llist_for_each_entry(data, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + ublk_forward_io_cmds(ubq); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -782,14 +796,20 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data *data = container_of(work, struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); + struct ublk_queue *ubq = req->mq_hctx->driver_data; - __ublk_rq_task_work(req); + ublk_forward_io_cmds(ubq); } -static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + struct ublk_io *io; + if (!llist_add(&data->node, &ubq->io_cmds)) + return; + + io = &ubq->ios[rq->tag]; /* * If the check pass, we know that this is a re-issued request aborted * previously in monitor_work because the ubq_daemon(cmd's task) is @@ -803,11 +823,11 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) * guarantees that here is a re-issued request aborted previously. */ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - - llist_for_each_entry(data, io_cmds, node) - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + ublk_abort_io_cmds(ubq); + } else if (ublk_can_use_task_work(ubq)) { + if (task_work_add(ubq->ubq_daemon, &data->work, + TWA_SIGNAL_NO_IPI)) + ublk_abort_io_cmds(ubq); } else { struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); @@ -817,23 +837,6 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } -static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, - bool last) -{ - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; - - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - __ublk_abort_rq(ubq, rq); - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } -} - static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -865,19 +868,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - ublk_queue_cmd(ubq, rq, bd->last); + ublk_queue_cmd(ubq, rq); return BLK_STS_OK; } -static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - struct ublk_queue *ubq = hctx->driver_data; - - if (ublk_can_use_task_work(ubq)) - __set_notify_signal(ubq->ubq_daemon); -} - static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -899,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, - .commit_rqs = ublk_commit_rqs, .init_hctx = ublk_init_hctx, .init_request = ublk_init_rq, }; @@ -1197,7 +1191,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - ublk_queue_cmd(ubq, req, true); + ublk_queue_cmd(ubq, req); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c index a4388440aca7..91db001eb69a 100644 --- a/drivers/bus/intel-ixp4xx-eb.c +++ b/drivers/bus/intel-ixp4xx-eb.c @@ -49,7 +49,7 @@ #define IXP4XX_EXP_SIZE_SHIFT 10 #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */ #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */ -#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */ +#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */ #define IXP4XX_EXP_BYTE_RD16 BIT(6) #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */ #define IXP4XX_EXP_MUX_EN BIT(4) @@ -57,8 +57,6 @@ #define IXP4XX_EXP_WORD BIT(2) /* Always zero */ #define IXP4XX_EXP_WR_EN BIT(1) #define IXP4XX_EXP_BYTE_EN BIT(0) -#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD) -#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD) #define IXP4XX_EXP_CNFG0 0x20 #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31) @@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb, cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT; } - if (eb->is_42x) - cs_cfg &= ~IXP42X_RESERVED; if (eb->is_43x) { - cs_cfg &= ~IXP43X_RESERVED; + /* Should always be zero */ + cs_cfg &= ~IXP4XX_EXP_WORD; /* * This bit for Intel strata flash is currently unused, but let's * report it if we find one. diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4cd2e127946e..3aa91aed3bf7 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -267,6 +267,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); /* common code that starts a transfer */ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) { + u32 int_mask, status; + bool timeout; + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { dev_dbg(rsb->dev, "RSB transfer still in progress\n"); return -EBUSY; @@ -274,13 +277,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) reinit_completion(&rsb->complete); - writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, - rsb->regs + RSB_INTE); + int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; + writel(int_mask, rsb->regs + RSB_INTE); writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, rsb->regs + RSB_CTRL); - if (!wait_for_completion_io_timeout(&rsb->complete, - msecs_to_jiffies(100))) { + if (irqs_disabled()) { + timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, + status, (status & int_mask), + 10, 100000); + writel(status, rsb->regs + RSB_INTS); + } else { + timeout = !wait_for_completion_io_timeout(&rsb->complete, + msecs_to_jiffies(100)); + status = rsb->status; + } + + if (timeout) { dev_dbg(rsb->dev, "RSB timeout\n"); /* abort the transfer */ @@ -292,18 +305,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) return -ETIMEDOUT; } - if (rsb->status & RSB_INTS_LOAD_BSY) { + if (status & RSB_INTS_LOAD_BSY) { dev_dbg(rsb->dev, "RSB busy\n"); return -EBUSY; } - if (rsb->status & RSB_INTS_TRANS_ERR) { - if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { + if (status & RSB_INTS_TRANS_ERR) { + if (status & RSB_INTS_TRANS_ERR_ACK) { dev_dbg(rsb->dev, "RSB slave nack\n"); return -EINVAL; } - if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { + if (status & RSB_INTS_TRANS_ERR_DATA) { dev_dbg(rsb->dev, "RSB transfer data error\n"); return -EIO; } @@ -812,14 +825,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev) return 0; } -static void sunxi_rsb_shutdown(struct platform_device *pdev) -{ - struct sunxi_rsb *rsb = platform_get_drvdata(pdev); - - pm_runtime_disable(&pdev->dev); - sunxi_rsb_hw_exit(rsb); -} - static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = { SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend, sunxi_rsb_runtime_resume, NULL) @@ -835,7 +840,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table); static struct platform_driver sunxi_rsb_driver = { .probe = sunxi_rsb_probe, .remove = sunxi_rsb_remove, - .shutdown = sunxi_rsb_shutdown, .driver = { .name = RSB_CTRL_NAME, .of_match_table = sunxi_rsb_of_match_table, diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a7ff77550e17..933bb960490d 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -806,6 +806,9 @@ static u64 __arch_timer_check_delta(void) /* * XGene-1 implements CVAL in terms of TVAL, meaning * that the maximum timer range is 32bit. Shame on them. + * + * Note that TVAL is signed, thus has only 31 of its + * 32 bits to express magnitude. */ MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, APM_CPU_PART_POTENZA)), @@ -813,8 +816,8 @@ static u64 __arch_timer_check_delta(void) }; if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) { - pr_warn_once("Broken CNTx_CVAL_EL1, limiting width to 32bits"); - return CLOCKSOURCE_MASK(32); + pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n"); + return CLOCKSOURCE_MASK(31); } #endif return CLOCKSOURCE_MASK(arch_counter_get_width()); diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 310779b07daf..00476e94db90 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ If in doubt, say N. config X86_AMD_PSTATE - tristate "AMD Processor P-State driver" + bool "AMD Processor P-State driver" depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ace7d50cf2ac..204e39006dda 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,12 +59,8 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ -static bool shared_mem = false; -module_param(shared_mem, bool, 0444); -MODULE_PARM_DESC(shared_mem, - "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); - static struct cpufreq_driver amd_pstate_driver; +static int cppc_load __initdata; static inline int pstate_enable(bool enable) { @@ -424,12 +420,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) amd_pstate_driver.boost_enabled = true; } +static void amd_perf_ctl_reset(unsigned int cpu) +{ + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; struct device *dev; struct amd_cpudata *cpudata; + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV; @@ -616,6 +622,15 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* + * by default the pstate driver is disabled to load + * enable the amd_pstate passive mode driver explicitly + * with amd_pstate=passive in kernel command line + */ + if (!cppc_load) { + pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + return -ENODEV; + } if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); @@ -630,13 +645,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; - } else if (shared_mem) { + } else { + pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_init_perf, cppc_init_perf); static_call_update(amd_pstate_update_perf, cppc_update_perf); - } else { - pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); - return -ENODEV; } /* enable amd pstate feature */ @@ -653,16 +666,22 @@ static int __init amd_pstate_init(void) return ret; } +device_initcall(amd_pstate_init); -static void __exit amd_pstate_exit(void) +static int __init amd_pstate_param(char *str) { - cpufreq_unregister_driver(&amd_pstate_driver); + if (!str) + return -EINVAL; - amd_pstate_enable(false); -} + if (!strcmp(str, "disable")) { + cppc_load = 0; + pr_info("driver is explicitly disabled\n"); + } else if (!strcmp(str, "passive")) + cppc_load = 1; -module_init(amd_pstate_init); -module_exit(amd_pstate_exit); + return 0; +} +early_param("amd_pstate", amd_pstate_param); MODULE_AUTHOR("Huang Rui <[email protected]>"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c index 7503f6b18ac5..a2aba0b0d68a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -76,10 +76,6 @@ static int otx2_cpt_devlink_info_get(struct devlink *dl, struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; int err; - err = devlink_info_driver_name_put(req, "rvu_cptpf"); - if (err) - return err; - err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, "fw.ae", OTX2_CPT_AE_TYPES); if (err) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index dd0f83ee505b..e6f36c014c4c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/dma-fence.h> +#include <linux/dma-fence-unwrap.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> @@ -391,8 +392,10 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, const void __user *user_data) { struct dma_buf_import_sync_file arg; - struct dma_fence *fence; + struct dma_fence *fence, *f; enum dma_resv_usage usage; + struct dma_fence_unwrap iter; + unsigned int num_fences; int ret = 0; if (copy_from_user(&arg, user_data, sizeof(arg))) @@ -411,13 +414,21 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; - dma_resv_lock(dmabuf->resv, NULL); + num_fences = 0; + dma_fence_unwrap_for_each(f, &iter, fence) + ++num_fences; - ret = dma_resv_reserve_fences(dmabuf->resv, 1); - if (!ret) - dma_resv_add_fence(dmabuf->resv, fence, usage); + if (num_fences > 0) { + dma_resv_lock(dmabuf->resv, NULL); - dma_resv_unlock(dmabuf->resv); + ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); + if (!ret) { + dma_fence_unwrap_for_each(f, &iter, fence) + dma_resv_add_fence(dmabuf->resv, f, usage); + } + + dma_resv_unlock(dmabuf->resv); + } dma_fence_put(fence); diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 8f5848aa144f..59d158873f4c 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -233,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) return ERR_PTR(-EINVAL); } - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); @@ -283,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) err_ret = ERR_CAST(dev_ret); goto err2; } - /* Add heap to the list */ + mutex_lock(&heap_list_lock); + /* check the name is unique */ + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + err_ret = ERR_PTR(-EINVAL); + goto err3; + } + } + + /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; +err3: + device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 41041ff0fadb..2a120d8d3c27 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -327,7 +327,13 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) return IRQ_NONE; tusb320_extcon_irq_handler(priv, reg); - tusb320_typec_irq_handler(priv, reg); + + /* + * Type-C support is optional. Only call the Type-C handler if a + * port had been registered previously. + */ + if (priv->port) + tusb320_typec_irq_handler(priv, reg); regmap_write(priv->regmap, TUSB320_REG9, reg); diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index c52bcaa9def6..9ca21feb9d45 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -149,12 +149,8 @@ static int coreboot_table_probe(struct platform_device *pdev) if (!ptr) return -ENOMEM; - ret = bus_register(&coreboot_bus_type); - if (!ret) { - ret = coreboot_table_populate(dev, ptr); - if (ret) - bus_unregister(&coreboot_bus_type); - } + ret = coreboot_table_populate(dev, ptr); + memunmap(ptr); return ret; @@ -169,7 +165,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy) static int coreboot_table_remove(struct platform_device *pdev) { bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister); - bus_unregister(&coreboot_bus_type); return 0; } @@ -199,6 +194,32 @@ static struct platform_driver coreboot_table_driver = { .of_match_table = of_match_ptr(coreboot_of_match), }, }; -module_platform_driver(coreboot_table_driver); + +static int __init coreboot_table_driver_init(void) +{ + int ret; + + ret = bus_register(&coreboot_bus_type); + if (ret) + return ret; + + ret = platform_driver_register(&coreboot_table_driver); + if (ret) { + bus_unregister(&coreboot_bus_type); + return ret; + } + + return 0; +} + +static void __exit coreboot_table_driver_exit(void) +{ + platform_driver_unregister(&coreboot_table_driver); + bus_unregister(&coreboot_bus_type); +} + +module_init(coreboot_table_driver_init); +module_exit(coreboot_table_driver_exit); + MODULE_AUTHOR("Google, Inc."); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 6c416955da53..bbe0a7cabb75 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -246,7 +246,9 @@ config FPGA_MGR_VERSAL_FPGA config FPGA_M10_BMC_SEC_UPDATE tristate "Intel MAX10 BMC Secure Update driver" - depends on MFD_INTEL_M10_BMC && FW_UPLOAD + depends on MFD_INTEL_M10_BMC + select FW_LOADER + select FW_UPLOAD help Secure update support for the Intel MAX10 board management controller. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8639a4f9c6e8..2eca58220550 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1293,6 +1293,7 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index c8935d718207..4485bb29bec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 84f44f7e4111..1f76e27f1a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -171,9 +171,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt)) { + adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -988,6 +986,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; + struct hmm_range *range; int ret = 0; mutex_lock(&process_info->lock); @@ -1017,7 +1016,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; @@ -1035,7 +1034,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -2372,6 +2371,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, validate_list.head) { + struct hmm_range *range; + invalid = atomic_read(&mem->invalid); if (!invalid) /* BO hasn't been invalidated since the last @@ -2382,7 +2383,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2401,7 +2403,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * FIXME: Cannot ignore the return code, must hold * notifier_lock */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); } /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 2168163aad2d..252a876b0725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -209,6 +209,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&e->tv.head, &bucket[priority]); e->user_pages = NULL; + e->range = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 9caea1688fc3..e4d78491bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -26,6 +26,8 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/amdgpu_drm.h> +struct hmm_range; + struct amdgpu_device; struct amdgpu_bo; struct amdgpu_bo_va; @@ -36,6 +38,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; + struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 491d4846fc02..cfb262911bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d371000a5727..365e3fb6a9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, return r; ++(num_ibs[r]); + p->gang_leader_idx = r; return 0; } @@ -287,8 +288,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - if (!p->gang_size) - return -EINVAL; + if (!p->gang_size) { + ret = -EINVAL; + goto free_partial_kdata; + } for (i = 0; i < p->gang_size; ++i) { ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); @@ -300,7 +303,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (ret) goto free_all_kdata; } - p->gang_leader = p->jobs[p->gang_size - 1]; + p->gang_leader = p->jobs[p->gang_leader_idx]; if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { ret = -ECANCELED; @@ -910,7 +913,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); if (r) { kvfree(e->user_pages); e->user_pages = NULL; @@ -988,9 +991,10 @@ out_free_user_pages: if (!e->user_pages) continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); kvfree(e->user_pages); e->user_pages = NULL; + e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); return r; @@ -1195,16 +1199,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - for (i = 0; i < p->gang_size - 1; ++i) { + for (i = 0; i < p->gang_size; ++i) { + if (p->jobs[i] == leader) + continue; + r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); if (r) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); if (r && r != -ERESTARTSYS) DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; } @@ -1238,9 +1244,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { struct dma_fence *fence; + if (p->jobs[i] == leader) + continue; + fence = &p->jobs[i]->base.s_fence->scheduled; r = amdgpu_sync_fence(&leader->sync, fence); if (r) @@ -1265,7 +1274,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + e->range = NULL; } if (r) { r = -EAGAIN; @@ -1276,7 +1286,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, list_for_each_entry(e, &p->validated, tv.head) { /* Everybody except for the gang leader uses READ */ - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { + if (p->jobs[i] == leader) + continue; + dma_resv_add_fence(e->tv.bo->base.resv, &p->jobs[i]->base.s_fence->finished, DMA_RESV_USAGE_READ); @@ -1286,7 +1299,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, e->tv.num_shared = 0; } - seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], p->fence); amdgpu_cs_post_dependencies(p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index cbaa19b2b8a3..f80adf9069ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -54,6 +54,7 @@ struct amdgpu_cs_parser { /* scheduler job objects */ unsigned int gang_size; + unsigned int gang_leader_idx; struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *gang_leader; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 64510898eedd..f1e9663b4051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6044,3 +6044,44 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, dma_fence_put(old); return NULL; } + +bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#endif + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_CARRIZO: + case CHIP_STONEY: + /* chips with display hardware */ + return true; + default: + /* IP discovery */ + if (!adev->ip_versions[DCE_HWIP][0] || + (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) + return false; + return true; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ef31d687ef3..91571b1324f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -378,6 +378,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; + struct hmm_range *range; struct amdgpu_bo *bo; uint32_t handle; int r; @@ -413,14 +414,13 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { - r = amdgpu_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = amdgpu_mn_register(bo, args->addr); + if (r) + goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (r) goto release_object; @@ -443,7 +443,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, user_pages_done: if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); release_object: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 34233a74248c..28612e56d0d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned i; unsigned vmhub, inv_eng; + if (adev->enable_mes) { + /* reserve engine 5 for firmware */ + for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) + vm_inv_engs[vmhub] &= ~(1 << 5); + } + for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->funcs->vmhub; @@ -656,7 +662,7 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev) || - !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { + !amdgpu_device_has_display_hardware(adev)) { size = 0; } else { size = amdgpu_gmc_get_vbios_fb_size(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cd968e781077..adac650cf544 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -169,7 +169,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - dma_fence_put(&job->hw_fence); + /* only put the hw fence if has embedded fence */ + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -254,6 +258,9 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, DRM_ERROR("Error adding fence (%d)\n", r); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + while (fence == NULL && vm && !job->vmid) { r = amdgpu_vmid_grab(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -264,9 +271,6 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync); } - if (!fence && job->gang_submit) - fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index effa7df3ddbf..7978307e1d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -172,6 +172,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); + mem_ctx->shared_bo = NULL; } static void psp_free_shared_bufs(struct psp_context *psp) @@ -182,6 +183,7 @@ static void psp_free_shared_bufs(struct psp_context *psp) /* free TMR memory buffer */ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + psp->tmr_bo = NULL; /* free xgmi shared memory */ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); @@ -743,7 +745,7 @@ static int psp_load_toc(struct psp_context *psp, /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { - int ret; + int ret = 0; int tmr_size; void *tmr_buf; void **pptr; @@ -770,10 +772,12 @@ static int psp_tmr_init(struct psp_context *psp) } } - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + if (!psp->tmr_bo) { + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + } return ret; } @@ -2732,8 +2736,6 @@ static int psp_suspend(void *handle) } out: - psp_free_shared_bufs(psp); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 57277b1cf183..b64938ed8cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -643,9 +643,6 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *range; -#endif }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -658,7 +655,8 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -668,16 +666,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) bool readonly; int r = 0; + /* Make sure get_user_pages_done() can cleanup gracefully */ + *range = NULL; + mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); return -EFAULT; } - /* Another get_user_pages is running at the same time?? */ - if (WARN_ON(gtt->range)) - return -EFAULT; - if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; @@ -695,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, - ttm->num_pages, >t->range, readonly, + ttm->num_pages, range, readonly, true, NULL); out_unlock: mmap_read_unlock(mm); @@ -713,30 +710,24 @@ out_unlock: * * Returns: true if pages are still valid */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - bool r = false; - if (!gtt || !gtt->userptr) + if (!gtt || !gtt->userptr || !range) return false; DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, - "No user pages to check\n"); + WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - if (gtt->range) { - /* - * FIXME: Must always hold notifier_lock for this, and must - * not ignore the return code. - */ - r = amdgpu_hmm_range_get_pages_done(gtt->range); - gtt->range = NULL; - } - - return !r; + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + return !amdgpu_hmm_range_get_pages_done(range); } #endif @@ -813,20 +804,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, /* unmap the pages mapped to the device */ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); - -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range) { - unsigned long i; - - for (i = 0; i < ttm->num_pages; i++) { - if (ttm->pages[i] != - hmm_pfn_to_page(gtt->range->hmm_pfns[i])) - break; - } - - WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); - } -#endif } static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..a37207011a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,6 +39,8 @@ #define AMDGPU_POISON 0xd0bed0be +struct hmm_range; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; @@ -149,15 +151,19 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages) + struct page **pages, + struct hmm_range **range) { return -EPERM; } -static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index f772bb499f3e..0312c71c3af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -32,7 +32,6 @@ #define RB_ENABLED (1 << 0) #define RB4_ENABLED (1 << 1) -#define MMSCH_DOORBELL_OFFSET 0x8 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 21d822b1d589..88f9b327183a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -45,6 +45,7 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 897a5ce9c9da..dcc49b01bd59 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,7 +100,6 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -112,12 +111,6 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - if (amdgpu_sriov_vf(adev)) { - vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; - /* get DWORD offset */ - vcn_doorbell_index = vcn_doorbell_index << 1; - } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -135,7 +128,7 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 509739d83b5a..512c32327eb1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -147,6 +147,14 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); /* Number of bytes in PSP footer for firmware. */ #define PSP_FOOTER_BYTES 0x100 +/* + * DMUB Async to Sync Mechanism Status + */ +#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 +#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 +#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 +#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 + /** * DOC: overview * @@ -1364,7 +1372,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, {} + /* TODO: refactor this from a fixed table to a dynamic option */ }; static void retrieve_dmi_info(struct amdgpu_display_manager *dm) @@ -1637,12 +1682,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } } - if (amdgpu_dm_initialize_drm_device(adev)) { - DRM_ERROR( - "amdgpu: failed to initialize sw for display support.\n"); - goto error; - } - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. * It is expected that DMUB will resend any pending notifications at this point, for * example HPD from DPIA. @@ -1650,6 +1689,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (dc_is_dmub_outbox_supported(adev->dm.dc)) dc_enable_dmub_outbox(adev->dm.dc); + if (amdgpu_dm_initialize_drm_device(adev)) { + DRM_ERROR( + "amdgpu: failed to initialize sw for display support.\n"); + goto error; + } + /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); @@ -6467,7 +6512,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j; + int i, j, ret; int vcpi, pbn_div, pbn, slot_num = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6514,8 +6559,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, dm_conn_state->pbn = pbn; dm_conn_state->vcpi_slots = slot_num; - drm_dp_mst_atomic_enable_dsc(state, aconnector->port, dm_conn_state->pbn, - false); + ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, + dm_conn_state->pbn, false); + if (ret < 0) + return ret; + continue; } @@ -9529,10 +9577,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, #if defined(CONFIG_DRM_AMD_DC_DCN) if (dc_resource_is_dsc_encoding_supported(dc)) { - if (!pre_validate_dsc(state, &dm_state, vars)) { - ret = -EINVAL; + ret = pre_validate_dsc(state, &dm_state, vars); + if (ret != 0) goto fail; - } } #endif @@ -9627,9 +9674,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto fail; } @@ -10109,6 +10156,8 @@ static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, *operation_result = AUX_RET_ERROR_TIMEOUT; } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; + } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { + *operation_result = AUX_RET_ERROR_INVALID_REPLY; } else { *operation_result = AUX_RET_ERROR_UNKNOWN; } @@ -10156,6 +10205,16 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && adev->dm.dmub_notify->aux_reply.length && payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { + + if (payload->length != adev->dm.dmub_notify->aux_reply.length) { + DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", + payload->address, payload->length, + adev->dm.dmub_notify->aux_reply.length); + return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, + DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, + (uint32_t *)operation_result); + } + memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, adev->dm.dmub_notify->aux_reply.length); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b5ce15c43bcc..635c398fcefe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -51,12 +51,6 @@ #define AMDGPU_DMUB_NOTIFICATION_MAX 5 /* - * DMUB Async to Sync Mechanism Status - */ -#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 -#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 -#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 -/* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 594fe8a4d02b..64dd02970292 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -412,7 +412,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, { struct amdgpu_crtc *acrtc = NULL; struct drm_plane *cursor_plane; - + bool is_dcn; int res = -ENOMEM; cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); @@ -450,8 +450,14 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, acrtc->otg_inst = -1; dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + + /* Don't enable DRM CRTC degamma property for DCE since it doesn't + * support programmable degamma anywhere. + */ + is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; + drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6ff96b4bdda5..6483ba266893 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -703,13 +703,13 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) return dsc_config.bits_per_pixel; } -static bool increase_dsc_bpp(struct drm_atomic_state *state, - struct drm_dp_mst_topology_state *mst_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int increase_dsc_bpp(struct drm_atomic_state *state, + struct drm_dp_mst_topology_state *mst_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool bpp_increased[MAX_PIPES]; @@ -719,6 +719,7 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, int remaining_to_increase = 0; int link_timeslots_used; int fair_pbn_alloc; + int ret = 0; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { @@ -757,52 +758,60 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn); } else { vars[next_index].pbn -= fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } else { vars[next_index].pbn += initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16; } else { vars[next_index].pbn -= initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } bpp_increased[next_index] = true; remaining_to_increase--; } - return true; + return 0; } -static bool try_disable_dsc(struct drm_atomic_state *state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int try_disable_dsc(struct drm_atomic_state *state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool tried[MAX_PIPES]; @@ -810,6 +819,7 @@ static bool try_disable_dsc(struct drm_atomic_state *state, int max_kbps_increase; int next_index; int remaining_to_try = 0; + int ret; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -840,49 +850,52 @@ static bool try_disable_dsc(struct drm_atomic_state *state, break; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } tried[next_index] = true; remaining_to_try--; } - return true; + return 0; } -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_vars *vars, - struct drm_dp_mst_topology_mgr *mgr, - int *link_vars_start_index) +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars, + struct drm_dp_mst_topology_mgr *mgr, + int *link_vars_start_index) { struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr); int count = 0; - int i, k; + int i, k, ret; bool debugfs_overwrite = false; memset(params, 0, sizeof(params)); if (IS_ERR(mst_state)) - return false; + return PTR_ERR(mst_state); mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link); #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +946,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (count == 0) { ASSERT(0); - return true; + return 0; } /* k is start index of vars for current phy link used by mst hub */ @@ -947,13 +960,17 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, - vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, + vars[i + k].pbn); + if (ret < 0) + return ret; } - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0 && !debugfs_overwrite) { set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; + } else if (ret != -ENOSPC) { + return ret; } /* Try max compression */ @@ -962,31 +979,36 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } else { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } } - if (drm_dp_mst_atomic_check(state)) - return false; + ret = drm_dp_mst_atomic_check(state); + if (ret != 0) + return ret; /* Optimize degree of compression */ - if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k)) - return false; + ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; - if (!try_disable_dsc(state, dc_link, params, vars, count, k)) - return false; + ret = try_disable_dsc(state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; } static bool is_dsc_need_re_compute( @@ -1087,15 +1109,17 @@ static bool is_dsc_need_re_compute( return is_dsc_need_re_compute; } -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1108,7 +1132,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1118,19 +1142,16 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1143,22 +1164,23 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (stream->timing.flags.DSC == 1) if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; } - return true; + return ret; } -static bool - pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1171,7 +1193,7 @@ static bool aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1183,14 +1205,11 @@ static bool if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { - mutex_unlock(&aconnector->mst_mgr.lock); - return false; - } - mutex_unlock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, + &link_vars_start_index); + if (ret != 0) + return ret; for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1198,7 +1217,7 @@ static bool } } - return true; + return ret; } static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state, @@ -1253,9 +1272,9 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state) return ret; } -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars) +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars) { int i; struct dm_atomic_state *dm_state; @@ -1264,11 +1283,12 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (!is_dsc_precompute_needed(state)) { DRM_INFO_ONCE("DSC precompute is not needed.\n"); - return true; + return 0; } - if (dm_atomic_get_state(state, dm_state_ptr)) { + ret = dm_atomic_get_state(state, dm_state_ptr); + if (ret != 0) { DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); - return false; + return ret; } dm_state = *dm_state_ptr; @@ -1280,7 +1300,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state, local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL); if (!local_dc_state) - return false; + return -ENOMEM; for (i = 0; i < local_dc_state->stream_count; i++) { struct dc_stream_state *stream = dm_state->context->streams[i]; @@ -1316,9 +1336,9 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (ret != 0) goto clean_exit; - if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) { + ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); + if (ret != 0) { DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto clean_exit; } @@ -1349,7 +1369,7 @@ clean_exit: kfree(local_dc_state); - return (ret == 0); + return ret; } static unsigned int kbps_from_pbn(unsigned int pbn) @@ -1392,6 +1412,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; + struct drm_dp_mst_topology_mgr *mst_mgr; /* * check if the mode could be supported if DSC pass-through is supported @@ -1400,7 +1421,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( */ if (is_dsc_common_config_possible(stream, &bw_range) && aconnector->port->passthrough_aux) { - mutex_lock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); cur_link_settings = stream->link->verified_link_cap; @@ -1413,7 +1435,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); /* * use the maximum dsc compression bandwidth as the required diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b92a7c5671aa..97fd70df531b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars { struct amdgpu_dm_connector *aconnector; }; -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars); +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); bool needs_dsc_aux_workaround(struct dc_link *link); -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars); +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars); enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index ee0456b5e14e..e0c8d6f09bb4 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2393,6 +2393,26 @@ static enum bp_result get_vram_info_v25( return result; } +static enum bp_result get_vram_info_v30( + struct bios_parser *bp, + struct dc_vram_info *info) +{ + struct atom_vram_info_header_v3_0 *info_v30; + enum bp_result result = BP_RESULT_OK; + + info_v30 = GET_IMAGE(struct atom_vram_info_header_v3_0, + DATA_TABLES(vram_info)); + + if (info_v30 == NULL) + return BP_RESULT_BADBIOSTABLE; + + info->num_chans = info_v30->channel_num; + info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + + return result; +} + + /* * get_integrated_info_v11 * @@ -3060,6 +3080,16 @@ static enum bp_result bios_parser_get_vram_info( } break; + case 3: + switch (revision.minor) { + case 0: + result = get_vram_info_v30(bp, info); + break; + default: + break; + } + break; + default: return result; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index ef0795b14a1f..2db595672a46 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -123,9 +123,10 @@ static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t result; result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000); - ASSERT(result == VBIOSSMC_Result_OK); - smu_print("SMU response after wait: %d\n", result); + if (result != VBIOSSMC_Result_OK) + smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", + result); if (result == VBIOSSMC_Status_BUSY) return -1; @@ -216,6 +217,12 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request VBIOSSMC_MSG_SetHardMinDcfclkByFreq, khz_to_mhz_ceil(requested_dcfclk_khz)); +#ifdef DBG + smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", + actual_dcfclk_set_mhz, + actual_dcfclk_set_mhz * 1000); +#endif + return actual_dcfclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 1b70b78e2fa1..af631085e88c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index b9765b3899e1..ef52e6b6eccf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -436,34 +436,48 @@ void dpp1_set_cursor_position( uint32_t height) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; uint32_t cur_en = pos->enable ? 1 : 0; - // Cursor width/height and hotspots need to be rotated for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { - swap(width, height); + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } if (src_x_offset >= (int)param->viewport.width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width <= 0) + if (src_x_offset + cursor_width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset + (int)height <= 0) + if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 52e201e9b091..a142a00bc432 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 938dba5249d4..4566bc7abf17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -973,10 +973,12 @@ void hubp2_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -994,18 +996,26 @@ void hubp2_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); @@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position( hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; hubp->pos.position.bits.y_pos = pos->y; - hubp->pos.hot_spot.bits.x_hot = x_hotspot; - hubp->pos.hot_spot.bits.y_hot = y_hotspot; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; /* Cursor Rectangle Cache * Cursor bitmaps have different hotspot values diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 84e1486f3d51..39a57bcd7866 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -87,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_init = hubp3_init, .set_unbounded_requesting = hubp31_set_unbounded_requesting, .hubp_soft_reset = hubp31_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 1bd7e0f327d8..389a8938ee45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); + return; + } + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 588c1c71241f..a0741794db62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 47eb162f1a75..7dd36e402bac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -237,7 +237,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .setup_global_swap_lock = NULL, .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, + .configure_crc = optc1_configure_crc, .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4daed44ef5f..df4f25119142 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div( // Don't program 0xF into the register field. Not valid since // K1 / K2 field is only 1 / 2 bits wide - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); return; + } dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index cf5bd9713f54..d0b46a3e0155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -283,8 +283,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c using the max for calculation */ if (hubp->curs_attr.width > 0) { - // Round cursor width to next multiple of 64 - cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height; + cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height; switch (pipe->stream->cursor_attributes.color_format) { case CURSOR_MODE_MONO: @@ -309,9 +308,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c cursor_size > 16384) { /* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1) */ - cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp + - DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) * - DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2; + cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / + DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) / + dc->caps.cache_line_size + 2; } break; } @@ -727,10 +726,7 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { - //Round cursor width up to next multiple of 64 - int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64; - int cursor_height = hubp->curs_attr.height; - int cursor_size = cursor_width * cursor_height; + int cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height; switch (hubp->curs_attr.color_format) { case CURSOR_MODE_MONO: @@ -1175,10 +1171,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b03a7814e96d..fa3778849db1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -111,7 +111,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) / mblk_height * mblk_height + mblk_height; /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 659323ebd79d..2abe3967f7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) @@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; @@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; @@ -1803,6 +1803,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, */ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so + * prefetch is scheduled correctly to account for dummy pstate. + */ + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; @@ -1904,7 +1910,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == dm_dram_clock_change_unsupported) { - int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; @@ -1990,6 +1996,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); if (!pstate_en) @@ -1997,8 +2007,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; + } } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 244fd15d24b4..9afd9ba23fb2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -718,6 +718,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman do { MaxTotalRDBandwidth = 0; + DestinationLineTimesForPrefetchLessThan2 = false; + VRatioPrefetchMoreThanMax = false; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h index f82e14cd9d8a..c8b28c83ddf4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h @@ -46,6 +46,8 @@ // Prefetch schedule max vratio #define __DML_MAX_VRATIO_PRE__ 4.0 +#define __DML_VBA_MAX_DST_Y_PRE__ 63.75 + #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 635fc54338fa..debe46b24a3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3475,7 +3475,6 @@ bool dml32_CalculatePrefetchSchedule( double min_Lsw; double Tsw_est1 = 0; double Tsw_est3 = 0; - double TPreMargin = 0; if (v->GPUVMEnable == true && v->HostVMEnable == true) HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; @@ -3669,6 +3668,7 @@ bool dml32_CalculatePrefetchSchedule( dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); @@ -3701,8 +3701,6 @@ bool dml32_CalculatePrefetchSchedule( dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; Tpre_rounded = dst_y_prefetch_equ * LineTime; - - TPreMargin = Tpre_rounded - TPreReq; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); @@ -3730,7 +3728,8 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1 && TPreMargin > 0.0) { + if (dst_y_prefetch_equ > 1 && + (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 432b4ecd01a7..f4b176599be7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 9.35, - .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, - .urgent_latency_vm_data_only_us = 9.35, + .urgent_latency_pixel_data_only_us = 4, + .urgent_latency_pixel_mixed_with_vm_data_us = 4, + .urgent_latency_vm_data_only_us = 4, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, @@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 4fe75dd2b329..b880f4d7d67e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1156,22 +1156,21 @@ static int smu_smc_hw_setup(struct smu_context *smu) uint64_t features_supported; int ret = 0; - if (adev->in_suspend && smu_is_dpm_running(smu)) { - dev_info(adev->dev, "dpm has been enabled\n"); - /* this is needed specifically */ - switch (adev->ip_versions[MP1_HWIP][0]) { - case IP_VERSION(11, 0, 7): - case IP_VERSION(11, 0, 11): - case IP_VERSION(11, 5, 0): - case IP_VERSION(11, 0, 12): + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + case IP_VERSION(11, 0, 11): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 0, 12): + if (adev->in_suspend && smu_is_dpm_running(smu)) { + dev_info(adev->dev, "dpm has been enabled\n"); ret = smu_system_features_control(smu, true); if (ret) dev_err(adev->dev, "Failed system features control!\n"); - break; - default: - break; + return ret; } - return ret; + break; + default: + break; } ret = smu_init_display_count(smu, 0); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index e2fa3b066b96..f816b1dd110e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1388,6 +1388,14 @@ enum smu_cmn2asic_mapping_type { CMN2ASIC_MAPPING_WORKLOAD, }; +enum smu_baco_seq { + BACO_SEQ_BACO = 0, + BACO_SEQ_MSR, + BACO_SEQ_BAMACO, + BACO_SEQ_ULPS, + BACO_SEQ_COUNT, +}; + #define MSG_MAP(msg, index, valid_in_vf) \ [SMU_MSG_##msg] = {1, (index), (valid_in_vf)} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 25c08f963f49..d6b13933a98f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2C +#define SMU13_DRIVER_IF_VERSION 0x35 //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x20 +#define PPTABLE_VERSION 0x27 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -96,7 +96,7 @@ #define FEATURE_MEM_TEMP_READ_BIT 47 #define FEATURE_ATHUB_MMHUB_PG_BIT 48 #define FEATURE_SOC_PCC_BIT 49 -#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_EDC_PWRBRK_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 #define FEATURE_SPARE_53_BIT 53 @@ -282,15 +282,15 @@ typedef enum { } I2cControllerPort_e; typedef enum { - I2C_CONTROLLER_NAME_VR_GFX = 0, - I2C_CONTROLLER_NAME_VR_SOC, - I2C_CONTROLLER_NAME_VR_VMEMP, - I2C_CONTROLLER_NAME_VR_VDDIO, - I2C_CONTROLLER_NAME_LIQUID0, - I2C_CONTROLLER_NAME_LIQUID1, - I2C_CONTROLLER_NAME_PLX, - I2C_CONTROLLER_NAME_OTHER, - I2C_CONTROLLER_NAME_COUNT, + I2C_CONTROLLER_NAME_VR_GFX = 0, + I2C_CONTROLLER_NAME_VR_SOC, + I2C_CONTROLLER_NAME_VR_VMEMP, + I2C_CONTROLLER_NAME_VR_VDDIO, + I2C_CONTROLLER_NAME_LIQUID0, + I2C_CONTROLLER_NAME_LIQUID1, + I2C_CONTROLLER_NAME_PLX, + I2C_CONTROLLER_NAME_FAN_INTAKE, + I2C_CONTROLLER_NAME_COUNT, } I2cControllerName_e; typedef enum { @@ -302,6 +302,7 @@ typedef enum { I2C_CONTROLLER_THROTTLER_LIQUID0, I2C_CONTROLLER_THROTTLER_LIQUID1, I2C_CONTROLLER_THROTTLER_PLX, + I2C_CONTROLLER_THROTTLER_FAN_INTAKE, I2C_CONTROLLER_THROTTLER_INA3221, I2C_CONTROLLER_THROTTLER_COUNT, } I2cControllerThrottler_e; @@ -309,8 +310,9 @@ typedef enum { typedef enum { I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5, I2C_CONTROLLER_PROTOCOL_VR_IR35217, - I2C_CONTROLLER_PROTOCOL_TMP_TMP102A, + I2C_CONTROLLER_PROTOCOL_TMP_MAX31875, I2C_CONTROLLER_PROTOCOL_INA3221, + I2C_CONTROLLER_PROTOCOL_TMP_MAX6604, I2C_CONTROLLER_PROTOCOL_COUNT, } I2cControllerProtocol_e; @@ -690,6 +692,9 @@ typedef struct { #define PP_OD_FEATURE_UCLK_BIT 8 #define PP_OD_FEATURE_ZERO_FAN_BIT 9 #define PP_OD_FEATURE_TEMPERATURE_BIT 10 +#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11 +#define PP_OD_FEATURE_ASIC_TDC_BIT 12 +#define PP_OD_FEATURE_COUNT 13 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -697,6 +702,11 @@ typedef enum { PP_OD_POWER_FEATURE_ALWAYS_DISABLED, } PP_OD_POWER_FEATURE_e; +typedef enum { + FAN_MODE_AUTO = 0, + FAN_MODE_MANUAL_LINEAR, +} FanMode_e; + typedef struct { uint32_t FeatureCtrlMask; @@ -708,8 +718,8 @@ typedef struct { uint8_t RuntimePwrSavingFeaturesCtrl; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -730,7 +740,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -748,8 +763,8 @@ typedef struct { uint8_t IdlePwrSavingFeaturesCtrl; uint8_t RuntimePwrSavingFeaturesCtrl; - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -769,7 +784,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; } OverDriveLimits_t; @@ -903,7 +923,8 @@ typedef struct { uint16_t FanStartTempMin; uint16_t FanStartTempMax; - uint32_t Spare[12]; + uint16_t PowerMinPpt0[POWER_SOURCE_COUNT]; + uint32_t Spare[11]; } MsgLimits_t; @@ -1086,11 +1107,13 @@ typedef struct { uint32_t GfxoffSpare[15]; // GFX GPO - float DfllBtcMasterScalerM; + uint32_t DfllBtcMasterScalerM; int32_t DfllBtcMasterScalerB; - float DfllBtcSlaveScalerM; + uint32_t DfllBtcSlaveScalerM; int32_t DfllBtcSlaveScalerB; - uint32_t GfxGpoSpare[12]; + uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg + uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg + uint32_t GfxGpoSpare[10]; // GFX DCS @@ -1106,7 +1129,10 @@ typedef struct { uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin. - uint32_t DcsSpare[16]; + uint32_t DcsSpare[14]; + + // UCLK section + uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz // UCLK section uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations @@ -1163,13 +1189,14 @@ typedef struct { uint16_t IntakeTempHighIntakeAcousticLimit; uint16_t IntakeTempAcouticLimitReleaseRate; - uint16_t FanStalledTempLimitOffset; + int16_t FanAbnormalTempLimitOffset; uint16_t FanStalledTriggerRpm; - uint16_t FanAbnormalTriggerRpm; - uint16_t FanPadding; - - uint32_t FanSpare[14]; + uint16_t FanAbnormalTriggerRpmCoeff; + uint16_t FanAbnormalDetectionEnable; + uint8_t FanIntakeSensorSupport; + uint8_t FanIntakePadding[3]; + uint32_t FanSpare[13]; // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; @@ -1193,7 +1220,6 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 uint32_t V2F_vmin_range_low; uint32_t V2F_vmin_range_high; uint32_t V2F_vmax_range_low; @@ -1238,8 +1264,21 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + + QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT]; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[43]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1304,7 +1343,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; // Q4.4 + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1317,11 +1357,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1423,8 +1459,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t AvgTemperatureFanIntake; uint8_t PcieRate ; uint8_t PcieWidth ; @@ -1592,5 +1631,7 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 +#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 +#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h index a9215494dcdd..d466db6f0ad4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h @@ -147,14 +147,6 @@ struct smu_11_5_power_context { uint32_t max_fast_ppt_limit; }; -enum smu_v11_0_baco_seq { - BACO_SEQ_BACO = 0, - BACO_SEQ_MSR, - BACO_SEQ_BAMACO, - BACO_SEQ_ULPS, - BACO_SEQ_COUNT, -}; - #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) int smu_v11_0_init_microcode(struct smu_context *smu); @@ -257,7 +249,7 @@ int smu_v11_0_baco_enter(struct smu_context *smu); int smu_v11_0_baco_exit(struct smu_context *smu); int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_v11_0_baco_seq baco_seq); + enum smu_baco_seq baco_seq); int smu_v11_0_mode1_reset(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 80fb583b18d9..865d6358918d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms @@ -124,14 +124,6 @@ struct smu_13_0_power_context { enum smu_13_0_power_state power_state; }; -enum smu_v13_0_baco_seq { - BACO_SEQ_BACO = 0, - BACO_SEQ_MSR, - BACO_SEQ_BAMACO, - BACO_SEQ_ULPS, - BACO_SEQ_COUNT, -}; - #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) int smu_v13_0_init_microcode(struct smu_context *smu); @@ -218,6 +210,9 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); +int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, + enum smu_baco_seq baco_seq); + bool smu_v13_0_baco_is_support(struct smu_context *smu); enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 6212fd270857..697e98a0a20a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -379,6 +379,10 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)) || ((adev->pdev->device == 0x7422) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73A3) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73E3) && (adev->pdev->revision == 0x00))) smu_baco->platform_support = false; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index dccbd9f70723..70b560737687 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1576,7 +1576,7 @@ int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu) } int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_v11_0_baco_seq baco_seq) + enum smu_baco_seq baco_seq) { return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq, NULL); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 43fb102a65f5..89f0f6eb19f3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2230,6 +2230,15 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu, return ret; } +int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, + enum smu_baco_seq baco_seq) +{ + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_ArmD3, + baco_seq, + NULL); +} + bool smu_v13_0_baco_is_support(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 29529328152d..f0121d171630 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -120,6 +120,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1566,6 +1567,31 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, NULL); } +static int smu_v13_0_0_baco_enter(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) + return smu_v13_0_baco_set_armd3_sequence(smu, + smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO); + else + return smu_v13_0_baco_enter(smu); +} + +static int smu_v13_0_0_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + return smu_v13_0_baco_exit(smu); + } +} + static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1827,8 +1853,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_baco_enter, - .baco_exit = smu_v13_0_baco_exit, + .baco_enter = smu_v13_0_0_baco_enter, + .baco_exit = smu_v13_0_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c4102cfb734c..d74debc584f8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -122,6 +122,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -1578,6 +1579,31 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, return ret; } +static int smu_v13_0_7_baco_enter(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) + return smu_v13_0_baco_set_armd3_sequence(smu, + smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO); + else + return smu_v13_0_baco_enter(smu); +} + +static int smu_v13_0_7_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + return smu_v13_0_baco_exit(smu); + } +} + static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1655,8 +1681,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_baco_enter, - .baco_exit = smu_v13_0_baco_exit, + .baco_enter = smu_v13_0_7_baco_enter, + .baco_exit = smu_v13_0_7_baco_exit, .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c index 3ea53bb67d3b..bd61e20770a5 100644 --- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c @@ -63,23 +63,45 @@ ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, u8 offset, void *buffer, size_t size) { + u8 zero = 0; + char *tmpbuf = NULL; + /* + * As sub-addressing is not supported by all adaptors, + * always explicitly read from the start and discard + * any bytes that come before the requested offset. + * This way, no matter whether the adaptor supports it + * or not, we'll end up reading the proper data. + */ struct i2c_msg msgs[] = { { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = 0, .len = 1, - .buf = &offset, + .buf = &zero, }, { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = I2C_M_RD, - .len = size, + .len = size + offset, .buf = buffer, }, }; int ret; + if (offset) { + tmpbuf = kmalloc(size + offset, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + msgs[1].buf = tmpbuf; + } + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (tmpbuf) + memcpy(buffer, tmpbuf + offset, size); + + kfree(tmpbuf); + if (ret < 0) return ret; if (ret != ARRAY_SIZE(msgs)) @@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, if (ret) return DRM_DP_DUAL_MODE_UNKNOWN; - /* - * Sigh. Some (maybe all?) type 1 adaptors are broken and ack - * the offset but ignore it, and instead they just always return - * data from the start of the HDMI ID buffer. So for a broken - * type 1 HDMI adaptor a single byte read will always give us - * 0x44, and for a type 1 DVI adaptor it should give 0x00 - * (assuming it implements any registers). Fortunately neither - * of those values will match the type 2 signature of the - * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with - * the type 2 adaptor detection safely even in the presence - * of broken type 1 adaptors. - */ ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, &adaptor_id, sizeof(adaptor_id)); drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret); @@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, return DRM_DP_DUAL_MODE_TYPE2_DVI; } /* - * If neither a proper type 1 ID nor a broken type 1 adaptor - * as described above, assume type 1, but let the user know - * that we may have misdetected the type. + * If not a proper type 1 ID, still assume type 1, but let + * the user know that we may have misdetected the type. */ - if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0]) + if (!is_type1_adaptor(adaptor_id)) drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id); } @@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output); * @enable: enable (as opposed to disable) the TMDS output buffers * * Set the state of the TMDS output buffers in the adaptor. For - * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As - * some type 1 adaptors have problems with registers (see comments - * in drm_dp_dual_mode_detect()) we avoid touching the register, - * making this function a no-op on type 1 adaptors. + * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. + * Type1 adaptors do not support any register writes. * * Returns: * 0 on success, negative error code on failure diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ecd22c038c8c..51a46689cda7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5186,7 +5186,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm mst_state = drm_atomic_get_mst_topology_state(state, mgr); if (IS_ERR(mst_state)) - return -EINVAL; + return PTR_ERR(mst_state); list_for_each_entry(pos, &mst_state->payloads, next) { diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8214a0b1ab7f..203bf8d6c34c 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -615,7 +615,7 @@ static int drm_dev_init(struct drm_device *dev, mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); - ret = drmm_add_action(dev, drm_dev_init_release, NULL); + ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 7bb98e6a446d..5ea5e260118c 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank) static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank) { - kthread_destroy_worker(vblank->worker); + if (vblank->worker) + kthread_destroy_worker(vblank->worker); } int drm_vblank_worker_init(struct drm_vblank_crtc *vblank); diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 939d621c9ad4..688c8afe0bf1 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -151,9 +151,6 @@ int drm_mode_getresources(struct drm_device *dev, void *data, count = 0; connector_id = u64_to_user_ptr(card_res->connector_id_ptr); drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->registration_state != DRM_CONNECTOR_REGISTERED) - continue; - /* only expose writeback connectors if userspace understands them */ if (!file_priv->writeback_connectors && (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 1e608b9e5055..1a63da28f330 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2434,7 +2434,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2445,7 +2445,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); @@ -2471,7 +2471,7 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2482,7 +2482,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index a4aa9500fa17..0d6d640225fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -612,6 +612,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; @@ -1013,9 +1017,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - if (i915_ttm_cpu_maps_iomem(bo->resource)) - wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); - if (!i915_ttm_resource_mappable(bo->resource)) { int err = -ENODEV; int i; @@ -1042,6 +1043,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } } + if (i915_ttm_cpu_maps_iomem(bo->resource)) + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (drm_dev_enter(dev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, TTM_BO_VM_NUM_PREFAULT); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..714221f9a131 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,8 +664,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; @@ -676,6 +674,7 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); @@ -715,15 +714,14 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c index 011be7ff51e1..bc8fb4e38d0a 100644 --- a/drivers/gpu/drm/lima/lima_devfreq.c +++ b/drivers/gpu/drm/lima/lima_devfreq.c @@ -112,11 +112,6 @@ int lima_devfreq_init(struct lima_device *ldev) unsigned long cur_freq; int ret; const char *regulator_names[] = { "mali", NULL }; - const char *clk_names[] = { "core", NULL }; - struct dev_pm_opp_config config = { - .regulator_names = regulator_names, - .clk_names = clk_names, - }; if (!device_property_present(dev, "operating-points-v2")) /* Optional, continue without devfreq */ @@ -124,7 +119,15 @@ int lima_devfreq_init(struct lima_device *ldev) spin_lock_init(&ldevfreq->lock); - ret = devm_pm_opp_set_config(dev, &config); + /* + * clkname is set separately so it is not affected by the optional + * regulator setting which may return error. + */ + ret = devm_pm_opp_set_clkname(dev, "core"); + if (ret) + return ret; + + ret = devm_pm_opp_set_regulators(dev, regulator_names); if (ret) { /* Continue if the optional regulator is missing */ if (ret != -ENODEV) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 2944228a8e2c..8a3b685c2fcc 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2500,6 +2500,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = { static const struct panel_desc logictechno_lt161010_2nh = { .timings = &logictechno_lt161010_2nh_timing, .num_timings = 1, + .bpc = 6, .size = { .width = 154, .height = 86, @@ -2529,6 +2530,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = { static const struct panel_desc logictechno_lt170410_2whc = { .timings = &logictechno_lt170410_2whc_timing, .num_timings = 1, + .bpc = 8, .size = { .width = 217, .height = 136, diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 6748ec1e0005..a1f909dac89a 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1093,6 +1093,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) struct host1x *host1x = dev_get_drvdata(dev->dev.parent); struct iommu_domain *domain; + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are * likely to be allocated beyond the 32-bit boundary if sufficient diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 4419e810103d..0a6347c05df4 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -197,8 +197,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } @@ -210,8 +210,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 0cd3f97e7e49..f60ea24db0ec 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -292,6 +292,10 @@ static void host1x_setup_virtualization_tables(struct host1x *host) static bool host1x_wants_iommu(struct host1x *host1x) { + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If we support addressing a maximum of 32 bits of physical memory * and if the host1x firewall is enabled, there's no need to enable diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b120402d405..cc23b90cae02 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..e592c481f7ae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index b4edf10e8fd0..9aa7b9d9a485 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2236,6 +2236,20 @@ int i2c_get_device_id(const struct i2c_client *client, } EXPORT_SYMBOL_GPL(i2c_get_device_id); +/** + * i2c_client_get_device_id - get the driver match table entry of a device + * @client: the device to query. The device must be bound to a driver + * + * Returns a pointer to the matching entry if found, NULL otherwise. + */ +const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client) +{ + const struct i2c_driver *drv = to_i2c_driver(client->dev.driver); + + return i2c_match_id(drv->id_table, client); +} +EXPORT_SYMBOL_GPL(i2c_client_get_device_id); + /* ---------------------------------------------------- * the i2c address scanning function * Will not work for 10-bit addresses! diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index ad8fce3e08cd..6e4d10a7cd32 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -805,8 +805,10 @@ static int bma400_get_steps_reg(struct bma400_data *data, int *val) ret = regmap_bulk_read(data->regmap, BMA400_STEP_CNT0_REG, steps_raw, BMA400_STEP_RAW_LEN); - if (ret) + if (ret) { + kfree(steps_raw); return ret; + } *val = get_unaligned_le24(steps_raw); kfree(steps_raw); return IIO_VAL_INT; @@ -869,18 +871,6 @@ static int bma400_init(struct bma400_data *data) unsigned int val; int ret; - /* Try to read chip_id register. It must return 0x90. */ - ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val); - if (ret) { - dev_err(data->dev, "Failed to read chip id register\n"); - return ret; - } - - if (val != BMA400_ID_REG_VAL) { - dev_err(data->dev, "Chip ID mismatch\n"); - return -ENODEV; - } - data->regulators[BMA400_VDD_REGULATOR].supply = "vdd"; data->regulators[BMA400_VDDIO_REGULATOR].supply = "vddio"; ret = devm_regulator_bulk_get(data->dev, @@ -906,6 +896,18 @@ static int bma400_init(struct bma400_data *data) if (ret) return ret; + /* Try to read chip_id register. It must return 0x90. */ + ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val); + if (ret) { + dev_err(data->dev, "Failed to read chip id register\n"); + return ret; + } + + if (val != BMA400_ID_REG_VAL) { + dev_err(data->dev, "Chip ID mismatch\n"); + return -ENODEV; + } + ret = bma400_get_power_mode(data); if (ret) { dev_err(data->dev, "Failed to get the initial power-mode\n"); diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 9341e0e0eb55..998e8bcc06e1 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -202,6 +202,8 @@ static int aspeed_adc_set_trim_data(struct iio_dev *indio_dev) ((scu_otp) & (data->model_data->trim_locate->field)) >> __ffs(data->model_data->trim_locate->field); + if (!trimming_val) + trimming_val = 0x8; } dev_dbg(data->dev, "trimming val = %d, offset = %08x, fields = %08x\n", @@ -563,12 +565,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) if (ret) return ret; - if (of_find_property(data->dev->of_node, "aspeed,trim-data-valid", - NULL)) { - ret = aspeed_adc_set_trim_data(indio_dev); - if (ret) - return ret; - } + ret = aspeed_adc_set_trim_data(indio_dev); + if (ret) + return ret; if (of_find_property(data->dev->of_node, "aspeed,battery-sensing", NULL)) { diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 33e251552214..870f4cb60923 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -2307,11 +2307,9 @@ static int at91_adc_temp_sensor_init(struct at91_adc_state *st, clb->p6 = buf[AT91_ADC_TS_CLB_IDX_P6]; /* - * We prepare here the conversion to milli and also add constant - * factor (5 degrees Celsius) to p1 here to avoid doing it on - * hotpath. + * We prepare here the conversion to milli to avoid doing it on hotpath. */ - clb->p1 = clb->p1 * 1000 + 5000; + clb->p1 = clb->p1 * 1000; free_buf: kfree(buf); diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 532daaa6f943..366e252ebeb0 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -634,8 +634,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev, trig->ops = &at91_adc_trigger_ops; ret = iio_trigger_register(trig); - if (ret) + if (ret) { + iio_trigger_free(trig); return NULL; + } return trig; } diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index 30a31f185d08..88e947f300cf 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -57,7 +57,8 @@ static struct iio_map mp2629_adc_maps[] = { MP2629_MAP(SYSTEM_VOLT, "system-volt"), MP2629_MAP(INPUT_VOLT, "input-volt"), MP2629_MAP(BATT_CURRENT, "batt-current"), - MP2629_MAP(INPUT_CURRENT, "input-current") + MP2629_MAP(INPUT_CURRENT, "input-current"), + { } }; static int mp2629_read_raw(struct iio_dev *indio_dev, @@ -74,7 +75,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - if (chan->address == MP2629_INPUT_VOLT) + if (chan->channel == MP2629_INPUT_VOLT) rval &= GENMASK(6, 0); *val = rval; return IIO_VAL_INT; diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 3bb4028c5d74..df3bc5c3d378 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4403_data *afe = iio_priv(indio_dev); - unsigned int reg = afe4403_channel_values[chan->address]; - unsigned int field = afe4403_channel_leds[chan->address]; + unsigned int reg, field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + reg = afe4403_channel_values[chan->address]; ret = afe4403_read(afe, reg, val); if (ret) return ret; @@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + field = afe4403_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[field], val); if (ret) return ret; diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 8fca787b2524..836da31b7e30 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int value_reg = afe4404_channel_values[chan->address]; - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int value_reg, led_field, offdac_field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + value_reg = afe4404_channel_values[chan->address]; ret = regmap_read(afe->regmap, value_reg, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; ret = regmap_field_read(afe->fields[offdac_field], val); if (ret) return ret; @@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[led_field], val); if (ret) return ret; @@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int led_field, offdac_field; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; return regmap_field_write(afe->fields[offdac_field], val); } break; case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; return regmap_field_write(afe->fields[led_field], val); } break; diff --git a/drivers/iio/imu/bno055/bno055.c b/drivers/iio/imu/bno055/bno055.c index 307557a609e3..52744dd98e65 100644 --- a/drivers/iio/imu/bno055/bno055.c +++ b/drivers/iio/imu/bno055/bno055.c @@ -632,7 +632,7 @@ static int bno055_set_regmask(struct bno055_priv *priv, int val, int val2, return -EINVAL; } delta = abs(tbl_val - req_val); - if (delta < best_delta || first) { + if (first || delta < best_delta) { best_delta = delta; hwval = i; first = false; diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c index 994f03a71520..d86a3305d9e8 100644 --- a/drivers/iio/industrialio-sw-trigger.c +++ b/drivers/iio/industrialio-sw-trigger.c @@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) t->group = configfs_register_default_group(iio_triggers_group, t->name, &iio_trigger_type_group_type); - if (IS_ERR(t->group)) + if (IS_ERR(t->group)) { + mutex_lock(&iio_trigger_types_lock); + list_del(&t->list); + mutex_unlock(&iio_trigger_types_lock); ret = PTR_ERR(t->group); + } return ret; } diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7cf6e8490123..0d4447df7200 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -293,6 +293,8 @@ config RPR0521 tristate "ROHM RPR0521 ALS and proximity sensor driver" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for ROHM's RPR0521 ambient light and proximity sensor device. diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b62c139baf41..38d4c7644bef 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -54,9 +54,6 @@ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 #define APDS9960_REG_CONFIG_2 0x90 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 - #define APDS9960_REG_ID 0x92 #define APDS9960_REG_STATUS 0x93 @@ -77,6 +74,9 @@ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 #define APDS9960_REG_GCONF_2 0xa3 +#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 +#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 + #define APDS9960_REG_GOFFSET_U 0xa4 #define APDS9960_REG_GOFFSET_D 0xa5 #define APDS9960_REG_GPULSE 0xa6 @@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) } ret = regmap_update_bits(data->regmap, - APDS9960_REG_CONFIG_2, - APDS9960_REG_CONFIG_2_GGAIN_MASK, - idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); + APDS9960_REG_GCONF_2, + APDS9960_REG_GCONF_2_GGAIN_MASK, + idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); if (!ret) data->pxs_gain = idx; mutex_unlock(&data->lock); diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h index cbc9349c342a..550b75b7186f 100644 --- a/drivers/iio/pressure/ms5611.h +++ b/drivers/iio/pressure/ms5611.h @@ -25,13 +25,6 @@ enum { MS5607, }; -struct ms5611_chip_info { - u16 prom[MS5611_PROM_WORDS_NB]; - - int (*temp_and_pressure_compensate)(struct ms5611_chip_info *chip_info, - s32 *temp, s32 *pressure); -}; - /* * OverSampling Rate descriptor. * Warning: cmd MUST be kept aligned on a word boundary (see @@ -50,12 +43,15 @@ struct ms5611_state { const struct ms5611_osr *pressure_osr; const struct ms5611_osr *temp_osr; + u16 prom[MS5611_PROM_WORDS_NB]; + int (*reset)(struct ms5611_state *st); int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word); int (*read_adc_temp_and_pressure)(struct ms5611_state *st, s32 *temp, s32 *pressure); - struct ms5611_chip_info *chip_info; + int (*compensate_temp_and_pressure)(struct ms5611_state *st, s32 *temp, + s32 *pressure); struct regulator *vdd; }; diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 717521de66c4..c564a1d6cafe 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -85,7 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) struct ms5611_state *st = iio_priv(indio_dev); for (i = 0; i < MS5611_PROM_WORDS_NB; i++) { - ret = st->read_prom_word(st, i, &st->chip_info->prom[i]); + ret = st->read_prom_word(st, i, &st->prom[i]); if (ret < 0) { dev_err(&indio_dev->dev, "failed to read prom at %d\n", i); @@ -93,7 +93,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) } } - if (!ms5611_prom_is_valid(st->chip_info->prom, MS5611_PROM_WORDS_NB)) { + if (!ms5611_prom_is_valid(st->prom, MS5611_PROM_WORDS_NB)) { dev_err(&indio_dev->dev, "PROM integrity check failed\n"); return -ENODEV; } @@ -114,21 +114,20 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev, return ret; } - return st->chip_info->temp_and_pressure_compensate(st->chip_info, - temp, pressure); + return st->compensate_temp_and_pressure(st, temp, pressure); } -static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5611_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 16) + ((chip_info->prom[4] * dt) >> 7); - sens = ((s64)chip_info->prom[1] << 15) + ((chip_info->prom[3] * dt) >> 8); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 16) + ((st->prom[4] * dt) >> 7); + sens = ((s64)st->prom[1] << 15) + ((st->prom[3] * dt) >> 8); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2; @@ -154,17 +153,17 @@ static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_inf return 0; } -static int ms5607_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5607_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 17) + ((chip_info->prom[4] * dt) >> 6); - sens = ((s64)chip_info->prom[1] << 16) + ((chip_info->prom[3] * dt) >> 7); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 17) + ((st->prom[4] * dt) >> 6); + sens = ((s64)st->prom[1] << 16) + ((st->prom[3] * dt) >> 7); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2, tmp; @@ -342,15 +341,6 @@ static int ms5611_write_raw(struct iio_dev *indio_dev, static const unsigned long ms5611_scan_masks[] = {0x3, 0}; -static struct ms5611_chip_info chip_info_tbl[] = { - [MS5611] = { - .temp_and_pressure_compensate = ms5611_temp_and_pressure_compensate, - }, - [MS5607] = { - .temp_and_pressure_compensate = ms5607_temp_and_pressure_compensate, - } -}; - static const struct iio_chan_spec ms5611_channels[] = { { .type = IIO_PRESSURE, @@ -433,7 +423,20 @@ int ms5611_probe(struct iio_dev *indio_dev, struct device *dev, struct ms5611_state *st = iio_priv(indio_dev); mutex_init(&st->lock); - st->chip_info = &chip_info_tbl[type]; + + switch (type) { + case MS5611: + st->compensate_temp_and_pressure = + ms5611_temp_and_pressure_compensate; + break; + case MS5607: + st->compensate_temp_and_pressure = + ms5607_temp_and_pressure_compensate; + break; + default: + return -EINVAL; + } + st->temp_osr = &ms5611_avail_temp_osr[ARRAY_SIZE(ms5611_avail_temp_osr) - 1]; st->pressure_osr = diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index 432e912096f4..a0a7205c9c3a 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -91,7 +91,7 @@ static int ms5611_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, indio_dev); spi->mode = SPI_MODE_0; - spi->max_speed_hz = 20000000; + spi->max_speed_hz = min(spi->max_speed_hz, 20000000U); spi->bits_per_word = 8; ret = spi_setup(spi); if (ret < 0) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index d6c5e9644738..6b05eed41612 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -203,9 +203,13 @@ static int iio_sysfs_trigger_remove(int id) static int __init iio_sysfs_trig_init(void) { + int ret; device_initialize(&iio_sysfs_trig_dev); dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger"); - return device_add(&iio_sysfs_trig_dev); + ret = device_add(&iio_sysfs_trig_dev); + if (ret) + put_device(&iio_sysfs_trig_dev); + return ret; } module_init(iio_sysfs_trig_init); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bc97958818bb..e6e021af6aa9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -230,8 +230,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, struct ib_umem_odp *umem_odp = container_of(mni, struct ib_umem_odp, notifier); struct mlx5_ib_mr *mr; - const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / - sizeof(struct mlx5_mtt)) - 1; + const u64 umr_block_mask = MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT - 1; u64 idx = 0, blk_start_idx = 0; u64 invalidations = 0; unsigned long start; diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index d5105b5c9979..029e9536ec28 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -418,7 +418,7 @@ int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, } #define MLX5_MAX_UMR_CHUNK \ - ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT) + ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 /* @@ -428,11 +428,11 @@ int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, */ static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) { - const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size; + const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size; size_t size; void *res = NULL; - static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0); /* * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the @@ -666,7 +666,7 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) } final_size = (void *)cur_mtt - (void *)mtt; - sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT); memset(cur_mtt, 0, sg.length - final_size); mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); @@ -690,7 +690,7 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) ? sizeof(struct mlx5_klm) : sizeof(struct mlx5_mtt); - const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; + const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size; struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; const int page_mask = page_align - 1; @@ -711,7 +711,7 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, if (WARN_ON(!mr->umem->is_odp)) return -EINVAL; - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, + /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ if (idx & page_mask) { @@ -748,7 +748,7 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); + sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT); if (pages_mapped + pages_iter >= pages_to_map) mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index b86de1312512..84b87526b7ba 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -273,22 +273,22 @@ int iforce_init_device(struct device *parent, u16 bustype, * Get device info. */ - if (!iforce_get_id_packet(iforce, 'M', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'M', buf, &len) && len >= 3) input_dev->id.vendor = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet M\n"); - if (!iforce_get_id_packet(iforce, 'P', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'P', buf, &len) && len >= 3) input_dev->id.product = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet P\n"); - if (!iforce_get_id_packet(iforce, 'B', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'B', buf, &len) && len >= 3) iforce->device_memory.end = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet B\n"); - if (!iforce_get_id_packet(iforce, 'N', buf, &len) || len < 2) + if (!iforce_get_id_packet(iforce, 'N', buf, &len) && len >= 2) ff_effects = buf[1]; else dev_warn(&iforce->dev->dev, "Device does not respond to id packet N\n"); diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 480476121c01..09489380afda 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -18,6 +18,10 @@ #include <linux/gpio.h> #include <linux/platform_device.h> +static bool use_low_level_irq; +module_param(use_low_level_irq, bool, 0444); +MODULE_PARM_DESC(use_low_level_irq, "Use low-level triggered IRQ instead of edge triggered"); + struct soc_button_info { const char *name; int acpi_index; @@ -74,6 +78,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { }, }, { + /* Acer Switch V 10 SW5-017, same issue as Acer Switch 10 SW5-012. */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + }, + { /* * Acer One S1003. _LID method messes with power-button GPIO * IRQ settings, leading to a non working power-button. @@ -164,7 +175,8 @@ soc_button_device_create(struct platform_device *pdev, } /* See dmi_use_low_level_irq[] comment */ - if (!autorepeat && dmi_check_system(dmi_use_low_level_irq)) { + if (!autorepeat && (use_low_level_irq || + dmi_check_system(dmi_use_low_level_irq))) { irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); gpio_keys[n_buttons].irq = irq; gpio_keys[n_buttons].gpio = -ENOENT; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index fa021af8506e..b0f776448a1c 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -192,6 +192,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 0778dc03cd9e..46f8a694291e 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -115,18 +115,18 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_NEVER) }, { - /* ASUS ZenBook UX425UA */ + /* ASUS ZenBook UX425UA/QA */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425"), }, .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, { - /* ASUS ZenBook UM325UA */ + /* ASUS ZenBook UM325UA/QA */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325"), }, .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index f9486495baef..6dac7c1853a5 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1543,8 +1543,6 @@ static int i8042_probe(struct platform_device *dev) { int error; - i8042_platform_device = dev; - if (i8042_reset == I8042_RESET_ALWAYS) { error = i8042_controller_selftest(); if (error) @@ -1582,7 +1580,6 @@ static int i8042_probe(struct platform_device *dev) i8042_free_aux_ports(); /* in case KBD failed but AUX not */ i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return error; } @@ -1592,7 +1589,6 @@ static int i8042_remove(struct platform_device *dev) i8042_unregister_ports(); i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return 0; } diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index a33cc7950cf5..c281e49826c2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -1158,6 +1158,7 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0); input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); +retry_read_config: /* Read configuration and apply touchscreen parameters */ goodix_read_config(ts); @@ -1165,6 +1166,16 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) touchscreen_parse_properties(ts->input_dev, true, &ts->prop); if (!ts->prop.max_x || !ts->prop.max_y || !ts->max_touch_num) { + if (!ts->reset_controller_at_probe && + ts->irq_pin_access_method != IRQ_PIN_ACCESS_NONE) { + dev_info(&ts->client->dev, "Config not set, resetting controller\n"); + /* Retry after a controller reset */ + ts->reset_controller_at_probe = true; + error = goodix_reset(ts); + if (error) + return error; + goto retry_read_config; + } dev_err(&ts->client->dev, "Invalid config (%d, %d, %d), using defaults\n", ts->prop.max_x, ts->prop.max_y, ts->max_touch_num); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 48cdcd0a5cf3..996a8b5ee5ee 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -959,11 +959,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) { - pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; - if (iommu_is_dma_domain(&domain->domain)) - pteval |= DMA_FL_PTE_ACCESS; - } + if (domain_use_first_level(domain)) + pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c30ddac40ee5..e13d7e5273e1 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -642,7 +642,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - if (pasid != PASID_RID2PASID) + if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -685,7 +685,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, * We should set SRE bit as well since the addresses are expected * to be GPAs. */ - pasid_set_sre(pte); + if (ecap_srs(iommu->ecap)) + pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c5ef818ca36..bb786c39545e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1858,6 +1858,8 @@ bad: dm_io_client_destroy(c->dm_io); bad_dm_io: mutex_destroy(&c->lock); + if (c->no_sleep) + static_branch_dec(&no_sleep_enabled); kfree(c); bad_client: return ERR_PTR(r); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 159c6806c19b..2653516bcdef 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3630,6 +3630,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->physical_block_size = max_t(unsigned, limits->physical_block_size, cc->sector_size); limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); + limits->dma_alignment = limits->logical_block_size - 1; } static struct target_type crypt_target = { diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aaf2472df6e5..e97e9f97456d 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -263,6 +263,7 @@ struct dm_integrity_c { struct completion crypto_backoff; + bool wrote_to_journal; bool journal_uptodate; bool just_formatted; bool recalculate_flag; @@ -2375,6 +2376,8 @@ static void integrity_commit(struct work_struct *w) if (!commit_sections) goto release_flush_bios; + ic->wrote_to_journal = true; + i = commit_start; for (n = 0; n < commit_sections; n++) { for (j = 0; j < ic->journal_section_entries; j++) { @@ -2591,10 +2594,6 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; - /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) - return; - spin_lock_irq(&ic->endio_wait.lock); write_start = ic->committed_section; write_sections = ic->n_committed_sections; @@ -3101,10 +3100,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti) drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { - if (ic->meta_dev) - queue_work(ic->writer_wq, &ic->writer_work); + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); + if (ic->wrote_to_journal) { + init_journal(ic, ic->free_section, + ic->journal_sections - ic->free_section, ic->commit_seq); + if (ic->free_section) { + init_journal(ic, 0, ic->free_section, + next_commit_seq(ic->commit_seq)); + } + } } if (ic->mode == 'B') { @@ -3132,6 +3138,8 @@ static void dm_integrity_resume(struct dm_target *ti) DEBUG_print("resume\n"); + ic->wrote_to_journal = false; + if (ic->provided_data_sectors != old_provided_data_sectors) { if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && @@ -3370,6 +3378,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); + limits->dma_alignment = limits->logical_block_size - 1; } } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 6b3f867d0b70..3bfc1583c20a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -655,7 +655,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) size_t *needed = needed_param; *needed += sizeof(struct dm_target_versions); - *needed += strlen(tt->name); + *needed += strlen(tt->name) + 1; *needed += ALIGN_MASK; } @@ -720,7 +720,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char iter_info.old_vers = NULL; iter_info.vers = vers; iter_info.flags = 0; - iter_info.end = (char *)vers+len; + iter_info.end = (char *)vers + needed; /* * Now loop through filling out the names & versions. diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 20fd688f72e7..178e13a5b059 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -875,6 +875,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); limits->io_min = limits->physical_block_size; + limits->dma_alignment = limits->logical_block_size - 1; } #if IS_ENABLED(CONFIG_FS_DAX) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index e71068f7759b..844264e1b88c 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -854,6 +854,7 @@ static int qp_notify_peer_local(bool attach, struct vmci_handle handle) u32 context_id = vmci_get_context_id(); struct vmci_event_qp ev; + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); @@ -1467,6 +1468,7 @@ static int qp_notify_peer(bool attach, * kernel. */ + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 95fa8fb1d45f..c5de202f530a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1134,7 +1134,13 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) mmc_power_cycle(host, ocr); } else { bit = fls(ocr) - 1; - ocr &= 3 << bit; + /* + * The bit variable represents the highest voltage bit set in + * the OCR register. + * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), + * we must shift the mask '3' with (bit - 1). + */ + ocr &= 3 << (bit - 1); if (bit != host->ios.vdd) dev_warn(mmc_dev(host), "exceeding card's volts\n"); } diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 34ea1acbb3cc..28dc65023fa9 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1749,6 +1749,8 @@ static int amd_probe(struct sdhci_pci_chip *chip) } } + pci_dev_put(smbus_dev); + if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index ad457cd9cbaa..bca1d095b759 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -32,6 +32,7 @@ #define O2_SD_CAPS 0xE0 #define O2_SD_ADMA1 0xE2 #define O2_SD_ADMA2 0xE7 +#define O2_SD_MISC_CTRL2 0xF0 #define O2_SD_INF_MOD 0xF1 #define O2_SD_MISC_CTRL4 0xFC #define O2_SD_MISC_CTRL 0x1C0 @@ -877,6 +878,12 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* Set Tuning Windows to 5 */ pci_write_config_byte(chip->pdev, O2_SD_TUNING_CTRL, 0x55); + //Adjust 1st and 2nd CD debounce time + pci_read_config_dword(chip->pdev, O2_SD_MISC_CTRL2, &scratch_32); + scratch_32 &= 0xFFE7FFFF; + scratch_32 |= 0x00180000; + pci_write_config_dword(chip->pdev, O2_SD_MISC_CTRL2, scratch_32); + pci_write_config_dword(chip->pdev, O2_SD_DETECT_SETTING, 1); /* Lock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 24150c933fcb..dc3253b318da 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; + int ret = -ENOMEM; dev_dbg(&p_dev->dev, "com20020_attach()\n"); @@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) info->dev = dev; p_dev->priv = info; - return com20020_config(p_dev); + ret = com20020_config(p_dev); + if (ret) + goto fail_config; + + return 0; +fail_config: + free_arcdev(dev); fail_alloc_dev: kfree(info); fail_alloc_info: - return -ENOMEM; + return ret; } /* com20020_attach */ static void com20020_detach(struct pcmcia_device *link) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1cd4e71916f8..e01bb0412f1c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2524,12 +2524,21 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in /* called with rcu_read_lock() */ static int bond_miimon_inspect(struct bonding *bond) { + bool ignore_updelay = false; int link_state, commit = 0; struct list_head *iter; struct slave *slave; - bool ignore_updelay; - ignore_updelay = !rcu_dereference(bond->curr_active_slave); + if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { + ignore_updelay = !rcu_dereference(bond->curr_active_slave); + } else { + struct bond_up_slave *usable_slaves; + + usable_slaves = rcu_dereference(bond->usable_slaves); + + if (usable_slaves && usable_slaves->count == 0) + ignore_updelay = true; + } bond_for_each_slave_rcu(bond, slave, iter) { bond_propose_link_state(slave, BOND_LINK_NOCHANGE); @@ -3224,16 +3233,23 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct slave *curr_active_slave, *curr_arp_slave; - struct icmp6hdr *hdr = icmp6_hdr(skb); struct in6_addr *saddr, *daddr; + struct { + struct ipv6hdr ip6; + struct icmp6hdr icmp6; + } *combined, _combined; if (skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + skb->pkt_type == PACKET_LOOPBACK) + goto out; + + combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); + if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || + combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) goto out; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; + saddr = &combined->ip6.saddr; + daddr = &combined->ip6.saddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 094197780776..ed3d0b8989a0 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb) { lockdep_assert_held(&elm->lock); - if (!netif_running(elm->dev)) + if (!netif_running(elm->dev)) { + kfree_skb(skb); return; + } /* Queue for NAPI pickup. * rx-offload will update stats and LEDs for us. diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 944529590ffe..0bdec28e7c85 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1919,7 +1919,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->hclk = devm_clk_get(cdev->dev, "hclk"); cdev->cclk = devm_clk_get(cdev->dev, "cclk"); - if (IS_ERR(cdev->cclk)) { + if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { dev_err(cdev->dev, "no clock found\n"); ret = -ENODEV; } diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..f2219aa2824b 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) - return ret; + goto err_free_dev; mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); @@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = m_can_class_register(mcan_class); if (ret) - goto err; + goto err_free_irq; /* Enable interrupt control at CAN wrapper IP */ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); @@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; -err: +err_free_irq: pci_free_irq_vectors(pci); +err_free_dev: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); pci_free_irq_vectors(pci); } diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 25f863b4f5f0..ddb7c5735c9a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) netdev->dev_port = channel_idx; ret = register_candev(netdev); - if (ret) + if (ret) { + es58x_dev->netdev[channel_idx] = NULL; + free_candev(netdev); return ret; + } netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 218b098b261d..47619e9cb005 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 07507b4820d7..c26755f662c1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -18,6 +18,7 @@ config NET_DSA_BCM_SF2 config NET_DSA_LOOP tristate "DSA mock-up Ethernet switch chip support" + select NET_DSA_TAG_NONE select FIXED_PHY help This enables support for a fake mock-up switch chip which @@ -99,6 +100,7 @@ config NET_DSA_SMSC_LAN9303_MDIO config NET_DSA_VITESSE_VSC73XX tristate + select NET_DSA_TAG_NONE select FIXED_PHY select VITESSE_PHY select GPIOLIB diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig index 90b525160b71..ebaa4a80d544 100644 --- a/drivers/net/dsa/b53/Kconfig +++ b/drivers/net/dsa/b53/Kconfig @@ -2,6 +2,7 @@ menuconfig B53 tristate "Broadcom BCM53xx managed switch support" depends on NET_DSA + select NET_DSA_TAG_NONE select NET_DSA_TAG_BRCM select NET_DSA_TAG_BRCM_LEGACY select NET_DSA_TAG_BRCM_PREPEND diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 951f7935c872..595a548bb0a8 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1176,11 +1176,6 @@ static int hellcreek_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct hellcreek *hellcreek = ds->priv; - int ret; - - ret = devlink_info_driver_name_put(req, "hellcreek"); - if (ret) - return ret; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 438e46af03e9..80f07bd20593 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c index 7d746cd9ca1b..1cb41c36bd47 100644 --- a/drivers/net/dsa/lan9303_i2c.c +++ b/drivers/net/dsa/lan9303_i2c.c @@ -29,8 +29,7 @@ static const struct regmap_config lan9303_i2c_regmap_config = { .cache_type = REGCACHE_NONE, }; -static int lan9303_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int lan9303_i2c_probe(struct i2c_client *client) { struct lan9303_i2c *sw_dev; int ret; @@ -106,7 +105,7 @@ static struct i2c_driver lan9303_i2c_driver = { .name = "LAN9303_I2C", .of_match_table = of_match_ptr(lan9303_i2c_of_match), }, - .probe = lan9303_i2c_probe, + .probe_new = lan9303_i2c_probe, .remove = lan9303_i2c_remove, .shutdown = lan9303_i2c_shutdown, .id_table = lan9303_i2c_id, diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig index 06b1efdb5e7d..913f83ef013c 100644 --- a/drivers/net/dsa/microchip/Kconfig +++ b/drivers/net/dsa/microchip/Kconfig @@ -3,6 +3,7 @@ menuconfig NET_DSA_MICROCHIP_KSZ_COMMON tristate "Microchip KSZ8795/KSZ9477/LAN937x series switch support" depends on NET_DSA select NET_DSA_TAG_KSZ + select NET_DSA_TAG_NONE help This driver adds support for Microchip KSZ9477 series switch and KSZ8795/KSZ88x3 switch chips. diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index db4aec0a51dc..c1a633ca1e6d 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -14,8 +14,7 @@ KSZ_REGMAP_TABLE(ksz9477, not_used, 16, 0, 0); -static int ksz9477_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *i2c_id) +static int ksz9477_i2c_probe(struct i2c_client *i2c) { struct regmap_config rc; struct ksz_device *dev; @@ -120,7 +119,7 @@ static struct i2c_driver ksz9477_i2c_driver = { .name = "ksz9477-switch", .of_match_table = of_match_ptr(ksz9477_dt_ids), }, - .probe = ksz9477_i2c_probe, + .probe_new = ksz9477_i2c_probe, .remove = ksz9477_i2c_remove, .shutdown = ksz9477_i2c_shutdown, .id_table = ksz9477_i2c_id, diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 1266eabee086..a08dab75e0c0 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -821,11 +821,6 @@ int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - int err; - - err = devlink_info_driver_name_put(req, "mv88e6xxx"); - if (err) - return err; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 44e160f32067..3b738cb2ae6e 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1363,7 +1363,6 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return -ENOMEM; ocelot->map = felix->info->map; - ocelot->stats_layout = felix->info->stats_layout; ocelot->num_mact_rows = felix->info->num_mact_rows; ocelot->vcap = felix->info->vcap; ocelot->vcap_pol.base = felix->info->vcap_pol_base; diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 42338116eed0..be22d6ccd7c8 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -28,7 +28,6 @@ struct felix_info { const struct ocelot_ops *ops; const u32 *port_modes; int num_mact_rows; - const struct ocelot_stat_layout *stats_layout; int num_ports; int num_tx_queues; struct vcap_props *vcap; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index b0ae8d6156f6..01ac70fd7ddf 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -565,10 +565,6 @@ static const struct reg_field vsc9959_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 7, 4), }; -static const struct ocelot_stat_layout vsc9959_stats_layout[OCELOT_NUM_STATS] = { - OCELOT_COMMON_STATS, -}; - static const struct vcap_field vsc9959_vcap_es0_keys[] = { [VCAP_ES0_EGR_PORT] = { 0, 3}, [VCAP_ES0_IGR_PORT] = { 3, 3}, @@ -2546,7 +2542,6 @@ static const struct felix_info felix_info_vsc9959 = { .regfields = vsc9959_regfields, .map = vsc9959_regmap, .ops = &vsc9959_ops, - .stats_layout = vsc9959_stats_layout, .vcap = vsc9959_vcap_props, .vcap_pol_base = VSC9959_VCAP_POLICER_BASE, .vcap_pol_max = VSC9959_VCAP_POLICER_MAX, diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 6500c1697dd6..88ed3a2e487a 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -543,10 +543,6 @@ static const struct reg_field vsc9953_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 11, 4), }; -static const struct ocelot_stat_layout vsc9953_stats_layout[OCELOT_NUM_STATS] = { - OCELOT_COMMON_STATS, -}; - static const struct vcap_field vsc9953_vcap_es0_keys[] = { [VCAP_ES0_EGR_PORT] = { 0, 4}, [VCAP_ES0_IGR_PORT] = { 4, 4}, @@ -970,7 +966,6 @@ static const struct felix_info seville_info_vsc9953 = { .regfields = vsc9953_regfields, .map = vsc9953_regmap, .ops = &vsc9953_ops, - .stats_layout = vsc9953_stats_layout, .vcap = vsc9953_vcap_props, .vcap_pol_base = VSC9953_VCAP_POLICER_BASE, .vcap_pol_max = VSC9953_VCAP_POLICER_MAX, diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 10c6fea1227f..da532614f34a 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -120,16 +120,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; - int rc; - - rc = devlink_info_driver_name_put(req, "sja1105"); - if (rc) - return rc; - rc = devlink_info_version_fixed_put(req, - DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, - priv->info->name); - return rc; + return devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + priv->info->name); } int sja1105_devlink_setup(struct dsa_switch *ds) diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 215dd17ca790..4059fcc8c832 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg) u32 tmp; int rc; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg, &tmp, NULL); if (rc < 0) @@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg, const struct sja1105_regs *regs = priv->info->regs; u32 tmp = val; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg, &tmp, NULL); } diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c index 54065cdedd35..14ff6887a225 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_i2c.c +++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c @@ -76,8 +76,7 @@ static const struct regmap_config xrs700x_i2c_regmap_config = { .val_format_endian = REGMAP_ENDIAN_BIG }; -static int xrs700x_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *i2c_id) +static int xrs700x_i2c_probe(struct i2c_client *i2c) { struct xrs700x *priv; int ret; @@ -148,7 +147,7 @@ static struct i2c_driver xrs700x_i2c_driver = { .name = "xrs700x-i2c", .of_match_table = of_match_ptr(xrs700x_i2c_dt_ids), }, - .probe = xrs700x_i2c_probe, + .probe_new = xrs700x_i2c_probe, .remove = xrs700x_i2c_remove, .shutdown = xrs700x_i2c_shutdown, .id_table = xrs700x_i2c_id, diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 28b5cae60eb5..66e3af73ec41 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -990,6 +990,7 @@ static int tse_shutdown(struct net_device *dev) int ret; phylink_stop(priv->phylink); + phylink_disconnect_phy(priv->phylink); netif_stop_queue(dev); napi_disable(&priv->napi); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a08f221e30d4..ac4ea93bd8dd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include <linux/ptp_clock_kernel.h> @@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8a0af371e7dc..77609dc0a08d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -88,7 +88,7 @@ err_exit: return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index 99870865f66d..a78c1a168d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key); void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 11d15cd03600..77d4cb4ad782 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -795,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) { - struct pci_dev *dev; struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); + struct pci_dev *dev; + bool pending; if (!vf) return false; dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); - if (dev) - return bnx2x_is_pcie_pending(dev); - return false; + if (!dev) + return false; + pending = bnx2x_is_pcie_pending(dev); + pci_dev_put(dev); + + return pending; } int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 8a6f788f6294..26913dc816d3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -892,10 +892,6 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, u32 ver = 0; int rc; - rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME); - if (rc) - return rc; - if (BNXT_PF(bp) && (bp->flags & BNXT_FLAG_DSN_VALID)) { sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X", bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4], diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c2f663770a7f..cbf17fcfb7ab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2007,6 +2007,14 @@ static void bnxt_get_fec_stats(struct net_device *dev, rx = bp->rx_port_stats_ext.sw_stats; fec_stats->corrected_bits.total = *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_corrected_bits)); + + if (bp->fw_rx_stats_ext_size <= BNXT_RX_STATS_EXT_NUM_LEGACY) + return; + + fec_stats->corrected_blocks.total = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_fec_corrected_blocks)); + fec_stats->uncorrectable_blocks.total = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_fec_uncorrectable_blocks)); } static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info, diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 7ded559842e8..b615176338b2 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -72,7 +72,6 @@ static void bcmgenet_mac_config(struct net_device *dev) * Receive clock is provided by the PHY. */ reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg &= ~OOB_DISABLE; reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); @@ -95,10 +94,18 @@ static void bcmgenet_mac_config(struct net_device *dev) */ void bcmgenet_mii_setup(struct net_device *dev) { + struct bcmgenet_priv *priv = netdev_priv(dev); struct phy_device *phydev = dev->phydev; + u32 reg; - if (phydev->link) + if (phydev->link) { bcmgenet_mac_config(dev); + } else { + reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~RGMII_LINK; + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + } + phy_print_status(phydev); } @@ -266,18 +273,20 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) (priv->phy_interface != PHY_INTERFACE_MODE_MOCA); /* This is an external PHY (xMII), so we need to enable the RGMII - * block for the interface to work + * block for the interface to work, unconditionally clear the + * Out-of-band disable since we do not need it. */ + reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~OOB_DISABLE; if (priv->ext_phy) { - reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~ID_MODE_DIS; reg |= id_mode_dis; if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) reg |= RGMII_MODE_EN_V123; else reg |= RGMII_MODE_EN; - bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } + bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); diff --git a/drivers/net/ethernet/brocade/bna/bfa_cs.h b/drivers/net/ethernet/brocade/bna/bfa_cs.h index 8f0ac7b99973..858c92129451 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_cs.h +++ b/drivers/net/ethernet/brocade/bna/bfa_cs.h @@ -18,15 +18,43 @@ /* BFA state machine interfaces */ -typedef void (*bfa_sm_t)(void *sm, int event); - /* For converting from state machine function to state encoding. */ -struct bfa_sm_table { - bfa_sm_t sm; /*!< state machine function */ - int state; /*!< state machine encoding */ - char *name; /*!< state name for display */ -}; -#define BFA_SM(_sm) ((bfa_sm_t)(_sm)) +#define BFA_SM_TABLE(n, s, e, t) \ +struct s; \ +enum e; \ +typedef void (*t)(struct s *, enum e); \ + \ +struct n ## _sm_table_s { \ + t sm; /* state machine function */ \ + int state; /* state machine encoding */ \ + char *name; /* state name for display */ \ +}; \ + \ +static inline int \ +n ## _sm_to_state(struct n ## _sm_table_s *smt, t sm) \ +{ \ + int i = 0; \ + \ + while (smt[i].sm && smt[i].sm != sm) \ + i++; \ + return smt[i].state; \ +} + +BFA_SM_TABLE(iocpf, bfa_iocpf, iocpf_event, bfa_fsm_iocpf_t) +BFA_SM_TABLE(ioc, bfa_ioc, ioc_event, bfa_fsm_ioc_t) +BFA_SM_TABLE(cmdq, bfa_msgq_cmdq, cmdq_event, bfa_fsm_msgq_cmdq_t) +BFA_SM_TABLE(rspq, bfa_msgq_rspq, rspq_event, bfa_fsm_msgq_rspq_t) + +BFA_SM_TABLE(ioceth, bna_ioceth, bna_ioceth_event, bna_fsm_ioceth_t) +BFA_SM_TABLE(enet, bna_enet, bna_enet_event, bna_fsm_enet_t) +BFA_SM_TABLE(ethport, bna_ethport, bna_ethport_event, bna_fsm_ethport_t) +BFA_SM_TABLE(tx, bna_tx, bna_tx_event, bna_fsm_tx_t) +BFA_SM_TABLE(rxf, bna_rxf, bna_rxf_event, bna_fsm_rxf_t) +BFA_SM_TABLE(rx, bna_rx, bna_rx_event, bna_fsm_rx_t) + +#undef BFA_SM_TABLE + +#define BFA_SM(_sm) (_sm) /* State machine with entry actions. */ typedef void (*bfa_fsm_t)(void *fsm, int event); @@ -41,24 +69,12 @@ typedef void (*bfa_fsm_t)(void *fsm, int event); static void oc ## _sm_ ## st ## _entry(otype * fsm) #define bfa_fsm_set_state(_fsm, _state) do { \ - (_fsm)->fsm = (bfa_fsm_t)(_state); \ + (_fsm)->fsm = (_state); \ _state ## _entry(_fsm); \ } while (0) #define bfa_fsm_send_event(_fsm, _event) ((_fsm)->fsm((_fsm), (_event))) -#define bfa_fsm_cmp_state(_fsm, _state) \ - ((_fsm)->fsm == (bfa_fsm_t)(_state)) - -static inline int -bfa_sm_to_state(const struct bfa_sm_table *smt, bfa_sm_t sm) -{ - int i = 0; - - while (smt[i].sm && smt[i].sm != sm) - i++; - return smt[i].state; -} - +#define bfa_fsm_cmp_state(_fsm, _state) ((_fsm)->fsm == (_state)) /* Generic wait counter. */ typedef void (*bfa_wc_resume_t) (void *cbarg); diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index cd933817a0b8..b07522ac3e74 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -114,7 +114,7 @@ bfa_fsm_state_decl(bfa_ioc, disabling, struct bfa_ioc, enum ioc_event); bfa_fsm_state_decl(bfa_ioc, disabled, struct bfa_ioc, enum ioc_event); bfa_fsm_state_decl(bfa_ioc, hwfail, struct bfa_ioc, enum ioc_event); -static struct bfa_sm_table ioc_sm_table[] = { +static struct ioc_sm_table_s ioc_sm_table[] = { {BFA_SM(bfa_ioc_sm_uninit), BFA_IOC_UNINIT}, {BFA_SM(bfa_ioc_sm_reset), BFA_IOC_RESET}, {BFA_SM(bfa_ioc_sm_enabling), BFA_IOC_ENABLING}, @@ -183,7 +183,7 @@ bfa_fsm_state_decl(bfa_iocpf, disabling_sync, struct bfa_iocpf, enum iocpf_event); bfa_fsm_state_decl(bfa_iocpf, disabled, struct bfa_iocpf, enum iocpf_event); -static struct bfa_sm_table iocpf_sm_table[] = { +static struct iocpf_sm_table_s iocpf_sm_table[] = { {BFA_SM(bfa_iocpf_sm_reset), BFA_IOCPF_RESET}, {BFA_SM(bfa_iocpf_sm_fwcheck), BFA_IOCPF_FWMISMATCH}, {BFA_SM(bfa_iocpf_sm_mismatch), BFA_IOCPF_FWMISMATCH}, @@ -2860,12 +2860,12 @@ static enum bfa_ioc_state bfa_ioc_get_state(struct bfa_ioc *ioc) { enum bfa_iocpf_state iocpf_st; - enum bfa_ioc_state ioc_st = bfa_sm_to_state(ioc_sm_table, ioc->fsm); + enum bfa_ioc_state ioc_st = ioc_sm_to_state(ioc_sm_table, ioc->fsm); if (ioc_st == BFA_IOC_ENABLING || ioc_st == BFA_IOC_FAIL || ioc_st == BFA_IOC_INITFAIL) { - iocpf_st = bfa_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm); + iocpf_st = iocpf_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm); switch (iocpf_st) { case BFA_IOCPF_SEMWAIT: @@ -2983,7 +2983,7 @@ bfa_nw_iocpf_timeout(struct bfa_ioc *ioc) { enum bfa_iocpf_state iocpf_st; - iocpf_st = bfa_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm); + iocpf_st = iocpf_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm); if (iocpf_st == BFA_IOCPF_HWINIT) bfa_ioc_poll_fwinit(ioc); diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.h b/drivers/net/ethernet/brocade/bna/bfa_ioc.h index edd0ed5b5332..f30d06ec4ffe 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.h +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.h @@ -147,16 +147,20 @@ struct bfa_ioc_notify { (__notify)->cbarg = (__cbarg); \ } while (0) +enum iocpf_event; + struct bfa_iocpf { - bfa_fsm_t fsm; + void (*fsm)(struct bfa_iocpf *s, enum iocpf_event e); struct bfa_ioc *ioc; bool fw_mismatch_notified; bool auto_recover; u32 poll_time; }; +enum ioc_event; + struct bfa_ioc { - bfa_fsm_t fsm; + void (*fsm)(struct bfa_ioc *s, enum ioc_event e); struct bfa *bfa; struct bfa_pcidev pcidev; struct timer_list ioc_timer; diff --git a/drivers/net/ethernet/brocade/bna/bfa_msgq.h b/drivers/net/ethernet/brocade/bna/bfa_msgq.h index 75343b535798..170a4b4bed96 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_msgq.h +++ b/drivers/net/ethernet/brocade/bna/bfa_msgq.h @@ -55,8 +55,10 @@ enum bfa_msgq_cmdq_flags { BFA_MSGQ_CMDQ_F_DB_UPDATE = 1, }; +enum cmdq_event; + struct bfa_msgq_cmdq { - bfa_fsm_t fsm; + void (*fsm)(struct bfa_msgq_cmdq *s, enum cmdq_event e); enum bfa_msgq_cmdq_flags flags; u16 producer_index; @@ -81,8 +83,10 @@ enum bfa_msgq_rspq_flags { typedef void (*bfa_msgq_mcfunc_t)(void *cbarg, struct bfi_msgq_mhdr *mhdr); +enum rspq_event; + struct bfa_msgq_rspq { - bfa_fsm_t fsm; + void (*fsm)(struct bfa_msgq_rspq *s, enum rspq_event e); enum bfa_msgq_rspq_flags flags; u16 producer_index; diff --git a/drivers/net/ethernet/brocade/bna/bna_enet.c b/drivers/net/ethernet/brocade/bna/bna_enet.c index a2c983f56b00..883de0ac8de4 100644 --- a/drivers/net/ethernet/brocade/bna/bna_enet.c +++ b/drivers/net/ethernet/brocade/bna/bna_enet.c @@ -1257,7 +1257,7 @@ bna_enet_mtu_get(struct bna_enet *enet) void bna_enet_enable(struct bna_enet *enet) { - if (enet->fsm != (bfa_sm_t)bna_enet_sm_stopped) + if (enet->fsm != bna_enet_sm_stopped) return; enet->flags |= BNA_ENET_F_ENABLED; @@ -1751,12 +1751,12 @@ bna_ioceth_uninit(struct bna_ioceth *ioceth) void bna_ioceth_enable(struct bna_ioceth *ioceth) { - if (ioceth->fsm == (bfa_fsm_t)bna_ioceth_sm_ready) { + if (ioceth->fsm == bna_ioceth_sm_ready) { bnad_cb_ioceth_ready(ioceth->bna->bnad); return; } - if (ioceth->fsm == (bfa_fsm_t)bna_ioceth_sm_stopped) + if (ioceth->fsm == bna_ioceth_sm_stopped) bfa_fsm_send_event(ioceth, IOCETH_E_ENABLE); } diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c index 2623a0da4682..c05dc7a1c4a1 100644 --- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c +++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c @@ -1956,7 +1956,7 @@ static void bna_rx_stop(struct bna_rx *rx) { rx->rx_flags &= ~BNA_RX_F_ENET_STARTED; - if (rx->fsm == (bfa_fsm_t) bna_rx_sm_stopped) + if (rx->fsm == bna_rx_sm_stopped) bna_rx_mod_cb_rx_stopped(&rx->bna->rx_mod, rx); else { rx->stop_cbfn = bna_rx_mod_cb_rx_stopped; @@ -2535,7 +2535,7 @@ bna_rx_destroy(struct bna_rx *rx) void bna_rx_enable(struct bna_rx *rx) { - if (rx->fsm != (bfa_sm_t)bna_rx_sm_stopped) + if (rx->fsm != bna_rx_sm_stopped) return; rx->rx_flags |= BNA_RX_F_ENABLED; @@ -3523,7 +3523,7 @@ bna_tx_destroy(struct bna_tx *tx) void bna_tx_enable(struct bna_tx *tx) { - if (tx->fsm != (bfa_sm_t)bna_tx_sm_stopped) + if (tx->fsm != bna_tx_sm_stopped) return; tx->flags |= BNA_TX_F_ENABLED; diff --git a/drivers/net/ethernet/brocade/bna/bna_types.h b/drivers/net/ethernet/brocade/bna/bna_types.h index 666b6922e24d..a5ebd7110e07 100644 --- a/drivers/net/ethernet/brocade/bna/bna_types.h +++ b/drivers/net/ethernet/brocade/bna/bna_types.h @@ -312,8 +312,10 @@ struct bna_attr { /* IOCEth */ +enum bna_ioceth_event; + struct bna_ioceth { - bfa_fsm_t fsm; + void (*fsm)(struct bna_ioceth *s, enum bna_ioceth_event e); struct bfa_ioc ioc; struct bna_attr attr; @@ -334,8 +336,10 @@ struct bna_pause_config { enum bna_status rx_pause; }; +enum bna_enet_event; + struct bna_enet { - bfa_fsm_t fsm; + void (*fsm)(struct bna_enet *s, enum bna_enet_event e); enum bna_enet_flags flags; enum bna_enet_type type; @@ -360,8 +364,10 @@ struct bna_enet { /* Ethport */ +enum bna_ethport_event; + struct bna_ethport { - bfa_fsm_t fsm; + void (*fsm)(struct bna_ethport *s, enum bna_ethport_event e); enum bna_ethport_flags flags; enum bna_link_status link_status; @@ -454,13 +460,16 @@ struct bna_txq { }; /* Tx object */ + +enum bna_tx_event; + struct bna_tx { /* This should be the first one */ struct list_head qe; int rid; int hw_id; - bfa_fsm_t fsm; + void (*fsm)(struct bna_tx *s, enum bna_tx_event e); enum bna_tx_flags flags; enum bna_tx_type type; @@ -698,8 +707,11 @@ struct bna_rxp { }; /* RxF structure (hardware Rx Function) */ + +enum bna_rxf_event; + struct bna_rxf { - bfa_fsm_t fsm; + void (*fsm)(struct bna_rxf *s, enum bna_rxf_event e); struct bfa_msgq_cmd_entry msgq_cmd; union { @@ -769,13 +781,16 @@ struct bna_rxf { }; /* Rx object */ + +enum bna_rx_event; + struct bna_rx { /* This should be the first one */ struct list_head qe; int rid; int hw_id; - bfa_fsm_t fsm; + void (*fsm)(struct bna_rx *s, enum bna_rx_event e); enum bna_rx_type type; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a685f6ef9422..fd7c80edb6e8 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1797,7 +1797,7 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ret = setup_tx_poll_fn(netdev); if (ret) goto err_poll; @@ -1827,7 +1827,7 @@ static int liquidio_open(struct net_device *netdev) return 0; err_rx_ctrl: - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) cleanup_tx_poll_fn(netdev); err_poll: if (lio->ptp_clock) { diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2f6484dc186a..7eb2ddbe9bad 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1436,8 +1436,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, return AE_OK; } - if (strncmp(string.pointer, bgx_sel, 4)) + if (strncmp(string.pointer, bgx_sel, 4)) { + kfree(string.pointer); return AE_OK; + } acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, bgx_acpi_register_phy, NULL, bgx, NULL); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index da9973b711f4..1a5fdd755e9e 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1839,9 +1839,7 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, */ if (prior_data_len) { int i = 0; - u8 *data = NULL; skb_frag_t *f; - u8 *vaddr; int frag_size = 0, frag_delta = 0; while (remaining > 0) { @@ -1853,24 +1851,24 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, i++; } f = &record->frags[i]; - vaddr = kmap_atomic(skb_frag_page(f)); - - data = vaddr + skb_frag_off(f) + remaining; frag_delta = skb_frag_size(f) - remaining; if (frag_delta >= prior_data_len) { - memcpy(prior_data, data, prior_data_len); - kunmap_atomic(vaddr); + memcpy_from_page(prior_data, skb_frag_page(f), + skb_frag_off(f) + remaining, + prior_data_len); } else { - memcpy(prior_data, data, frag_delta); - kunmap_atomic(vaddr); + memcpy_from_page(prior_data, skb_frag_page(f), + skb_frag_off(f) + remaining, + frag_delta); + /* get the next page */ f = &record->frags[i + 1]; - vaddr = kmap_atomic(skb_frag_page(f)); - data = vaddr + skb_frag_off(f); - memcpy(prior_data + frag_delta, - data, (prior_data_len - frag_delta)); - kunmap_atomic(vaddr); + + memcpy_from_page(prior_data + frag_delta, + skb_frag_page(f), + skb_frag_off(f), + prior_data_len - frag_delta); } /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c index a523ddda7609..de7105a84747 100644 --- a/drivers/net/ethernet/davicom/dm9051.c +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -798,8 +798,10 @@ static int dm9051_loop_rx(struct board_info *db) } ret = dm9051_stop_mrcmd(db); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } skb->protocol = eth_type_trans(skb, db->ndev); if (db->ndev->features & NETIF_F_RXCSUM) diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h index 09a723b827c7..f93ba48bac3f 100644 --- a/drivers/net/ethernet/engleder/tsnep.h +++ b/drivers/net/ethernet/engleder/tsnep.h @@ -18,6 +18,8 @@ #define TSNEP "tsnep" #define TSNEP_RING_SIZE 256 +#define TSNEP_RING_RX_REFILL 16 +#define TSNEP_RING_RX_REUSE (TSNEP_RING_SIZE - TSNEP_RING_SIZE / 4) #define TSNEP_RING_ENTRIES_PER_PAGE (PAGE_SIZE / TSNEP_DESC_SIZE) #define TSNEP_RING_PAGE_COUNT (TSNEP_RING_SIZE / TSNEP_RING_ENTRIES_PER_PAGE) @@ -110,6 +112,7 @@ struct tsnep_rx { dma_addr_t page_dma[TSNEP_RING_PAGE_COUNT]; struct tsnep_rx_entry entry[TSNEP_RING_SIZE]; + int write; int read; u32 owner_counter; int increment_owner_counter; @@ -119,6 +122,7 @@ struct tsnep_rx { u32 bytes; u32 dropped; u32 multicast; + u32 alloc_failed; }; struct tsnep_queue { @@ -132,6 +136,8 @@ struct tsnep_queue { int irq; u32 irq_mask; + void __iomem *irq_delay_addr; + u8 irq_delay; }; struct tsnep_adapter { @@ -223,5 +229,7 @@ static inline void tsnep_ethtool_self_test(struct net_device *dev, #endif /* CONFIG_TSNEP_SELFTESTS */ void tsnep_get_system_time(struct tsnep_adapter *adapter, u64 *time); +int tsnep_set_irq_coalesce(struct tsnep_queue *queue, u32 usecs); +u32 tsnep_get_irq_coalesce(struct tsnep_queue *queue); #endif /* _TSNEP_H */ diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c index a713a126b227..716815dad7d2 100644 --- a/drivers/net/ethernet/engleder/tsnep_ethtool.c +++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c @@ -8,6 +8,7 @@ static const char tsnep_stats_strings[][ETH_GSTRING_LEN] = { "rx_bytes", "rx_dropped", "rx_multicast", + "rx_alloc_failed", "rx_phy_errors", "rx_forwarded_phy_errors", "rx_invalid_frame_errors", @@ -21,6 +22,7 @@ struct tsnep_stats { u64 rx_bytes; u64 rx_dropped; u64 rx_multicast; + u64 rx_alloc_failed; u64 rx_phy_errors; u64 rx_forwarded_phy_errors; u64 rx_invalid_frame_errors; @@ -36,6 +38,7 @@ static const char tsnep_rx_queue_stats_strings[][ETH_GSTRING_LEN] = { "rx_%d_bytes", "rx_%d_dropped", "rx_%d_multicast", + "rx_%d_alloc_failed", "rx_%d_no_descriptor_errors", "rx_%d_buffer_too_small_errors", "rx_%d_fifo_overflow_errors", @@ -47,6 +50,7 @@ struct tsnep_rx_queue_stats { u64 rx_bytes; u64 rx_dropped; u64 rx_multicast; + u64 rx_alloc_failed; u64 rx_no_descriptor_errors; u64 rx_buffer_too_small_errors; u64 rx_fifo_overflow_errors; @@ -178,6 +182,7 @@ static void tsnep_ethtool_get_ethtool_stats(struct net_device *netdev, tsnep_stats.rx_bytes += adapter->rx[i].bytes; tsnep_stats.rx_dropped += adapter->rx[i].dropped; tsnep_stats.rx_multicast += adapter->rx[i].multicast; + tsnep_stats.rx_alloc_failed += adapter->rx[i].alloc_failed; } reg = ioread32(adapter->addr + ECM_STAT); tsnep_stats.rx_phy_errors = @@ -200,6 +205,8 @@ static void tsnep_ethtool_get_ethtool_stats(struct net_device *netdev, tsnep_rx_queue_stats.rx_bytes = adapter->rx[i].bytes; tsnep_rx_queue_stats.rx_dropped = adapter->rx[i].dropped; tsnep_rx_queue_stats.rx_multicast = adapter->rx[i].multicast; + tsnep_rx_queue_stats.rx_alloc_failed = + adapter->rx[i].alloc_failed; reg = ioread32(adapter->addr + TSNEP_QUEUE(i) + TSNEP_RX_STATISTIC); tsnep_rx_queue_stats.rx_no_descriptor_errors = @@ -250,10 +257,10 @@ static int tsnep_ethtool_get_sset_count(struct net_device *netdev, int sset) } } -static int tsnep_ethtool_get_rxnfc(struct net_device *dev, +static int tsnep_ethtool_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct tsnep_adapter *adapter = netdev_priv(dev); + struct tsnep_adapter *adapter = netdev_priv(netdev); switch (cmd->cmd) { case ETHTOOL_GRXRINGS: @@ -273,10 +280,10 @@ static int tsnep_ethtool_get_rxnfc(struct net_device *dev, } } -static int tsnep_ethtool_set_rxnfc(struct net_device *dev, +static int tsnep_ethtool_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) { - struct tsnep_adapter *adapter = netdev_priv(dev); + struct tsnep_adapter *adapter = netdev_priv(netdev); switch (cmd->cmd) { case ETHTOOL_SRXCLSRLINS: @@ -288,10 +295,21 @@ static int tsnep_ethtool_set_rxnfc(struct net_device *dev, } } -static int tsnep_ethtool_get_ts_info(struct net_device *dev, +static void tsnep_ethtool_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + + ch->max_rx = adapter->num_rx_queues; + ch->max_tx = adapter->num_tx_queues; + ch->rx_count = adapter->num_rx_queues; + ch->tx_count = adapter->num_tx_queues; +} + +static int tsnep_ethtool_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) { - struct tsnep_adapter *adapter = netdev_priv(dev); + struct tsnep_adapter *adapter = netdev_priv(netdev); info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | @@ -313,7 +331,137 @@ static int tsnep_ethtool_get_ts_info(struct net_device *dev, return 0; } +static struct tsnep_queue *tsnep_get_queue_with_tx(struct tsnep_adapter *adapter, + int index) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) { + if (adapter->queue[i].tx) { + if (index == 0) + return &adapter->queue[i]; + + index--; + } + } + + return NULL; +} + +static struct tsnep_queue *tsnep_get_queue_with_rx(struct tsnep_adapter *adapter, + int index) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) { + if (adapter->queue[i].rx) { + if (index == 0) + return &adapter->queue[i]; + + index--; + } + } + + return NULL; +} + +static int tsnep_ethtool_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + struct tsnep_queue *queue; + + queue = tsnep_get_queue_with_rx(adapter, 0); + if (queue) + ec->rx_coalesce_usecs = tsnep_get_irq_coalesce(queue); + + queue = tsnep_get_queue_with_tx(adapter, 0); + if (queue) + ec->tx_coalesce_usecs = tsnep_get_irq_coalesce(queue); + + return 0; +} + +static int tsnep_ethtool_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + int i; + int retval; + + for (i = 0; i < adapter->num_queues; i++) { + /* RX coalesce has priority for queues with TX and RX */ + if (adapter->queue[i].rx) + retval = tsnep_set_irq_coalesce(&adapter->queue[i], + ec->rx_coalesce_usecs); + else + retval = tsnep_set_irq_coalesce(&adapter->queue[i], + ec->tx_coalesce_usecs); + if (retval != 0) + return retval; + } + + return 0; +} + +static int tsnep_ethtool_get_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + struct tsnep_queue *queue_with_rx; + struct tsnep_queue *queue_with_tx; + + if (queue >= max(adapter->num_tx_queues, adapter->num_rx_queues)) + return -EINVAL; + + queue_with_rx = tsnep_get_queue_with_rx(adapter, queue); + if (queue_with_rx) + ec->rx_coalesce_usecs = tsnep_get_irq_coalesce(queue_with_rx); + + queue_with_tx = tsnep_get_queue_with_tx(adapter, queue); + if (queue_with_tx) + ec->tx_coalesce_usecs = tsnep_get_irq_coalesce(queue_with_tx); + + return 0; +} + +static int tsnep_ethtool_set_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + struct tsnep_adapter *adapter = netdev_priv(netdev); + struct tsnep_queue *queue_with_rx; + struct tsnep_queue *queue_with_tx; + int retval; + + if (queue >= max(adapter->num_tx_queues, adapter->num_rx_queues)) + return -EINVAL; + + queue_with_rx = tsnep_get_queue_with_rx(adapter, queue); + if (queue_with_rx) { + retval = tsnep_set_irq_coalesce(queue_with_rx, ec->rx_coalesce_usecs); + if (retval != 0) + return retval; + } + + /* RX coalesce has priority for queues with TX and RX */ + queue_with_tx = tsnep_get_queue_with_tx(adapter, queue); + if (queue_with_tx && !queue_with_tx->rx) { + retval = tsnep_set_irq_coalesce(queue_with_tx, ec->tx_coalesce_usecs); + if (retval != 0) + return retval; + } + + return 0; +} + const struct ethtool_ops tsnep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = tsnep_ethtool_get_drvinfo, .get_regs_len = tsnep_ethtool_get_regs_len, .get_regs = tsnep_ethtool_get_regs, @@ -327,7 +475,12 @@ const struct ethtool_ops tsnep_ethtool_ops = { .get_sset_count = tsnep_ethtool_get_sset_count, .get_rxnfc = tsnep_ethtool_get_rxnfc, .set_rxnfc = tsnep_ethtool_set_rxnfc, + .get_channels = tsnep_ethtool_get_channels, .get_ts_info = tsnep_ethtool_get_ts_info, + .get_coalesce = tsnep_ethtool_get_coalesce, + .set_coalesce = tsnep_ethtool_set_coalesce, + .get_per_queue_coalesce = tsnep_ethtool_get_per_queue_coalesce, + .set_per_queue_coalesce = tsnep_ethtool_set_per_queue_coalesce, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; diff --git a/drivers/net/ethernet/engleder/tsnep_hw.h b/drivers/net/ethernet/engleder/tsnep_hw.h index 315dada75323..55e1caf193a6 100644 --- a/drivers/net/ethernet/engleder/tsnep_hw.h +++ b/drivers/net/ethernet/engleder/tsnep_hw.h @@ -48,6 +48,13 @@ #define ECM_COUNTER_LOW 0x0028 #define ECM_COUNTER_HIGH 0x002C +/* interrupt delay */ +#define ECM_INT_DELAY 0x0030 +#define ECM_INT_DELAY_MASK 0xF0 +#define ECM_INT_DELAY_SHIFT 4 +#define ECM_INT_DELAY_BASE_US 16 +#define ECM_INT_DELAY_OFFSET 1 + /* control and status */ #define ECM_STATUS 0x0080 #define ECM_LINK_MODE_OFF 0x01000000 diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 48fb391951dd..bf0190e1d2ea 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -39,6 +39,10 @@ #endif #define DMA_ADDR_LOW(dma_addr) ((u32)((dma_addr) & 0xFFFFFFFF)) +#define TSNEP_COALESCE_USECS_DEFAULT 64 +#define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ + ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) + static void tsnep_enable_irq(struct tsnep_adapter *adapter, u32 mask) { iowrite32(mask, adapter->addr + ECM_INT_ENABLE); @@ -83,6 +87,33 @@ static irqreturn_t tsnep_irq_txrx(int irq, void *arg) return IRQ_HANDLED; } +int tsnep_set_irq_coalesce(struct tsnep_queue *queue, u32 usecs) +{ + if (usecs > TSNEP_COALESCE_USECS_MAX) + return -ERANGE; + + usecs /= ECM_INT_DELAY_BASE_US; + usecs <<= ECM_INT_DELAY_SHIFT; + usecs &= ECM_INT_DELAY_MASK; + + queue->irq_delay &= ~ECM_INT_DELAY_MASK; + queue->irq_delay |= usecs; + iowrite8(queue->irq_delay, queue->irq_delay_addr); + + return 0; +} + +u32 tsnep_get_irq_coalesce(struct tsnep_queue *queue) +{ + u32 usecs; + + usecs = (queue->irq_delay & ECM_INT_DELAY_MASK); + usecs >>= ECM_INT_DELAY_SHIFT; + usecs *= ECM_INT_DELAY_BASE_US; + + return usecs; +} + static int tsnep_mdiobus_read(struct mii_bus *bus, int addr, int regnum) { struct tsnep_adapter *adapter = bus->priv; @@ -542,6 +573,27 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) return (budget != 0); } +static bool tsnep_tx_pending(struct tsnep_tx *tx) +{ + unsigned long flags; + struct tsnep_tx_entry *entry; + bool pending = false; + + spin_lock_irqsave(&tx->lock, flags); + + if (tx->read != tx->write) { + entry = &tx->entry[tx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_TX_DESC_OWNER_MASK) == + (entry->properties & TSNEP_TX_DESC_OWNER_MASK)) + pending = true; + } + + spin_unlock_irqrestore(&tx->lock, flags); + + return pending; +} + static int tsnep_tx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_tx *tx) { @@ -608,23 +660,6 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) } } -static int tsnep_rx_alloc_buffer(struct tsnep_rx *rx, - struct tsnep_rx_entry *entry) -{ - struct page *page; - - page = page_pool_dev_alloc_pages(rx->page_pool); - if (unlikely(!page)) - return -ENOMEM; - - entry->page = page; - entry->len = TSNEP_MAX_RX_BUF_SIZE; - entry->dma = page_pool_get_dma_addr(entry->page); - entry->desc->rx = __cpu_to_le64(entry->dma + TSNEP_SKB_PAD); - - return 0; -} - static int tsnep_rx_ring_init(struct tsnep_rx *rx) { struct device *dmadev = rx->adapter->dmadev; @@ -671,10 +706,6 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx) entry = &rx->entry[i]; next_entry = &rx->entry[(i + 1) % TSNEP_RING_SIZE]; entry->desc->next = __cpu_to_le64(next_entry->desc_dma); - - retval = tsnep_rx_alloc_buffer(rx, entry); - if (retval) - goto failed; } return 0; @@ -684,6 +715,45 @@ failed: return retval; } +static int tsnep_rx_desc_available(struct tsnep_rx *rx) +{ + if (rx->read <= rx->write) + return TSNEP_RING_SIZE - rx->write + rx->read - 1; + else + return rx->read - rx->write - 1; +} + +static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry, + struct page *page) +{ + entry->page = page; + entry->len = TSNEP_MAX_RX_BUF_SIZE; + entry->dma = page_pool_get_dma_addr(entry->page); + entry->desc->rx = __cpu_to_le64(entry->dma + TSNEP_SKB_PAD); +} + +static int tsnep_rx_alloc_buffer(struct tsnep_rx *rx, int index) +{ + struct tsnep_rx_entry *entry = &rx->entry[index]; + struct page *page; + + page = page_pool_dev_alloc_pages(rx->page_pool); + if (unlikely(!page)) + return -ENOMEM; + tsnep_rx_set_page(rx, entry, page); + + return 0; +} + +static void tsnep_rx_reuse_buffer(struct tsnep_rx *rx, int index) +{ + struct tsnep_rx_entry *entry = &rx->entry[index]; + struct tsnep_rx_entry *read = &rx->entry[rx->read]; + + tsnep_rx_set_page(rx, entry, read->page); + read->page = NULL; +} + static void tsnep_rx_activate(struct tsnep_rx *rx, int index) { struct tsnep_rx_entry *entry = &rx->entry[index]; @@ -711,6 +781,48 @@ static void tsnep_rx_activate(struct tsnep_rx *rx, int index) entry->desc->properties = __cpu_to_le32(entry->properties); } +static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) +{ + int index; + bool alloc_failed = false; + bool enable = false; + int i; + int retval; + + for (i = 0; i < count && !alloc_failed; i++) { + index = (rx->write + i) % TSNEP_RING_SIZE; + + retval = tsnep_rx_alloc_buffer(rx, index); + if (unlikely(retval)) { + rx->alloc_failed++; + alloc_failed = true; + + /* reuse only if no other allocation was successful */ + if (i == 0 && reuse) + tsnep_rx_reuse_buffer(rx, index); + else + break; + } + + tsnep_rx_activate(rx, index); + + enable = true; + } + + if (enable) { + rx->write = (rx->write + i) % TSNEP_RING_SIZE; + + /* descriptor properties shall be valid before hardware is + * notified + */ + dma_wmb(); + + iowrite32(TSNEP_CONTROL_RX_ENABLE, rx->addr + TSNEP_CONTROL); + } + + return i; +} + static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, int length) { @@ -746,23 +858,42 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, int budget) { struct device *dmadev = rx->adapter->dmadev; + int desc_available; int done = 0; enum dma_data_direction dma_dir; struct tsnep_rx_entry *entry; - struct page *page; struct sk_buff *skb; int length; - bool enable = false; - int retval; + desc_available = tsnep_rx_desc_available(rx); dma_dir = page_pool_get_dma_dir(rx->page_pool); - while (likely(done < budget)) { + while (likely(done < budget) && (rx->read != rx->write)) { entry = &rx->entry[rx->read]; if ((__le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_OWNER_COUNTER_MASK) != (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) break; + done++; + + if (desc_available >= TSNEP_RING_RX_REFILL) { + bool reuse = desc_available >= TSNEP_RING_RX_REUSE; + + desc_available -= tsnep_rx_refill(rx, desc_available, + reuse); + if (!entry->page) { + /* buffer has been reused for refill to prevent + * empty RX ring, thus buffer cannot be used for + * RX processing + */ + rx->read = (rx->read + 1) % TSNEP_RING_SIZE; + desc_available++; + + rx->dropped++; + + continue; + } + } /* descriptor properties shall be read first, because valid data * is signaled there @@ -774,58 +905,53 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, TSNEP_DESC_LENGTH_MASK; dma_sync_single_range_for_cpu(dmadev, entry->dma, TSNEP_SKB_PAD, length, dma_dir); - page = entry->page; - /* forward skb only if allocation is successful, otherwise - * page is reused and frame dropped - */ - retval = tsnep_rx_alloc_buffer(rx, entry); - if (!retval) { - skb = tsnep_build_skb(rx, page, length); - if (skb) { - page_pool_release_page(rx->page_pool, page); - - rx->packets++; - rx->bytes += length - - TSNEP_RX_INLINE_METADATA_SIZE; - if (skb->pkt_type == PACKET_MULTICAST) - rx->multicast++; - - napi_gro_receive(napi, skb); - } else { - page_pool_recycle_direct(rx->page_pool, page); + rx->read = (rx->read + 1) % TSNEP_RING_SIZE; + desc_available++; - rx->dropped++; - } - done++; + skb = tsnep_build_skb(rx, entry->page, length); + if (skb) { + page_pool_release_page(rx->page_pool, entry->page); + + rx->packets++; + rx->bytes += length - TSNEP_RX_INLINE_METADATA_SIZE; + if (skb->pkt_type == PACKET_MULTICAST) + rx->multicast++; + + napi_gro_receive(napi, skb); } else { + page_pool_recycle_direct(rx->page_pool, entry->page); + rx->dropped++; } + entry->page = NULL; + } - tsnep_rx_activate(rx, rx->read); - - enable = true; + if (desc_available) + tsnep_rx_refill(rx, desc_available, false); - rx->read = (rx->read + 1) % TSNEP_RING_SIZE; - } + return done; +} - if (enable) { - /* descriptor properties shall be valid before hardware is - * notified - */ - dma_wmb(); +static bool tsnep_rx_pending(struct tsnep_rx *rx) +{ + struct tsnep_rx_entry *entry; - iowrite32(TSNEP_CONTROL_RX_ENABLE, rx->addr + TSNEP_CONTROL); + if (rx->read != rx->write) { + entry = &rx->entry[rx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_DESC_OWNER_COUNTER_MASK) == + (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) + return true; } - return done; + return false; } static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_rx *rx) { dma_addr_t dma; - int i; int retval; memset(rx, 0, sizeof(*rx)); @@ -843,13 +969,7 @@ static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, rx->owner_counter = 1; rx->increment_owner_counter = TSNEP_RING_SIZE - 1; - for (i = 0; i < TSNEP_RING_SIZE; i++) - tsnep_rx_activate(rx, i); - - /* descriptor properties shall be valid before hardware is notified */ - dma_wmb(); - - iowrite32(TSNEP_CONTROL_RX_ENABLE, rx->addr + TSNEP_CONTROL); + tsnep_rx_refill(rx, tsnep_rx_desc_available(rx), false); return 0; } @@ -866,6 +986,17 @@ static void tsnep_rx_close(struct tsnep_rx *rx) tsnep_rx_ring_cleanup(rx); } +static bool tsnep_pending(struct tsnep_queue *queue) +{ + if (queue->tx && tsnep_tx_pending(queue->tx)) + return true; + + if (queue->rx && tsnep_rx_pending(queue->rx)) + return true; + + return false; +} + static int tsnep_poll(struct napi_struct *napi, int budget) { struct tsnep_queue *queue = container_of(napi, struct tsnep_queue, @@ -886,9 +1017,19 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (!complete) return budget; - if (likely(napi_complete_done(napi, done))) + if (likely(napi_complete_done(napi, done))) { tsnep_enable_irq(queue->adapter, queue->irq_mask); + /* reschedule if work is already pending, prevent rotten packets + * which are transmitted or received after polling but before + * interrupt enable + */ + if (tsnep_pending(queue)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + napi_schedule(napi); + } + } + return min(done, budget - 1); } @@ -1316,6 +1457,11 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count) adapter->queue[0].tx = &adapter->tx[0]; adapter->queue[0].rx = &adapter->rx[0]; adapter->queue[0].irq_mask = irq_mask; + adapter->queue[0].irq_delay_addr = adapter->addr + ECM_INT_DELAY; + retval = tsnep_set_irq_coalesce(&adapter->queue[0], + TSNEP_COALESCE_USECS_DEFAULT); + if (retval < 0) + return retval; adapter->netdev->irq = adapter->queue[0].irq; @@ -1336,6 +1482,12 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count) adapter->queue[i].rx = &adapter->rx[i]; adapter->queue[i].irq_mask = irq_mask << (ECM_INT_TXRX_SHIFT * i); + adapter->queue[i].irq_delay_addr = + adapter->addr + ECM_INT_DELAY + ECM_INT_DELAY_OFFSET * i; + retval = tsnep_set_irq_coalesce(&adapter->queue[i], + TSNEP_COALESCE_USECS_DEFAULT); + if (retval < 0) + return retval; } return 0; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 5c6dd3029e2f..76f808d38066 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -37,18 +37,9 @@ static int dpaa2_eth_dl_info_get(struct devlink *devlink, struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink); struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv; char buf[10]; - int err; - - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; scnprintf(buf, 10, "%d.%d", priv->dpni_ver_major, priv->dpni_ver_minor); - err = devlink_info_version_running_put(req, "dpni", buf); - if (err) - return err; - - return 0; + return devlink_info_version_running_put(req, "dpni", buf); } static struct dpaa2_eth_trap_item * diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 97e1856641b4..0c35abb7d065 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2147,8 +2147,11 @@ static int dpaa2_eth_link_state_update(struct dpaa2_eth_priv *priv) /* When we manage the MAC/PHY using phylink there is no need * to manually update the netif_carrier. + * We can avoid locking because we are called from the "link changed" + * IRQ handler, which is the same as the "endpoint changed" IRQ handler + * (the writer to priv->mac), so we cannot race with it. */ - if (dpaa2_eth_is_type_phy(priv)) + if (dpaa2_mac_is_type_phy(priv->mac)) goto out; /* Chech link state; speed / duplex changes are not treated yet */ @@ -2179,6 +2182,8 @@ static int dpaa2_eth_open(struct net_device *net_dev) dpaa2_eth_seed_pools(priv); + mutex_lock(&priv->mac_lock); + if (!dpaa2_eth_is_type_phy(priv)) { /* We'll only start the txqs when the link is actually ready; * make sure we don't race against the link up notification, @@ -2197,14 +2202,15 @@ static int dpaa2_eth_open(struct net_device *net_dev) err = dpni_enable(priv->mc_io, 0, priv->mc_token); if (err < 0) { + mutex_unlock(&priv->mac_lock); netdev_err(net_dev, "dpni_enable() failed\n"); goto enable_err; } - if (dpaa2_eth_is_type_phy(priv)) { + if (dpaa2_eth_is_type_phy(priv)) dpaa2_mac_start(priv->mac); - phylink_start(priv->mac->phylink); - } + + mutex_unlock(&priv->mac_lock); return 0; @@ -2277,14 +2283,17 @@ static int dpaa2_eth_stop(struct net_device *net_dev) int dpni_enabled = 0; int retries = 10; + mutex_lock(&priv->mac_lock); + if (dpaa2_eth_is_type_phy(priv)) { - phylink_stop(priv->mac->phylink); dpaa2_mac_stop(priv->mac); } else { netif_tx_stop_all_queues(net_dev); netif_carrier_off(net_dev); } + mutex_unlock(&priv->mac_lock); + /* On dpni_disable(), the MC firmware will: * - stop MAC Rx and wait for all Rx frames to be enqueued to software * - cut off WRIOP dequeues from egress FQs and wait until transmission @@ -2610,12 +2619,20 @@ static int dpaa2_eth_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct dpaa2_eth_priv *priv = netdev_priv(dev); + int err; if (cmd == SIOCSHWTSTAMP) return dpaa2_eth_ts_ioctl(dev, rq, cmd); - if (dpaa2_eth_is_type_phy(priv)) - return phylink_mii_ioctl(priv->mac->phylink, rq, cmd); + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_is_type_phy(priv)) { + err = phylink_mii_ioctl(priv->mac->phylink, rq, cmd); + mutex_unlock(&priv->mac_lock); + return err; + } + + mutex_unlock(&priv->mac_lock); return -EOPNOTSUPP; } @@ -3791,7 +3808,7 @@ static int dpaa2_eth_setup_dpni(struct fsl_mc_device *ls_dev) dev_err(dev, "DPNI version %u.%u not supported, need >= %u.%u\n", priv->dpni_ver_major, priv->dpni_ver_minor, DPNI_VER_MAJOR, DPNI_VER_MINOR); - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto close; } @@ -4627,9 +4644,8 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) err = dpaa2_mac_open(mac); if (err) goto err_free_mac; - priv->mac = mac; - if (dpaa2_eth_is_type_phy(priv)) { + if (dpaa2_mac_is_type_phy(mac)) { err = dpaa2_mac_connect(mac); if (err) { if (err == -EPROBE_DEFER) @@ -4643,11 +4659,14 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) } } + mutex_lock(&priv->mac_lock); + priv->mac = mac; + mutex_unlock(&priv->mac_lock); + return 0; err_close_mac: dpaa2_mac_close(mac); - priv->mac = NULL; err_free_mac: kfree(mac); return err; @@ -4655,15 +4674,21 @@ err_free_mac: static void dpaa2_eth_disconnect_mac(struct dpaa2_eth_priv *priv) { - if (dpaa2_eth_is_type_phy(priv)) - dpaa2_mac_disconnect(priv->mac); + struct dpaa2_mac *mac; - if (!dpaa2_eth_has_mac(priv)) + mutex_lock(&priv->mac_lock); + mac = priv->mac; + priv->mac = NULL; + mutex_unlock(&priv->mac_lock); + + if (!mac) return; - dpaa2_mac_close(priv->mac); - kfree(priv->mac); - priv->mac = NULL; + if (dpaa2_mac_is_type_phy(mac)) + dpaa2_mac_disconnect(mac); + + dpaa2_mac_close(mac); + kfree(mac); } static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg) @@ -4673,6 +4698,7 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg) struct fsl_mc_device *dpni_dev = to_fsl_mc_device(dev); struct net_device *net_dev = dev_get_drvdata(dev); struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + bool had_mac; int err; err = dpni_get_irq_status(dpni_dev->mc_io, 0, dpni_dev->mc_handle, @@ -4689,12 +4715,15 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg) dpaa2_eth_set_mac_addr(netdev_priv(net_dev)); dpaa2_eth_update_tx_fqids(priv); - rtnl_lock(); - if (dpaa2_eth_has_mac(priv)) + /* We can avoid locking because the "endpoint changed" IRQ + * handler is the only one who changes priv->mac at runtime, + * so we are not racing with anyone. + */ + had_mac = !!priv->mac; + if (had_mac) dpaa2_eth_disconnect_mac(priv); else dpaa2_eth_connect_mac(priv); - rtnl_unlock(); } return IRQ_HANDLED; @@ -4792,6 +4821,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) priv->net_dev = net_dev; SET_NETDEV_DEVLINK_PORT(net_dev, &priv->devlink_port); + mutex_init(&priv->mac_lock); + priv->iommu_domain = iommu_get_domain_for_dev(dev); priv->tx_tstamp_type = HWTSTAMP_TX_OFF; @@ -4899,6 +4930,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) } #endif + err = dpaa2_eth_connect_mac(priv); + if (err) + goto err_connect_mac; + err = dpaa2_eth_setup_irqs(dpni_dev); if (err) { netdev_warn(net_dev, "Failed to set link interrupt, fall back to polling\n"); @@ -4911,10 +4946,6 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) priv->do_link_poll = true; } - err = dpaa2_eth_connect_mac(priv); - if (err) - goto err_connect_mac; - err = dpaa2_eth_dl_alloc(priv); if (err) goto err_dl_register; @@ -4948,13 +4979,13 @@ err_dl_port_add: err_dl_trap_register: dpaa2_eth_dl_free(priv); err_dl_register: - dpaa2_eth_disconnect_mac(priv); -err_connect_mac: if (priv->do_link_poll) kthread_stop(priv->poll_thread); else fsl_mc_free_irqs(dpni_dev); err_poll_thread: + dpaa2_eth_disconnect_mac(priv); +err_connect_mac: dpaa2_eth_free_rings(priv); err_alloc_rings: err_csum: @@ -5002,9 +5033,6 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) #endif unregister_netdev(net_dev); - rtnl_lock(); - dpaa2_eth_disconnect_mac(priv); - rtnl_unlock(); dpaa2_eth_dl_port_del(priv); dpaa2_eth_dl_traps_unregister(priv); @@ -5015,6 +5043,7 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) else fsl_mc_free_irqs(ls_dev); + dpaa2_eth_disconnect_mac(priv); dpaa2_eth_free_rings(priv); free_percpu(priv->fd); free_percpu(priv->sgt_cache); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 5d0fc432e5b2..d56d7a13262e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -615,6 +615,8 @@ struct dpaa2_eth_priv { #endif struct dpaa2_mac *mac; + /* Serializes changes to priv->mac */ + struct mutex mac_lock; struct workqueue_struct *dpaa2_ptp_wq; struct work_struct tx_onestep_tstamp; struct sk_buff_head tx_skbs; @@ -768,16 +770,15 @@ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv) static inline bool dpaa2_eth_is_type_phy(struct dpaa2_eth_priv *priv) { - if (priv->mac && - (priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || - priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) - return true; + lockdep_assert_held(&priv->mac_lock); - return false; + return dpaa2_mac_is_type_phy(priv->mac); } static inline bool dpaa2_eth_has_mac(struct dpaa2_eth_priv *priv) { + lockdep_assert_held(&priv->mac_lock); + return priv->mac ? true : false; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 32a38a03db57..e80e9388c71f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -85,11 +85,16 @@ static void dpaa2_eth_get_drvinfo(struct net_device *net_dev, static int dpaa2_eth_nway_reset(struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err = -EOPNOTSUPP; + + mutex_lock(&priv->mac_lock); if (dpaa2_eth_is_type_phy(priv)) - return phylink_ethtool_nway_reset(priv->mac->phylink); + err = phylink_ethtool_nway_reset(priv->mac->phylink); + + mutex_unlock(&priv->mac_lock); - return -EOPNOTSUPP; + return err; } static int @@ -97,10 +102,18 @@ dpaa2_eth_get_link_ksettings(struct net_device *net_dev, struct ethtool_link_ksettings *link_settings) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err; - if (dpaa2_eth_is_type_phy(priv)) - return phylink_ethtool_ksettings_get(priv->mac->phylink, - link_settings); + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_is_type_phy(priv)) { + err = phylink_ethtool_ksettings_get(priv->mac->phylink, + link_settings); + mutex_unlock(&priv->mac_lock); + return err; + } + + mutex_unlock(&priv->mac_lock); link_settings->base.autoneg = AUTONEG_DISABLE; if (!(priv->link_state.options & DPNI_LINK_OPT_HALF_DUPLEX)) @@ -115,11 +128,17 @@ dpaa2_eth_set_link_ksettings(struct net_device *net_dev, const struct ethtool_link_ksettings *link_settings) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err = -EOPNOTSUPP; - if (!dpaa2_eth_is_type_phy(priv)) - return -ENOTSUPP; + mutex_lock(&priv->mac_lock); - return phylink_ethtool_ksettings_set(priv->mac->phylink, link_settings); + if (dpaa2_eth_is_type_phy(priv)) + err = phylink_ethtool_ksettings_set(priv->mac->phylink, + link_settings); + + mutex_unlock(&priv->mac_lock); + + return err; } static void dpaa2_eth_get_pauseparam(struct net_device *net_dev, @@ -128,11 +147,16 @@ static void dpaa2_eth_get_pauseparam(struct net_device *net_dev, struct dpaa2_eth_priv *priv = netdev_priv(net_dev); u64 link_options = priv->link_state.options; + mutex_lock(&priv->mac_lock); + if (dpaa2_eth_is_type_phy(priv)) { phylink_ethtool_get_pauseparam(priv->mac->phylink, pause); + mutex_unlock(&priv->mac_lock); return; } + mutex_unlock(&priv->mac_lock); + pause->rx_pause = dpaa2_eth_rx_pause_enabled(link_options); pause->tx_pause = dpaa2_eth_tx_pause_enabled(link_options); pause->autoneg = AUTONEG_DISABLE; @@ -151,9 +175,17 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev, return -EOPNOTSUPP; } - if (dpaa2_eth_is_type_phy(priv)) - return phylink_ethtool_set_pauseparam(priv->mac->phylink, - pause); + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_is_type_phy(priv)) { + err = phylink_ethtool_set_pauseparam(priv->mac->phylink, + pause); + mutex_unlock(&priv->mac_lock); + return err; + } + + mutex_unlock(&priv->mac_lock); + if (pause->autoneg) return -EOPNOTSUPP; @@ -185,7 +217,6 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev, static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { - struct dpaa2_eth_priv *priv = netdev_priv(netdev); u8 *p = data; int i; @@ -199,22 +230,17 @@ static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset, strscpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } - if (dpaa2_eth_has_mac(priv)) - dpaa2_mac_get_strings(p); + dpaa2_mac_get_strings(p); break; } } static int dpaa2_eth_get_sset_count(struct net_device *net_dev, int sset) { - int num_ss_stats = DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS; - struct dpaa2_eth_priv *priv = netdev_priv(net_dev); - switch (sset) { case ETH_SS_STATS: /* ethtool_get_stats(), ethtool_get_drvinfo() */ - if (dpaa2_eth_has_mac(priv)) - num_ss_stats += dpaa2_mac_get_sset_count(); - return num_ss_stats; + return DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS + + dpaa2_mac_get_sset_count(); default: return -EOPNOTSUPP; } @@ -315,8 +341,12 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, } *(data + i++) = buf_cnt_total; + mutex_lock(&priv->mac_lock); + if (dpaa2_eth_has_mac(priv)) dpaa2_mac_get_ethtool_stats(priv->mac, data + i); + + mutex_unlock(&priv->mac_lock); } static int dpaa2_eth_prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 51c9da8e1be2..c886f33f8c6f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -338,12 +338,20 @@ static void dpaa2_mac_set_supported_interfaces(struct dpaa2_mac *mac) void dpaa2_mac_start(struct dpaa2_mac *mac) { + ASSERT_RTNL(); + if (mac->serdes_phy) phy_power_on(mac->serdes_phy); + + phylink_start(mac->phylink); } void dpaa2_mac_stop(struct dpaa2_mac *mac) { + ASSERT_RTNL(); + + phylink_stop(mac->phylink); + if (mac->serdes_phy) phy_power_off(mac->serdes_phy); } @@ -422,7 +430,9 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) } mac->phylink = phylink; + rtnl_lock(); err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0); + rtnl_unlock(); if (err) { netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err); goto err_phylink_destroy; @@ -440,10 +450,10 @@ err_pcs_destroy: void dpaa2_mac_disconnect(struct dpaa2_mac *mac) { - if (!mac->phylink) - return; - + rtnl_lock(); phylink_disconnect_phy(mac->phylink); + rtnl_unlock(); + phylink_destroy(mac->phylink); dpaa2_pcs_destroy(mac); of_phy_put(mac->serdes_phy); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h index a58cab188a99..c1ec9efd413a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h @@ -30,8 +30,14 @@ struct dpaa2_mac { struct phy *serdes_phy; }; -bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev, - struct fsl_mc_io *mc_io); +static inline bool dpaa2_mac_is_type_phy(struct dpaa2_mac *mac) +{ + if (!mac) + return false; + + return mac->attr.link_type == DPMAC_LINK_TYPE_PHY || + mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE; +} int dpaa2_mac_open(struct dpaa2_mac *mac); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c index 720c9230cab5..6bc1988be311 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -60,11 +60,18 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev, { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct dpsw_link_state state = {0}; - int err = 0; + int err; + + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_is_type_phy(port_priv)) { + err = phylink_ethtool_ksettings_get(port_priv->mac->phylink, + link_ksettings); + mutex_unlock(&port_priv->mac_lock); + return err; + } - if (dpaa2_switch_port_is_type_phy(port_priv)) - return phylink_ethtool_ksettings_get(port_priv->mac->phylink, - link_ksettings); + mutex_unlock(&port_priv->mac_lock); err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, @@ -99,9 +106,16 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, bool if_running; int err = 0, ret; - if (dpaa2_switch_port_is_type_phy(port_priv)) - return phylink_ethtool_ksettings_set(port_priv->mac->phylink, - link_ksettings); + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_is_type_phy(port_priv)) { + err = phylink_ethtool_ksettings_set(port_priv->mac->phylink, + link_ksettings); + mutex_unlock(&port_priv->mac_lock); + return err; + } + + mutex_unlock(&port_priv->mac_lock); /* Interface needs to be down to change link settings */ if_running = netif_running(netdev); @@ -145,14 +159,9 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, static int dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset) { - struct ethsw_port_priv *port_priv = netdev_priv(netdev); - int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS; - switch (sset) { case ETH_SS_STATS: - if (port_priv->mac) - num_ss_stats += dpaa2_mac_get_sset_count(); - return num_ss_stats; + return DPAA2_SWITCH_NUM_COUNTERS + dpaa2_mac_get_sset_count(); default: return -EOPNOTSUPP; } @@ -161,7 +170,6 @@ dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset) static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { - struct ethsw_port_priv *port_priv = netdev_priv(netdev); u8 *p = data; int i; @@ -172,8 +180,7 @@ static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } - if (port_priv->mac) - dpaa2_mac_get_strings(p); + dpaa2_mac_get_strings(p); break; } } @@ -196,8 +203,12 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, dpaa2_switch_ethtool_counters[i].name, err); } - if (port_priv->mac) + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_has_mac(port_priv)) dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i); + + mutex_unlock(&port_priv->mac_lock); } const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 14f739e04a3c..f4ae4289c41a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -602,8 +602,11 @@ static int dpaa2_switch_port_link_state_update(struct net_device *netdev) /* When we manage the MAC/PHY using phylink there is no need * to manually update the netif_carrier. + * We can avoid locking because we are called from the "link changed" + * IRQ handler, which is the same as the "endpoint changed" IRQ handler + * (the writer to port_priv->mac), so we cannot race with it. */ - if (dpaa2_switch_port_is_type_phy(port_priv)) + if (dpaa2_mac_is_type_phy(port_priv->mac)) return 0; /* Interrupts are received even though no one issued an 'ifconfig up' @@ -683,6 +686,8 @@ static int dpaa2_switch_port_open(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; + mutex_lock(&port_priv->mac_lock); + if (!dpaa2_switch_port_is_type_phy(port_priv)) { /* Explicitly set carrier off, otherwise * netif_carrier_ok() will return true and cause 'ip link show' @@ -696,16 +701,17 @@ static int dpaa2_switch_port_open(struct net_device *netdev) port_priv->ethsw_data->dpsw_handle, port_priv->idx); if (err) { + mutex_unlock(&port_priv->mac_lock); netdev_err(netdev, "dpsw_if_enable err %d\n", err); return err; } dpaa2_switch_enable_ctrl_if_napi(ethsw); - if (dpaa2_switch_port_is_type_phy(port_priv)) { + if (dpaa2_switch_port_is_type_phy(port_priv)) dpaa2_mac_start(port_priv->mac); - phylink_start(port_priv->mac->phylink); - } + + mutex_unlock(&port_priv->mac_lock); return 0; } @@ -716,14 +722,17 @@ static int dpaa2_switch_port_stop(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; + mutex_lock(&port_priv->mac_lock); + if (dpaa2_switch_port_is_type_phy(port_priv)) { - phylink_stop(port_priv->mac->phylink); dpaa2_mac_stop(port_priv->mac); } else { netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); } + mutex_unlock(&port_priv->mac_lock); + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx); @@ -1452,9 +1461,8 @@ static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) err = dpaa2_mac_open(mac); if (err) goto err_free_mac; - port_priv->mac = mac; - if (dpaa2_switch_port_is_type_phy(port_priv)) { + if (dpaa2_mac_is_type_phy(mac)) { err = dpaa2_mac_connect(mac); if (err) { netdev_err(port_priv->netdev, @@ -1464,11 +1472,14 @@ static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) } } + mutex_lock(&port_priv->mac_lock); + port_priv->mac = mac; + mutex_unlock(&port_priv->mac_lock); + return 0; err_close_mac: dpaa2_mac_close(mac); - port_priv->mac = NULL; err_free_mac: kfree(mac); return err; @@ -1476,15 +1487,21 @@ err_free_mac: static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv) { - if (dpaa2_switch_port_is_type_phy(port_priv)) - dpaa2_mac_disconnect(port_priv->mac); + struct dpaa2_mac *mac; + + mutex_lock(&port_priv->mac_lock); + mac = port_priv->mac; + port_priv->mac = NULL; + mutex_unlock(&port_priv->mac_lock); - if (!dpaa2_switch_port_has_mac(port_priv)) + if (!mac) return; - dpaa2_mac_close(port_priv->mac); - kfree(port_priv->mac); - port_priv->mac = NULL; + if (dpaa2_mac_is_type_phy(mac)) + dpaa2_mac_disconnect(mac); + + dpaa2_mac_close(mac); + kfree(mac); } static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) @@ -1494,6 +1511,7 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) struct ethsw_port_priv *port_priv; u32 status = ~0; int err, if_id; + bool had_mac; err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, &status); @@ -1511,12 +1529,15 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) } if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) { - rtnl_lock(); - if (dpaa2_switch_port_has_mac(port_priv)) + /* We can avoid locking because the "endpoint changed" IRQ + * handler is the only one who changes priv->mac at runtime, + * so we are not racing with anyone. + */ + had_mac = !!port_priv->mac; + if (had_mac) dpaa2_switch_port_disconnect_mac(port_priv); else dpaa2_switch_port_connect_mac(port_priv); - rtnl_unlock(); } out: @@ -2934,9 +2955,7 @@ static void dpaa2_switch_remove_port(struct ethsw_core *ethsw, { struct ethsw_port_priv *port_priv = ethsw->ports[port_idx]; - rtnl_lock(); dpaa2_switch_port_disconnect_mac(port_priv); - rtnl_unlock(); free_netdev(port_priv->netdev); ethsw->ports[port_idx] = NULL; } @@ -3255,6 +3274,8 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, port_priv->netdev = port_netdev; port_priv->ethsw_data = ethsw; + mutex_init(&port_priv->mac_lock); + port_priv->idx = port_idx; port_priv->stp_state = BR_STATE_FORWARDING; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index 0002dca4d417..42b3ca73f55d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -161,6 +161,8 @@ struct ethsw_port_priv { struct dpaa2_switch_filter_block *filter_block; struct dpaa2_mac *mac; + /* Protects against changes to port_priv->mac */ + struct mutex mac_lock; }; /* Switch data */ @@ -230,12 +232,7 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) static inline bool dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv) { - if (port_priv->mac && - (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || - port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) - return true; - - return false; + return dpaa2_mac_is_type_phy(port_priv->mac); } static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index f8c06c3f9464..8671591cb750 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2058,7 +2058,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); - tbmr = ENETC_TBMR_EN; + tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) tbmr |= ENETC_TBMR_VIH; @@ -2461,7 +2461,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, 0); + tx_ring->prio = 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } return 0; @@ -2480,7 +2481,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, i); + tx_ring->prio = i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } /* Reset the number of netdev queues based on the TC count */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 161930a65f61..c6d8cc15c270 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -95,6 +95,7 @@ struct enetc_bdr { void __iomem *rcir; }; u16 index; + u16 prio; int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index a842e1999122..fcebb54224c0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -137,6 +137,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; int err; int i; @@ -145,16 +146,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) if (priv->tx_ring[i]->tsd_enable) return -EBUSY; - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? i : 0); + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? i : 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } err = enetc_setup_taprio(ndev, taprio); - - if (err) - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? 0 : i); + if (err) { + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? 0 : i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + } return err; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 879dfee3dc41..9471baa11d39 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -74,7 +74,7 @@ #include "fec.h" static void set_multicast_list(struct net_device *ndev); -static void fec_enet_itr_coal_init(struct net_device *ndev); +static void fec_enet_itr_coal_set(struct net_device *ndev); #define DRIVER_NAME "fec" @@ -1216,8 +1216,7 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_init(ndev); - + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) @@ -2958,19 +2957,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -static void fec_enet_itr_coal_init(struct net_device *ndev) -{ - struct ethtool_coalesce ec; - - ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - fec_enet_set_coalesce(ndev, &ec, NULL, NULL); -} - static int fec_enet_get_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, void *data) @@ -3874,6 +3860,10 @@ static int fec_enet_init(struct net_device *ndev) fep->rx_align = 0x3; fep->tx_align = 0x3; #endif + fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; + fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; /* Check mask of the streaming and coherent API */ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index e76a3d262b2b..a55542c1ad65 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -4,7 +4,6 @@ config FSL_FMAN depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR select PHYLINK - select PCS select PCS_LYNX select CRC32 default n diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c index d50c222948b4..4fbeb3fd71a8 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -3,14 +3,7 @@ #include "funeth.h" #include "funeth_devlink.h" -static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - return devlink_info_driver_name_put(req, KBUILD_MODNAME); -} - static const struct devlink_ops fun_dl_ops = { - .info_get = fun_dl_info_get, }; struct devlink *fun_devlink_alloc(struct device *dev) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 5655da9cd236..64eb0442c82f 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -563,6 +563,7 @@ struct gve_priv { u32 adminq_report_stats_cnt; u32 adminq_report_link_speed_cnt; u32 adminq_get_ptype_map_cnt; + u32 adminq_verify_driver_compatibility_cnt; /* Global stats */ u32 interface_up_cnt; /* count of times interface turned up since last reset */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index f7621ab672b9..60061288ad9d 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -289,7 +289,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED: return -ENOMEM; case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED: - return -ENOTSUPP; + return -EOPNOTSUPP; default: dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status); return -EINVAL; @@ -407,6 +407,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_GET_PTYPE_MAP: priv->adminq_get_ptype_map_cnt++; break; + case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: + priv->adminq_verify_driver_compatibility_cnt++; + break; default: dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); } @@ -878,6 +881,22 @@ int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, return gve_adminq_execute_cmd(priv, &cmd); } +int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, + u64 driver_info_len, + dma_addr_t driver_info_addr) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY); + cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) { + .driver_info_len = cpu_to_be64(driver_info_len), + .driver_info_addr = cpu_to_be64(driver_info_addr), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_report_link_speed(struct gve_priv *priv) { union gve_adminq_command gvnic_cmd; diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 83c0b40cd2d9..cf29662e6ad1 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -24,6 +24,7 @@ enum gve_adminq_opcodes { GVE_ADMINQ_REPORT_STATS = 0xC, GVE_ADMINQ_REPORT_LINK_SPEED = 0xD, GVE_ADMINQ_GET_PTYPE_MAP = 0xE, + GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, }; /* Admin queue status codes */ @@ -146,6 +147,51 @@ enum gve_sup_feature_mask { #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 +#define GVE_VERSION_STR_LEN 128 + +enum gve_driver_capbility { + gve_driver_capability_gqi_qpl = 0, + gve_driver_capability_gqi_rda = 1, + gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ + gve_driver_capability_dqo_rda = 3, + gve_driver_capability_alt_miss_compl = 4, +}; + +#define GVE_CAP1(a) BIT((int)a) +#define GVE_CAP2(a) BIT(((int)a) - 64) +#define GVE_CAP3(a) BIT(((int)a) - 128) +#define GVE_CAP4(a) BIT(((int)a) - 192) + +#define GVE_DRIVER_CAPABILITY_FLAGS1 \ + (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ + GVE_CAP1(gve_driver_capability_gqi_rda) | \ + GVE_CAP1(gve_driver_capability_dqo_rda) | \ + GVE_CAP1(gve_driver_capability_alt_miss_compl)) + +#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 +#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 +#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 + +struct gve_driver_info { + u8 os_type; /* 0x01 = Linux */ + u8 driver_major; + u8 driver_minor; + u8 driver_sub; + __be32 os_version_major; + __be32 os_version_minor; + __be32 os_version_sub; + __be64 driver_capability_flags[4]; + u8 os_version_str1[GVE_VERSION_STR_LEN]; + u8 os_version_str2[GVE_VERSION_STR_LEN]; +}; + +struct gve_adminq_verify_driver_compatibility { + __be64 driver_info_len; + __be64 driver_info_addr; +}; + +static_assert(sizeof(struct gve_adminq_verify_driver_compatibility) == 16); + struct gve_adminq_configure_device_resources { __be64 counter_array; __be64 irq_db_addr; @@ -345,6 +391,8 @@ union gve_adminq_command { struct gve_adminq_report_stats report_stats; struct gve_adminq_report_link_speed report_link_speed; struct gve_adminq_get_ptype_map get_ptype_map; + struct gve_adminq_verify_driver_compatibility + verify_driver_compatibility; }; }; u8 reserved[64]; @@ -372,6 +420,9 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval); +int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, + u64 driver_info_len, + dma_addr_t driver_info_addr); int gve_adminq_report_link_speed(struct gve_priv *priv); struct gve_ptype_lut; diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h index e8fe9adef7f2..f79cd0591110 100644 --- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h @@ -176,6 +176,11 @@ static_assert(sizeof(struct gve_tx_compl_desc) == 8); #define GVE_COMPL_TYPE_DQO_MISS 0x1 /* Miss path completion */ #define GVE_COMPL_TYPE_DQO_REINJECTION 0x3 /* Re-injection completion */ +/* The most significant bit in the completion tag can change the completion + * type from packet completion to miss path completion. + */ +#define GVE_ALT_MISS_COMPL_BIT BIT(15) + /* Descriptor to post buffers to HW on buffer queue. */ struct gve_rx_desc_dqo { __le16 buf_id; /* ID returned in Rx completion descriptor */ diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5a229a01f49d..5b40f9c53196 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -12,6 +12,8 @@ #include <linux/sched.h> #include <linux/timer.h> #include <linux/workqueue.h> +#include <linux/utsname.h> +#include <linux/version.h> #include <net/sch_generic.h> #include "gve.h" #include "gve_dqo.h" @@ -30,6 +32,49 @@ const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; +static int gve_verify_driver_compatibility(struct gve_priv *priv) +{ + int err; + struct gve_driver_info *driver_info; + dma_addr_t driver_info_bus; + + driver_info = dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct gve_driver_info), + &driver_info_bus, GFP_KERNEL); + if (!driver_info) + return -ENOMEM; + + *driver_info = (struct gve_driver_info) { + .os_type = 1, /* Linux */ + .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), + .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), + .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), + .driver_capability_flags = { + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), + }, + }; + strscpy(driver_info->os_version_str1, utsname()->release, + sizeof(driver_info->os_version_str1)); + strscpy(driver_info->os_version_str2, utsname()->version, + sizeof(driver_info->os_version_str2)); + + err = gve_adminq_verify_driver_compatibility(priv, + sizeof(struct gve_driver_info), + driver_info_bus); + + /* It's ok if the device doesn't support this */ + if (err == -EOPNOTSUPP) + err = 0; + + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_driver_info), + driver_info, driver_info_bus); + return err; +} + static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -1368,6 +1413,13 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) return err; } + err = gve_verify_driver_compatibility(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Could not verify driver compatibility: err=%d\n", err); + goto err; + } + if (skip_describe_device) goto setup_device; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 588d64819ed5..b76143bfd594 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -953,12 +953,18 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, atomic_set_release(&tx->dqo_compl.hw_tx_head, tx_head); } else if (type == GVE_COMPL_TYPE_DQO_PKT) { u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); - - gve_handle_packet_completion(priv, tx, !!napi, - compl_tag, - &pkt_compl_bytes, - &pkt_compl_pkts, - /*is_reinjection=*/false); + if (compl_tag & GVE_ALT_MISS_COMPL_BIT) { + compl_tag &= ~GVE_ALT_MISS_COMPL_BIT; + gve_handle_miss_completion(priv, tx, compl_tag, + &miss_compl_bytes, + &miss_compl_pkts); + } else { + gve_handle_packet_completion(priv, tx, !!napi, + compl_tag, + &pkt_compl_bytes, + &pkt_compl_pkts, + false); + } } else if (type == GVE_COMPL_TYPE_DQO_MISS) { u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); @@ -972,7 +978,7 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, compl_tag, &reinject_compl_bytes, &reinject_compl_pkts, - /*is_reinjection=*/true); + true); } tx->dqo_compl.head = diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 4c441e6a5082..3d3b69605423 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -13,11 +13,6 @@ static int hclge_devlink_info_get(struct devlink *devlink, struct hclge_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; struct hclge_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index fdc19868b818..a6c3c5e8f0ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -13,11 +13,6 @@ static int hclgevf_devlink_info_get(struct devlink *devlink, struct hclgevf_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN]; struct hclgevf_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 560d1d442232..d3fdc290937f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4a6630586ec9..fc373472e4e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4880b740fa6e..6861b3e2ced3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16645,6 +16645,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -16662,7 +16664,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 3f6187c16424..0d1bab4ac1b0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,7 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) -#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 258bdf8906dd..c4e451ef7942 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1087,12 +1087,6 @@ static int iavf_set_mac(struct net_device *netdev, void *p) if (ret) return ret; - /* If this is an initial set MAC during VF spawn do not wait */ - if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { - adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; - return 0; - } - ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, iavf_is_mac_set_handled(netdev, addr->sa_data), msecs_to_jiffies(2500)); @@ -2605,8 +2599,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; - adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -2921,7 +2913,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -3021,6 +3012,11 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (netif_running(netdev)) { + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3033,6 +3029,7 @@ continue_reset: if (running) { netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); adapter->link_up = false; iavf_napi_disable_all(adapter); } @@ -3172,6 +3169,16 @@ reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + + if (netif_running(netdev)) { + /* Close device to ensure that Tx queues will not be started + * during netif_device_attach() at the end of the reset task. + */ + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } + dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); reset_finish: rtnl_lock(); @@ -5035,23 +5042,21 @@ static int __maybe_unused iavf_resume(struct device *dev_d) static void iavf_remove(struct pci_dev *pdev) { struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; - struct iavf_cloud_filter *cf, *cftmp; - struct iavf_hw *hw = &adapter->hw; + struct net_device *netdev; + struct iavf_hw *hw; int err; - /* When reboot/shutdown is in progress no need to do anything - * as the adapter is already REMOVE state that was set during - * iavf_shutdown() callback. - */ - if (adapter->state == __IAVF_REMOVE) + netdev = adapter->netdev; + hw = &adapter->hw; + + if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. */ @@ -5191,6 +5196,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -5201,7 +5208,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index f88ee051e71c..2f0b604abc5e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -320,6 +320,11 @@ enum ice_vsi_state { ICE_VSI_STATE_NBITS /* must be last */ }; +struct ice_vsi_stats { + struct ice_ring_stats **tx_ring_stats; /* Tx ring stats array */ + struct ice_ring_stats **rx_ring_stats; /* Rx ring stats array */ +}; + /* struct that defines a VSI, associated with a dev */ struct ice_vsi { struct net_device *netdev; @@ -373,6 +378,7 @@ struct ice_vsi { /* VSI stats */ struct rtnl_link_stats64 net_stats; + struct rtnl_link_stats64 net_stats_prev; struct ice_eth_stats eth_stats; struct ice_eth_stats eth_stats_prev; @@ -540,6 +546,7 @@ struct ice_pf { u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */ struct ice_vsi **vsi; /* VSIs created by the driver */ + struct ice_vsi_stats **vsi_stats; struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ struct ice_vfs vfs; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e864634d66bc..554095b25f44 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -389,7 +389,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) * Indicates the starting address of the descriptor queue defined in * 128 Byte units. */ - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 216370ec60d4..d02b55b6aa9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2948,8 +2948,8 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw) * Note: In the structure of [phy_type_low, phy_type_high], there should * be one bit set, as this function will convert one PHY type to its * speed. - * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned - * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned + * If no bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned + * If more than one bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned */ static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high) @@ -5515,3 +5515,40 @@ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) ICE_FW_API_REPORT_DFLT_CFG_MIN, ICE_FW_API_REPORT_DFLT_CFG_PATCH); } + +/* each of the indexes into the following array match the speed of a return + * value from the list of AQ returned speeds like the range: + * ICE_AQ_LINK_SPEED_10MB .. ICE_AQ_LINK_SPEED_100GB excluding + * ICE_AQ_LINK_SPEED_UNKNOWN which is BIT(15) and maps to BIT(14) in this + * array. The array is defined as 15 elements long because the link_speed + * returned by the firmware is a 16 bit * value, but is indexed + * by [fls(speed) - 1] + */ +static const u32 ice_aq_to_link_speed[15] = { + SPEED_10, /* BIT(0) */ + SPEED_100, + SPEED_1000, + SPEED_2500, + SPEED_5000, + SPEED_10000, + SPEED_20000, + SPEED_25000, + SPEED_40000, + SPEED_50000, + SPEED_100000, /* BIT(10) */ + 0, + 0, + 0, + 0 /* BIT(14) */ +}; + +/** + * ice_get_link_speed - get integer speed from table + * @index: array index from fls(aq speed) - 1 + * + * Returns: u32 value containing integer speed + */ +u32 ice_get_link_speed(u16 index) +{ + return ice_aq_to_link_speed[index]; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8b6712b92e84..4c6a0b5c9304 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -163,6 +163,7 @@ int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, bool write, struct ice_sq_cd *cd); +u32 ice_get_link_speed(u16 index); int ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap, diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 9defb9d0fe88..4f24d441c35e 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -881,6 +881,9 @@ void ice_update_dcb_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + for (i = 0; i < 8; i++) { ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i), pf->stat_prev_loaded, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 1d638216484d..8286e47b4bae 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -311,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink, } } - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - goto out_free_ctx; - } - ice_info_get_dsn(pf, ctx); err = devlink_info_serial_number_put(req, ctx->buf); @@ -1596,21 +1590,22 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf) #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure * @data: on exit points to snapshot data buffer * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the nvm-flash devlink region. It captures a snapshot of the full NVM flash - * contents, including both banks of flash. This snapshot can later be viewed - * via the devlink-region interface. + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. * - * It captures the flash using the FLASH_ONLY bit set when reading via - * firmware, so it does not read the current Shadow RAM contents. For that, - * use the shadow-ram region. + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. @@ -1622,17 +1617,27 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; u8 *nvm_data, *tmp, i; u32 nvm_size, left; s8 num_blks; int status; - nvm_size = hw->flash.flash_size; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + nvm_data = vzalloc(nvm_size); if (!nvm_data) return -ENOMEM; - num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); tmp = nvm_data; left = nvm_size; @@ -1656,7 +1661,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, - &read_sz, tmp, false); + &read_sz, tmp, read_shadow_ram); if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", read_sz, status, hw->adminq.sq_last_status); @@ -1677,62 +1682,69 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } /** - * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents + * ice_devlink_nvm_read - Read a portion of NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the shadow-ram devlink region. It captures a snapshot of the shadow ram - * contents. This snapshot can later be viewed via the devlink-region - * interface. + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. */ -static int -ice_devlink_sram_snapshot(struct devlink *devlink, - const struct devlink_region_ops __always_unused *ops, - struct netlink_ext_ack *extack, u8 **data) +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) { struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - u8 *sram_data; - u32 sram_size; - int err; + bool read_shadow_ram; + u64 nvm_size; + int status; - sram_size = hw->flash.sr_words * 2u; - sram_data = vzalloc(sram_size); - if (!sram_data) - return -ENOMEM; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } - err = ice_acquire_nvm(hw, ICE_RES_READ); - if (err) { + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - err, hw->adminq.sq_last_status); + status, hw->adminq.sq_last_status); NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(sram_data); - return err; + return -EIO; } - /* Read from the Shadow RAM, rather than directly from NVM */ - err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true); - if (err) { + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - sram_size, err, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, - "Failed to read Shadow RAM contents"); + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); ice_release_nvm(hw); - vfree(sram_data); - return err; + return -EIO; } - ice_release_nvm(hw); - *data = sram_data; - return 0; } @@ -1784,12 +1796,14 @@ static const struct devlink_region_ops ice_nvm_region_ops = { .name = "nvm-flash", .destructor = vfree, .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_sram_region_ops = { .name = "shadow-ram", .destructor = vfree, - .snapshot = ice_devlink_sram_snapshot, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_devcaps_region_ops = { diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f71a7521c7bd..4191994d8f3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1544,9 +1544,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_txq(vsi, j) { tx_ring = READ_ONCE(vsi->tx_rings[j]); - if (tx_ring) { - data[i++] = tx_ring->stats.pkts; - data[i++] = tx_ring->stats.bytes; + if (tx_ring && tx_ring->ring_stats) { + data[i++] = tx_ring->ring_stats->stats.pkts; + data[i++] = tx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1555,9 +1555,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_rxq(vsi, j) { rx_ring = READ_ONCE(vsi->rx_rings[j]); - if (rx_ring) { - data[i++] = rx_ring->stats.pkts; - data[i++] = rx_ring->stats.bytes; + if (rx_ring && rx_ring->ring_stats) { + data[i++] = rx_ring->ring_stats->stats.pkts; + data[i++] = rx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index b3baf7c3f910..89f986a75cc8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -908,17 +908,5 @@ static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) return ice_ptype_lkup[ptype]; } -#define ICE_LINK_SPEED_UNKNOWN 0 -#define ICE_LINK_SPEED_10MBPS 10 -#define ICE_LINK_SPEED_100MBPS 100 -#define ICE_LINK_SPEED_1000MBPS 1000 -#define ICE_LINK_SPEED_2500MBPS 2500 -#define ICE_LINK_SPEED_5000MBPS 5000 -#define ICE_LINK_SPEED_10000MBPS 10000 -#define ICE_LINK_SPEED_20000MBPS 20000 -#define ICE_LINK_SPEED_25000MBPS 25000 -#define ICE_LINK_SPEED_40000MBPS 40000 -#define ICE_LINK_SPEED_50000MBPS 50000 -#define ICE_LINK_SPEED_100000MBPS 100000 #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7276badfa19e..94aa834cd9a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -448,6 +448,49 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d } /** + * ice_vsi_alloc_stat_arrays - Allocate statistics arrays + * @vsi: VSI pointer + */ +static int ice_vsi_alloc_stat_arrays(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + + if (vsi->type == ICE_VSI_CHNL) + return 0; + if (!pf->vsi_stats) + return -ENOENT; + + vsi_stat = kzalloc(sizeof(*vsi_stat), GFP_KERNEL); + if (!vsi_stat) + return -ENOMEM; + + vsi_stat->tx_ring_stats = + kcalloc(vsi->alloc_txq, sizeof(*vsi_stat->tx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->tx_ring_stats) + goto err_alloc_tx; + + vsi_stat->rx_ring_stats = + kcalloc(vsi->alloc_rxq, sizeof(*vsi_stat->rx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->rx_ring_stats) + goto err_alloc_rx; + + pf->vsi_stats[vsi->idx] = vsi_stat; + + return 0; + +err_alloc_rx: + kfree(vsi_stat->rx_ring_stats); +err_alloc_tx: + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; + return -ENOMEM; +} + +/** * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI @@ -560,6 +603,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, if (vsi->type == ICE_VSI_CTRL && vf) vf->ctrl_vsi_idx = vsi->idx; + + /* allocate memory for Tx/Rx ring stat pointers */ + if (ice_vsi_alloc_stat_arrays(vsi)) + goto err_rings; + goto unlock_pf; err_rings: @@ -1536,6 +1584,106 @@ err_out: } /** + * ice_vsi_free_stats - Free the ring statistics structures + * @vsi: VSI pointer + */ +static void ice_vsi_free_stats(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (vsi->type == ICE_VSI_CHNL) + return; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + ice_for_each_alloc_txq(vsi, i) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + + ice_for_each_alloc_rxq(vsi, i) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat->rx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; +} + +/** + * ice_vsi_alloc_ring_stats - Allocates Tx and Rx ring stats for the VSI + * @vsi: VSI which is having stats allocated + */ +static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi) +{ + struct ice_ring_stats **tx_ring_stats; + struct ice_ring_stats **rx_ring_stats; + struct ice_vsi_stats *vsi_stats; + struct ice_pf *pf = vsi->back; + u16 i; + + vsi_stats = pf->vsi_stats[vsi->idx]; + tx_ring_stats = vsi_stats->tx_ring_stats; + rx_ring_stats = vsi_stats->rx_ring_stats; + + /* Allocate Tx ring stats */ + ice_for_each_alloc_txq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_tx_ring *ring; + + ring = vsi->tx_rings[i]; + ring_stats = tx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(tx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + /* Allocate Rx ring stats */ + ice_for_each_alloc_rxq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_rx_ring *ring; + + ring = vsi->rx_rings[i]; + ring_stats = rx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(rx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + return 0; + +err_out: + ice_vsi_free_stats(vsi); + return -ENOMEM; +} + +/** * ice_vsi_manage_rss_lut - disable/enable RSS * @vsi: the VSI being changed * @ena: boolean value indicating if this is an enable or disable request @@ -1795,11 +1943,15 @@ void ice_update_eth_stats(struct ice_vsi *vsi) { struct ice_eth_stats *prev_es, *cur_es; struct ice_hw *hw = &vsi->back->hw; + struct ice_pf *pf = vsi->back; u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ prev_es = &vsi->eth_stats_prev; cur_es = &vsi->eth_stats; + if (ice_is_reset_in_progress(pf->state)) + vsi->stat_offsets_loaded = false; + ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded, &prev_es->rx_bytes, &cur_es->rx_bytes); @@ -2576,6 +2728,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + ice_vsi_map_rings_to_vectors(vsi); /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ @@ -2614,6 +2770,9 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value @@ -2627,6 +2786,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ret = ice_vsi_alloc_rings(vsi); if (ret) goto unroll_vsi_init; + + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + break; default: /* clean up the resources and exit */ @@ -2686,6 +2850,7 @@ unroll_vector_base: unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: + ice_vsi_free_stats(vsi); ice_vsi_delete(vsi); unroll_get_qs: ice_vsi_put_qs(vsi); @@ -3077,7 +3242,7 @@ int ice_vsi_release(struct ice_vsi *vsi) vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; ice_vsi_clear_rings(vsi); - + ice_vsi_free_stats(vsi); ice_vsi_put_qs(vsi); /* retain SW VSI data structure since it is needed to unregister and @@ -3205,6 +3370,47 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } /** + * ice_vsi_realloc_stat_arrays - Frees unused stat structures + * @vsi: VSI pointer + * @prev_txq: Number of Tx rings before ring reallocation + * @prev_rxq: Number of Rx rings before ring reallocation + */ +static int +ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (!prev_txq || !prev_rxq) + return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; + + vsi_stat = pf->vsi_stats[vsi->idx]; + + if (vsi->num_txq < prev_txq) { + for (i = vsi->num_txq; i < prev_txq; i++) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + } + + if (vsi->num_rxq < prev_rxq) { + for (i = vsi->num_rxq; i < prev_rxq; i++) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + } + + return 0; +} + +/** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild * @init_vsi: is this an initialization or a reconfigure of the VSI @@ -3215,10 +3421,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_coalesce_stored *coalesce; + int ret, i, prev_txq, prev_rxq; int prev_num_q_vectors = 0; enum ice_vsi_type vtype; struct ice_pf *pf; - int ret, i; if (!vsi) return -EINVAL; @@ -3237,6 +3443,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + prev_txq = vsi->num_txq; + prev_rxq = vsi->num_rxq; + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (ret) @@ -3303,7 +3512,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + ice_vsi_map_rings_to_vectors(vsi); + + vsi->stat_offsets_loaded = false; if (ice_is_xdp_ena_vsi(vsi)) { ret = ice_vsi_determine_xdp_res(vsi); if (ret) @@ -3340,6 +3555,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + + vsi->stat_offsets_loaded = false; break; case ICE_VSI_CHNL: if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { @@ -3387,6 +3607,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return ice_schedule_reset(pf, ICE_RESET_PFR); } } + + if (ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq)) + goto err_vectors; + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); kfree(coalesce); @@ -3728,9 +3952,9 @@ static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes */ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(&tx_ring->stats, pkts, bytes); - u64_stats_update_end(&tx_ring->syncp); + u64_stats_update_begin(&tx_ring->ring_stats->syncp); + ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&tx_ring->ring_stats->syncp); } /** @@ -3741,9 +3965,9 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) */ void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(&rx_ring->stats, pkts, bytes); - u64_stats_update_end(&rx_ring->syncp); + u64_stats_update_begin(&rx_ring->ring_stats->syncp); + ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&rx_ring->ring_stats->syncp); } /** @@ -3850,33 +4074,11 @@ int ice_clear_dflt_vsi(struct ice_vsi *vsi) */ int ice_get_link_speed_mbps(struct ice_vsi *vsi) { - switch (vsi->port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - return SPEED_100000; - case ICE_AQ_LINK_SPEED_50GB: - return SPEED_50000; - case ICE_AQ_LINK_SPEED_40GB: - return SPEED_40000; - case ICE_AQ_LINK_SPEED_25GB: - return SPEED_25000; - case ICE_AQ_LINK_SPEED_20GB: - return SPEED_20000; - case ICE_AQ_LINK_SPEED_10GB: - return SPEED_10000; - case ICE_AQ_LINK_SPEED_5GB: - return SPEED_5000; - case ICE_AQ_LINK_SPEED_2500MB: - return SPEED_2500; - case ICE_AQ_LINK_SPEED_1000MB: - return SPEED_1000; - case ICE_AQ_LINK_SPEED_100MB: - return SPEED_100; - case ICE_AQ_LINK_SPEED_10MB: - return SPEED_10; - case ICE_AQ_LINK_SPEED_UNKNOWN: - default: - return 0; - } + unsigned int link_speed; + + link_speed = vsi->port_info->phy.link_info.link_speed; + + return (int)ice_get_link_speed(fls(link_speed) - 1); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d6f460ff1b72..2b23b4714a26 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -130,12 +130,17 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; + struct ice_ring_stats *ring_stats; if (!tx_ring) continue; if (ice_ring_ch_enabled(tx_ring)) continue; + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this @@ -144,8 +149,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * prev_pkt would be negative if there was no * pending work. */ - packets = tx_ring->stats.pkts & INT_MAX; - if (tx_ring->tx_stats.prev_pkt == packets) { + packets = ring_stats->stats.pkts & INT_MAX; + if (ring_stats->tx_stats.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; @@ -155,7 +160,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * to ice_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt = + ring_stats->tx_stats.prev_pkt = ice_get_tx_pending(tx_ring) ? packets : -1; } } @@ -2546,13 +2551,20 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring_stats *ring_stats; struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); - if (!xdp_ring) goto free_xdp_rings; + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) { + ice_free_tx_ring(xdp_ring); + goto free_xdp_rings; + } + + xdp_ring->ring_stats = ring_stats; xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; @@ -2575,9 +2587,13 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) return 0; free_xdp_rings: - for (; i >= 0; i--) - if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + for (; i >= 0; i--) { + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) { + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; ice_free_tx_ring(vsi->xdp_rings[i]); + } + } return -ENOMEM; } @@ -2778,6 +2794,8 @@ free_qmap: synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); } + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } @@ -3131,15 +3149,15 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) */ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { - irqreturn_t ret = IRQ_HANDLED; struct ice_pf *pf = data; - bool irq_handled; - irq_handled = ice_ptp_process_ts(pf); - if (!irq_handled) - ret = IRQ_WAKE_THREAD; + if (ice_is_reset_in_progress(pf->state)) + return IRQ_HANDLED; + + while (!ice_ptp_process_ts(pf)) + usleep_range(50, 100); - return ret; + return IRQ_HANDLED; } /** @@ -4756,11 +4774,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_init_pf_unroll; } + pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi, + sizeof(*pf->vsi_stats), GFP_KERNEL); + if (!pf->vsi_stats) { + err = -ENOMEM; + goto err_init_vsi_unroll; + } + err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); err = -EIO; - goto err_init_vsi_unroll; + goto err_init_vsi_stats_unroll; } /* In case of MSIX we are going to setup the misc vector right here @@ -4941,6 +4966,9 @@ err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: ice_clear_interrupt_scheme(pf); +err_init_vsi_stats_unroll: + devm_kfree(dev, pf->vsi_stats); + pf->vsi_stats = NULL; err_init_vsi_unroll: devm_kfree(dev, pf->vsi); err_init_pf_unroll: @@ -5063,6 +5091,8 @@ static void ice_remove(struct pci_dev *pdev) continue; ice_vsi_free_q_vectors(pf->vsi[i]); } + devm_kfree(&pdev->dev, pf->vsi_stats); + pf->vsi_stats = NULL; ice_deinit_pf(pf); ice_devlink_destroy_regions(pf); ice_deinit_hw(&pf->hw); @@ -6380,14 +6410,16 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (!ring) + if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, + ring->ring_stats->stats, &pkts, + &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->tx_stats.restart_q; - vsi->tx_busy += ring->tx_stats.tx_busy; - vsi->tx_linearize += ring->tx_stats.tx_linearize; + vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; + vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; + vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; } } @@ -6397,6 +6429,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { + struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; @@ -6421,12 +6454,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring_stats *ring_stats; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ring_stats = ring->ring_stats; + ice_fetch_u64_stats_per_ring(&ring_stats->syncp, + ring_stats->stats, &pkts, + &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; - vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; } /* update XDP Tx rings counters */ @@ -6436,10 +6473,28 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_unlock(); - vsi->net_stats.tx_packets = vsi_stats->tx_packets; - vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; - vsi->net_stats.rx_packets = vsi_stats->rx_packets; - vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + net_stats = &vsi->net_stats; + stats_prev = &vsi->net_stats_prev; + + /* clear prev counters after reset */ + if (vsi_stats->tx_packets < stats_prev->tx_packets || + vsi_stats->rx_packets < stats_prev->rx_packets) { + stats_prev->tx_packets = 0; + stats_prev->tx_bytes = 0; + stats_prev->rx_packets = 0; + stats_prev->rx_bytes = 0; + } + + /* update netdev counters */ + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + + stats_prev->tx_packets = vsi_stats->tx_packets; + stats_prev->tx_bytes = vsi_stats->tx_bytes; + stats_prev->rx_packets = vsi_stats->rx_packets; + stats_prev->rx_bytes = vsi_stats->rx_bytes; kfree(vsi_stats); } @@ -6501,6 +6556,9 @@ void ice_update_pf_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, &cur_ps->eth.rx_bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 5cf198a33e26..13e75279e71c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -614,11 +614,14 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * 2) extend the 40b timestamp value to get a 64bit timestamp * 3) send that timestamp to the stack * - * After looping, if we still have waiting SKBs, return true. This may cause us - * effectively poll even when not strictly necessary. We do this because it's - * possible a new timestamp was requested around the same time as the interrupt. - * In some cases hardware might not interrupt us again when the timestamp is - * captured. + * Returns true if all timestamps were handled, and false if any slots remain + * without a timestamp. + * + * After looping, if we still have waiting SKBs, return false. This may cause + * us effectively poll even when not strictly necessary. We do this because + * it's possible a new timestamp was requested around the same time as the + * interrupt. In some cases hardware might not interrupt us again when the + * timestamp is captured. * * Note that we only take the tracking lock when clearing the bit and when * checking if we need to re-queue this task. The only place where bits can be @@ -641,7 +644,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return false; + return true; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -2367,10 +2370,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) */ bool ice_ptp_process_ts(struct ice_pf *pf) { - if (pf->ptp.port.tx.init) - return ice_ptp_tx_tstamp(&pf->ptp.port.tx); - - return false; + return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } static void ice_ptp_periodic_work(struct kthread_work *work) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 772b1f566d6e..1f8dd50db524 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -2963,16 +2963,18 @@ bool ice_ptp_lock(struct ice_hw *hw) u32 hw_lock; int i; -#define MAX_TRIES 5 +#define MAX_TRIES 15 for (i = 0; i < MAX_TRIES; i++) { hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); hw_lock = hw_lock & PFTSYN_SEM_BUSY_M; - if (!hw_lock) - break; + if (hw_lock) { + /* Somebody is holding the lock */ + usleep_range(5000, 6000); + continue; + } - /* Somebody is holding the lock */ - usleep_range(10000, 20000); + break; } return !hw_lock; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 109761c8c858..fd1f8b0ad0ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -156,18 +156,20 @@ ice_repr_sp_stats64(const struct net_device *dev, u64 pkts, bytes; tx_ring = np->vsi->tx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&tx_ring->syncp, tx_ring->stats, + ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp, + tx_ring->ring_stats->stats, &pkts, &bytes); stats->rx_packets = pkts; stats->rx_bytes = bytes; rx_ring = np->vsi->rx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&rx_ring->syncp, rx_ring->stats, + ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp, + rx_ring->ring_stats->stats, &pkts, &bytes); stats->tx_packets = pkts; stats->tx_bytes = bytes; - stats->tx_dropped = rx_ring->rx_stats.alloc_page_failed + - rx_ring->rx_stats.alloc_buf_failed; + stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed + + rx_ring->ring_stats->rx_stats.alloc_buf_failed; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dbe80e5053a8..086f0b3ab68d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -325,7 +325,7 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) if (netif_tx_queue_stopped(txring_txq(tx_ring)) && !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_tx_wake_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; } } @@ -367,7 +367,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt = -1; + tx_ring->ring_stats->tx_stats.prev_pkt = -1; return 0; err: @@ -667,7 +667,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) /* alloc new page for storage */ page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -680,7 +680,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_pages(page, ice_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -1091,7 +1091,7 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) return false; - rx_ring->rx_stats.non_eop_descs++; + rx_ring->ring_stats->rx_stats.non_eop_descs++; return true; } @@ -1222,7 +1222,7 @@ construct_skb: } /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; if (rx_buf) rx_buf->pagecnt_bias++; break; @@ -1275,7 +1275,9 @@ construct_skb: ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; - ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); + if (rx_ring->ring_stats) + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, + total_rx_bytes); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_pkts; @@ -1292,15 +1294,25 @@ static void __ice_update_sample(struct ice_q_vector *q_vector, struct ice_tx_ring *tx_ring; ice_for_each_tx_ring(tx_ring, *rc) { - packets += tx_ring->stats.pkts; - bytes += tx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } else { struct ice_rx_ring *rx_ring; ice_for_each_rx_ring(rx_ring, *rc) { - packets += rx_ring->stats.pkts; - bytes += rx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = rx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } @@ -1549,7 +1561,7 @@ static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_tx_start_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; return 0; } @@ -2293,7 +2305,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) if (__skb_linearize(skb)) goto out_drop; count = ice_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; + tx_ring->ring_stats->tx_stats.tx_linearize++; } /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, @@ -2304,7 +2316,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) */ if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_CTX_DESC)) { - tx_ring->tx_stats.tx_busy++; + tx_ring->ring_stats->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 932b5661ec4d..4fd0e5d0a313 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -191,6 +191,16 @@ struct ice_rxq_stats { u64 alloc_buf_failed; }; +struct ice_ring_stats { + struct rcu_head rcu; /* to avoid race on free */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; +}; + enum ice_ring_state_t { ICE_TX_XPS_INIT_DONE, ICE_TX_NBITS, @@ -283,9 +293,7 @@ struct ice_rx_ring { u16 rx_buf_len; /* stats structs */ - struct ice_rxq_stats rx_stats; - struct ice_q_stats stats; - struct u64_stats_sync syncp; + struct ice_ring_stats *ring_stats; struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ @@ -325,10 +333,8 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_txq_stats tx_stats; + struct ice_ring_stats *ring_stats; /* CL3 - 3rd cacheline starts here */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 7ee38d02d1e5..25f04266c668 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -285,7 +285,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; + xdp_ring->ring_stats->tx_stats.tx_busy++; return ICE_XDP_CONSUMED; } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index fc8c93fa4455..d4a4001b6e5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -39,6 +39,24 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); } +static const u32 ice_legacy_aq_to_vc_speed[15] = { + VIRTCHNL_LINK_SPEED_100MB, /* BIT(0) */ + VIRTCHNL_LINK_SPEED_100MB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_10GB, + VIRTCHNL_LINK_SPEED_20GB, + VIRTCHNL_LINK_SPEED_25GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN /* BIT(14) */ +}; + /** * ice_conv_link_speed_to_virtchnl * @adv_link_support: determines the format of the returned link speed @@ -55,79 +73,17 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) { u32 speed; - if (adv_link_support) - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - speed = ICE_LINK_SPEED_10MBPS; - break; - case ICE_AQ_LINK_SPEED_100MB: - speed = ICE_LINK_SPEED_100MBPS; - break; - case ICE_AQ_LINK_SPEED_1000MB: - speed = ICE_LINK_SPEED_1000MBPS; - break; - case ICE_AQ_LINK_SPEED_2500MB: - speed = ICE_LINK_SPEED_2500MBPS; - break; - case ICE_AQ_LINK_SPEED_5GB: - speed = ICE_LINK_SPEED_5000MBPS; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = ICE_LINK_SPEED_10000MBPS; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = ICE_LINK_SPEED_20000MBPS; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = ICE_LINK_SPEED_25000MBPS; - break; - case ICE_AQ_LINK_SPEED_40GB: - speed = ICE_LINK_SPEED_40000MBPS; - break; - case ICE_AQ_LINK_SPEED_50GB: - speed = ICE_LINK_SPEED_50000MBPS; - break; - case ICE_AQ_LINK_SPEED_100GB: - speed = ICE_LINK_SPEED_100000MBPS; - break; - default: - speed = ICE_LINK_SPEED_UNKNOWN; - break; - } - else + if (adv_link_support) { + /* convert a BIT() value into an array index */ + speed = ice_get_link_speed(fls(link_speed) - 1); + } else { /* Virtchnl speeds are not defined for every speed supported in * the hardware. To maintain compatibility with older AVF * drivers, while reporting the speed the new speed values are * resolved to the closest known virtchnl speeds */ - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - case ICE_AQ_LINK_SPEED_100MB: - speed = (u32)VIRTCHNL_LINK_SPEED_100MB; - break; - case ICE_AQ_LINK_SPEED_1000MB: - case ICE_AQ_LINK_SPEED_2500MB: - case ICE_AQ_LINK_SPEED_5GB: - speed = (u32)VIRTCHNL_LINK_SPEED_1GB; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = (u32)VIRTCHNL_LINK_SPEED_10GB; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = (u32)VIRTCHNL_LINK_SPEED_20GB; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = (u32)VIRTCHNL_LINK_SPEED_25GB; - break; - case ICE_AQ_LINK_SPEED_40GB: - case ICE_AQ_LINK_SPEED_50GB: - case ICE_AQ_LINK_SPEED_100GB: - speed = (u32)VIRTCHNL_LINK_SPEED_40GB; - break; - default: - speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; - break; - } + speed = ice_legacy_aq_to_vc_speed[fls(link_speed) - 1]; + } return speed; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index ec90e594986e..dab3cd5d300e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1621,9 +1621,6 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } for (i = 0; i < qci->num_queue_pairs; i++) { - struct ice_hw *hw; - u32 rxdid; - u16 pf_q; qpi = &qci->qpair[i]; if (qpi->txq.vsi_id != qci->vsi_id || qpi->rxq.vsi_id != qci->vsi_id || @@ -1664,6 +1661,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { u16 max_frame_size = ice_vc_get_max_frame_size(vf); + u32 rxdid; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -1691,26 +1689,25 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vf->vf_id, i); goto error_param; } - } - /* VF Rx queue RXDID configuration */ - pf_q = vsi->rxq_map[qpi->rxq.queue_id]; - rxdid = qpi->rxq.rxdid; - hw = &vsi->back->hw; + /* If Rx flex desc is supported, select RXDID for Rx + * queues. Otherwise, use legacy 32byte descriptor + * format. Legacy 16byte descriptor is not supported. + * If this RXDID is selected, return error. + */ + if (vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + rxdid = qpi->rxq.rxdid; + if (!(BIT(rxdid) & pf->supported_rxdids)) + goto error_param; + } else { + rxdid = ICE_RXDID_LEGACY_1; + } - /* If Rx flex desc is supported, select RXDID for Rx queues. - * Otherwise, use legacy 32byte descriptor format. - * Legacy 16byte descriptor is not supported. If this RXDID - * is selected, return error. - */ - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { - if (!(BIT(rxdid) & pf->supported_rxdids)) - goto error_param; - } else { - rxdid = ICE_RXDID_LEGACY_1; + ice_write_qrxflxp_cntxt(&vsi->back->hw, + vsi->rxq_map[q_idx], + rxdid, 0x03, false); } - - ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x03, false); } /* send the response to the VF */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 056c904b83cc..907055b77af0 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -24,13 +24,24 @@ static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) */ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) { - memset(&vsi->rx_rings[q_idx]->rx_stats, 0, - sizeof(vsi->rx_rings[q_idx]->rx_stats)); - memset(&vsi->tx_rings[q_idx]->stats, 0, - sizeof(vsi->tx_rings[q_idx]->stats)); + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf; + + pf = vsi->back; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, + sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); + memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, + sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); if (ice_is_xdp_ena_vsi(vsi)) - memset(&vsi->xdp_rings[q_idx]->stats, 0, - sizeof(vsi->xdp_rings[q_idx]->stats)); + memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); } /** @@ -722,7 +733,7 @@ construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; break; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0aaf70c063da..ea0a230c1153 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4876,6 +4876,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4884,7 +4886,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index d98f7e9a480e..4da45c5abba5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6104,6 +6104,13 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, } } + /* Only valid on OF enabled platforms */ + if (!of_get_mac_address_nvmem(to_of_node(fwnode), fw_mac_addr)) { + *mac_from = "nvmem cell"; + eth_hw_addr_set(dev, fw_mac_addr); + return; + } + *mac_from = "random"; eth_hw_addr_random(dev); } @@ -7349,6 +7356,7 @@ static int mvpp2_get_sram(struct platform_device *pdev, struct mvpp2 *priv) { struct resource *res; + void __iomem *base; res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { @@ -7359,9 +7367,12 @@ static int mvpp2_get_sram(struct platform_device *pdev, return 0; } - priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); - return PTR_ERR_OR_ZERO(priv->cm3_base); + priv->cm3_base = base; + return 0; } static int mvpp2_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 6b4f640163f7..993ac180a5db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,7 +32,6 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK - depends on MACSEC || !MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 8d5d5a0f68c4..d2584ebb7a70 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -245,6 +245,9 @@ M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats, \ M(NPC_GET_SECRET_KEY, 0x6013, npc_get_secret_key, \ npc_get_secret_key_req, \ npc_get_secret_key_rsp) \ +M(NPC_GET_FIELD_STATUS, 0x6014, npc_get_field_status, \ + npc_get_field_status_req, \ + npc_get_field_status_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1437,6 +1440,10 @@ struct flow_msg { u8 tc; __be16 sport; __be16 dport; + union { + u8 ip_flag; + u8 next_header; + }; }; struct npc_install_flow_req { @@ -1541,6 +1548,17 @@ struct ptp_rsp { u64 clk; }; +struct npc_get_field_status_req { + struct mbox_msghdr hdr; + u8 intf; + u8 field; +}; + +struct npc_get_field_status_rsp { + struct mbox_msghdr hdr; + u8 enable; +}; + struct set_vf_perm { struct mbox_msghdr hdr; u16 vf; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 4a343f853b28..c0bedf402da9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -951,7 +951,7 @@ static void mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction d else event.intr_mask = (dir == MCS_RX) ? MCS_BBE_RX_PLFIFO_OVERFLOW_INT : - MCS_BBE_RX_PLFIFO_OVERFLOW_INT; + MCS_BBE_TX_PLFIFO_OVERFLOW_INT; /* Notify the lmac_id info which ran into BBE fatal error */ event.lmac_id = i & 0x3ULL; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index d027c23b8ef8..9beeead56d7b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -185,8 +185,10 @@ enum key_fields { NPC_VLAN_ETYPE_STAG, /* 0x88A8 */ NPC_OUTER_VID, NPC_TOS, + NPC_IPFRAG_IPV4, NPC_SIP_IPV4, NPC_DIP_IPV4, + NPC_IPFRAG_IPV6, NPC_SIP_IPV6, NPC_DIP_IPV6, NPC_IPPROTO_TCP, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 76474385a602..f718cbd32a94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -851,6 +851,7 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, u64 chan_val, u64 chan_mask, u64 exact_val, u64 exact_mask, u64 bcast_mcast_val, u64 bcast_mcast_mask); void npc_mcam_rsrcs_reserve(struct rvu *rvu, int blkaddr, int entry_idx); +bool npc_is_feature_supported(struct rvu *rvu, u64 features, u8 intf); /* CPT APIs */ int rvu_cpt_register_interrupts(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 642e58a04da0..0eb3085c4c21 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -880,6 +880,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) sprintf(lmac, "LMAC%d", lmac_id); seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); + + pci_dev_put(pdev); } return 0; } @@ -2566,6 +2568,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) } } + pci_dev_put(pdev); return 0; } @@ -2799,6 +2802,14 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, seq_printf(s, "%pI6 ", rule->packet.ip6dst); seq_printf(s, "mask %pI6\n", rule->mask.ip6dst); break; + case NPC_IPFRAG_IPV6: + seq_printf(s, "0x%x ", rule->packet.next_header); + seq_printf(s, "mask 0x%x\n", rule->mask.next_header); + break; + case NPC_IPFRAG_IPV4: + seq_printf(s, "0x%x ", rule->packet.ip_flag); + seq_printf(s, "mask 0x%x\n", rule->mask.ip_flag); + break; case NPC_SPORT_TCP: case NPC_SPORT_UDP: case NPC_SPORT_SCTP: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 88dee589cb21..bda1a6fa2ec4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1547,14 +1547,7 @@ static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, return 0; } -static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - return devlink_info_driver_name_put(req, DRV_NAME); -} - static const struct devlink_ops rvu_devlink_ops = { - .info_get = rvu_devlink_info_get, .eswitch_mode_get = rvu_devlink_eswitch_mode_get, .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 7646bb2ec89b..a62c1b322012 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4985,6 +4985,8 @@ static int nix_setup_ipolicers(struct rvu *rvu, ipolicer->ref_count = devm_kcalloc(rvu->dev, ipolicer->band_prof.max, sizeof(u16), GFP_KERNEL); + if (!ipolicer->ref_count) + return -ENOMEM; } /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 1e348fd0d930..16cfc802e348 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -617,6 +617,12 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, if (blkaddr < 0) return; + /* Ucast rule should not be installed if DMAC + * extraction is not supported by the profile. + */ + if (!npc_is_feature_supported(rvu, BIT_ULL(NPC_DMAC), pfvf->nix_rx_intf)) + return; + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_UCAST_ENTRY); @@ -778,6 +784,14 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, /* Get 'pcifunc' of PF device */ pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; pfvf = rvu_get_pfvf(rvu, pcifunc); + + /* Bcast rule should not be installed if both DMAC + * and LXMB extraction is not supported by the profile. + */ + if (!npc_is_feature_supported(rvu, BIT_ULL(NPC_DMAC), pfvf->nix_rx_intf) && + !npc_is_feature_supported(rvu, BIT_ULL(NPC_LXMB), pfvf->nix_rx_intf)) + return; + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); @@ -848,6 +862,14 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, vf_func = pcifunc & RVU_PFVF_FUNC_MASK; pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; pfvf = rvu_get_pfvf(rvu, pcifunc); + + /* Mcast rule should not be installed if both DMAC + * and LXMB extraction is not supported by the profile. + */ + if (!npc_is_feature_supported(rvu, BIT_ULL(NPC_DMAC), pfvf->nix_rx_intf) && + !npc_is_feature_supported(rvu, BIT_ULL(NPC_LXMB), pfvf->nix_rx_intf)) + return; + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_ALLMULTI_ENTRY); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 50d3994efa97..006beb5cf98d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -26,8 +26,10 @@ static const char * const npc_flow_names[] = { [NPC_VLAN_ETYPE_STAG] = "vlan ether type stag", [NPC_OUTER_VID] = "outer vlan id", [NPC_TOS] = "tos", + [NPC_IPFRAG_IPV4] = "fragmented IPv4 header ", [NPC_SIP_IPV4] = "ipv4 source ip", [NPC_DIP_IPV4] = "ipv4 destination ip", + [NPC_IPFRAG_IPV6] = "fragmented IPv6 header ", [NPC_SIP_IPV6] = "ipv6 source ip", [NPC_DIP_IPV6] = "ipv6 destination ip", [NPC_IPPROTO_TCP] = "ip proto tcp", @@ -47,6 +49,19 @@ static const char * const npc_flow_names[] = { [NPC_UNKNOWN] = "unknown", }; +bool npc_is_feature_supported(struct rvu *rvu, u64 features, u8 intf) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 mcam_features; + u64 unsupported; + + mcam_features = is_npc_intf_tx(intf) ? mcam->tx_features : mcam->rx_features; + unsupported = (mcam_features ^ features) & ~mcam_features; + + /* Return false if at least one of the input flows is not extracted */ + return !unsupported; +} + const char *npc_get_field_name(u8 hdr) { if (hdr >= ARRAY_SIZE(npc_flow_names)) @@ -436,8 +451,6 @@ static void npc_scan_ldata(struct rvu *rvu, int blkaddr, u8 lid, nr_bytes = FIELD_GET(NPC_BYTESM, cfg) + 1; hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); key = FIELD_GET(NPC_KEY_OFFSET, cfg); - start_kwi = key / 8; - offset = (key * 8) % 64; /* For Tx, Layer A has NIX_INST_HDR_S(64 bytes) preceding * ethernet header. @@ -452,13 +465,18 @@ static void npc_scan_ldata(struct rvu *rvu, int blkaddr, u8 lid, #define NPC_SCAN_HDR(name, hlid, hlt, hstart, hlen) \ do { \ + start_kwi = key / 8; \ + offset = (key * 8) % 64; \ if (lid == (hlid) && lt == (hlt)) { \ if ((hstart) >= hdr && \ ((hstart) + (hlen)) <= (hdr + nr_bytes)) { \ bit_offset = (hdr + nr_bytes - (hstart) - (hlen)) * 8; \ npc_set_layer_mdata(mcam, (name), cfg, lid, lt, intf); \ + offset += bit_offset; \ + start_kwi += offset / 64; \ + offset %= 64; \ npc_set_kw_masks(mcam, (name), (hlen) * 8, \ - start_kwi, offset + bit_offset, intf);\ + start_kwi, offset, intf); \ } \ } \ } while (0) @@ -468,8 +486,10 @@ do { \ * Example: Source IP is 4 bytes and starts at 12th byte of IP header */ NPC_SCAN_HDR(NPC_TOS, NPC_LID_LC, NPC_LT_LC_IP, 1, 1); + NPC_SCAN_HDR(NPC_IPFRAG_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 6, 1); NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4); NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4); + NPC_SCAN_HDR(NPC_IPFRAG_IPV6, NPC_LID_LC, NPC_LT_LC_IP6_EXT, 6, 1); NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); NPC_SCAN_HDR(NPC_DIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 24, 16); NPC_SCAN_HDR(NPC_SPORT_UDP, NPC_LID_LD, NPC_LT_LD_UDP, 0, 2); @@ -650,9 +670,9 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf) unsupported = (*mcam_features ^ features) & ~(*mcam_features); if (unsupported) { - dev_info(rvu->dev, "Unsupported flow(s):\n"); + dev_warn(rvu->dev, "Unsupported flow(s):\n"); for_each_set_bit(bit, (unsigned long *)&unsupported, 64) - dev_info(rvu->dev, "%s ", npc_get_field_name(bit)); + dev_warn(rvu->dev, "%s ", npc_get_field_name(bit)); return -EOPNOTSUPP; } @@ -883,6 +903,8 @@ do { \ NPC_WRITE_FLOW(NPC_ETYPE, etype, ntohs(pkt->etype), 0, ntohs(mask->etype), 0); NPC_WRITE_FLOW(NPC_TOS, tos, pkt->tos, 0, mask->tos, 0); + NPC_WRITE_FLOW(NPC_IPFRAG_IPV4, ip_flag, pkt->ip_flag, 0, + mask->ip_flag, 0); NPC_WRITE_FLOW(NPC_SIP_IPV4, ip4src, ntohl(pkt->ip4src), 0, ntohl(mask->ip4src), 0); NPC_WRITE_FLOW(NPC_DIP_IPV4, ip4dst, ntohl(pkt->ip4dst), 0, @@ -903,6 +925,8 @@ do { \ NPC_WRITE_FLOW(NPC_OUTER_VID, vlan_tci, ntohs(pkt->vlan_tci), 0, ntohs(mask->vlan_tci), 0); + NPC_WRITE_FLOW(NPC_IPFRAG_IPV6, next_header, pkt->next_header, 0, + mask->next_header, 0); npc_update_ipv6_flow(rvu, entry, features, pkt, mask, output, intf); npc_update_vlan_features(rvu, entry, features, intf); @@ -1007,8 +1031,20 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, action.match_id = req->match_id; action.flow_key_alg = req->flow_key_alg; - if (req->op == NIX_RX_ACTION_DEFAULT && pfvf->def_ucast_rule) - action = pfvf->def_ucast_rule->rx_action; + if (req->op == NIX_RX_ACTION_DEFAULT) { + if (pfvf->def_ucast_rule) { + action = pfvf->def_ucast_rule->rx_action; + } else { + /* For profiles which do not extract DMAC, the default + * unicast entry is unused. Hence modify action for the + * requests which use same action as default unicast + * entry + */ + *(u64 *)&action = 0; + action.pf_func = target; + action.op = NIX_RX_ACTIONOP_UCAST; + } + } entry->action = *(u64 *)&action; @@ -1239,18 +1275,19 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, if (npc_check_field(rvu, blkaddr, NPC_DMAC, req->intf)) goto process_flow; - if (is_pffunc_af(req->hdr.pcifunc)) { + if (is_pffunc_af(req->hdr.pcifunc) && + req->features & BIT_ULL(NPC_DMAC)) { if (is_unicast_ether_addr(req->packet.dmac)) { - dev_err(rvu->dev, - "%s: mkex profile does not support ucast flow\n", - __func__); + dev_warn(rvu->dev, + "%s: mkex profile does not support ucast flow\n", + __func__); return NPC_FLOW_NOT_SUPPORTED; } if (!npc_is_field_present(rvu, NPC_LXMB, req->intf)) { - dev_err(rvu->dev, - "%s: mkex profile does not support bcast/mcast flow", - __func__); + dev_warn(rvu->dev, + "%s: mkex profile does not support bcast/mcast flow", + __func__); return NPC_FLOW_NOT_SUPPORTED; } @@ -1603,3 +1640,22 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, return 0; } + +int rvu_mbox_handler_npc_get_field_status(struct rvu *rvu, + struct npc_get_field_status_req *req, + struct npc_get_field_status_rsp *rsp) +{ + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + if (!is_npc_interface_valid(rvu, req->intf)) + return NPC_FLOW_INTF_INVALID; + + if (npc_check_field(rvu, blkaddr, req->field, req->intf)) + rsp->enable = 1; + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 594029007f85..00aef8f5ac29 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -490,7 +490,7 @@ static bool rvu_npc_exact_alloc_id(struct rvu *rvu, u32 *seq_id) if (idx == table->tot_ids) { mutex_unlock(&table->lock); dev_err(rvu->dev, "%s: No space in id bitmap (%d)\n", - __func__, bitmap_weight(table->id_bmap, table->tot_ids)); + __func__, table->tot_ids); return false; } @@ -1870,12 +1870,11 @@ int rvu_npc_exact_init(struct rvu *rvu) /* Set capability to true */ rvu->hw->cap.npc_exact_match_enabled = true; - table = kmalloc(sizeof(*table), GFP_KERNEL); + table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; dev_dbg(rvu->dev, "%s: Memory allocation for table success\n", __func__); - memset(table, 0, sizeof(*table)); rvu->hw->table = table; /* Read table size, ways and depth */ @@ -1899,24 +1898,24 @@ int rvu_npc_exact_init(struct rvu *rvu) table_size = table->mem_table.depth * table->mem_table.ways; /* Allocate bitmap for 4way 2K table */ - table->mem_table.bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table_size), - sizeof(long), GFP_KERNEL); + table->mem_table.bmap = devm_bitmap_zalloc(rvu->dev, table_size, + GFP_KERNEL); if (!table->mem_table.bmap) return -ENOMEM; dev_dbg(rvu->dev, "%s: Allocated bitmap for 4way 2K entry table\n", __func__); /* Allocate bitmap for 32 entry mcam */ - table->cam_table.bmap = devm_kcalloc(rvu->dev, 1, sizeof(long), GFP_KERNEL); + table->cam_table.bmap = devm_bitmap_zalloc(rvu->dev, 32, GFP_KERNEL); if (!table->cam_table.bmap) return -ENOMEM; dev_dbg(rvu->dev, "%s: Allocated bitmap for 32 entry cam\n", __func__); - table->tot_ids = (table->mem_table.depth * table->mem_table.ways) + table->cam_table.depth; - table->id_bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table->tot_ids), - table->tot_ids, GFP_KERNEL); + table->tot_ids = table_size + table->cam_table.depth; + table->id_bmap = devm_bitmap_zalloc(rvu->dev, table->tot_ids, + GFP_KERNEL); if (!table->id_bmap) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c index b04fb226f708..ae50d56258ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c @@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu) pfvf->sdp_info = devm_kzalloc(rvu->dev, sizeof(struct sdp_node_info), GFP_KERNEL); - if (!pfvf->sdp_info) + if (!pfvf->sdp_info) { + pci_dev_put(pdev); return -ENOMEM; + } dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]); - put_device(&pdev->dev); i++; } + pci_dev_put(pdev); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 282db6fe3b08..5bee3c3a7ce4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -28,6 +28,9 @@ #include "otx2_devlink.h" #include <rvu_trace.h> +/* IPv4 flag more fragment bit */ +#define IPV4_FLAG_MORE 0x20 + /* PCI device IDs */ #define PCI_DEVID_OCTEONTX2_RVU_PF 0xA063 #define PCI_DEVID_OCTEONTX2_RVU_VF 0xA064 @@ -884,7 +887,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { #ifdef CONFIG_DCB - if (pfvf->pfc_alloc_status[qidx]) + if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 777a27047c8e..63ef7c41d18d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -77,22 +77,7 @@ static const struct devlink_param otx2_dl_params[] = { otx2_dl_mcam_count_validate), }; -/* Devlink OPs */ -static int otx2_devlink_info_get(struct devlink *devlink, - struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct otx2_devlink *otx2_dl = devlink_priv(devlink); - struct otx2_nic *pfvf = otx2_dl->pfvf; - - if (is_otx2_vf(pfvf->pcifunc)) - return devlink_info_driver_name_put(req, "rvu_nicvf"); - - return devlink_info_driver_name_put(req, "rvu_nicpf"); -} - static const struct devlink_ops otx2_devlink_ops = { - .info_get = otx2_devlink_info_get, }; int otx2_register_dl(struct otx2_nic *pfvf) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 709fc0114fbd..684cb8ec9f21 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -164,6 +164,8 @@ EXPORT_SYMBOL(otx2_alloc_mcam_entries); static int otx2_mcam_entry_init(struct otx2_nic *pfvf) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_get_field_status_req *freq; + struct npc_get_field_status_rsp *frsp; struct npc_mcam_alloc_entry_req *req; struct npc_mcam_alloc_entry_rsp *rsp; int vf_vlan_max_flows; @@ -214,8 +216,29 @@ static int otx2_mcam_entry_init(struct otx2_nic *pfvf) flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset + OTX2_MAX_UNICAST_FLOWS; pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT; - pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT; - pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT; + + /* Check if NPC_DMAC field is supported + * by the mkex profile before setting VLAN support flag. + */ + freq = otx2_mbox_alloc_msg_npc_get_field_status(&pfvf->mbox); + if (!freq) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + freq->field = NPC_DMAC; + if (otx2_sync_mbox_msg(&pfvf->mbox)) { + mutex_unlock(&pfvf->mbox.lock); + return -EINVAL; + } + + frsp = (struct npc_get_field_status_rsp *)otx2_mbox_get_rsp + (&pfvf->mbox.mbox, 0, &freq->hdr); + + if (frsp->enable) { + pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT; + pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT; + } pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC; mutex_unlock(&pfvf->mbox.lock); @@ -688,6 +711,11 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip6dst)); req->features |= BIT_ULL(NPC_DIP_IPV6); } + if (ipv6_usr_hdr->l4_proto == IPPROTO_FRAGMENT) { + pkt->next_header = ipv6_usr_hdr->l4_proto; + pmask->next_header = ipv6_usr_mask->l4_proto; + req->features |= BIT_ULL(NPC_IPFRAG_IPV6); + } pkt->etype = cpu_to_be16(ETH_P_IPV6); pmask->etype = cpu_to_be16(0xFFFF); req->features |= BIT_ULL(NPC_ETYPE); @@ -868,10 +896,22 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, req->features |= BIT_ULL(NPC_OUTER_VID); } - /* Not Drop/Direct to queue but use action in default entry */ - if (fsp->m_ext.data[1] && - fsp->h_ext.data[1] == cpu_to_be32(OTX2_DEFAULT_ACTION)) - req->op = NIX_RX_ACTION_DEFAULT; + if (fsp->m_ext.data[1]) { + if (flow_type == IP_USER_FLOW) { + if (be32_to_cpu(fsp->h_ext.data[1]) != IPV4_FLAG_MORE) + return -EINVAL; + + pkt->ip_flag = be32_to_cpu(fsp->h_ext.data[1]); + pmask->ip_flag = be32_to_cpu(fsp->m_ext.data[1]); + req->features |= BIT_ULL(NPC_IPFRAG_IPV4); + } else if (fsp->h_ext.data[1] == + cpu_to_be32(OTX2_DEFAULT_ACTION)) { + /* Not Drop/Direct to queue but use action + * in default entry + */ + req->op = NIX_RX_ACTION_DEFAULT; + } + } } if (fsp->flow_type & FLOW_MAC_EXT && diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index e64318c110fd..e421714524c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -532,6 +532,31 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, req->features |= BIT_ULL(NPC_IPPROTO_ICMP6); } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { + NL_SET_ERR_MSG_MOD(extack, "HW doesn't support frag first/later"); + return -EOPNOTSUPP; + } + + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { + if (ntohs(flow_spec->etype) == ETH_P_IP) { + flow_spec->ip_flag = IPV4_FLAG_MORE; + flow_mask->ip_flag = 0xff; + req->features |= BIT_ULL(NPC_IPFRAG_IPV4); + } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) { + flow_spec->next_header = IPPROTO_FRAGMENT; + flow_mask->next_header = 0xff; + req->features |= BIT_ULL(NPC_IPFRAG_IPV6); + } else { + NL_SET_ERR_MSG_MOD(extack, "flow-type should be either IPv4 and IPv6"); + return -EOPNOTSUPP; + } + } + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index 84ad05c9f12d..2a4c9df4eb79 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -355,11 +355,6 @@ static int prestera_dl_info_get(struct devlink *dl, { struct prestera_switch *sw = devlink_priv(dl); char buf[16]; - int err; - - err = devlink_info_driver_name_put(req, PRESTERA_DRV_NAME); - if (err) - return err; snprintf(buf, sizeof(buf), "%d.%d.%d", sw->dev->fw_rev.maj, diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index f8deaee84398..9d504142e51a 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -743,6 +743,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) return 0; err_sfp_bind: + unregister_netdev(dev); err_register_netdev: prestera_port_list_del(port); err_port_init: @@ -858,17 +859,10 @@ static void prestera_event_handlers_unregister(struct prestera_switch *sw) static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw) { - struct device_node *base_mac_np; - int ret = 0; - - if (sw->np) { - base_mac_np = of_parse_phandle(sw->np, "base-mac-provider", 0); - if (base_mac_np) { - ret = of_get_mac_address(base_mac_np, sw->base_mac); - of_node_put(base_mac_np); - } - } + int ret; + if (sw->np) + ret = of_get_mac_address(sw->np, sw->base_mac); if (!is_valid_ether_addr(sw->base_mac) || ret) { eth_random_addr(sw->base_mac); dev_info(prestera_dev(sw), "using random base mac address\n"); @@ -1372,7 +1366,7 @@ static int prestera_switch_init(struct prestera_switch *sw) { int err; - sw->np = of_find_compatible_node(NULL, NULL, "marvell,prestera"); + sw->np = sw->dev->dev->of_node; err = prestera_hw_switch_init(sw); if (err) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 4046be0e86ff..a9a1028cb17b 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw, n_cache = rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key, __prestera_kern_neigh_cache_ht_params); - return IS_ERR(n_cache) ? NULL : n_cache; + return n_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index aa080dc57ff0..02faaea2aefa 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw, nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht, key, __prestera_nh_neigh_ht_params); - return IS_ERR(nh_neigh) ? NULL : nh_neigh; + return nh_neigh; } struct prestera_nh_neigh * @@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw, nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht, key, __prestera_nexthop_group_ht_params); - return IS_ERR(nh_grp) ? NULL : nh_grp; + return nh_grp; } static struct prestera_nexthop_group * diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index cdc0ff596196..8b93dab79141 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -55,6 +55,7 @@ static const struct mtk_reg_map mtk_reg_map = { }, .qdma = { .qtx_cfg = 0x1800, + .qtx_sch = 0x1804, .rx_ptr = 0x1900, .rx_cnt_cfg = 0x1904, .qcrx_ptr = 0x1908, @@ -62,6 +63,7 @@ static const struct mtk_reg_map mtk_reg_map = { .rst_idx = 0x1a08, .delay_irq = 0x1a0c, .fc_th = 0x1a10, + .tx_sch_rate = 0x1a14, .int_grp = 0x1a20, .hred = 0x1a44, .ctx_ptr = 0x1b00, @@ -114,6 +116,7 @@ static const struct mtk_reg_map mt7986_reg_map = { }, .qdma = { .qtx_cfg = 0x4400, + .qtx_sch = 0x4404, .rx_ptr = 0x4500, .rx_cnt_cfg = 0x4504, .qcrx_ptr = 0x4508, @@ -131,6 +134,7 @@ static const struct mtk_reg_map mt7986_reg_map = { .fq_tail = 0x4724, .fq_count = 0x4728, .fq_blen = 0x472c, + .tx_sch_rate = 0x4798, }, .gdm1_cnt = 0x1c00, .gdma_to_ppe = 0x3333, @@ -614,6 +618,75 @@ static void mtk_mac_link_down(struct phylink_config *config, unsigned int mode, mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); } +static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, + int speed) +{ + const struct mtk_soc_data *soc = eth->soc; + u32 ofs, val; + + if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) + return; + + val = MTK_QTX_SCH_MIN_RATE_EN | + /* minimum: 10 Mbps */ + FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) | + MTK_QTX_SCH_LEAKY_BUCKET_SIZE; + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + val |= MTK_QTX_SCH_LEAKY_BUCKET_EN; + + if (IS_ENABLED(CONFIG_SOC_MT7621)) { + switch (speed) { + case SPEED_10: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 2) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); + break; + case SPEED_100: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); + break; + case SPEED_1000: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 105) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); + break; + default: + break; + } + } else { + switch (speed) { + case SPEED_10: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); + break; + case SPEED_100: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); + break; + case SPEED_1000: + val |= MTK_QTX_SCH_MAX_RATE_EN | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); + break; + default: + break; + } + } + + ofs = MTK_QTX_OFFSET * idx; + mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs); +} + static void mtk_mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, @@ -639,6 +712,8 @@ static void mtk_mac_link_up(struct phylink_config *config, break; } + mtk_set_queue_speed(mac->hw, mac->id, speed); + /* Configure duplex */ if (duplex == DUPLEX_FULL) mcr |= MAC_MCR_FORCE_DPX; @@ -938,7 +1013,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; dma_addr_t phy_ring_tail; - int cnt = MTK_DMA_SIZE; + int cnt = MTK_QDMA_RING_SIZE; dma_addr_t dma_addr; int i; @@ -1099,7 +1174,8 @@ static void mtk_tx_set_dma_desc_v1(struct net_device *dev, void *txd, WRITE_ONCE(desc->txd1, info->addr); - data = TX_DMA_SWC | TX_DMA_PLEN0(info->size); + data = TX_DMA_SWC | TX_DMA_PLEN0(info->size) | + FIELD_PREP(TX_DMA_PQID, info->qid); if (info->last) data |= TX_DMA_LS0; WRITE_ONCE(desc->txd3, data); @@ -1133,9 +1209,6 @@ static void mtk_tx_set_dma_desc_v2(struct net_device *dev, void *txd, data |= TX_DMA_LS0; WRITE_ONCE(desc->txd3, data); - if (!info->qid && mac->id) - info->qid = MTK_QDMA_GMAC2_QID; - data = (mac->id + 1) << TX_DMA_FPORT_SHIFT_V2; /* forward port */ data |= TX_DMA_SWC_V2 | QID_BITS_V2(info->qid); WRITE_ONCE(desc->txd4, data); @@ -1179,11 +1252,12 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, .gso = gso, .csum = skb->ip_summed == CHECKSUM_PARTIAL, .vlan = skb_vlan_tag_present(skb), - .qid = skb->mark & MTK_QDMA_TX_MASK, + .qid = skb_get_queue_mapping(skb), .vlan_tci = skb_vlan_tag_get(skb), .first = true, .last = !skb_is_nonlinear(skb), }; + struct netdev_queue *txq; struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; const struct mtk_soc_data *soc = eth->soc; @@ -1191,8 +1265,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, struct mtk_tx_dma *itxd_pdma, *txd_pdma; struct mtk_tx_buf *itx_buf, *tx_buf; int i, n_desc = 1; + int queue = skb_get_queue_mapping(skb); int k = 0; + txq = netdev_get_tx_queue(dev, queue); itxd = ring->next_free; itxd_pdma = qdma_to_pdma(ring, itxd); if (itxd == ring->last_free) @@ -1241,7 +1317,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info)); txd_info.size = min_t(unsigned int, frag_size, soc->txrx.dma_max_len); - txd_info.qid = skb->mark & MTK_QDMA_TX_MASK; + txd_info.qid = queue; txd_info.last = i == skb_shinfo(skb)->nr_frags - 1 && !(frag_size - txd_info.size); txd_info.addr = skb_frag_dma_map(eth->dma_dev, frag, @@ -1280,7 +1356,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, txd_pdma->txd2 |= TX_DMA_LS1; } - netdev_sent_queue(dev, skb->len); + netdev_tx_sent_queue(txq, skb->len); skb_tx_timestamp(skb); ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2); @@ -1292,8 +1368,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, wmb(); if (MTK_HAS_CAPS(soc->caps, MTK_QDMA)) { - if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || - !netdev_xmit_more()) + if (netif_xmit_stopped(txq) || !netdev_xmit_more()) mtk_w32(eth, txd->txd2, soc->reg_map->qdma.ctx_ptr); } else { int next_idx; @@ -1362,7 +1437,7 @@ static void mtk_wake_queue(struct mtk_eth *eth) for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) continue; - netif_wake_queue(eth->netdev[i]); + netif_tx_wake_all_queues(eth->netdev[i]); } } @@ -1386,7 +1461,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_num = mtk_cal_txd_req(eth, skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); netif_err(eth, tx_queued, dev, "Tx Ring full when queue awake!\n"); spin_unlock(ð->page_lock); @@ -1412,7 +1487,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); spin_unlock(ð->page_lock); @@ -1579,10 +1654,12 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); const struct mtk_soc_data *soc = eth->soc; struct mtk_tx_ring *ring = ð->tx_ring; + struct mtk_mac *mac = netdev_priv(dev); struct mtk_tx_dma_desc_info txd_info = { .size = xdpf->len, .first = true, .last = !xdp_frame_has_frags(xdpf), + .qid = mac->id, }; int err, index = 0, n_desc = 1, nr_frags; struct mtk_tx_buf *htx_buf, *tx_buf; @@ -1632,6 +1709,7 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info)); txd_info.size = skb_frag_size(&sinfo->frags[index]); txd_info.last = index + 1 == nr_frags; + txd_info.qid = mac->id; data = skb_frag_address(&sinfo->frags[index]); index++; @@ -1992,8 +2070,46 @@ rx_done: return done; } +struct mtk_poll_state { + struct netdev_queue *txq; + unsigned int total; + unsigned int done; + unsigned int bytes; +}; + +static void +mtk_poll_tx_done(struct mtk_eth *eth, struct mtk_poll_state *state, u8 mac, + struct sk_buff *skb) +{ + struct netdev_queue *txq; + struct net_device *dev; + unsigned int bytes = skb->len; + + state->total++; + eth->tx_packets++; + eth->tx_bytes += bytes; + + dev = eth->netdev[mac]; + if (!dev) + return; + + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + if (state->txq == txq) { + state->done++; + state->bytes += bytes; + return; + } + + if (state->txq) + netdev_tx_completed_queue(state->txq, state->done, state->bytes); + + state->txq = txq; + state->done = 1; + state->bytes = bytes; +} + static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, - unsigned int *done, unsigned int *bytes) + struct mtk_poll_state *state) { const struct mtk_reg_map *reg_map = eth->soc->reg_map; struct mtk_tx_ring *ring = ð->tx_ring; @@ -2025,12 +2141,9 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, break; if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { - if (tx_buf->type == MTK_TYPE_SKB) { - struct sk_buff *skb = tx_buf->data; + if (tx_buf->type == MTK_TYPE_SKB) + mtk_poll_tx_done(eth, state, mac, tx_buf->data); - bytes[mac] += skb->len; - done[mac]++; - } budget--; } mtk_tx_unmap(eth, tx_buf, &bq, true); @@ -2049,7 +2162,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, } static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, - unsigned int *done, unsigned int *bytes) + struct mtk_poll_state *state) { struct mtk_tx_ring *ring = ð->tx_ring; struct mtk_tx_buf *tx_buf; @@ -2067,12 +2180,8 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, break; if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) { - if (tx_buf->type == MTK_TYPE_SKB) { - struct sk_buff *skb = tx_buf->data; - - bytes[0] += skb->len; - done[0]++; - } + if (tx_buf->type == MTK_TYPE_SKB) + mtk_poll_tx_done(eth, state, 0, tx_buf->data); budget--; } mtk_tx_unmap(eth, tx_buf, &bq, true); @@ -2094,26 +2203,15 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) { struct mtk_tx_ring *ring = ð->tx_ring; struct dim_sample dim_sample = {}; - unsigned int done[MTK_MAX_DEVS]; - unsigned int bytes[MTK_MAX_DEVS]; - int total = 0, i; - - memset(done, 0, sizeof(done)); - memset(bytes, 0, sizeof(bytes)); + struct mtk_poll_state state = {}; if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) - budget = mtk_poll_tx_qdma(eth, budget, done, bytes); + budget = mtk_poll_tx_qdma(eth, budget, &state); else - budget = mtk_poll_tx_pdma(eth, budget, done, bytes); + budget = mtk_poll_tx_pdma(eth, budget, &state); - for (i = 0; i < MTK_MAC_COUNT; i++) { - if (!eth->netdev[i] || !done[i]) - continue; - netdev_completed_queue(eth->netdev[i], done[i], bytes[i]); - total += done[i]; - eth->tx_packets += done[i]; - eth->tx_bytes += bytes[i]; - } + if (state.txq) + netdev_tx_completed_queue(state.txq, state.done, state.bytes); dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes, &dim_sample); @@ -2123,7 +2221,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); - return total; + return state.total; } static void mtk_handle_status_irq(struct mtk_eth *eth) @@ -2208,19 +2306,26 @@ static int mtk_tx_alloc(struct mtk_eth *eth) struct mtk_tx_ring *ring = ð->tx_ring; int i, sz = soc->txrx.txd_size; struct mtk_tx_dma_v2 *txd; + int ring_size; + u32 ofs, val; + + if (MTK_HAS_CAPS(soc->caps, MTK_QDMA)) + ring_size = MTK_QDMA_RING_SIZE; + else + ring_size = MTK_DMA_SIZE; - ring->buf = kcalloc(MTK_DMA_SIZE, sizeof(*ring->buf), + ring->buf = kcalloc(ring_size, sizeof(*ring->buf), GFP_KERNEL); if (!ring->buf) goto no_tx_mem; - ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz, + ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz, &ring->phys, GFP_KERNEL); if (!ring->dma) goto no_tx_mem; - for (i = 0; i < MTK_DMA_SIZE; i++) { - int next = (i + 1) % MTK_DMA_SIZE; + for (i = 0; i < ring_size; i++) { + int next = (i + 1) % ring_size; u32 next_ptr = ring->phys + next * sz; txd = ring->dma + i * sz; @@ -2240,22 +2345,22 @@ static int mtk_tx_alloc(struct mtk_eth *eth) * descriptors in ring->dma_pdma. */ if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) { - ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz, + ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, ring_size * sz, &ring->phys_pdma, GFP_KERNEL); if (!ring->dma_pdma) goto no_tx_mem; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < ring_size; i++) { ring->dma_pdma[i].txd2 = TX_DMA_DESP2_DEF; ring->dma_pdma[i].txd4 = 0; } } - ring->dma_size = MTK_DMA_SIZE; - atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); + ring->dma_size = ring_size; + atomic_set(&ring->free_count, ring_size - 2); ring->next_free = ring->dma; ring->last_free = (void *)txd; - ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz)); + ring->last_free_ptr = (u32)(ring->phys + ((ring_size - 1) * sz)); ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we @@ -2267,14 +2372,31 @@ static int mtk_tx_alloc(struct mtk_eth *eth) mtk_w32(eth, ring->phys, soc->reg_map->qdma.ctx_ptr); mtk_w32(eth, ring->phys, soc->reg_map->qdma.dtx_ptr); mtk_w32(eth, - ring->phys + ((MTK_DMA_SIZE - 1) * sz), + ring->phys + ((ring_size - 1) * sz), soc->reg_map->qdma.crx_ptr); mtk_w32(eth, ring->last_free_ptr, soc->reg_map->qdma.drx_ptr); - mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, - soc->reg_map->qdma.qtx_cfg); + + for (i = 0, ofs = 0; i < MTK_QDMA_NUM_QUEUES; i++) { + val = (QDMA_RES_THRES << 8) | QDMA_RES_THRES; + mtk_w32(eth, val, soc->reg_map->qdma.qtx_cfg + ofs); + + val = MTK_QTX_SCH_MIN_RATE_EN | + /* minimum: 10 Mbps */ + FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) | + MTK_QTX_SCH_LEAKY_BUCKET_SIZE; + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + val |= MTK_QTX_SCH_LEAKY_BUCKET_EN; + mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs); + ofs += MTK_QTX_OFFSET; + } + val = MTK_QDMA_TX_SCH_MAX_WFQ | (MTK_QDMA_TX_SCH_MAX_WFQ << 16); + mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate + 4); } else { mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MT7628_TX_MAX_CNT0); + mtk_w32(eth, ring_size, MT7628_TX_MAX_CNT0); mtk_w32(eth, 0, MT7628_TX_CTX_IDX0); mtk_w32(eth, MT7628_PST_DTX_IDX0, soc->reg_map->pdma.rst_idx); } @@ -2292,7 +2414,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) int i; if (ring->buf) { - for (i = 0; i < MTK_DMA_SIZE; i++) + for (i = 0; i < ring->dma_size; i++) mtk_tx_unmap(eth, &ring->buf[i], NULL, false); kfree(ring->buf); ring->buf = NULL; @@ -2300,14 +2422,14 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->dma) { dma_free_coherent(eth->dma_dev, - MTK_DMA_SIZE * soc->txrx.txd_size, + ring->dma_size * soc->txrx.txd_size, ring->dma, ring->phys); ring->dma = NULL; } if (ring->dma_pdma) { dma_free_coherent(eth->dma_dev, - MTK_DMA_SIZE * soc->txrx.txd_size, + ring->dma_size * soc->txrx.txd_size, ring->dma_pdma, ring->phys_pdma); ring->dma_pdma = NULL; } @@ -2384,8 +2506,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) data + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(eth->dma_dev, - dma_addr))) + dma_addr))) { + skb_free_frag(data); return -ENOMEM; + } } rxd->rxd1 = (unsigned int)dma_addr; ring->data[i] = data; @@ -2842,7 +2966,7 @@ static void mtk_dma_free(struct mtk_eth *eth) netdev_reset_queue(eth->netdev[i]); if (eth->scratch_ring) { dma_free_coherent(eth->dma_dev, - MTK_DMA_SIZE * soc->txrx.txd_size, + MTK_QDMA_RING_SIZE * soc->txrx.txd_size, eth->scratch_ring, eth->phy_scratch_ring); eth->scratch_ring = NULL; eth->phy_scratch_ring = 0; @@ -2951,7 +3075,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) val |= MTK_MUTLI_CNT | MTK_RESV_BUF | MTK_WCOMP_EN | MTK_DMAD_WR_WDONE | - MTK_CHK_DDONE_EN; + MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN; else val |= MTK_RX_BT_32DWORDS; mtk_w32(eth, val, reg_map->qdma.glo_cfg); @@ -3008,6 +3132,45 @@ static bool mtk_uses_dsa(struct net_device *dev) #endif } +static int mtk_device_event(struct notifier_block *n, unsigned long event, void *ptr) +{ + struct mtk_mac *mac = container_of(n, struct mtk_mac, device_notifier); + struct mtk_eth *eth = mac->hw; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ethtool_link_ksettings s; + struct net_device *ldev; + struct list_head *iter; + struct dsa_port *dp; + + if (event != NETDEV_CHANGE) + return NOTIFY_DONE; + + netdev_for_each_lower_dev(dev, ldev, iter) { + if (netdev_priv(ldev) == mac) + goto found; + } + + return NOTIFY_DONE; + +found: + if (!dsa_slave_dev_check(dev)) + return NOTIFY_DONE; + + if (__ethtool_get_link_ksettings(dev, &s)) + return NOTIFY_DONE; + + if (s.base.speed == 0 || s.base.speed == ((__u32)-1)) + return NOTIFY_DONE; + + dp = dsa_port_from_netdev(dev); + if (dp->index >= MTK_QDMA_NUM_QUEUES) + return NOTIFY_DONE; + + mtk_set_queue_speed(eth, dp->index + 3, s.base.speed); + + return NOTIFY_DONE; +} + static int mtk_open(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -3052,8 +3215,10 @@ static int mtk_open(struct net_device *dev) int i; err = mtk_start_dma(eth); - if (err) + if (err) { + phylink_disconnect_phy(mac->phylink); return err; + } for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) mtk_ppe_start(eth->ppe[i]); @@ -3072,7 +3237,8 @@ static int mtk_open(struct net_device *dev) refcount_inc(ð->dma_refcnt); phylink_start(mac->phylink); - netif_start_queue(dev); + netif_tx_start_all_queues(dev); + return 0; } @@ -3344,16 +3510,17 @@ static int mtk_hw_init(struct mtk_eth *eth) return 0; } - val = RSTCTRL_FE | RSTCTRL_PPE; if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { regmap_write(eth->ethsys, ETHSYS_FE_RST_CHK_IDLE_EN, 0); - - val |= RSTCTRL_ETH; - if (MTK_HAS_CAPS(eth->soc->caps, MTK_RSTCTRL_PPE1)) - val |= RSTCTRL_PPE1; + val = RSTCTRL_PPE0_V2; + } else { + val = RSTCTRL_PPE0; } - ethsys_reset(eth, val); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_RSTCTRL_PPE1)) + val |= RSTCTRL_PPE1; + + ethsys_reset(eth, RSTCTRL_ETH | RSTCTRL_FE | val); if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { regmap_write(eth->ethsys, ETHSYS_FE_RST_CHK_IDLE_EN, @@ -3419,9 +3586,12 @@ static int mtk_hw_init(struct mtk_eth *eth) mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { - /* PSE should not drop port8 and port9 packets */ + /* PSE should not drop port8 and port9 packets from WDMA Tx */ mtk_w32(eth, 0x00000300, PSE_DROP_CFG); + /* PSE should drop packets to port 8/9 on WDMA Rx ring full */ + mtk_w32(eth, 0x00000300, PSE_PPE0_DROP); + /* PSE Free Queue Flow Control */ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2); @@ -3610,8 +3780,12 @@ static int mtk_unreg_dev(struct mtk_eth *eth) int i; for (i = 0; i < MTK_MAC_COUNT; i++) { + struct mtk_mac *mac; if (!eth->netdev[i]) continue; + mac = netdev_priv(eth->netdev[i]); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + unregister_netdevice_notifier(&mac->device_notifier); unregister_netdev(eth->netdev[i]); } @@ -3827,6 +4001,23 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return ret; } +static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mtk_mac *mac = netdev_priv(dev); + unsigned int queue = 0; + + if (netdev_uses_dsa(dev)) + queue = skb_get_queue_mapping(skb) + 3; + else + queue = mac->id; + + if (queue >= dev->num_tx_queues) + queue = 0; + + return queue; +} + static const struct ethtool_ops mtk_ethtool_ops = { .get_link_ksettings = mtk_get_link_ksettings, .set_link_ksettings = mtk_set_link_ksettings, @@ -3862,6 +4053,7 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_setup_tc = mtk_eth_setup_tc, .ndo_bpf = mtk_xdp, .ndo_xdp_xmit = mtk_xdp_xmit, + .ndo_select_queue = mtk_select_queue, }; static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) @@ -3871,6 +4063,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) struct phylink *phylink; struct mtk_mac *mac; int id, err; + int txqs = 1; if (!_id) { dev_err(eth->dev, "missing mac id\n"); @@ -3888,7 +4081,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) return -EINVAL; } - eth->netdev[id] = alloc_etherdev(sizeof(*mac)); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + eth->netdev[id] = alloc_etherdev_mqs(sizeof(*mac), txqs, 1); if (!eth->netdev[id]) { dev_err(eth->dev, "alloc_etherdev failed\n"); return -ENOMEM; @@ -3985,6 +4181,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) else eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN; + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { + mac->device_notifier.notifier_call = mtk_device_event; + register_netdevice_notifier(&mac->device_notifier); + } + return 0; free_netdev: @@ -4221,13 +4422,13 @@ static int mtk_probe(struct platform_device *pdev) eth->soc->offload_version, i); if (!eth->ppe[i]) { err = -ENOMEM; - goto err_free_dev; + goto err_deinit_ppe; } } err = mtk_eth_offload_init(eth); if (err) - goto err_free_dev; + goto err_deinit_ppe; } for (i = 0; i < MTK_MAX_DEVS; i++) { @@ -4237,7 +4438,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_deinit_mdio; + goto err_deinit_ppe; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -4255,7 +4456,8 @@ static int mtk_probe(struct platform_device *pdev) return 0; -err_deinit_mdio: +err_deinit_ppe: + mtk_ppe_deinit(eth); mtk_mdio_cleanup(eth); err_free_dev: mtk_free_dev(eth); @@ -4315,7 +4517,7 @@ static const struct mtk_soc_data mt7621_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7621_CLKS_BITMAP, .required_pctl = false, - .offload_version = 2, + .offload_version = 1, .hash_offset = 2, .foe_entry_size = sizeof(struct mtk_foe_entry) - 16, .txrx = { @@ -4354,7 +4556,7 @@ static const struct mtk_soc_data mt7623_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, - .offload_version = 2, + .offload_version = 1, .hash_offset = 2, .foe_entry_size = sizeof(struct mtk_foe_entry) - 16, .txrx = { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index a572416e25de..18a50529ce7b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -25,11 +25,13 @@ #define MTK_MAX_DSA_PORTS 7 #define MTK_DSA_PORT_MASK GENMASK(2, 0) +#define MTK_QDMA_NUM_QUEUES 16 #define MTK_QDMA_PAGE_SIZE 2048 #define MTK_MAX_RX_LENGTH 1536 #define MTK_MAX_RX_LENGTH_2K 2048 #define MTK_TX_DMA_BUF_LEN 0x3fff #define MTK_TX_DMA_BUF_LEN_V2 0xffff +#define MTK_QDMA_RING_SIZE 2048 #define MTK_DMA_SIZE 512 #define MTK_MAC_COUNT 2 #define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN) @@ -126,6 +128,7 @@ #define PSE_FQFC_CFG1 0x100 #define PSE_FQFC_CFG2 0x104 #define PSE_DROP_CFG 0x108 +#define PSE_PPE0_DROP 0x110 /* PSE Input Queue Reservation Register*/ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2)) @@ -208,8 +211,26 @@ #define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3) /* QDMA TX Queue Configuration Registers */ +#define MTK_QTX_OFFSET 0x10 #define QDMA_RES_THRES 4 +/* QDMA Tx Queue Scheduler Configuration Registers */ +#define MTK_QTX_SCH_TX_SEL BIT(31) +#define MTK_QTX_SCH_TX_SEL_V2 GENMASK(31, 30) + +#define MTK_QTX_SCH_LEAKY_BUCKET_EN BIT(30) +#define MTK_QTX_SCH_LEAKY_BUCKET_SIZE GENMASK(29, 28) +#define MTK_QTX_SCH_MIN_RATE_EN BIT(27) +#define MTK_QTX_SCH_MIN_RATE_MAN GENMASK(26, 20) +#define MTK_QTX_SCH_MIN_RATE_EXP GENMASK(19, 16) +#define MTK_QTX_SCH_MAX_RATE_WEIGHT GENMASK(15, 12) +#define MTK_QTX_SCH_MAX_RATE_EN BIT(11) +#define MTK_QTX_SCH_MAX_RATE_MAN GENMASK(10, 4) +#define MTK_QTX_SCH_MAX_RATE_EXP GENMASK(3, 0) + +/* QDMA TX Scheduler Rate Control Register */ +#define MTK_QDMA_TX_SCH_MAX_WFQ BIT(15) + /* QDMA Global Configuration Register */ #define MTK_RX_2B_OFFSET BIT(31) #define MTK_RX_BT_32DWORDS (3 << 11) @@ -228,6 +249,7 @@ #define MTK_WCOMP_EN BIT(24) #define MTK_RESV_BUF (0x40 << 16) #define MTK_MUTLI_CNT (0x4 << 12) +#define MTK_LEAKY_BUCKET_EN BIT(11) /* QDMA Flow Control Register */ #define FC_THRES_DROP_MODE BIT(20) @@ -256,8 +278,6 @@ #define MTK_STAT_OFFSET 0x40 /* QDMA TX NUM */ -#define MTK_QDMA_TX_NUM 16 -#define MTK_QDMA_TX_MASK (MTK_QDMA_TX_NUM - 1) #define QID_BITS_V2(x) (((x) & 0x3f) << 16) #define MTK_QDMA_GMAC2_QID 8 @@ -287,6 +307,7 @@ #define TX_DMA_PLEN0(x) (((x) & eth->soc->txrx.dma_max_len) << eth->soc->txrx.dma_len_offset) #define TX_DMA_PLEN1(x) ((x) & eth->soc->txrx.dma_max_len) #define TX_DMA_SWC BIT(14) +#define TX_DMA_PQID GENMASK(3, 0) /* PDMA on MT7628 */ #define TX_DMA_DONE BIT(31) @@ -453,18 +474,14 @@ /* ethernet reset control register */ #define ETHSYS_RSTCTRL 0x34 #define RSTCTRL_FE BIT(6) -#define RSTCTRL_PPE BIT(31) -#define RSTCTRL_PPE1 BIT(30) +#define RSTCTRL_PPE0 BIT(31) +#define RSTCTRL_PPE0_V2 BIT(30) +#define RSTCTRL_PPE1 BIT(31) #define RSTCTRL_ETH BIT(23) /* ethernet reset check idle register */ #define ETHSYS_FE_RST_CHK_IDLE_EN 0x28 -/* ethernet reset control register */ -#define ETHSYS_RSTCTRL 0x34 -#define RSTCTRL_FE BIT(6) -#define RSTCTRL_PPE BIT(31) - /* ethernet dma channel agent map */ #define ETHSYS_DMA_AG_MAP 0x408 #define ETHSYS_DMA_AG_MAP_PDMA BIT(0) @@ -945,6 +962,7 @@ struct mtk_reg_map { } pdma; struct { u32 qtx_cfg; /* tx queue configuration */ + u32 qtx_sch; /* tx queue scheduler configuration */ u32 rx_ptr; /* rx base pointer */ u32 rx_cnt_cfg; /* rx max count configuration */ u32 qcrx_ptr; /* rx cpu pointer */ @@ -962,6 +980,7 @@ struct mtk_reg_map { u32 fq_tail; /* fq tail pointer */ u32 fq_count; /* fq free page count */ u32 fq_blen; /* fq free page buffer length */ + u32 tx_sch_rate; /* tx scheduler rate control registers */ } qdma; u32 gdm1_cnt; u32 gdma_to_ppe; @@ -1155,6 +1174,7 @@ struct mtk_mac { __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; int hwlro_ip_cnt; unsigned int syscfg0; + struct notifier_block device_notifier; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 2d8ca99f2467..269208a841c7 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -175,6 +175,8 @@ int mtk_foe_entry_prepare(struct mtk_eth *eth, struct mtk_foe_entry *entry, val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, pse_port) | FIELD_PREP(MTK_FOE_IB2_PORT_AG_V2, 0xf); } else { + int port_mg = eth->soc->offload_version > 1 ? 0 : 0x3f; + val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) | FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) | FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) | @@ -182,7 +184,7 @@ int mtk_foe_entry_prepare(struct mtk_eth *eth, struct mtk_foe_entry *entry, entry->ib1 = val; val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port) | - FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) | + FIELD_PREP(MTK_FOE_IB2_PORT_MG, port_mg) | FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f); } @@ -397,6 +399,24 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, return 0; } +int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry, + unsigned int queue) +{ + u32 *ib2 = mtk_foe_entry_ib2(eth, entry); + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) { + *ib2 &= ~MTK_FOE_IB2_QID_V2; + *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID_V2, queue); + *ib2 |= MTK_FOE_IB2_PSE_QOS_V2; + } else { + *ib2 &= ~MTK_FOE_IB2_QID; + *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID, queue); + *ib2 |= MTK_FOE_IB2_PSE_QOS; + } + + return 0; +} + static bool mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry, struct mtk_foe_entry *data) @@ -737,7 +757,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, MTK_PPE_ENTRIES * soc->foe_entry_size, &ppe->foe_phys, GFP_KERNEL); if (!foe) - return NULL; + goto err_free_l2_flows; ppe->foe_table = foe; @@ -745,11 +765,26 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, sizeof(*ppe->foe_flow); ppe->foe_flow = devm_kzalloc(dev, foe_flow_size, GFP_KERNEL); if (!ppe->foe_flow) - return NULL; + goto err_free_l2_flows; mtk_ppe_debugfs_init(ppe, index); return ppe; + +err_free_l2_flows: + rhashtable_destroy(&ppe->l2_flows); + return NULL; +} + +void mtk_ppe_deinit(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) { + if (!eth->ppe[i]) + return; + rhashtable_destroy(ð->ppe[i]->l2_flows); + } } static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 0b7a67a958e4..ea64fac1d425 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -68,7 +68,9 @@ enum { #define MTK_FOE_IB2_DSCP GENMASK(31, 24) /* CONFIG_MEDIATEK_NETSYS_V2 */ +#define MTK_FOE_IB2_QID_V2 GENMASK(6, 0) #define MTK_FOE_IB2_PORT_MG_V2 BIT(7) +#define MTK_FOE_IB2_PSE_QOS_V2 BIT(8) #define MTK_FOE_IB2_DEST_PORT_V2 GENMASK(12, 9) #define MTK_FOE_IB2_MULTICAST_V2 BIT(13) #define MTK_FOE_IB2_WDMA_WINFO_V2 BIT(19) @@ -304,6 +306,7 @@ struct mtk_ppe { struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version, int index); +void mtk_ppe_deinit(struct mtk_eth *eth); void mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); @@ -350,6 +353,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry, int sid); int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, int wdma_idx, int txq, int bss, int wcid); +int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry, + unsigned int queue); int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 28bbd1df3e30..81afd5ee3fbf 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -188,7 +188,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, int *wed_index) { struct mtk_wdma_info info = {}; - int pse_port, dsa_port; + int pse_port, dsa_port, queue; if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) { mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue, @@ -212,8 +212,6 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, } dsa_port = mtk_flow_get_dsa_port(&dev); - if (dsa_port >= 0) - mtk_foe_entry_set_dsa(eth, foe, dsa_port); if (dev == eth->netdev[0]) pse_port = 1; @@ -222,6 +220,14 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, else return -EOPNOTSUPP; + if (dsa_port >= 0) { + mtk_foe_entry_set_dsa(eth, foe, dsa_port); + queue = 3 + dsa_port; + } else { + queue = pse_port - 1; + } + mtk_foe_entry_set_queue(eth, foe, queue); + out: mtk_foe_entry_set_pse_port(eth, foe, pse_port); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 7d8842378c2b..d041615b2bac 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -101,17 +101,21 @@ mtk_wdma_read_reset(struct mtk_wed_device *dev) return wdma_r32(dev, MTK_WDMA_GLO_CFG); } -static void +static int mtk_wdma_rx_reset(struct mtk_wed_device *dev) { u32 status, mask = MTK_WDMA_GLO_CFG_RX_DMA_BUSY; - int i; + int i, ret; wdma_clr(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_DMA_EN); - if (readx_poll_timeout(mtk_wdma_read_reset, dev, status, - !(status & mask), 0, 1000)) + ret = readx_poll_timeout(mtk_wdma_read_reset, dev, status, + !(status & mask), 0, 10000); + if (ret) dev_err(dev->hw->dev, "rx reset failed\n"); + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + for (i = 0; i < ARRAY_SIZE(dev->rx_wdma); i++) { if (dev->rx_wdma[i].desc) continue; @@ -119,6 +123,8 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev) wdma_w32(dev, MTK_WDMA_RING_RX(i) + MTK_WED_RING_OFS_CPU_IDX, 0); } + + return ret; } static void @@ -129,16 +135,15 @@ mtk_wdma_tx_reset(struct mtk_wed_device *dev) wdma_clr(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN); if (readx_poll_timeout(mtk_wdma_read_reset, dev, status, - !(status & mask), 0, 1000)) + !(status & mask), 0, 10000)) dev_err(dev->hw->dev, "tx reset failed\n"); - for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) { - if (dev->tx_wdma[i].desc) - continue; + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) wdma_w32(dev, MTK_WDMA_RING_TX(i) + MTK_WED_RING_OFS_CPU_IDX, 0); - } } static void @@ -526,22 +531,16 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) MTK_WED_WPDMA_RX_D_RX_DRV_EN); wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); - - mtk_wed_set_512_support(dev, false); } + + mtk_wed_set_512_support(dev, false); } static void mtk_wed_stop(struct mtk_wed_device *dev) { - mtk_wed_dma_disable(dev); mtk_wed_set_ext_int(dev, false); - wed_clr(dev, MTK_WED_CTRL, - MTK_WED_CTRL_WDMA_INT_AGENT_EN | - MTK_WED_CTRL_WPDMA_INT_AGENT_EN | - MTK_WED_CTRL_WED_TX_BM_EN | - MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0); wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0); wdma_w32(dev, MTK_WDMA_INT_MASK, 0); @@ -553,7 +552,27 @@ mtk_wed_stop(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_EXT_INT_MASK1, 0); wed_w32(dev, MTK_WED_EXT_INT_MASK2, 0); - wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN); +} + +static void +mtk_wed_deinit(struct mtk_wed_device *dev) +{ + mtk_wed_stop(dev); + mtk_wed_dma_disable(dev); + + wed_clr(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | + MTK_WED_CTRL_WPDMA_INT_AGENT_EN | + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + + if (dev->hw->version == 1) + return; + + wed_clr(dev, MTK_WED_CTRL, + MTK_WED_CTRL_RX_ROUTE_QM_EN | + MTK_WED_CTRL_WED_RX_BM_EN | + MTK_WED_CTRL_RX_RRO_QM_EN); } static void @@ -563,18 +582,10 @@ mtk_wed_detach(struct mtk_wed_device *dev) mutex_lock(&hw_lock); - mtk_wed_stop(dev); - - wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); - wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + mtk_wed_deinit(dev); + mtk_wdma_rx_reset(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); - if (mtk_wed_get_rx_capa(dev)) { - wdma_clr(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN); - wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX); - wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); - } - mtk_wed_free_tx_buffer(dev); mtk_wed_free_tx_rings(dev); @@ -582,7 +593,6 @@ mtk_wed_detach(struct mtk_wed_device *dev) mtk_wed_wo_reset(dev); mtk_wed_free_rx_rings(dev); mtk_wed_wo_deinit(hw); - mtk_wdma_rx_reset(dev); } if (dev->wlan.bus_type == MTK_WED_BUS_PCIE) { @@ -674,7 +684,7 @@ mtk_wed_hw_init_early(struct mtk_wed_device *dev) { u32 mask, set; - mtk_wed_stop(dev); + mtk_wed_deinit(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); mtk_wed_set_wpdma(dev); @@ -934,42 +944,130 @@ mtk_wed_ring_reset(struct mtk_wed_ring *ring, int size, bool tx) } static u32 -mtk_wed_check_busy(struct mtk_wed_device *dev) +mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) { - if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY) - return true; - - if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) & - MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY) - return true; - - if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY) - return true; - - if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) & - MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) - return true; - - if (wdma_r32(dev, MTK_WDMA_GLO_CFG) & - MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) - return true; - - if (wed_r32(dev, MTK_WED_CTRL) & - (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY)) - return true; - - return false; + return !!(wed_r32(dev, reg) & mask); } static int -mtk_wed_poll_busy(struct mtk_wed_device *dev) +mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) { int sleep = 15000; int timeout = 100 * sleep; u32 val; return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, - timeout, false, dev); + timeout, false, dev, reg, mask); +} + +static int +mtk_wed_rx_reset(struct mtk_wed_device *dev) +{ + struct mtk_wed_wo *wo = dev->hw->wed_wo; + u8 val = MTK_WED_WO_STATE_SER_RESET; + int i, ret; + + ret = mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, + MTK_WED_WO_CMD_CHANGE_STATE, &val, + sizeof(val), true); + if (ret) + return ret; + + wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_EN); + ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, + MTK_WED_WPDMA_RX_D_RX_DRV_BUSY); + if (ret) { + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_D_DRV); + } else { + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, + MTK_WED_WPDMA_RX_D_RST_CRX_IDX | + MTK_WED_WPDMA_RX_D_RST_DRV_IDX); + + wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, + MTK_WED_WPDMA_RX_D_RST_INIT_COMPLETE | + MTK_WED_WPDMA_RX_D_FSM_RETURN_IDLE); + wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, + MTK_WED_WPDMA_RX_D_RST_INIT_COMPLETE | + MTK_WED_WPDMA_RX_D_FSM_RETURN_IDLE); + + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0); + } + + /* reset rro qm */ + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_RRO_QM_EN); + ret = mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_RX_RRO_QM_BUSY); + if (ret) { + mtk_wed_reset(dev, MTK_WED_RESET_RX_RRO_QM); + } else { + wed_set(dev, MTK_WED_RROQM_RST_IDX, + MTK_WED_RROQM_RST_IDX_MIOD | + MTK_WED_RROQM_RST_IDX_FDBK); + wed_w32(dev, MTK_WED_RROQM_RST_IDX, 0); + } + + /* reset route qm */ + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN); + ret = mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_RX_ROUTE_QM_BUSY); + if (ret) + mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM); + else + wed_set(dev, MTK_WED_RTQM_GLO_CFG, + MTK_WED_RTQM_Q_RST); + + /* reset tx wdma */ + mtk_wdma_tx_reset(dev); + + /* reset tx wdma drv */ + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DRV_EN); + mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_BUSY); + mtk_wed_reset(dev, MTK_WED_RESET_WDMA_TX_DRV); + + /* reset wed rx dma */ + ret = mtk_wed_poll_busy(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_RX_DMA_BUSY); + wed_clr(dev, MTK_WED_GLO_CFG, MTK_WED_GLO_CFG_RX_DMA_EN); + if (ret) { + mtk_wed_reset(dev, MTK_WED_RESET_WED_RX_DMA); + } else { + struct mtk_eth *eth = dev->hw->eth; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + wed_set(dev, MTK_WED_RESET_IDX, + MTK_WED_RESET_IDX_RX_V2); + else + wed_set(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_RX); + wed_w32(dev, MTK_WED_RESET_IDX, 0); + } + + /* reset rx bm */ + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN); + mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WED_RX_BM_BUSY); + mtk_wed_reset(dev, MTK_WED_RESET_RX_BM); + + /* wo change to enable state */ + val = MTK_WED_WO_STATE_ENABLE; + ret = mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, + MTK_WED_WO_CMD_CHANGE_STATE, &val, + sizeof(val), true); + if (ret) + return ret; + + /* wed_rx_ring_reset */ + for (i = 0; i < ARRAY_SIZE(dev->rx_ring); i++) { + if (!dev->rx_ring[i].desc) + continue; + + mtk_wed_ring_reset(&dev->rx_ring[i], MTK_WED_RX_RING_SIZE, + false); + } + mtk_wed_free_rx_buffer(dev); + + return 0; } static void @@ -987,23 +1085,23 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) true); } - if (mtk_wed_poll_busy(dev)) - busy = mtk_wed_check_busy(dev); - + /* 1. reset WED tx DMA */ + wed_clr(dev, MTK_WED_GLO_CFG, MTK_WED_GLO_CFG_TX_DMA_EN); + busy = mtk_wed_poll_busy(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_TX_DMA_BUSY); if (busy) { mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA); } else { - wed_w32(dev, MTK_WED_RESET_IDX, - MTK_WED_RESET_IDX_TX | - MTK_WED_RESET_IDX_RX); + wed_w32(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_TX); wed_w32(dev, MTK_WED_RESET_IDX, 0); } - wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); - wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); - - if (mtk_wed_get_rx_capa(dev)) - mtk_wdma_rx_reset(dev); + /* 2. reset WDMA rx DMA */ + busy = !!mtk_wdma_rx_reset(dev); + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); + if (!busy) + busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY); if (busy) { mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT); @@ -1020,6 +1118,9 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE); } + /* 3. reset WED WPDMA tx */ + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + for (i = 0; i < 100; i++) { val = wed_r32(dev, MTK_WED_TX_BM_INTF); if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40) @@ -1027,8 +1128,19 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) } mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT); + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_BM_EN); mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + /* 4. reset WED WPDMA tx */ + busy = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY); + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + if (!busy) + busy = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY); + if (busy) { mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV); @@ -1039,6 +1151,17 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) MTK_WED_WPDMA_RESET_IDX_RX); wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0); } + + dev->init_done = false; + if (dev->hw->version == 1) + return; + + if (!busy) { + wed_w32(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_WPDMA_IDX_RX); + wed_w32(dev, MTK_WED_RESET_IDX, 0); + } + + mtk_wed_rx_reset(dev); } static int @@ -1058,7 +1181,8 @@ mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, } static int -mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size) +mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size, + bool reset) { u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version; struct mtk_wed_ring *wdma; @@ -1067,8 +1191,8 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size) return -EINVAL; wdma = &dev->rx_wdma[idx]; - if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, desc_size, - true)) + if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, + desc_size, true)) return -ENOMEM; wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, @@ -1261,9 +1385,12 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) { int i; + if (mtk_wed_get_rx_capa(dev) && mtk_wed_rx_buffer_alloc(dev)) + return; + for (i = 0; i < ARRAY_SIZE(dev->rx_wdma); i++) if (!dev->rx_wdma[i].desc) - mtk_wed_wdma_rx_ring_setup(dev, i, 16); + mtk_wed_wdma_rx_ring_setup(dev, i, 16, false); mtk_wed_hw_init(dev); mtk_wed_configure_irq(dev, irq_mask); @@ -1290,9 +1417,10 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) if (mtk_wed_rro_cfg(dev)) return; - mtk_wed_set_512_support(dev, dev->wlan.wcid_512); } + mtk_wed_set_512_support(dev, dev->wlan.wcid_512); + mtk_wed_dma_enable(dev); dev->running = true; } @@ -1348,21 +1476,19 @@ mtk_wed_attach(struct mtk_wed_device *dev) goto out; if (mtk_wed_get_rx_capa(dev)) { - ret = mtk_wed_rx_buffer_alloc(dev); - if (ret) - goto out; - ret = mtk_wed_rro_alloc(dev); if (ret) goto out; } mtk_wed_hw_init_early(dev); - if (hw->version == 1) + if (hw->version == 1) { regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0); - else + } else { + dev->rev_id = wed_r32(dev, MTK_WED_REV_ID); ret = mtk_wed_wo_init(hw); + } out: if (ret) mtk_wed_detach(dev); @@ -1373,7 +1499,8 @@ unlock: } static int -mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, + bool reset) { struct mtk_wed_ring *ring = &dev->tx_ring[idx]; @@ -1392,11 +1519,12 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) if (WARN_ON(idx >= ARRAY_SIZE(dev->tx_ring))) return -EINVAL; - if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE, - sizeof(*ring->desc), true)) + if (!reset && mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE, + sizeof(*ring->desc), true)) return -ENOMEM; - if (mtk_wed_wdma_rx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE)) + if (mtk_wed_wdma_rx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE, + reset)) return -ENOMEM; ring->reg_base = MTK_WED_RING_TX(idx); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h index 9e39dace95eb..0a50bb98c5ea 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -20,13 +20,19 @@ struct mtk_wdma_desc { __le32 info; } __packed __aligned(4); +#define MTK_WED_REV_ID 0x004 + #define MTK_WED_RESET 0x008 #define MTK_WED_RESET_TX_BM BIT(0) +#define MTK_WED_RESET_RX_BM BIT(1) #define MTK_WED_RESET_TX_FREE_AGENT BIT(4) #define MTK_WED_RESET_WPDMA_TX_DRV BIT(8) #define MTK_WED_RESET_WPDMA_RX_DRV BIT(9) +#define MTK_WED_RESET_WPDMA_RX_D_DRV BIT(10) #define MTK_WED_RESET_WPDMA_INT_AGENT BIT(11) #define MTK_WED_RESET_WED_TX_DMA BIT(12) +#define MTK_WED_RESET_WED_RX_DMA BIT(13) +#define MTK_WED_RESET_WDMA_TX_DRV BIT(16) #define MTK_WED_RESET_WDMA_RX_DRV BIT(17) #define MTK_WED_RESET_WDMA_INT_AGENT BIT(19) #define MTK_WED_RESET_RX_RRO_QM BIT(20) @@ -156,6 +162,8 @@ struct mtk_wdma_desc { #define MTK_WED_RESET_IDX 0x20c #define MTK_WED_RESET_IDX_TX GENMASK(3, 0) #define MTK_WED_RESET_IDX_RX GENMASK(17, 16) +#define MTK_WED_RESET_IDX_RX_V2 GENMASK(7, 6) +#define MTK_WED_RESET_WPDMA_IDX_RX GENMASK(31, 30) #define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4) #define MTK_WED_RX_MIB(_n) (0x2e0 + (_n) * 4) @@ -265,6 +273,9 @@ struct mtk_wdma_desc { #define MTK_WED_WPDMA_RX_D_GLO_CFG 0x75c #define MTK_WED_WPDMA_RX_D_RX_DRV_EN BIT(0) +#define MTK_WED_WPDMA_RX_D_RX_DRV_BUSY BIT(1) +#define MTK_WED_WPDMA_RX_D_FSM_RETURN_IDLE BIT(3) +#define MTK_WED_WPDMA_RX_D_RST_INIT_COMPLETE BIT(4) #define MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL GENMASK(11, 7) #define MTK_WED_WPDMA_RX_D_RXD_READ_LEN GENMASK(31, 24) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index b149e601f673..48cfaa7eaf50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, err = mlx4_bitmap_init(*bitmap + k, 1, MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, 0); - mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e0d59ca62b5..d3ca745d107d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -37,7 +37,6 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/random.h> -#include <linux/io-mapping.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/eq.h> #include <linux/debugfs.h> @@ -45,6 +44,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" enum { CMD_IF_REV = 5, @@ -785,27 +786,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err); static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { u16 opcode, op_mod; - u32 syndrome; - u8 status; u16 uid; - int err; - - syndrome = MLX5_GET(mbox_out, out, syndrome); - status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - err = cmd_status_to_err(status); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) mlx5_cmd_out_err(dev, opcode, op_mod, out); - else - mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", - mlx5_command_str(opcode), opcode, op_mod, uid, - cmd_status_str(status), status, syndrome, err); } int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) @@ -1016,6 +1004,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { ent->ret = -ENXIO; @@ -1023,7 +1012,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1508,8 +1496,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) @@ -1672,8 +1660,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); @@ -1892,6 +1880,16 @@ out_in: return err; } +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { @@ -1914,7 +1912,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ -static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); u8 status = MLX5_GET(mbox_out, out, status); @@ -1922,8 +1920,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void * if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ err = -EIO; - if (!err && status != MLX5_CMD_STAT_OK) + if (!err && status != MLX5_CMD_STAT_OK) { err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } cmd_status_log(dev, opcode, status, syndrome, err); return err; @@ -1951,9 +1951,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); - return err; + return cmd_status_err(dev, err, opcode, op_mod, out); } EXPORT_SYMBOL(mlx5_cmd_do); @@ -1997,8 +1997,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); + err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); @@ -2034,7 +2035,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_ctx *ctx; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); @@ -2049,6 +2050,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; work->opcode = MLX5_GET(mbox_in, in, opcode); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index cc2ae427dcb0..751bc4a9edcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,10 +46,6 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 000000000000..406ebe17405f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 978a2bb8e122..21831386b26e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); else - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ff5b302531d5..65790ff58a74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -103,11 +103,11 @@ struct page_pool; * size actually used at runtime, but it's not a problem when calculating static * array sizes. */ -#define MLX5_UMR_MAX_MTT_SPACE \ +#define MLX5_UMR_MAX_FLEX_SPACE \ (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \ - MLX5_UMR_MTT_ALIGNMENT)) + MLX5_UMR_FLEX_ALIGNMENT)) #define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ - rounddown_pow_of_two(MLX5_UMR_MAX_MTT_SPACE / sizeof(struct mlx5_mtt)) + rounddown_pow_of_two(MLX5_UMR_MAX_FLEX_SPACE / sizeof(struct mlx5_mtt)) #define MLX5E_MAX_RQ_NUM_MTTS \ (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */ @@ -160,7 +160,7 @@ struct page_pool; (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ - ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) + ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT) #define MLX5E_MAX_KLM_PER_WQE(mdev) \ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 9ec9662d1d0b..585bdc8383ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -107,7 +107,7 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), - MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size; + MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); @@ -146,7 +146,7 @@ u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, u16 umr_wqe_sz; umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) + - ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); + ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c index 53b270f652b9..915ce201aeb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -3,6 +3,7 @@ #include "act.h" #include "en/tc_priv.h" +#include "eswitch.h" static bool tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, @@ -10,13 +11,6 @@ tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, int act_index, struct mlx5_flow_attr *attr) { - struct netlink_ext_ack *extack = parse_state->extack; - - if (parse_state->flow_action->num_entries != 1) { - NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only"); - return false; - } - return true; } @@ -27,7 +21,7 @@ tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *attr) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + attr->dest_ft = mlx5_eswitch_get_slow_fdb(priv->mdev->priv.eswitch); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5aff97914367..ff73d25bc6eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - spec = &flow->attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; } static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, @@ -762,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) + struct net_device **encap_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -878,9 +879,8 @@ attach_flow: if (e->flags & MLX5_ENCAP_ENTRY_VALID) { attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; } else { - *encap_valid = false; + flow_flag_set(flow, SLOW); } mutex_unlock(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index d542b8476491..8ad273dde40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); + struct net_device **encap_dev); int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 285d32d2fd08..d7c020f72401 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index a715601865d3..1b03ab03fc5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -345,29 +345,27 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) kfree(sa_entry); } -int mlx5e_ipsec_init(struct mlx5e_priv *priv) +void mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec; - int ret; + int ret = -ENOMEM; if (!mlx5_ipsec_device_caps(priv->mdev)) { netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); - return 0; + return; } ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); if (!ipsec) - return -ENOMEM; + return; hash_init(ipsec->sadb_rx); spin_lock_init(&ipsec->sadb_rx_lock); ipsec->mdev = priv->mdev; ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, priv->netdev->name); - if (!ipsec->wq) { - ret = -ENOMEM; + if (!ipsec->wq) goto err_wq; - } ret = mlx5e_accel_ipsec_fs_init(ipsec); if (ret) @@ -375,13 +373,14 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) priv->ipsec = ipsec; netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); - return 0; + return; err_fs_init: destroy_workqueue(ipsec->wq); err_wq: kfree(ipsec); - return (ret != -EOPNOTSUPP) ? ret : 0; + mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret); + return; } void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 16bcceec16c4..4c47347d0ee2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -146,7 +146,7 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_modify_state_work modify_work; }; -int mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_init(struct mlx5e_priv *priv); void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); @@ -174,9 +174,8 @@ mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) return sa_entry->ipsec->mdev; } #else -static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) +static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv) { - return 0; } static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 2ef36cb9555a..9369a580743e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -186,7 +186,7 @@ static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macs return err; } - dma_device = &mdev->pdev->dev; + dma_device = mlx5_core_dma_dev(mdev); dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL); err = dma_mapping_error(dma_device, dma_addr); if (err) { @@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - if (is_tx) { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); - key = &ctx->sa.tx_sa->key; - } else { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - key = &ctx->sa.rx_sa->key; + key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : + cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); + + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); } - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); obj_attrs.replay_window = ctx->secy->replay_window; obj_attrs.replay_protect = ctx->secy->replay_protect; @@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) return NULL; } -static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *rx_sa, - bool active) +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) { - struct mlx5_core_dev *mdev = macsec->mdev; - struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; int err = 0; - if (rx_sa->active != active) + if (rx_sa->active == active) return 0; rx_sa->active = active; @@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, return 0; } - attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); - attrs.enc_key_id = rx_sa->enc_key_id; - err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) - return err; + rx_sa->active = false; - return 0; + return err; } static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) @@ -476,6 +474,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) return false; } + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + return true; } @@ -620,6 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) if (tx_sa->active == ctx_tx_sa->active) goto out; + tx_sa->active = ctx_tx_sa->active; if (tx_sa->assoc_num != tx_sc->encoding_sa) goto out; @@ -635,8 +639,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); } - - tx_sa->active = ctx_tx_sa->active; out: mutex_unlock(&macsec->lock); @@ -736,9 +738,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) sc_xarray_element->rx_sc = rx_sc; err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, - XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); - if (err) + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); goto destroy_sc_xarray_elemenet; + } rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!rx_sc->md_dst) { @@ -798,16 +805,16 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) goto out; } - rx_sc->active = ctx_rx_sc->active; if (rx_sc->active == ctx_rx_sc->active) goto out; + rx_sc->active = ctx_rx_sc->active; for (i = 0; i < MACSEC_NUM_AN; ++i) { rx_sa = rx_sc->rx_sa[i]; if (!rx_sa) continue; - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); if (err) goto out; } @@ -818,16 +825,43 @@ out: return err; } +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec *macsec; struct list_head *list; int err = 0; - int i; mutex_lock(&priv->macsec->lock); @@ -849,31 +883,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - -/* - * At this point the relevant MACsec offload Rx rule already removed at - * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current - * Rx related data propagating using xa_erase which uses rcu to sync, - * once fs_id is erased then this rx_sc is hidden from datapath. - */ - list_del_rcu(&rx_sc->rx_sc_list_element); - xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); - metadata_dst_free(rx_sc->md_dst); - kfree(rx_sc->sc_xarray_element); - - kfree_rcu(rx_sc); - + macsec_del_rxsc_ctx(macsec, rx_sc); out: mutex_unlock(&macsec->lock); @@ -1015,7 +1025,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); out: mutex_unlock(&macsec->lock); @@ -1155,7 +1165,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) goto out; } @@ -1234,7 +1244,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec_sa *tx_sa; struct mlx5e_macsec *macsec; struct list_head *list; @@ -1263,28 +1272,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) } list = &macsec_device->macsec_rx_sc_list_head; - list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - - list_del_rcu(&rx_sc->rx_sc_list_element); - - kfree_rcu(rx_sc); - } + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; list_del_rcu(&macsec_device->macsec_device_list_element); --macsec->num_of_devices; + kfree(macsec_device); out: mutex_unlock(&macsec->lock); @@ -1303,12 +1299,12 @@ static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso, struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, struct mlx5_aso_ctrl_param *param) { + struct mlx5e_macsec_umr *umr = macsec_aso->umr; + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); - if (macsec_aso->umr->dma_addr) { - aso_ctrl->va_l = cpu_to_be32(macsec_aso->umr->dma_addr | ASO_CTRL_READ_EN); - aso_ctrl->va_h = cpu_to_be32((u64)macsec_aso->umr->dma_addr >> 32); - aso_ctrl->l_key = cpu_to_be32(macsec_aso->umr->mkey); - } + aso_ctrl->va_l = cpu_to_be32(umr->dma_addr | ASO_CTRL_READ_EN); + aso_ctrl->va_h = cpu_to_be32((u64)umr->dma_addr >> 32); + aso_ctrl->l_key = cpu_to_be32(umr->mkey); if (!param) return; @@ -1536,6 +1532,8 @@ static void macsec_async_event(struct work_struct *work) async_work = container_of(work, struct mlx5e_macsec_async_work, work); macsec = async_work->macsec; + mutex_lock(&macsec->lock); + mdev = async_work->mdev; obj_id = async_work->obj_id; macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); @@ -1557,6 +1555,7 @@ static void macsec_async_event(struct work_struct *work) out_async_work: kfree(async_work); + mutex_unlock(&macsec->lock); } static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) @@ -1745,7 +1744,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, if (!macsec) return; - fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); rcu_read_lock(); sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index d580b4a91253..347380a2cd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -10,9 +10,11 @@ #include <net/macsec.h> #include <net/dst_metadata.h> -/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) struct mlx5e_priv; struct mlx5e_macsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 1ac0cf04e811..5b658a5588c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); if (!ns) @@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto out_spec; + } tx_tables = &tx_fs->tables; ft_crypto = &tx_tables->ft_crypto; @@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); if (!ns) @@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto free_spec; + } rx_tables = &rx_fs->tables; ft_crypto = &rx_tables->ft_crypto; @@ -1142,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, ft_crypto = &rx_tables->ft_crypto; /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[3-0] fs id */ + /* Set bit[15-0] fs id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); MLX5_SET(set_action_in, action, offset, 0); MLX5_SET(set_action_in, action, length, 32); @@ -1205,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, rx_rule->rule[1] = rule; } + kvfree(spec); return macsec_rule; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d9397ffb396b..7708acc9b2ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -35,7 +35,6 @@ #include "en.h" #include "en/port.h" #include "en/params.h" -#include "en/xsk/pool.h" #include "en/ptp.h" #include "lib/clock.h" #include "en/fs_ethtool.h" @@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { mutex_lock(&priv->state_lock); - ch->max_combined = priv->max_nch; ch->combined_count = priv->channels.params.num_channels; - if (priv->xsk.refcnt) { - /* The upper half are XSK queues. */ - ch->max_combined *= 2; - ch->combined_count *= 2; - } - mutex_unlock(&priv->state_lock); } @@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - /* Don't allow changing the number of channels if there is an active - * XSK, because the numeration of the XSK and regular RQs will change. - */ - if (priv->xsk.refcnt) { - err = -EINVAL; - netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", - __func__); - goto out; - } - /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 14bd86e368d5..8d36e2de53a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; - WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); - return entries * umr_entry_size / MLX5_OCTWORD; + return sz / MLX5_OCTWORD; } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -5237,10 +5238,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, } priv->fs = fs; - err = mlx5e_ipsec_init(priv); - if (err) - mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); - err = mlx5e_ktls_init(priv); if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); @@ -5253,7 +5250,6 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); mlx5e_ktls_cleanup(priv); - mlx5e_ipsec_cleanup(priv); mlx5e_fs_cleanup(priv->fs); } @@ -5382,6 +5378,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) int err; mlx5e_fs_init_l2_addr(priv->fs, netdev); + mlx5e_ipsec_init(priv); err = mlx5e_macsec_init(priv); if (err) @@ -5445,6 +5442,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); mlx5e_macsec_cleanup(priv); + mlx5e_ipsec_cleanup(priv); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1b53e8852c86..623886462c10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -751,7 +751,6 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - int err; priv->fs = mlx5e_fs_init(priv->profile, mdev, !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); @@ -760,10 +759,6 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, return -ENOMEM; } - err = mlx5e_ipsec_init(priv); - if (err) - mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err); - mlx5e_vxlan_set_netdev_info(priv); mlx5e_build_rep_params(netdev); mlx5e_timestamp_init(priv); @@ -773,7 +768,6 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { mlx5e_fs_cleanup(priv->fs); - mlx5e_ipsec_cleanup(priv); } static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) @@ -1112,6 +1106,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; + mlx5e_ipsec_init(priv); + netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); @@ -1158,6 +1154,8 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5e_rep_tc_disable(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); + + mlx5e_ipsec_cleanup(priv); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b1ea0b995d9c..c8820ab22169 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -593,8 +593,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, int headroom, i; headroom = rq->buff.headroom; - new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); - entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT); + new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); + entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); @@ -603,7 +603,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < - MLX5_UMR_KLM_ALIGNMENT)) + MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)) goto update_klm; header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; @@ -668,8 +668,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) if (!klm_entries) return 0; - klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); - index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT); + klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); entries_before = shampo->hd_per_wq - index; if (unlikely(entries_before < klm_entries)) @@ -727,6 +727,17 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) }; } + /* Pad if needed, in case the value set to ucseg->xlt_octowords + * in mlx5e_build_umr_wqe() needed alignment. + */ + if (rq->mpwqe.pages_per_wqe & (MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT - 1)) { + int pad = ALIGN(rq->mpwqe.pages_per_wqe, MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT) - + rq->mpwqe.pages_per_wqe; + + memset(&umr_wqe->inline_mtts[rq->mpwqe.pages_per_wqe], 0, + sizeof(*umr_wqe->inline_mtts) * pad); + } + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3782f0097292..10d1609ece58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1631,7 +1631,6 @@ set_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, struct netlink_ext_ack *extack, - bool *encap_valid, bool *vf_tun) { struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -1648,7 +1647,6 @@ set_encap_dests(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; - *encap_valid = true; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; @@ -1665,7 +1663,7 @@ set_encap_dests(struct mlx5e_priv *priv, goto out; } err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev, encap_valid); + extack, &encap_dev); dev_put(out_dev); if (err) goto out; @@ -1729,8 +1727,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun, encap_valid; u32 max_prio, max_chain; + bool vf_tun; int err = 0; parse_attr = attr->parse_attr; @@ -1820,7 +1818,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -1850,7 +1848,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid || flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -3756,7 +3754,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *next_attr = NULL; struct mlx5e_post_act_handle *handle; - bool vf_tun, encap_valid = true; + bool vf_tun; int err; /* This is going in reverse order as needed. @@ -3778,13 +3776,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (list_is_last(&attr->list, &flow->attrs)) break; - err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); if (err) goto out_free; - if (!encap_valid) - flow_flag_set(flow, SLOW); - err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); if (err) goto out_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 48241317a535..0db41fa4a9a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -97,8 +97,8 @@ struct mlx5_flow_attr { } lag; /* keep this union last */ union { - struct mlx5_esw_flow_attr esw_attr[0]; - struct mlx5_nic_flow_attr nic_attr[0]; + DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr); + DECLARE_FLEX_ARRAY(struct mlx5_nic_flow_attr, nic_attr); }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2169486c4bfb..374e3fbdc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) devl_rate_nodes_destroy(devlink); } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); esw->esw_funcs.num_vfs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f68dc2d0dbe6..42d9df417e20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -736,6 +736,19 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + +static inline struct mlx5_flow_table * +mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.offloads.slow_fdb; +} #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 728ca9f2bb9d..9b6fbb19c22a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -248,7 +248,7 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_ac if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = esw->fdb_table.offloads.slow_fdb; + dest[i].ft = mlx5_eswitch_get_slow_fdb(esw); } static int @@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f mlx5_lag_mpesw_is_activated(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } - if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; @@ -479,13 +479,15 @@ esw_setup_dests(struct mlx5_flow_destination *dest, esw_src_port_rewrite_supported(esw)) attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; - if (attr->flags & MLX5_ATTR_FLAG_SAMPLE && - !(attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)) { - esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); - (*i)++; - } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { esw_setup_slow_path_dest(dest, flow_act, esw, *i); (*i)++; + goto out; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); + (*i)++; } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; @@ -506,6 +508,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, } } +out: return err; } @@ -1046,7 +1049,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, if (rep->vport == MLX5_VPORT_UPLINK) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; - flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(on_esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", @@ -1095,7 +1098,7 @@ mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); dest.vport.num = vport_num; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n", @@ -1248,7 +1251,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, spec, MLX5_VPORT_PF); - flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); @@ -1260,7 +1263,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (mlx5_ecpf_vport_exists(esw->dev)) { vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); - flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); @@ -1274,7 +1277,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, peer_dev->priv.eswitch, spec, vport->vport); - flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); @@ -1363,7 +1366,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -1378,7 +1381,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); dmac_v[0] = 0x01; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -1927,7 +1930,7 @@ send_vport_err: fdb_chains_err: mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); tc_miss_table_err: - mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); + mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw)); slow_fdb_err: /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); @@ -1938,7 +1941,7 @@ ns_err: static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { - if (!esw->fdb_table.offloads.slow_fdb) + if (!mlx5_eswitch_get_slow_fdb(esw)) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); @@ -1954,7 +1957,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_chains_destroy(esw, esw_chains(esw)); mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); - mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); + mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw)); /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); @@ -3387,6 +3390,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err; esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 108a3503f413..3a9a6bb9158d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -210,6 +210,18 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, return (port_mask & port_value) == MLX5_VPORT_UPLINK; } +static bool +mlx5_eswitch_is_push_vlan_no_cap(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act) +{ + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) + return true; + + return false; +} + bool mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, @@ -225,10 +237,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) return false; - /* push vlan on RX */ - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && - !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & - MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) + if (mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act)) return true; /* hairpin */ @@ -252,19 +261,31 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, struct mlx5_flow_act term_tbl_act = {}; struct mlx5_flow_handle *rule = NULL; bool term_table_created = false; + bool is_push_vlan_on_rx; int num_vport_dests = 0; int i, curr_dest; + is_push_vlan_on_rx = mlx5_eswitch_is_push_vlan_no_cap(esw, flow_act); mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < num_dest; i++) { struct mlx5_termtbl_handle *tt; + bool hairpin = false; /* only vport destinations can be terminated */ if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) continue; + if (attr->dests[num_vport_dests].rep && + attr->dests[num_vport_dests].rep->vport == MLX5_VPORT_UPLINK) + hairpin = true; + + if (!is_push_vlan_on_rx && !hairpin) { + num_vport_dests++; + continue; + } + if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; @@ -312,6 +333,11 @@ revert_changes: for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + if (!tt) + continue; + + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 9d908a0ccfef..1e46f9afa40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -9,7 +9,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, - MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED }; struct mlx5_fw_reset { @@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) err = mlx5_pci_link_toggle(dev); if (err) { mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); - goto done; + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); } mlx5_enter_error_state(dev, true); @@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) goto out; } err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev); + mlx5_load_one_devl_locked(dev, false); + } out: clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a9f4ede4a9bf..32c3e0a649a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); - mlx5_lag_mpesw_cleanup(ldev); - cancel_work_sync(&ldev->mpesw_work); destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } @@ -701,10 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - dev = ldev->pf[MLX5_LAG_P1].dev; - if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) - return false; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index ce2ce8ccbd70..f30ac2de639f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -50,6 +50,19 @@ struct lag_tracker { enum netdev_lag_hash hash_type; }; +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + /* LAG data of a ConnectX card. * It serves both its phys functions. */ @@ -66,7 +79,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; - struct work_struct mpesw_work; struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index f643202b29c6..c17e8f1ec914 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -7,63 +7,95 @@ #include "eswitch.h" #include "lib/mlx5.h" -void mlx5_mpesw_work(struct work_struct *work) +static int add_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int err; - mutex_lock(&ldev->lock); - mlx5_disable_lag(ldev); - mutex_unlock(&ldev->lock); -} + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + return 0; -static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev = dev->priv.lag; + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out_err; + } - if (!queue_work(ldev->wq, &ldev->mpesw_work)) - mlx5_core_warn(dev, "failed to queue work\n"); + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) { + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + goto out_err; + } + + return 0; + +out_err: + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); + return err; } -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +static void del_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = dev->priv.lag; + if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && + ldev->mode == MLX5_LAG_MODE_MPESW) + mlx5_disable_lag(ldev); +} - if (!ldev) - return; +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; mutex_lock(&ldev->lock); - if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && - ldev->mode == MLX5_LAG_MODE_MPESW) - mlx5_lag_disable_mpesw(dev); + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = add_mpesw_rule(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + del_mpesw_rule(ldev); mutex_unlock(&ldev->lock); + + complete(&mpesww->comp); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) { struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_mpesw_work_st *work; int err = 0; if (!ldev) return 0; - mutex_lock(&ldev->lock); - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - goto out; + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; - if (ldev->mode != MLX5_LAG_MODE_NONE) { + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); err = -EINVAL; goto out; } - - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); - if (err) - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - + wait_for_completion(&work->comp); + err = work->result; out: - mutex_unlock(&ldev->lock); + kfree(work); return err; } +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) { struct mlx5_lag *ldev = mdev->priv.lag; @@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) if (!netif_is_bond_master(out_dev) || !ldev) return 0; - mutex_lock(&ldev->lock); - if (ldev->mode == MLX5_LAG_MODE_MPESW) { - mutex_unlock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) return -EOPNOTSUPP; - } - mutex_unlock(&ldev->lock); + return 0; } @@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) { - INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); } void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) { - cancel_delayed_work_sync(&ldev->bond_work); + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index be4abcb8fcd5..88e8daffcf92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -12,7 +12,6 @@ struct lag_mpesw { atomic_t mpesw_rule_count; }; -void mlx5_mpesw_work(struct work_struct *work); int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index c971ff04dd04..0f9e4f01c85a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -334,9 +334,6 @@ err_cq: void mlx5_aso_destroy(struct mlx5_aso *aso) { - if (IS_ERR_OR_NULL(aso)) - return; - mlx5_aso_destroy_sq(aso); mlx5_aso_destroy_cq(&aso->cq); kfree(aso); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6d7c102861ea..7f5db13e3550 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -37,7 +37,6 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> -#include <linux/io-mapping.h> #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/mlx5/driver.h> @@ -1604,8 +1603,6 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) int err; memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); - INIT_LIST_HEAD(&priv->ctx_list); - spin_lock_init(&priv->ctx_lock); lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); @@ -1817,7 +1814,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; - mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); return res; } @@ -1856,7 +1854,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_pci_trace(dev, "Enter\n"); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); err = mlx5_pci_enable_device(dev); if (err) { @@ -1878,7 +1877,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) res = PCI_ERS_RESULT_RECOVERED; out: - mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); return res; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 7da012ff0d41..8e2abbab05f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -18,6 +18,10 @@ struct mlx5_sf_dev_table { phys_addr_t base_address; u64 sf_bar_length; struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; struct mlx5_core_dev *dev; }; @@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ return 0; sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); sf_dev = xa_load(&table->devices, sf_index); switch (event->new_vhca_state) { case MLX5_VHCA_STATE_INVALID: @@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ default: break; } + mutex_unlock(&table->table_lock); return 0; } @@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) return 0; } +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table; @@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) table->base_address = pci_resource_start(dev->pdev, 2); table->max_sfs = max_sfs; xa_init(&table->devices); + mutex_init(&table->table_lock); dev->priv.sf_dev_table = table; err = mlx5_vhca_event_notifier_register(dev, &table->nb); if (err) goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + err = mlx5_sf_dev_vhca_arm_all(table); if (err) goto arm_err; @@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) return; arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: mlx5_vhca_event_notifier_unregister(dev, &table->nb); vhca_err: table->max_sfs = 0; @@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) if (!table) return; + mlx5_sf_dev_destroy_active_work(table); mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); /* Now that event handler is not running, it is safe to destroy * the sf device without race. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 9c3dfd68f8df..69294a66fd7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action) { - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, goto out; } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 8455e79bc44a..1513112ecec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -31,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/io-mapping.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index a83f6bc30072..a0a06e2eff82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1459,11 +1459,6 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, - mlxsw_core->bus_info->device_kind); - if (err) - return err; - mlxsw_reg_mgir_pack(mgir_pl); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index f5f5f8dc3d19..2c586c2308ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -632,9 +632,9 @@ static const struct mlxsw_bus mlxsw_i2c_bus = { .cmd_exec = mlxsw_i2c_cmd_exec, }; -static int mlxsw_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int mlxsw_i2c_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); const struct i2c_adapter_quirks *quirks = client->adapter->quirks; struct mlxsw_i2c *mlxsw_i2c; u8 status; @@ -751,7 +751,7 @@ static void mlxsw_i2c_remove(struct i2c_client *client) int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver) { - i2c_driver->probe = mlxsw_i2c_probe; + i2c_driver->probe_new = mlxsw_i2c_probe; i2c_driver->remove = mlxsw_i2c_remove; return i2c_add_driver(i2c_driver); } diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index b7ae5ce7d3f7..8bcd60f17d6d 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -8,5 +8,6 @@ config LAN966X_SWITCH select PHYLINK select PACKING select PAGE_POOL + select VCAP help This driver supports the Lan966x network switch device. diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 251a7d561d63..56afd694f3c7 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -12,4 +12,8 @@ lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_tc.o lan966x_mqprio.o lan966x_taprio.o \ lan966x_tbf.o lan966x_cbs.o lan966x_ets.o \ lan966x_tc_matchall.o lan966x_police.o lan966x_mirror.o \ - lan966x_xdp.o + lan966x_xdp.o lan966x_vcap_impl.o lan966x_vcap_ag_api.o \ + lan966x_tc_flower.o lan966x_goto.o + +# Provide include files +ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 5fbbd479cfb0..5314c064ceae 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0+ +#include <linux/bpf.h> +#include <linux/filter.h> + #include "lan966x_main.h" static int lan966x_fdma_channel_active(struct lan966x *lan966x) @@ -16,7 +19,7 @@ static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx, if (unlikely(!page)) return NULL; - db->dataptr = page_pool_get_dma_addr(page); + db->dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM; return page; } @@ -72,12 +75,28 @@ static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx) .nid = NUMA_NO_NODE, .dev = lan966x->dev, .dma_dir = DMA_FROM_DEVICE, - .offset = 0, + .offset = XDP_PACKET_HEADROOM, .max_len = rx->max_mtu - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), }; + if (lan966x_xdp_present(lan966x)) + pp_params.dma_dir = DMA_BIDIRECTIONAL; + rx->page_pool = page_pool_create(&pp_params); + + for (int i = 0; i < lan966x->num_phys_ports; ++i) { + struct lan966x_port *port; + + if (!lan966x->ports[i]) + continue; + + port = lan966x->ports[i]; + xdp_rxq_info_unreg_mem_model(&port->xdp_rxq); + xdp_rxq_info_reg_mem_model(&port->xdp_rxq, MEM_TYPE_PAGE_POOL, + rx->page_pool); + } + return PTR_ERR_OR_ZERO(rx->page_pool); } @@ -372,11 +391,14 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) { struct lan966x_tx *tx = &lan966x->tx; struct lan966x_tx_dcb_buf *dcb_buf; + struct xdp_frame_bulk bq; struct lan966x_db *db; unsigned long flags; bool clear = false; int i; + xdp_frame_bulk_init(&bq); + spin_lock_irqsave(&lan966x->tx_lock, flags); for (i = 0; i < FDMA_DCB_MAX; ++i) { dcb_buf = &tx->dcbs_buf[i]; @@ -389,19 +411,35 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) continue; dcb_buf->dev->stats.tx_packets++; - dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len; + dcb_buf->dev->stats.tx_bytes += dcb_buf->len; dcb_buf->used = false; - dma_unmap_single(lan966x->dev, - dcb_buf->dma_addr, - dcb_buf->skb->len, - DMA_TO_DEVICE); - if (!dcb_buf->ptp) - dev_kfree_skb_any(dcb_buf->skb); + if (dcb_buf->use_skb) { + dma_unmap_single(lan966x->dev, + dcb_buf->dma_addr, + dcb_buf->len, + DMA_TO_DEVICE); + + if (!dcb_buf->ptp) + napi_consume_skb(dcb_buf->data.skb, weight); + } else { + if (dcb_buf->xdp_ndo) + dma_unmap_single(lan966x->dev, + dcb_buf->dma_addr, + dcb_buf->len, + DMA_TO_DEVICE); + + if (dcb_buf->xdp_ndo) + xdp_return_frame_bulk(dcb_buf->data.xdpf, &bq); + else + xdp_return_frame_rx_napi(dcb_buf->data.xdpf); + } clear = true; } + xdp_flush_frame_bulk(&bq); + if (clear) lan966x_fdma_wakeup_netdev(lan966x); @@ -432,11 +470,13 @@ static int lan966x_fdma_rx_check_frame(struct lan966x_rx *rx, u64 *src_port) if (unlikely(!page)) return FDMA_ERROR; - dma_sync_single_for_cpu(lan966x->dev, (dma_addr_t)db->dataptr, + dma_sync_single_for_cpu(lan966x->dev, + (dma_addr_t)db->dataptr + XDP_PACKET_HEADROOM, FDMA_DCB_STATUS_BLOCKL(db->status), DMA_FROM_DEVICE); - lan966x_ifh_get_src_port(page_address(page), src_port); + lan966x_ifh_get_src_port(page_address(page) + XDP_PACKET_HEADROOM, + src_port); if (WARN_ON(*src_port >= lan966x->num_phys_ports)) return FDMA_ERROR; @@ -466,6 +506,7 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx, skb_mark_for_recycle(skb); + skb_reserve(skb, XDP_PACKET_HEADROOM); skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status)); lan966x_ifh_get_timestamp(skb->data, ×tamp); @@ -505,6 +546,7 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) int dcb_reload = rx->dcb_index; struct lan966x_rx_dcb *old_dcb; struct lan966x_db *db; + bool redirect = false; struct sk_buff *skb; struct page *page; int counter = 0; @@ -527,6 +569,12 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) lan966x_fdma_rx_free_page(rx); lan966x_fdma_rx_advance_dcb(rx); goto allocate_new; + case FDMA_REDIRECT: + redirect = true; + fallthrough; + case FDMA_TX: + lan966x_fdma_rx_advance_dcb(rx); + continue; case FDMA_DROP: lan966x_fdma_rx_free_page(rx); lan966x_fdma_rx_advance_dcb(rx); @@ -563,6 +611,9 @@ allocate_new: if (counter < weight && napi_complete_done(napi, counter)) lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA); + if (redirect) + xdp_do_flush(); + return counter; } @@ -607,14 +658,139 @@ static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx) return -1; } +static void lan966x_fdma_tx_setup_dcb(struct lan966x_tx *tx, + int next_to_use, int len, + dma_addr_t dma_addr) +{ + struct lan966x_tx_dcb *next_dcb; + struct lan966x_db *next_db; + + next_dcb = &tx->dcbs[next_to_use]; + next_dcb->nextptr = FDMA_DCB_INVALID_DATA; + + next_db = &next_dcb->db[0]; + next_db->dataptr = dma_addr; + next_db->status = FDMA_DCB_STATUS_SOF | + FDMA_DCB_STATUS_EOF | + FDMA_DCB_STATUS_INTR | + FDMA_DCB_STATUS_BLOCKO(0) | + FDMA_DCB_STATUS_BLOCKL(len); +} + +static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use) +{ + struct lan966x *lan966x = tx->lan966x; + struct lan966x_tx_dcb *dcb; + + if (likely(lan966x->tx.activated)) { + /* Connect current dcb to the next db */ + dcb = &tx->dcbs[tx->last_in_use]; + dcb->nextptr = tx->dma + (next_to_use * + sizeof(struct lan966x_tx_dcb)); + + lan966x_fdma_tx_reload(tx); + } else { + /* Because it is first time, then just activate */ + lan966x->tx.activated = true; + lan966x_fdma_tx_activate(tx); + } + + /* Move to next dcb because this last in use */ + tx->last_in_use = next_to_use; +} + +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, + struct xdp_frame *xdpf, + struct page *page, + bool dma_map) +{ + struct lan966x *lan966x = port->lan966x; + struct lan966x_tx_dcb_buf *next_dcb_buf; + struct lan966x_tx *tx = &lan966x->tx; + dma_addr_t dma_addr; + int next_to_use; + __be32 *ifh; + int ret = 0; + + spin_lock(&lan966x->tx_lock); + + /* Get next index */ + next_to_use = lan966x_fdma_get_next_dcb(tx); + if (next_to_use < 0) { + netif_stop_queue(port->dev); + ret = NETDEV_TX_BUSY; + goto out; + } + + /* Generate new IFH */ + if (dma_map) { + if (xdpf->headroom < IFH_LEN_BYTES) { + ret = NETDEV_TX_OK; + goto out; + } + + ifh = xdpf->data - IFH_LEN_BYTES; + memset(ifh, 0x0, sizeof(__be32) * IFH_LEN); + lan966x_ifh_set_bypass(ifh, 1); + lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port)); + + dma_addr = dma_map_single(lan966x->dev, + xdpf->data - IFH_LEN_BYTES, + xdpf->len + IFH_LEN_BYTES, + DMA_TO_DEVICE); + if (dma_mapping_error(lan966x->dev, dma_addr)) { + ret = NETDEV_TX_OK; + goto out; + } + + /* Setup next dcb */ + lan966x_fdma_tx_setup_dcb(tx, next_to_use, + xdpf->len + IFH_LEN_BYTES, + dma_addr); + } else { + ifh = page_address(page) + XDP_PACKET_HEADROOM; + memset(ifh, 0x0, sizeof(__be32) * IFH_LEN); + lan966x_ifh_set_bypass(ifh, 1); + lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port)); + + dma_addr = page_pool_get_dma_addr(page); + dma_sync_single_for_device(lan966x->dev, + dma_addr + XDP_PACKET_HEADROOM, + xdpf->len + IFH_LEN_BYTES, + DMA_TO_DEVICE); + + /* Setup next dcb */ + lan966x_fdma_tx_setup_dcb(tx, next_to_use, + xdpf->len + IFH_LEN_BYTES, + dma_addr + XDP_PACKET_HEADROOM); + } + + /* Fill up the buffer */ + next_dcb_buf = &tx->dcbs_buf[next_to_use]; + next_dcb_buf->use_skb = false; + next_dcb_buf->data.xdpf = xdpf; + next_dcb_buf->xdp_ndo = dma_map; + next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES; + next_dcb_buf->dma_addr = dma_addr; + next_dcb_buf->used = true; + next_dcb_buf->ptp = false; + next_dcb_buf->dev = port->dev; + + /* Start the transmission */ + lan966x_fdma_tx_start(tx, next_to_use); + +out: + spin_unlock(&lan966x->tx_lock); + + return ret; +} + int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) { struct lan966x_port *port = netdev_priv(dev); struct lan966x *lan966x = port->lan966x; struct lan966x_tx_dcb_buf *next_dcb_buf; - struct lan966x_tx_dcb *next_dcb, *dcb; struct lan966x_tx *tx = &lan966x->tx; - struct lan966x_db *next_db; int needed_headroom; int needed_tailroom; dma_addr_t dma_addr; @@ -660,20 +836,14 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) } /* Setup next dcb */ - next_dcb = &tx->dcbs[next_to_use]; - next_dcb->nextptr = FDMA_DCB_INVALID_DATA; - - next_db = &next_dcb->db[0]; - next_db->dataptr = dma_addr; - next_db->status = FDMA_DCB_STATUS_SOF | - FDMA_DCB_STATUS_EOF | - FDMA_DCB_STATUS_INTR | - FDMA_DCB_STATUS_BLOCKO(0) | - FDMA_DCB_STATUS_BLOCKL(skb->len); + lan966x_fdma_tx_setup_dcb(tx, next_to_use, skb->len, dma_addr); /* Fill up the buffer */ next_dcb_buf = &tx->dcbs_buf[next_to_use]; - next_dcb_buf->skb = skb; + next_dcb_buf->use_skb = true; + next_dcb_buf->data.skb = skb; + next_dcb_buf->xdp_ndo = false; + next_dcb_buf->len = skb->len; next_dcb_buf->dma_addr = dma_addr; next_dcb_buf->used = true; next_dcb_buf->ptp = false; @@ -683,21 +853,8 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) next_dcb_buf->ptp = true; - if (likely(lan966x->tx.activated)) { - /* Connect current dcb to the next db */ - dcb = &tx->dcbs[tx->last_in_use]; - dcb->nextptr = tx->dma + (next_to_use * - sizeof(struct lan966x_tx_dcb)); - - lan966x_fdma_tx_reload(tx); - } else { - /* Because it is first time, then just activate */ - lan966x->tx.activated = true; - lan966x_fdma_tx_activate(tx); - } - - /* Move to next dcb because this last in use */ - tx->last_in_use = next_to_use; + /* Start the transmission */ + lan966x_fdma_tx_start(tx, next_to_use); return NETDEV_TX_OK; @@ -786,19 +943,15 @@ static int lan966x_fdma_get_max_frame(struct lan966x *lan966x) return lan966x_fdma_get_max_mtu(lan966x) + IFH_LEN_BYTES + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - VLAN_HLEN * 2; + VLAN_HLEN * 2 + + XDP_PACKET_HEADROOM; } -int lan966x_fdma_change_mtu(struct lan966x *lan966x) +static int __lan966x_fdma_reload(struct lan966x *lan966x, int max_mtu) { - int max_mtu; int err; u32 val; - max_mtu = lan966x_fdma_get_max_frame(lan966x); - if (max_mtu == lan966x->rx.max_mtu) - return 0; - /* Disable the CPU port */ lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(0), QSYS_SW_PORT_MODE_PORT_ENA, @@ -824,6 +977,25 @@ int lan966x_fdma_change_mtu(struct lan966x *lan966x) return err; } +int lan966x_fdma_change_mtu(struct lan966x *lan966x) +{ + int max_mtu; + + max_mtu = lan966x_fdma_get_max_frame(lan966x); + if (max_mtu == lan966x->rx.max_mtu) + return 0; + + return __lan966x_fdma_reload(lan966x, max_mtu); +} + +int lan966x_fdma_reload_page_pool(struct lan966x *lan966x) +{ + int max_mtu; + + max_mtu = lan966x_fdma_get_max_frame(lan966x); + return __lan966x_fdma_reload(lan966x, max_mtu); +} + void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev) { if (lan966x->fdma_ndev) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_goto.c b/drivers/net/ethernet/microchip/lan966x/lan966x_goto.c new file mode 100644 index 000000000000..bf0cfe24a8fc --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_goto.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" +#include "vcap_api_client.h" + +int lan966x_goto_port_add(struct lan966x_port *port, + struct flow_action_entry *act, + unsigned long goto_id, + struct netlink_ext_ack *extack) +{ + struct lan966x *lan966x = port->lan966x; + int err; + + err = vcap_enable_lookups(lan966x->vcap_ctrl, port->dev, + act->chain_index, goto_id, + true); + if (err == -EFAULT) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported goto chain"); + return -EOPNOTSUPP; + } + + if (err == -EADDRINUSE) { + NL_SET_ERR_MSG_MOD(extack, "VCAP already enabled"); + return -EOPNOTSUPP; + } + + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Could not enable VCAP lookups"); + return err; + } + + port->tc.goto_id = goto_id; + + return 0; +} + +int lan966x_goto_port_del(struct lan966x_port *port, + unsigned long goto_id, + struct netlink_ext_ack *extack) +{ + struct lan966x *lan966x = port->lan966x; + int err; + + err = vcap_enable_lookups(lan966x->vcap_ctrl, port->dev, 0, + goto_id, false); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Could not disable VCAP lookups"); + return err; + } + + port->tc.goto_id = 0; + + return 0; +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 42be5d0f1f01..f6092983d028 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -47,6 +47,9 @@ static const struct lan966x_main_io_resource lan966x_main_iomap[] = { { TARGET_PTP, 0xc000, 1 }, /* 0xe200c000 */ { TARGET_CHIP_TOP, 0x10000, 1 }, /* 0xe2010000 */ { TARGET_REW, 0x14000, 1 }, /* 0xe2014000 */ + { TARGET_VCAP, 0x18000, 1 }, /* 0xe2018000 */ + { TARGET_VCAP + 1, 0x20000, 1 }, /* 0xe2020000 */ + { TARGET_VCAP + 2, 0x24000, 1 }, /* 0xe2024000 */ { TARGET_SYS, 0x28000, 1 }, /* 0xe2028000 */ { TARGET_DEV, 0x34000, 1 }, /* 0xe2034000 */ { TARGET_DEV + 1, 0x38000, 1 }, /* 0xe2038000 */ @@ -302,13 +305,13 @@ err: return NETDEV_TX_BUSY; } -static void lan966x_ifh_set_bypass(void *ifh, u64 bypass) +void lan966x_ifh_set_bypass(void *ifh, u64 bypass) { packing(ifh, &bypass, IFH_POS_BYPASS + IFH_WID_BYPASS - 1, IFH_POS_BYPASS, IFH_LEN * 4, PACK, 0); } -static void lan966x_ifh_set_port(void *ifh, u64 bypass) +void lan966x_ifh_set_port(void *ifh, u64 bypass) { packing(ifh, &bypass, IFH_POS_DSTS + IFH_WID_DSTS - 1, IFH_POS_DSTS, IFH_LEN * 4, PACK, 0); @@ -469,6 +472,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = { .ndo_eth_ioctl = lan966x_port_ioctl, .ndo_setup_tc = lan966x_tc_setup, .ndo_bpf = lan966x_xdp, + .ndo_xdp_xmit = lan966x_xdp_xmit, }; bool lan966x_netdevice_check(const struct net_device *dev) @@ -1156,8 +1160,15 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_ptp; + err = lan966x_vcap_init(lan966x); + if (err) + goto cleanup_fdma; + return 0; +cleanup_fdma: + lan966x_fdma_deinit(lan966x); + cleanup_ptp: lan966x_ptp_deinit(lan966x); @@ -1181,6 +1192,7 @@ static int lan966x_remove(struct platform_device *pdev) struct lan966x *lan966x = platform_get_drvdata(pdev); lan966x_taprio_deinit(lan966x); + lan966x_vcap_deinit(lan966x); lan966x_fdma_deinit(lan966x); lan966x_cleanup_ports(lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index bc93051aa079..f2e45da7ffd4 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -105,11 +105,15 @@ enum macaccess_entry_type { * FDMA_PASS, frame is valid and can be used * FDMA_ERROR, something went wrong, stop getting more frames * FDMA_DROP, frame is dropped, but continue to get more frames + * FDMA_TX, frame is given to TX, but continue to get more frames + * FDMA_REDIRECT, frame is given to TX, but continue to get more frames */ enum lan966x_fdma_action { FDMA_PASS = 0, FDMA_ERROR, FDMA_DROP, + FDMA_TX, + FDMA_REDIRECT, }; struct lan966x_port; @@ -173,11 +177,17 @@ struct lan966x_rx { }; struct lan966x_tx_dcb_buf { - struct net_device *dev; - struct sk_buff *skb; dma_addr_t dma_addr; - bool used; - bool ptp; + struct net_device *dev; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + } data; + u32 len; + u32 used : 1; + u32 ptp : 1; + u32 use_skb : 1; + u32 xdp_ndo : 1; }; struct lan966x_tx { @@ -290,6 +300,9 @@ struct lan966x { struct lan966x_port *mirror_monitor; u32 mirror_mask[2]; u32 mirror_count; + + /* vcap */ + struct vcap_control *vcap_ctrl; }; struct lan966x_port_config { @@ -307,6 +320,7 @@ struct lan966x_port_tc { unsigned long police_id; unsigned long ingress_mirror_id; unsigned long egress_mirror_id; + unsigned long goto_id; struct flow_stats police_stat; struct flow_stats mirror_stat; }; @@ -359,6 +373,8 @@ bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb); void lan966x_ifh_get_src_port(void *ifh, u64 *src_port); void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp); +void lan966x_ifh_set_bypass(void *ifh, u64 bypass); +void lan966x_ifh_set_port(void *ifh, u64 bypass); void lan966x_stats_get(struct net_device *dev, struct rtnl_link_stats64 *stats); @@ -459,12 +475,17 @@ u32 lan966x_ptp_get_period_ps(void); int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts); int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev); +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, + struct xdp_frame *frame, + struct page *page, + bool dma_map); int lan966x_fdma_change_mtu(struct lan966x *lan966x); void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev); void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev); int lan966x_fdma_init(struct lan966x *lan966x); void lan966x_fdma_deinit(struct lan966x *lan966x); irqreturn_t lan966x_fdma_irq_handler(int irq, void *args); +int lan966x_fdma_reload_page_pool(struct lan966x *lan966x); int lan966x_lag_port_join(struct lan966x_port *port, struct net_device *brport_dev, @@ -555,11 +576,30 @@ int lan966x_xdp(struct net_device *dev, struct netdev_bpf *xdp); int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len); +int lan966x_xdp_xmit(struct net_device *dev, + int n, + struct xdp_frame **frames, + u32 flags); +bool lan966x_xdp_present(struct lan966x *lan966x); static inline bool lan966x_xdp_port_present(struct lan966x_port *port) { return !!port->xdp_prog; } +int lan966x_vcap_init(struct lan966x *lan966x); +void lan966x_vcap_deinit(struct lan966x *lan966x); + +int lan966x_tc_flower(struct lan966x_port *port, + struct flow_cls_offload *f); + +int lan966x_goto_port_add(struct lan966x_port *port, + struct flow_action_entry *act, + unsigned long goto_id, + struct netlink_ext_ack *extack); +int lan966x_goto_port_del(struct lan966x_port *port, + unsigned long goto_id, + struct netlink_ext_ack *extack); + static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, int gbase, int ginst, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index fb5087fef22e..9767b5a1c958 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -25,6 +25,7 @@ enum lan966x_target { TARGET_QSYS = 46, TARGET_REW = 47, TARGET_SYS = 52, + TARGET_VCAP = 61, NUM_TARGETS = 66 }; @@ -315,6 +316,69 @@ enum lan966x_target { #define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\ FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) +/* ANA:PORT:VCAP_S2_CFG */ +#define ANA_VCAP_S2_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 28, 0, 1, 4) + +#define ANA_VCAP_S2_CFG_ISDX_ENA GENMASK(20, 19) +#define ANA_VCAP_S2_CFG_ISDX_ENA_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_ISDX_ENA, x) +#define ANA_VCAP_S2_CFG_ISDX_ENA_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_ISDX_ENA, x) + +#define ANA_VCAP_S2_CFG_UDP_PAYLOAD_ENA GENMASK(18, 17) +#define ANA_VCAP_S2_CFG_UDP_PAYLOAD_ENA_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_UDP_PAYLOAD_ENA, x) +#define ANA_VCAP_S2_CFG_UDP_PAYLOAD_ENA_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_UDP_PAYLOAD_ENA, x) + +#define ANA_VCAP_S2_CFG_ETYPE_PAYLOAD_ENA GENMASK(16, 15) +#define ANA_VCAP_S2_CFG_ETYPE_PAYLOAD_ENA_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_ETYPE_PAYLOAD_ENA, x) +#define ANA_VCAP_S2_CFG_ETYPE_PAYLOAD_ENA_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_ETYPE_PAYLOAD_ENA, x) + +#define ANA_VCAP_S2_CFG_ENA BIT(14) +#define ANA_VCAP_S2_CFG_ENA_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_ENA, x) +#define ANA_VCAP_S2_CFG_ENA_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_ENA, x) + +#define ANA_VCAP_S2_CFG_SNAP_DIS GENMASK(13, 12) +#define ANA_VCAP_S2_CFG_SNAP_DIS_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_SNAP_DIS, x) +#define ANA_VCAP_S2_CFG_SNAP_DIS_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_SNAP_DIS, x) + +#define ANA_VCAP_S2_CFG_ARP_DIS GENMASK(11, 10) +#define ANA_VCAP_S2_CFG_ARP_DIS_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_ARP_DIS, x) +#define ANA_VCAP_S2_CFG_ARP_DIS_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_ARP_DIS, x) + +#define ANA_VCAP_S2_CFG_IP_TCPUDP_DIS GENMASK(9, 8) +#define ANA_VCAP_S2_CFG_IP_TCPUDP_DIS_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_IP_TCPUDP_DIS, x) +#define ANA_VCAP_S2_CFG_IP_TCPUDP_DIS_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_IP_TCPUDP_DIS, x) + +#define ANA_VCAP_S2_CFG_IP_OTHER_DIS GENMASK(7, 6) +#define ANA_VCAP_S2_CFG_IP_OTHER_DIS_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_IP_OTHER_DIS, x) +#define ANA_VCAP_S2_CFG_IP_OTHER_DIS_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_IP_OTHER_DIS, x) + +#define ANA_VCAP_S2_CFG_IP6_CFG GENMASK(5, 2) +#define ANA_VCAP_S2_CFG_IP6_CFG_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_IP6_CFG, x) +#define ANA_VCAP_S2_CFG_IP6_CFG_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_IP6_CFG, x) + +#define ANA_VCAP_S2_CFG_OAM_DIS GENMASK(1, 0) +#define ANA_VCAP_S2_CFG_OAM_DIS_SET(x)\ + FIELD_PREP(ANA_VCAP_S2_CFG_OAM_DIS, x) +#define ANA_VCAP_S2_CFG_OAM_DIS_GET(x)\ + FIELD_GET(ANA_VCAP_S2_CFG_OAM_DIS, x) + /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) @@ -1506,4 +1570,136 @@ enum lan966x_target { #define SYS_RAM_INIT_RAM_INIT_GET(x)\ FIELD_GET(SYS_RAM_INIT_RAM_INIT, x) +/* VCAP:VCAP_CORE_CFG:VCAP_UPDATE_CTRL */ +#define VCAP_UPDATE_CTRL(t) __REG(TARGET_VCAP, t, 3, 0, 0, 1, 8, 0, 0, 1, 4) + +#define VCAP_UPDATE_CTRL_UPDATE_CMD GENMASK(24, 22) +#define VCAP_UPDATE_CTRL_UPDATE_CMD_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_CMD, x) +#define VCAP_UPDATE_CTRL_UPDATE_CMD_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_CMD, x) + +#define VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS BIT(21) +#define VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS, x) +#define VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS, x) + +#define VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS BIT(20) +#define VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS, x) +#define VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS, x) + +#define VCAP_UPDATE_CTRL_UPDATE_CNT_DIS BIT(19) +#define VCAP_UPDATE_CTRL_UPDATE_CNT_DIS_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_CNT_DIS, x) +#define VCAP_UPDATE_CTRL_UPDATE_CNT_DIS_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_CNT_DIS, x) + +#define VCAP_UPDATE_CTRL_UPDATE_ADDR GENMASK(18, 3) +#define VCAP_UPDATE_CTRL_UPDATE_ADDR_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_ADDR, x) +#define VCAP_UPDATE_CTRL_UPDATE_ADDR_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_ADDR, x) + +#define VCAP_UPDATE_CTRL_UPDATE_SHOT BIT(2) +#define VCAP_UPDATE_CTRL_UPDATE_SHOT_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_UPDATE_SHOT, x) +#define VCAP_UPDATE_CTRL_UPDATE_SHOT_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_UPDATE_SHOT, x) + +#define VCAP_UPDATE_CTRL_CLEAR_CACHE BIT(1) +#define VCAP_UPDATE_CTRL_CLEAR_CACHE_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_CLEAR_CACHE, x) +#define VCAP_UPDATE_CTRL_CLEAR_CACHE_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_CLEAR_CACHE, x) + +#define VCAP_UPDATE_CTRL_MV_TRAFFIC_IGN BIT(0) +#define VCAP_UPDATE_CTRL_MV_TRAFFIC_IGN_SET(x)\ + FIELD_PREP(VCAP_UPDATE_CTRL_MV_TRAFFIC_IGN, x) +#define VCAP_UPDATE_CTRL_MV_TRAFFIC_IGN_GET(x)\ + FIELD_GET(VCAP_UPDATE_CTRL_MV_TRAFFIC_IGN, x) + +/* VCAP:VCAP_CORE_CFG:VCAP_MV_CFG */ +#define VCAP_MV_CFG(t) __REG(TARGET_VCAP, t, 3, 0, 0, 1, 8, 4, 0, 1, 4) + +#define VCAP_MV_CFG_MV_NUM_POS GENMASK(31, 16) +#define VCAP_MV_CFG_MV_NUM_POS_SET(x)\ + FIELD_PREP(VCAP_MV_CFG_MV_NUM_POS, x) +#define VCAP_MV_CFG_MV_NUM_POS_GET(x)\ + FIELD_GET(VCAP_MV_CFG_MV_NUM_POS, x) + +#define VCAP_MV_CFG_MV_SIZE GENMASK(15, 0) +#define VCAP_MV_CFG_MV_SIZE_SET(x)\ + FIELD_PREP(VCAP_MV_CFG_MV_SIZE, x) +#define VCAP_MV_CFG_MV_SIZE_GET(x)\ + FIELD_GET(VCAP_MV_CFG_MV_SIZE, x) + +/* VCAP:VCAP_CORE_CACHE:VCAP_ENTRY_DAT */ +#define VCAP_ENTRY_DAT(t, r) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 0, r, 64, 4) + +/* VCAP:VCAP_CORE_CACHE:VCAP_MASK_DAT */ +#define VCAP_MASK_DAT(t, r) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 256, r, 64, 4) + +/* VCAP:VCAP_CORE_CACHE:VCAP_ACTION_DAT */ +#define VCAP_ACTION_DAT(t, r) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 512, r, 64, 4) + +/* VCAP:VCAP_CORE_CACHE:VCAP_CNT_DAT */ +#define VCAP_CNT_DAT(t, r) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 768, r, 32, 4) + +/* VCAP:VCAP_CORE_CACHE:VCAP_CNT_FW_DAT */ +#define VCAP_CNT_FW_DAT(t) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 896, 0, 1, 4) + +/* VCAP:VCAP_CORE_CACHE:VCAP_TG_DAT */ +#define VCAP_TG_DAT(t) __REG(TARGET_VCAP, t, 3, 8, 0, 1, 904, 900, 0, 1, 4) + +/* VCAP:VCAP_CORE_MAP:VCAP_CORE_IDX */ +#define VCAP_CORE_IDX(t) __REG(TARGET_VCAP, t, 3, 912, 0, 1, 8, 0, 0, 1, 4) + +#define VCAP_CORE_IDX_CORE_IDX GENMASK(3, 0) +#define VCAP_CORE_IDX_CORE_IDX_SET(x)\ + FIELD_PREP(VCAP_CORE_IDX_CORE_IDX, x) +#define VCAP_CORE_IDX_CORE_IDX_GET(x)\ + FIELD_GET(VCAP_CORE_IDX_CORE_IDX, x) + +/* VCAP:VCAP_CORE_MAP:VCAP_CORE_MAP */ +#define VCAP_CORE_MAP(t) __REG(TARGET_VCAP, t, 3, 912, 0, 1, 8, 4, 0, 1, 4) + +#define VCAP_CORE_MAP_CORE_MAP GENMASK(2, 0) +#define VCAP_CORE_MAP_CORE_MAP_SET(x)\ + FIELD_PREP(VCAP_CORE_MAP_CORE_MAP, x) +#define VCAP_CORE_MAP_CORE_MAP_GET(x)\ + FIELD_GET(VCAP_CORE_MAP_CORE_MAP, x) + +/* VCAP:VCAP_CONST:VCAP_VER */ +#define VCAP_VER(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 0, 0, 1, 4) + +/* VCAP:VCAP_CONST:ENTRY_WIDTH */ +#define VCAP_ENTRY_WIDTH(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 4, 0, 1, 4) + +/* VCAP:VCAP_CONST:ENTRY_CNT */ +#define VCAP_ENTRY_CNT(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 8, 0, 1, 4) + +/* VCAP:VCAP_CONST:ENTRY_SWCNT */ +#define VCAP_ENTRY_SWCNT(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 12, 0, 1, 4) + +/* VCAP:VCAP_CONST:ENTRY_TG_WIDTH */ +#define VCAP_ENTRY_TG_WIDTH(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 16, 0, 1, 4) + +/* VCAP:VCAP_CONST:ACTION_DEF_CNT */ +#define VCAP_ACTION_DEF_CNT(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 20, 0, 1, 4) + +/* VCAP:VCAP_CONST:ACTION_WIDTH */ +#define VCAP_ACTION_WIDTH(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 24, 0, 1, 4) + +/* VCAP:VCAP_CONST:CNT_WIDTH */ +#define VCAP_CNT_WIDTH(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 28, 0, 1, 4) + +/* VCAP:VCAP_CONST:CORE_CNT */ +#define VCAP_CORE_CNT(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 32, 0, 1, 4) + +/* VCAP:VCAP_CONST:IF_CNT */ +#define VCAP_IF_CNT(t) __REG(TARGET_VCAP, t, 3, 924, 0, 1, 40, 36, 0, 1, 4) + #endif /* _LAN966X_REGS_H_ */ diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c index 651d5493ae55..01072121c999 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c @@ -69,6 +69,8 @@ static int lan966x_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSMATCHALL: return lan966x_tc_matchall(port, type_data, ingress); + case TC_SETUP_CLSFLOWER: + return lan966x_tc_flower(port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c new file mode 100644 index 000000000000..04a2afd683cc --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" +#include "vcap_api.h" +#include "vcap_api_client.h" + +/* Controls how PORT_MASK is applied */ +enum LAN966X_PORT_MASK_MODE { + LAN966X_PMM_NO_ACTION, + LAN966X_PMM_REPLACE, + LAN966X_PMM_FORWARDING, + LAN966X_PMM_REDIRECT, +}; + +struct lan966x_tc_flower_parse_usage { + struct flow_cls_offload *f; + struct flow_rule *frule; + struct vcap_rule *vrule; + unsigned int used_keys; + u16 l3_proto; +}; + +static int lan966x_tc_flower_handler_ethaddr_usage(struct lan966x_tc_flower_parse_usage *st) +{ + enum vcap_key_field smac_key = VCAP_KF_L2_SMAC; + enum vcap_key_field dmac_key = VCAP_KF_L2_DMAC; + struct flow_match_eth_addrs match; + struct vcap_u48_key smac, dmac; + int err = 0; + + flow_rule_match_eth_addrs(st->frule, &match); + + if (!is_zero_ether_addr(match.mask->src)) { + vcap_netbytes_copy(smac.value, match.key->src, ETH_ALEN); + vcap_netbytes_copy(smac.mask, match.mask->src, ETH_ALEN); + err = vcap_rule_add_key_u48(st->vrule, smac_key, &smac); + if (err) + goto out; + } + + if (!is_zero_ether_addr(match.mask->dst)) { + vcap_netbytes_copy(dmac.value, match.key->dst, ETH_ALEN); + vcap_netbytes_copy(dmac.mask, match.mask->dst, ETH_ALEN); + err = vcap_rule_add_key_u48(st->vrule, dmac_key, &dmac); + if (err) + goto out; + } + + st->used_keys |= BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + + return err; + +out: + NL_SET_ERR_MSG_MOD(st->f->common.extack, "eth_addr parse error"); + return err; +} + +static int +(*lan966x_tc_flower_handlers_usage[])(struct lan966x_tc_flower_parse_usage *st) = { + [FLOW_DISSECTOR_KEY_ETH_ADDRS] = lan966x_tc_flower_handler_ethaddr_usage, +}; + +static int lan966x_tc_flower_use_dissectors(struct flow_cls_offload *f, + struct vcap_admin *admin, + struct vcap_rule *vrule, + u16 *l3_proto) +{ + struct lan966x_tc_flower_parse_usage state = { + .f = f, + .vrule = vrule, + .l3_proto = ETH_P_ALL, + }; + int err = 0; + + state.frule = flow_cls_offload_flow_rule(f); + for (int i = 0; i < ARRAY_SIZE(lan966x_tc_flower_handlers_usage); ++i) { + if (!flow_rule_match_key(state.frule, i) || + !lan966x_tc_flower_handlers_usage[i]) + continue; + + err = lan966x_tc_flower_handlers_usage[i](&state); + if (err) + return err; + } + + if (l3_proto) + *l3_proto = state.l3_proto; + + return err; +} + +static int lan966x_tc_flower_action_check(struct vcap_control *vctrl, + struct flow_cls_offload *fco, + struct vcap_admin *admin) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(fco); + struct flow_action_entry *actent, *last_actent = NULL; + struct flow_action *act = &rule->action; + u64 action_mask = 0; + int idx; + + if (!flow_action_has_entries(act)) { + NL_SET_ERR_MSG_MOD(fco->common.extack, "No actions"); + return -EINVAL; + } + + if (!flow_action_basic_hw_stats_check(act, fco->common.extack)) + return -EOPNOTSUPP; + + flow_action_for_each(idx, actent, act) { + if (action_mask & BIT(actent->id)) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "More actions of the same type"); + return -EINVAL; + } + action_mask |= BIT(actent->id); + last_actent = actent; /* Save last action for later check */ + } + + /* Check that last action is a goto */ + if (last_actent->id != FLOW_ACTION_GOTO) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "Last action must be 'goto'"); + return -EINVAL; + } + + /* Check if the goto chain is in the next lookup */ + if (!vcap_is_next_lookup(vctrl, fco->common.chain_index, + last_actent->chain_index)) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "Invalid goto chain"); + return -EINVAL; + } + + /* Catch unsupported combinations of actions */ + if (action_mask & BIT(FLOW_ACTION_TRAP) && + action_mask & BIT(FLOW_ACTION_ACCEPT)) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "Cannot combine pass and trap action"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int lan966x_tc_flower_add(struct lan966x_port *port, + struct flow_cls_offload *f, + struct vcap_admin *admin) +{ + struct flow_action_entry *act; + u16 l3_proto = ETH_P_ALL; + struct flow_rule *frule; + struct vcap_rule *vrule; + int err, idx; + + err = lan966x_tc_flower_action_check(port->lan966x->vcap_ctrl, f, + admin); + if (err) + return err; + + vrule = vcap_alloc_rule(port->lan966x->vcap_ctrl, port->dev, + f->common.chain_index, VCAP_USER_TC, + f->common.prio, 0); + if (IS_ERR(vrule)) + return PTR_ERR(vrule); + + vrule->cookie = f->cookie; + err = lan966x_tc_flower_use_dissectors(f, admin, vrule, &l3_proto); + if (err) + goto out; + + frule = flow_cls_offload_flow_rule(f); + + flow_action_for_each(idx, act, &frule->action) { + switch (act->id) { + case FLOW_ACTION_TRAP: + err = vcap_rule_add_action_bit(vrule, + VCAP_AF_CPU_COPY_ENA, + VCAP_BIT_1); + err |= vcap_rule_add_action_u32(vrule, + VCAP_AF_CPU_QUEUE_NUM, + 0); + err |= vcap_rule_add_action_u32(vrule, VCAP_AF_MASK_MODE, + LAN966X_PMM_REPLACE); + err |= vcap_set_rule_set_actionset(vrule, + VCAP_AFS_BASE_TYPE); + if (err) + goto out; + + break; + case FLOW_ACTION_GOTO: + break; + default: + NL_SET_ERR_MSG_MOD(f->common.extack, + "Unsupported TC action"); + err = -EOPNOTSUPP; + goto out; + } + } + + err = vcap_val_rule(vrule, l3_proto); + if (err) { + vcap_set_tc_exterr(f, vrule); + goto out; + } + + err = vcap_add_rule(vrule); + if (err) + NL_SET_ERR_MSG_MOD(f->common.extack, + "Could not add the filter"); +out: + vcap_free_rule(vrule); + return err; +} + +static int lan966x_tc_flower_del(struct lan966x_port *port, + struct flow_cls_offload *f, + struct vcap_admin *admin) +{ + struct vcap_control *vctrl; + int err = -ENOENT, rule_id; + + vctrl = port->lan966x->vcap_ctrl; + while (true) { + rule_id = vcap_lookup_rule_by_cookie(vctrl, f->cookie); + if (rule_id <= 0) + break; + + err = vcap_del_rule(vctrl, port->dev, rule_id); + if (err) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Cannot delete rule"); + break; + } + } + + return err; +} + +int lan966x_tc_flower(struct lan966x_port *port, + struct flow_cls_offload *f) +{ + struct vcap_admin *admin; + + admin = vcap_find_admin(port->lan966x->vcap_ctrl, + f->common.chain_index); + if (!admin) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Invalid chain"); + return -EINVAL; + } + + switch (f->command) { + case FLOW_CLS_REPLACE: + return lan966x_tc_flower_add(port, f, admin); + case FLOW_CLS_DESTROY: + return lan966x_tc_flower_del(port, f, admin); + default: + return -EOPNOTSUPP; + } + + return 0; +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_matchall.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_matchall.c index 7368433b9277..a539abaad9b6 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_matchall.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_matchall.c @@ -23,6 +23,9 @@ static int lan966x_tc_matchall_add(struct lan966x_port *port, case FLOW_ACTION_MIRRED: return lan966x_mirror_port_add(port, act, f->cookie, ingress, f->common.extack); + case FLOW_ACTION_GOTO: + return lan966x_goto_port_add(port, act, f->cookie, + f->common.extack); default: NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported action"); @@ -43,6 +46,9 @@ static int lan966x_tc_matchall_del(struct lan966x_port *port, f->cookie == port->tc.egress_mirror_id) { return lan966x_mirror_port_del(port, ingress, f->common.extack); + } else if (f->cookie == port->tc.goto_id) { + return lan966x_goto_port_del(port, f->cookie, + f->common.extack); } else { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported action"); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c new file mode 100644 index 000000000000..928e711960e6 --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c @@ -0,0 +1,1608 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) + +#include <linux/types.h> +#include <linux/kernel.h> + +#include "lan966x_vcap_ag_api.h" + +/* keyfields */ +static const struct vcap_field is2_mac_etype_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 43, + .width = 48, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 91, + .width = 48, + }, + [VCAP_KF_ETYPE] = { + .type = VCAP_FIELD_U32, + .offset = 139, + .width = 16, + }, + [VCAP_KF_L2_FRM_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 155, + .width = 4, + }, + [VCAP_KF_L2_PAYLOAD0] = { + .type = VCAP_FIELD_U32, + .offset = 159, + .width = 16, + }, + [VCAP_KF_L2_PAYLOAD1] = { + .type = VCAP_FIELD_U32, + .offset = 175, + .width = 8, + }, + [VCAP_KF_L2_PAYLOAD2] = { + .type = VCAP_FIELD_U32, + .offset = 183, + .width = 3, + }, +}; + +static const struct vcap_field is2_mac_llc_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 43, + .width = 48, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 91, + .width = 48, + }, + [VCAP_KF_L2_LLC] = { + .type = VCAP_FIELD_U48, + .offset = 139, + .width = 40, + }, +}; + +static const struct vcap_field is2_mac_snap_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 43, + .width = 48, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 91, + .width = 48, + }, + [VCAP_KF_L2_SNAP] = { + .type = VCAP_FIELD_U48, + .offset = 139, + .width = 40, + }, +}; + +static const struct vcap_field is2_arp_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 43, + .width = 48, + }, + [VCAP_KF_ARP_ADDR_SPACE_OK_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 91, + .width = 1, + }, + [VCAP_KF_ARP_PROTO_SPACE_OK_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 92, + .width = 1, + }, + [VCAP_KF_ARP_LEN_OK_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 93, + .width = 1, + }, + [VCAP_KF_ARP_TGT_MATCH_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 94, + .width = 1, + }, + [VCAP_KF_ARP_SENDER_MATCH_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 95, + .width = 1, + }, + [VCAP_KF_ARP_OPCODE_UNKNOWN_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 96, + .width = 1, + }, + [VCAP_KF_ARP_OPCODE] = { + .type = VCAP_FIELD_U32, + .offset = 97, + .width = 2, + }, + [VCAP_KF_L3_IP4_DIP] = { + .type = VCAP_FIELD_U32, + .offset = 99, + .width = 32, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 131, + .width = 32, + }, + [VCAP_KF_L3_DIP_EQ_SIP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 163, + .width = 1, + }, +}; + +static const struct vcap_field is2_ip4_tcp_udp_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 43, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 44, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 45, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 46, + .width = 1, + }, + [VCAP_KF_L3_TTL_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 47, + .width = 1, + }, + [VCAP_KF_L3_TOS] = { + .type = VCAP_FIELD_U32, + .offset = 48, + .width = 8, + }, + [VCAP_KF_L3_IP4_DIP] = { + .type = VCAP_FIELD_U32, + .offset = 56, + .width = 32, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 88, + .width = 32, + }, + [VCAP_KF_L3_DIP_EQ_SIP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 120, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 121, + .width = 1, + }, + [VCAP_KF_L4_DPORT] = { + .type = VCAP_FIELD_U32, + .offset = 122, + .width = 16, + }, + [VCAP_KF_L4_SPORT] = { + .type = VCAP_FIELD_U32, + .offset = 138, + .width = 16, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 154, + .width = 8, + }, + [VCAP_KF_L4_SPORT_EQ_DPORT_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 162, + .width = 1, + }, + [VCAP_KF_L4_SEQUENCE_EQ0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 163, + .width = 1, + }, + [VCAP_KF_L4_FIN] = { + .type = VCAP_FIELD_BIT, + .offset = 164, + .width = 1, + }, + [VCAP_KF_L4_SYN] = { + .type = VCAP_FIELD_BIT, + .offset = 165, + .width = 1, + }, + [VCAP_KF_L4_RST] = { + .type = VCAP_FIELD_BIT, + .offset = 166, + .width = 1, + }, + [VCAP_KF_L4_PSH] = { + .type = VCAP_FIELD_BIT, + .offset = 167, + .width = 1, + }, + [VCAP_KF_L4_ACK] = { + .type = VCAP_FIELD_BIT, + .offset = 168, + .width = 1, + }, + [VCAP_KF_L4_URG] = { + .type = VCAP_FIELD_BIT, + .offset = 169, + .width = 1, + }, + [VCAP_KF_L4_1588_DOM] = { + .type = VCAP_FIELD_U32, + .offset = 170, + .width = 8, + }, + [VCAP_KF_L4_1588_VER] = { + .type = VCAP_FIELD_U32, + .offset = 178, + .width = 4, + }, +}; + +static const struct vcap_field is2_ip4_other_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_IP4_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 43, + .width = 1, + }, + [VCAP_KF_L3_FRAGMENT] = { + .type = VCAP_FIELD_BIT, + .offset = 44, + .width = 1, + }, + [VCAP_KF_L3_FRAG_OFS_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 45, + .width = 1, + }, + [VCAP_KF_L3_OPTIONS_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 46, + .width = 1, + }, + [VCAP_KF_L3_TTL_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 47, + .width = 1, + }, + [VCAP_KF_L3_TOS] = { + .type = VCAP_FIELD_U32, + .offset = 48, + .width = 8, + }, + [VCAP_KF_L3_IP4_DIP] = { + .type = VCAP_FIELD_U32, + .offset = 56, + .width = 32, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 88, + .width = 32, + }, + [VCAP_KF_L3_DIP_EQ_SIP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 120, + .width = 1, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 121, + .width = 8, + }, + [VCAP_KF_L3_PAYLOAD] = { + .type = VCAP_FIELD_U56, + .offset = 129, + .width = 56, + }, +}; + +static const struct vcap_field is2_ip6_std_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L3_TTL_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 43, + .width = 1, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 44, + .width = 128, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 172, + .width = 8, + }, +}; + +static const struct vcap_field is2_oam_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 25, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 26, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 27, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 39, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 40, + .width = 3, + }, + [VCAP_KF_L2_DMAC] = { + .type = VCAP_FIELD_U48, + .offset = 43, + .width = 48, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 91, + .width = 48, + }, + [VCAP_KF_OAM_MEL_FLAGS] = { + .type = VCAP_FIELD_U32, + .offset = 139, + .width = 7, + }, + [VCAP_KF_OAM_VER] = { + .type = VCAP_FIELD_U32, + .offset = 146, + .width = 5, + }, + [VCAP_KF_OAM_OPCODE] = { + .type = VCAP_FIELD_U32, + .offset = 151, + .width = 8, + }, + [VCAP_KF_OAM_FLAGS] = { + .type = VCAP_FIELD_U32, + .offset = 159, + .width = 8, + }, + [VCAP_KF_OAM_MEPID] = { + .type = VCAP_FIELD_U32, + .offset = 167, + .width = 16, + }, + [VCAP_KF_OAM_CCM_CNTS_EQ0] = { + .type = VCAP_FIELD_BIT, + .offset = 183, + .width = 1, + }, + [VCAP_KF_OAM_Y1731_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 184, + .width = 1, + }, + [VCAP_KF_OAM_DETECTED] = { + .type = VCAP_FIELD_BIT, + .offset = 185, + .width = 1, + }, +}; + +static const struct vcap_field is2_ip6_tcp_udp_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 2, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 11, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 20, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 21, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 25, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 37, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 38, + .width = 3, + }, + [VCAP_KF_L3_TTL_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 41, + .width = 1, + }, + [VCAP_KF_L3_TOS] = { + .type = VCAP_FIELD_U32, + .offset = 42, + .width = 8, + }, + [VCAP_KF_L3_IP6_DIP] = { + .type = VCAP_FIELD_U128, + .offset = 50, + .width = 128, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 178, + .width = 128, + }, + [VCAP_KF_L3_DIP_EQ_SIP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 306, + .width = 1, + }, + [VCAP_KF_TCP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 307, + .width = 1, + }, + [VCAP_KF_L4_DPORT] = { + .type = VCAP_FIELD_U32, + .offset = 308, + .width = 16, + }, + [VCAP_KF_L4_SPORT] = { + .type = VCAP_FIELD_U32, + .offset = 324, + .width = 16, + }, + [VCAP_KF_L4_RNG] = { + .type = VCAP_FIELD_U32, + .offset = 340, + .width = 8, + }, + [VCAP_KF_L4_SPORT_EQ_DPORT_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 348, + .width = 1, + }, + [VCAP_KF_L4_SEQUENCE_EQ0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 349, + .width = 1, + }, + [VCAP_KF_L4_FIN] = { + .type = VCAP_FIELD_BIT, + .offset = 350, + .width = 1, + }, + [VCAP_KF_L4_SYN] = { + .type = VCAP_FIELD_BIT, + .offset = 351, + .width = 1, + }, + [VCAP_KF_L4_RST] = { + .type = VCAP_FIELD_BIT, + .offset = 352, + .width = 1, + }, + [VCAP_KF_L4_PSH] = { + .type = VCAP_FIELD_BIT, + .offset = 353, + .width = 1, + }, + [VCAP_KF_L4_ACK] = { + .type = VCAP_FIELD_BIT, + .offset = 354, + .width = 1, + }, + [VCAP_KF_L4_URG] = { + .type = VCAP_FIELD_BIT, + .offset = 355, + .width = 1, + }, + [VCAP_KF_L4_1588_DOM] = { + .type = VCAP_FIELD_U32, + .offset = 356, + .width = 8, + }, + [VCAP_KF_L4_1588_VER] = { + .type = VCAP_FIELD_U32, + .offset = 364, + .width = 4, + }, +}; + +static const struct vcap_field is2_ip6_other_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_KF_LOOKUP_FIRST_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 2, + .width = 1, + }, + [VCAP_KF_LOOKUP_PAG] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 8, + }, + [VCAP_KF_IF_IGR_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 11, + .width = 9, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 20, + .width = 1, + }, + [VCAP_KF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 21, + .width = 1, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 22, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 23, + .width = 1, + }, + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 24, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 25, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 37, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 38, + .width = 3, + }, + [VCAP_KF_L3_TTL_GT0] = { + .type = VCAP_FIELD_BIT, + .offset = 41, + .width = 1, + }, + [VCAP_KF_L3_TOS] = { + .type = VCAP_FIELD_U32, + .offset = 42, + .width = 8, + }, + [VCAP_KF_L3_IP6_DIP] = { + .type = VCAP_FIELD_U128, + .offset = 50, + .width = 128, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 178, + .width = 128, + }, + [VCAP_KF_L3_DIP_EQ_SIP_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 306, + .width = 1, + }, + [VCAP_KF_L3_IP_PROTO] = { + .type = VCAP_FIELD_U32, + .offset = 307, + .width = 8, + }, + [VCAP_KF_L3_PAYLOAD] = { + .type = VCAP_FIELD_U56, + .offset = 315, + .width = 56, + }, +}; + +static const struct vcap_field is2_smac_sip4_keyfield[] = { + [VCAP_KF_IF_IGR_PORT] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 4, + .width = 48, + }, + [VCAP_KF_L3_IP4_SIP] = { + .type = VCAP_FIELD_U32, + .offset = 52, + .width = 32, + }, +}; + +static const struct vcap_field is2_smac_sip6_keyfield[] = { + [VCAP_KF_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_IF_IGR_PORT] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 4, + }, + [VCAP_KF_L2_SMAC] = { + .type = VCAP_FIELD_U48, + .offset = 8, + .width = 48, + }, + [VCAP_KF_L3_IP6_SIP] = { + .type = VCAP_FIELD_U128, + .offset = 56, + .width = 128, + }, +}; + +/* keyfield_set */ +static const struct vcap_set is2_keyfield_set[] = { + [VCAP_KFS_MAC_ETYPE] = { + .type_id = 0, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_MAC_LLC] = { + .type_id = 1, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_MAC_SNAP] = { + .type_id = 2, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_ARP] = { + .type_id = 3, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_IP4_TCP_UDP] = { + .type_id = 4, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_IP4_OTHER] = { + .type_id = 5, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_IP6_STD] = { + .type_id = 6, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_OAM] = { + .type_id = 7, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_KFS_IP6_TCP_UDP] = { + .type_id = 0, + .sw_per_item = 4, + .sw_cnt = 1, + }, + [VCAP_KFS_IP6_OTHER] = { + .type_id = 1, + .sw_per_item = 4, + .sw_cnt = 1, + }, + [VCAP_KFS_SMAC_SIP4] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 4, + }, + [VCAP_KFS_SMAC_SIP6] = { + .type_id = 8, + .sw_per_item = 2, + .sw_cnt = 2, + }, +}; + +/* keyfield_set map */ +static const struct vcap_field *is2_keyfield_set_map[] = { + [VCAP_KFS_MAC_ETYPE] = is2_mac_etype_keyfield, + [VCAP_KFS_MAC_LLC] = is2_mac_llc_keyfield, + [VCAP_KFS_MAC_SNAP] = is2_mac_snap_keyfield, + [VCAP_KFS_ARP] = is2_arp_keyfield, + [VCAP_KFS_IP4_TCP_UDP] = is2_ip4_tcp_udp_keyfield, + [VCAP_KFS_IP4_OTHER] = is2_ip4_other_keyfield, + [VCAP_KFS_IP6_STD] = is2_ip6_std_keyfield, + [VCAP_KFS_OAM] = is2_oam_keyfield, + [VCAP_KFS_IP6_TCP_UDP] = is2_ip6_tcp_udp_keyfield, + [VCAP_KFS_IP6_OTHER] = is2_ip6_other_keyfield, + [VCAP_KFS_SMAC_SIP4] = is2_smac_sip4_keyfield, + [VCAP_KFS_SMAC_SIP6] = is2_smac_sip6_keyfield, +}; + +/* keyfield_set map sizes */ +static int is2_keyfield_set_map_size[] = { + [VCAP_KFS_MAC_ETYPE] = ARRAY_SIZE(is2_mac_etype_keyfield), + [VCAP_KFS_MAC_LLC] = ARRAY_SIZE(is2_mac_llc_keyfield), + [VCAP_KFS_MAC_SNAP] = ARRAY_SIZE(is2_mac_snap_keyfield), + [VCAP_KFS_ARP] = ARRAY_SIZE(is2_arp_keyfield), + [VCAP_KFS_IP4_TCP_UDP] = ARRAY_SIZE(is2_ip4_tcp_udp_keyfield), + [VCAP_KFS_IP4_OTHER] = ARRAY_SIZE(is2_ip4_other_keyfield), + [VCAP_KFS_IP6_STD] = ARRAY_SIZE(is2_ip6_std_keyfield), + [VCAP_KFS_OAM] = ARRAY_SIZE(is2_oam_keyfield), + [VCAP_KFS_IP6_TCP_UDP] = ARRAY_SIZE(is2_ip6_tcp_udp_keyfield), + [VCAP_KFS_IP6_OTHER] = ARRAY_SIZE(is2_ip6_other_keyfield), + [VCAP_KFS_SMAC_SIP4] = ARRAY_SIZE(is2_smac_sip4_keyfield), + [VCAP_KFS_SMAC_SIP6] = ARRAY_SIZE(is2_smac_sip6_keyfield), +}; + +/* actionfields */ +static const struct vcap_field is2_base_type_actionfield[] = { + [VCAP_AF_HIT_ME_ONCE] = { + .type = VCAP_FIELD_BIT, + .offset = 0, + .width = 1, + }, + [VCAP_AF_CPU_COPY_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 1, + .width = 1, + }, + [VCAP_AF_CPU_QUEUE_NUM] = { + .type = VCAP_FIELD_U32, + .offset = 2, + .width = 3, + }, + [VCAP_AF_MASK_MODE] = { + .type = VCAP_FIELD_U32, + .offset = 5, + .width = 2, + }, + [VCAP_AF_MIRROR_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 7, + .width = 1, + }, + [VCAP_AF_LRN_DIS] = { + .type = VCAP_FIELD_BIT, + .offset = 8, + .width = 1, + }, + [VCAP_AF_POLICE_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 9, + .width = 1, + }, + [VCAP_AF_POLICE_IDX] = { + .type = VCAP_FIELD_U32, + .offset = 10, + .width = 9, + }, + [VCAP_AF_POLICE_VCAP_ONLY] = { + .type = VCAP_FIELD_BIT, + .offset = 19, + .width = 1, + }, + [VCAP_AF_PORT_MASK] = { + .type = VCAP_FIELD_U32, + .offset = 20, + .width = 8, + }, + [VCAP_AF_REW_OP] = { + .type = VCAP_FIELD_U32, + .offset = 28, + .width = 16, + }, + [VCAP_AF_ISDX_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 44, + .width = 1, + }, + [VCAP_AF_ACL_ID] = { + .type = VCAP_FIELD_U32, + .offset = 45, + .width = 6, + }, +}; + +static const struct vcap_field is2_smac_sip_actionfield[] = { + [VCAP_AF_CPU_COPY_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 0, + .width = 1, + }, + [VCAP_AF_CPU_QUEUE_NUM] = { + .type = VCAP_FIELD_U32, + .offset = 1, + .width = 3, + }, + [VCAP_AF_FWD_KILL_ENA] = { + .type = VCAP_FIELD_BIT, + .offset = 4, + .width = 1, + }, + [VCAP_AF_HOST_MATCH] = { + .type = VCAP_FIELD_BIT, + .offset = 5, + .width = 1, + }, +}; + +/* actionfield_set */ +static const struct vcap_set is2_actionfield_set[] = { + [VCAP_AFS_BASE_TYPE] = { + .type_id = -1, + .sw_per_item = 2, + .sw_cnt = 2, + }, + [VCAP_AFS_SMAC_SIP] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 4, + }, +}; + +/* actionfield_set map */ +static const struct vcap_field *is2_actionfield_set_map[] = { + [VCAP_AFS_BASE_TYPE] = is2_base_type_actionfield, + [VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield, +}; + +/* actionfield_set map size */ +static int is2_actionfield_set_map_size[] = { + [VCAP_AFS_BASE_TYPE] = ARRAY_SIZE(is2_base_type_actionfield), + [VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield), +}; + +/* Type Groups */ +static const struct vcap_typegroup is2_x4_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 3, + .value = 4, + }, + { + .offset = 96, + .width = 1, + .value = 0, + }, + { + .offset = 192, + .width = 2, + .value = 0, + }, + { + .offset = 288, + .width = 1, + .value = 0, + }, + {} +}; + +static const struct vcap_typegroup is2_x2_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 2, + .value = 2, + }, + { + .offset = 96, + .width = 1, + .value = 0, + }, + {} +}; + +static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = { + { + .offset = 0, + .width = 1, + .value = 1, + }, + {} +}; + +static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = { + [4] = is2_x4_keyfield_set_typegroups, + [2] = is2_x2_keyfield_set_typegroups, + [1] = is2_x1_keyfield_set_typegroups, + [5] = NULL, +}; + +static const struct vcap_typegroup is2_x2_actionfield_set_typegroups[] = { + { + .offset = 0, + .width = 2, + .value = 2, + }, + { + .offset = 31, + .width = 1, + .value = 0, + }, + {} +}; + +static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = { + { + .offset = 0, + .width = 1, + .value = 1, + }, + {} +}; + +static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = { + [2] = is2_x2_actionfield_set_typegroups, + [1] = is2_x1_actionfield_set_typegroups, + [5] = NULL, +}; + +/* Keyfieldset names */ +static const char * const vcap_keyfield_set_names[] = { + [VCAP_KFS_NO_VALUE] = "(None)", + [VCAP_KFS_ARP] = "VCAP_KFS_ARP", + [VCAP_KFS_IP4_OTHER] = "VCAP_KFS_IP4_OTHER", + [VCAP_KFS_IP4_TCP_UDP] = "VCAP_KFS_IP4_TCP_UDP", + [VCAP_KFS_IP6_OTHER] = "VCAP_KFS_IP6_OTHER", + [VCAP_KFS_IP6_STD] = "VCAP_KFS_IP6_STD", + [VCAP_KFS_IP6_TCP_UDP] = "VCAP_KFS_IP6_TCP_UDP", + [VCAP_KFS_MAC_ETYPE] = "VCAP_KFS_MAC_ETYPE", + [VCAP_KFS_MAC_LLC] = "VCAP_KFS_MAC_LLC", + [VCAP_KFS_MAC_SNAP] = "VCAP_KFS_MAC_SNAP", + [VCAP_KFS_OAM] = "VCAP_KFS_OAM", + [VCAP_KFS_SMAC_SIP4] = "VCAP_KFS_SMAC_SIP4", + [VCAP_KFS_SMAC_SIP6] = "VCAP_KFS_SMAC_SIP6", +}; + +/* Actionfieldset names */ +static const char * const vcap_actionfield_set_names[] = { + [VCAP_AFS_NO_VALUE] = "(None)", + [VCAP_AFS_BASE_TYPE] = "VCAP_AFS_BASE_TYPE", + [VCAP_AFS_SMAC_SIP] = "VCAP_AFS_SMAC_SIP", +}; + +/* Keyfield names */ +static const char * const vcap_keyfield_names[] = { + [VCAP_KF_NO_VALUE] = "(None)", + [VCAP_KF_8021Q_DEI_CLS] = "8021Q_DEI_CLS", + [VCAP_KF_8021Q_PCP_CLS] = "8021Q_PCP_CLS", + [VCAP_KF_8021Q_VID_CLS] = "8021Q_VID_CLS", + [VCAP_KF_8021Q_VLAN_TAGGED_IS] = "8021Q_VLAN_TAGGED_IS", + [VCAP_KF_ARP_ADDR_SPACE_OK_IS] = "ARP_ADDR_SPACE_OK_IS", + [VCAP_KF_ARP_LEN_OK_IS] = "ARP_LEN_OK_IS", + [VCAP_KF_ARP_OPCODE] = "ARP_OPCODE", + [VCAP_KF_ARP_OPCODE_UNKNOWN_IS] = "ARP_OPCODE_UNKNOWN_IS", + [VCAP_KF_ARP_PROTO_SPACE_OK_IS] = "ARP_PROTO_SPACE_OK_IS", + [VCAP_KF_ARP_SENDER_MATCH_IS] = "ARP_SENDER_MATCH_IS", + [VCAP_KF_ARP_TGT_MATCH_IS] = "ARP_TGT_MATCH_IS", + [VCAP_KF_ETYPE] = "ETYPE", + [VCAP_KF_HOST_MATCH] = "HOST_MATCH", + [VCAP_KF_IF_IGR_PORT] = "IF_IGR_PORT", + [VCAP_KF_IF_IGR_PORT_MASK] = "IF_IGR_PORT_MASK", + [VCAP_KF_IP4_IS] = "IP4_IS", + [VCAP_KF_ISDX_GT0_IS] = "ISDX_GT0_IS", + [VCAP_KF_L2_BC_IS] = "L2_BC_IS", + [VCAP_KF_L2_DMAC] = "L2_DMAC", + [VCAP_KF_L2_FRM_TYPE] = "L2_FRM_TYPE", + [VCAP_KF_L2_LLC] = "L2_LLC", + [VCAP_KF_L2_MC_IS] = "L2_MC_IS", + [VCAP_KF_L2_PAYLOAD0] = "L2_PAYLOAD0", + [VCAP_KF_L2_PAYLOAD1] = "L2_PAYLOAD1", + [VCAP_KF_L2_PAYLOAD2] = "L2_PAYLOAD2", + [VCAP_KF_L2_SMAC] = "L2_SMAC", + [VCAP_KF_L2_SNAP] = "L2_SNAP", + [VCAP_KF_L3_DIP_EQ_SIP_IS] = "L3_DIP_EQ_SIP_IS", + [VCAP_KF_L3_FRAGMENT] = "L3_FRAGMENT", + [VCAP_KF_L3_FRAG_OFS_GT0] = "L3_FRAG_OFS_GT0", + [VCAP_KF_L3_IP4_DIP] = "L3_IP4_DIP", + [VCAP_KF_L3_IP4_SIP] = "L3_IP4_SIP", + [VCAP_KF_L3_IP6_DIP] = "L3_IP6_DIP", + [VCAP_KF_L3_IP6_SIP] = "L3_IP6_SIP", + [VCAP_KF_L3_IP_PROTO] = "L3_IP_PROTO", + [VCAP_KF_L3_OPTIONS_IS] = "L3_OPTIONS_IS", + [VCAP_KF_L3_PAYLOAD] = "L3_PAYLOAD", + [VCAP_KF_L3_TOS] = "L3_TOS", + [VCAP_KF_L3_TTL_GT0] = "L3_TTL_GT0", + [VCAP_KF_L4_1588_DOM] = "L4_1588_DOM", + [VCAP_KF_L4_1588_VER] = "L4_1588_VER", + [VCAP_KF_L4_ACK] = "L4_ACK", + [VCAP_KF_L4_DPORT] = "L4_DPORT", + [VCAP_KF_L4_FIN] = "L4_FIN", + [VCAP_KF_L4_PSH] = "L4_PSH", + [VCAP_KF_L4_RNG] = "L4_RNG", + [VCAP_KF_L4_RST] = "L4_RST", + [VCAP_KF_L4_SEQUENCE_EQ0_IS] = "L4_SEQUENCE_EQ0_IS", + [VCAP_KF_L4_SPORT] = "L4_SPORT", + [VCAP_KF_L4_SPORT_EQ_DPORT_IS] = "L4_SPORT_EQ_DPORT_IS", + [VCAP_KF_L4_SYN] = "L4_SYN", + [VCAP_KF_L4_URG] = "L4_URG", + [VCAP_KF_LOOKUP_FIRST_IS] = "LOOKUP_FIRST_IS", + [VCAP_KF_LOOKUP_PAG] = "LOOKUP_PAG", + [VCAP_KF_OAM_CCM_CNTS_EQ0] = "OAM_CCM_CNTS_EQ0", + [VCAP_KF_OAM_DETECTED] = "OAM_DETECTED", + [VCAP_KF_OAM_FLAGS] = "OAM_FLAGS", + [VCAP_KF_OAM_MEL_FLAGS] = "OAM_MEL_FLAGS", + [VCAP_KF_OAM_MEPID] = "OAM_MEPID", + [VCAP_KF_OAM_OPCODE] = "OAM_OPCODE", + [VCAP_KF_OAM_VER] = "OAM_VER", + [VCAP_KF_OAM_Y1731_IS] = "OAM_Y1731_IS", + [VCAP_KF_TCP_IS] = "TCP_IS", + [VCAP_KF_TYPE] = "TYPE", +}; + +/* Actionfield names */ +static const char * const vcap_actionfield_names[] = { + [VCAP_AF_NO_VALUE] = "(None)", + [VCAP_AF_ACL_ID] = "ACL_ID", + [VCAP_AF_CPU_COPY_ENA] = "CPU_COPY_ENA", + [VCAP_AF_CPU_QUEUE_NUM] = "CPU_QUEUE_NUM", + [VCAP_AF_FWD_KILL_ENA] = "FWD_KILL_ENA", + [VCAP_AF_HIT_ME_ONCE] = "HIT_ME_ONCE", + [VCAP_AF_HOST_MATCH] = "HOST_MATCH", + [VCAP_AF_ISDX_ENA] = "ISDX_ENA", + [VCAP_AF_LRN_DIS] = "LRN_DIS", + [VCAP_AF_MASK_MODE] = "MASK_MODE", + [VCAP_AF_MIRROR_ENA] = "MIRROR_ENA", + [VCAP_AF_POLICE_ENA] = "POLICE_ENA", + [VCAP_AF_POLICE_IDX] = "POLICE_IDX", + [VCAP_AF_POLICE_VCAP_ONLY] = "POLICE_VCAP_ONLY", + [VCAP_AF_PORT_MASK] = "PORT_MASK", + [VCAP_AF_REW_OP] = "REW_OP", +}; + +/* VCAPs */ +const struct vcap_info lan966x_vcaps[] = { + [VCAP_TYPE_IS2] = { + .name = "is2", + .rows = 64, + .sw_count = 4, + .sw_width = 96, + .sticky_width = 32, + .act_width = 31, + .default_cnt = 11, + .require_cnt_dis = 1, + .version = 1, + .keyfield_set = is2_keyfield_set, + .keyfield_set_size = ARRAY_SIZE(is2_keyfield_set), + .actionfield_set = is2_actionfield_set, + .actionfield_set_size = ARRAY_SIZE(is2_actionfield_set), + .keyfield_set_map = is2_keyfield_set_map, + .keyfield_set_map_size = is2_keyfield_set_map_size, + .actionfield_set_map = is2_actionfield_set_map, + .actionfield_set_map_size = is2_actionfield_set_map_size, + .keyfield_set_typegroups = is2_keyfield_set_typegroups, + .actionfield_set_typegroups = is2_actionfield_set_typegroups, + }, +}; + +const struct vcap_statistics lan966x_vcap_stats = { + .name = "lan966x", + .count = 1, + .keyfield_set_names = vcap_keyfield_set_names, + .actionfield_set_names = vcap_actionfield_set_names, + .keyfield_names = vcap_keyfield_names, + .actionfield_names = vcap_actionfield_names, +}; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.h b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.h new file mode 100644 index 000000000000..0d8bbee73b2a --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef __LAN966X_VCAP_AG_API_H__ +#define __LAN966X_VCAP_AG_API_H__ + +#include "vcap_api.h" + +extern const struct vcap_info lan966x_vcaps[]; +extern const struct vcap_statistics lan966x_vcap_stats; + +#endif /* __LAN966X_VCAP_AG_API_H__ */ diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c new file mode 100644 index 000000000000..44f40d914947 --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" +#include "lan966x_vcap_ag_api.h" +#include "vcap_api.h" +#include "vcap_api_client.h" + +#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */ +#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ +#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ + +#define STREAMSIZE (64 * 4) + +#define LAN966X_IS2_LOOKUPS 2 + +enum vcap_is2_port_sel_ipv6 { + VCAP_IS2_PS_IPV6_TCPUDP_OTHER, + VCAP_IS2_PS_IPV6_STD, + VCAP_IS2_PS_IPV6_IP4_TCPUDP_IP4_OTHER, + VCAP_IS2_PS_IPV6_MAC_ETYPE, +}; + +static struct lan966x_vcap_inst { + enum vcap_type vtype; /* type of vcap */ + int tgt_inst; /* hardware instance number */ + int lookups; /* number of lookups in this vcap type */ + int first_cid; /* first chain id in this vcap */ + int last_cid; /* last chain id in this vcap */ + int count; /* number of available addresses */ +} lan966x_vcap_inst_cfg[] = { + { + .vtype = VCAP_TYPE_IS2, /* IS2-0 */ + .tgt_inst = 2, + .lookups = LAN966X_IS2_LOOKUPS, + .first_cid = LAN966X_VCAP_CID_IS2_L0, + .last_cid = LAN966X_VCAP_CID_IS2_MAX, + .count = 256, + }, +}; + +struct lan966x_vcap_cmd_cb { + struct lan966x *lan966x; + u32 instance; +}; + +static u32 lan966x_vcap_read_update_ctrl(const struct lan966x_vcap_cmd_cb *cb) +{ + return lan_rd(cb->lan966x, VCAP_UPDATE_CTRL(cb->instance)); +} + +static void lan966x_vcap_wait_update(struct lan966x *lan966x, int instance) +{ + const struct lan966x_vcap_cmd_cb cb = { .lan966x = lan966x, + .instance = instance }; + u32 val; + + readx_poll_timeout(lan966x_vcap_read_update_ctrl, &cb, val, + (val & VCAP_UPDATE_CTRL_UPDATE_SHOT) == 0, 10, + 100000); +} + +static void __lan966x_vcap_range_init(struct lan966x *lan966x, + struct vcap_admin *admin, + u32 addr, + u32 count) +{ + lan_wr(VCAP_MV_CFG_MV_NUM_POS_SET(0) | + VCAP_MV_CFG_MV_SIZE_SET(count - 1), + lan966x, VCAP_MV_CFG(admin->tgt_inst)); + + lan_wr(VCAP_UPDATE_CTRL_UPDATE_CMD_SET(VCAP_CMD_INITIALIZE) | + VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_CNT_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_ADDR_SET(addr) | + VCAP_UPDATE_CTRL_CLEAR_CACHE_SET(true) | + VCAP_UPDATE_CTRL_UPDATE_SHOT_SET(1), + lan966x, VCAP_UPDATE_CTRL(admin->tgt_inst)); + + lan966x_vcap_wait_update(lan966x, admin->tgt_inst); +} + +static int lan966x_vcap_cid_to_lookup(int cid) +{ + if (cid >= LAN966X_VCAP_CID_IS2_L1 && + cid < LAN966X_VCAP_CID_IS2_MAX) + return 1; + + return 0; +} + +static int +lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup, + struct vcap_keyset_list *keysetlist, + u16 l3_proto) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + bool found = false; + u32 val; + + /* Check if the port keyset selection is enabled */ + val = lan_rd(lan966x, ANA_VCAP_S2_CFG(port->chip_port)); + if (!ANA_VCAP_S2_CFG_ENA_GET(val)) + return -ENOENT; + + /* Collect all keysets for the port in a list */ + if (l3_proto == ETH_P_ALL) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_SNAP) { + if (ANA_VCAP_S2_CFG_SNAP_DIS_GET(val) & (BIT(0) << lookup)) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_LLC); + else + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_SNAP); + + found = true; + } + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_CFM) { + if (ANA_VCAP_S2_CFG_OAM_DIS_GET(val) & (BIT(0) << lookup)) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + else + vcap_keyset_list_add(keysetlist, VCAP_KFS_OAM); + + found = true; + } + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_ARP) { + if (ANA_VCAP_S2_CFG_ARP_DIS_GET(val) & (BIT(0) << lookup)) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + else + vcap_keyset_list_add(keysetlist, VCAP_KFS_ARP); + + found = true; + } + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IP) { + if (ANA_VCAP_S2_CFG_IP_OTHER_DIS_GET(val) & (BIT(0) << lookup)) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + else + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP4_OTHER); + + if (ANA_VCAP_S2_CFG_IP_TCPUDP_DIS_GET(val) & (BIT(0) << lookup)) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + else + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP4_TCP_UDP); + + found = true; + } + + if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IPV6) { + switch (ANA_VCAP_S2_CFG_IP6_CFG_GET(val) & (0x3 << lookup)) { + case VCAP_IS2_PS_IPV6_TCPUDP_OTHER: + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP6_OTHER); + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP6_TCP_UDP); + break; + case VCAP_IS2_PS_IPV6_STD: + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP6_STD); + break; + case VCAP_IS2_PS_IPV6_IP4_TCPUDP_IP4_OTHER: + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP4_OTHER); + vcap_keyset_list_add(keysetlist, VCAP_KFS_IP4_TCP_UDP); + break; + case VCAP_IS2_PS_IPV6_MAC_ETYPE: + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + break; + } + + found = true; + } + + if (!found) + vcap_keyset_list_add(keysetlist, VCAP_KFS_MAC_ETYPE); + + return 0; +} + +static enum vcap_keyfield_set +lan966x_vcap_validate_keyset(struct net_device *dev, + struct vcap_admin *admin, + struct vcap_rule *rule, + struct vcap_keyset_list *kslist, + u16 l3_proto) +{ + struct vcap_keyset_list keysetlist = {}; + enum vcap_keyfield_set keysets[10] = {}; + int lookup; + int err; + + if (!kslist || kslist->cnt == 0) + return VCAP_KFS_NO_VALUE; + + lookup = lan966x_vcap_cid_to_lookup(rule->vcap_chain_id); + keysetlist.max = ARRAY_SIZE(keysets); + keysetlist.keysets = keysets; + err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist, + l3_proto); + if (err) + return VCAP_KFS_NO_VALUE; + + /* Check if there is a match and return the match */ + for (int i = 0; i < kslist->cnt; ++i) + for (int j = 0; j < keysetlist.cnt; ++j) + if (kslist->keysets[i] == keysets[j]) + return kslist->keysets[i]; + + return VCAP_KFS_NO_VALUE; +} + +static bool lan966x_vcap_is_first_chain(struct vcap_rule *rule) +{ + return (rule->vcap_chain_id >= LAN966X_VCAP_CID_IS2_L0 && + rule->vcap_chain_id < LAN966X_VCAP_CID_IS2_L1); +} + +static void lan966x_vcap_add_default_fields(struct net_device *dev, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + struct lan966x_port *port = netdev_priv(dev); + + vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0, + ~BIT(port->chip_port)); + + if (lan966x_vcap_is_first_chain(rule)) + vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, + VCAP_BIT_1); + else + vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, + VCAP_BIT_0); +} + +static void lan966x_vcap_cache_erase(struct vcap_admin *admin) +{ + memset(admin->cache.keystream, 0, STREAMSIZE); + memset(admin->cache.maskstream, 0, STREAMSIZE); + memset(admin->cache.actionstream, 0, STREAMSIZE); + memset(&admin->cache.counter, 0, sizeof(admin->cache.counter)); +} + +static void lan966x_vcap_cache_write(struct net_device *dev, + struct vcap_admin *admin, + enum vcap_selection sel, + u32 start, + u32 count) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + u32 *keystr, *mskstr, *actstr; + + keystr = &admin->cache.keystream[start]; + mskstr = &admin->cache.maskstream[start]; + actstr = &admin->cache.actionstream[start]; + + switch (sel) { + case VCAP_SEL_ENTRY: + for (int i = 0; i < count; ++i) { + lan_wr(keystr[i] & mskstr[i], lan966x, + VCAP_ENTRY_DAT(admin->tgt_inst, i)); + lan_wr(~mskstr[i], lan966x, + VCAP_MASK_DAT(admin->tgt_inst, i)); + } + break; + case VCAP_SEL_ACTION: + for (int i = 0; i < count; ++i) + lan_wr(actstr[i], lan966x, + VCAP_ACTION_DAT(admin->tgt_inst, i)); + break; + case VCAP_SEL_COUNTER: + admin->cache.sticky = admin->cache.counter > 0; + lan_wr(admin->cache.counter, lan966x, + VCAP_CNT_DAT(admin->tgt_inst, 0)); + break; + default: + break; + } +} + +static void lan966x_vcap_cache_read(struct net_device *dev, + struct vcap_admin *admin, + enum vcap_selection sel, + u32 start, + u32 count) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + int instance = admin->tgt_inst; + u32 *keystr, *mskstr, *actstr; + + keystr = &admin->cache.keystream[start]; + mskstr = &admin->cache.maskstream[start]; + actstr = &admin->cache.actionstream[start]; + + if (sel & VCAP_SEL_ENTRY) { + for (int i = 0; i < count; ++i) { + keystr[i] = + lan_rd(lan966x, VCAP_ENTRY_DAT(instance, i)); + mskstr[i] = + ~lan_rd(lan966x, VCAP_MASK_DAT(instance, i)); + } + } + + if (sel & VCAP_SEL_ACTION) + for (int i = 0; i < count; ++i) + actstr[i] = + lan_rd(lan966x, VCAP_ACTION_DAT(instance, i)); + + if (sel & VCAP_SEL_COUNTER) { + admin->cache.counter = + lan_rd(lan966x, VCAP_CNT_DAT(instance, 0)); + admin->cache.sticky = admin->cache.counter > 0; + } +} + +static void lan966x_vcap_range_init(struct net_device *dev, + struct vcap_admin *admin, + u32 addr, + u32 count) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + + __lan966x_vcap_range_init(lan966x, admin, addr, count); +} + +static void lan966x_vcap_update(struct net_device *dev, + struct vcap_admin *admin, + enum vcap_command cmd, + enum vcap_selection sel, + u32 addr) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + bool clear; + + clear = (cmd == VCAP_CMD_INITIALIZE); + + lan_wr(VCAP_MV_CFG_MV_NUM_POS_SET(0) | + VCAP_MV_CFG_MV_SIZE_SET(0), + lan966x, VCAP_MV_CFG(admin->tgt_inst)); + + lan_wr(VCAP_UPDATE_CTRL_UPDATE_CMD_SET(cmd) | + VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS_SET((VCAP_SEL_ENTRY & sel) == 0) | + VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS_SET((VCAP_SEL_ACTION & sel) == 0) | + VCAP_UPDATE_CTRL_UPDATE_CNT_DIS_SET((VCAP_SEL_COUNTER & sel) == 0) | + VCAP_UPDATE_CTRL_UPDATE_ADDR_SET(addr) | + VCAP_UPDATE_CTRL_CLEAR_CACHE_SET(clear) | + VCAP_UPDATE_CTRL_UPDATE_SHOT, + lan966x, VCAP_UPDATE_CTRL(admin->tgt_inst)); + + lan966x_vcap_wait_update(lan966x, admin->tgt_inst); +} + +static void lan966x_vcap_move(struct net_device *dev, + struct vcap_admin *admin, + u32 addr, int offset, int count) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + enum vcap_command cmd; + u16 mv_num_pos; + u16 mv_size; + + mv_size = count - 1; + if (offset > 0) { + mv_num_pos = offset - 1; + cmd = VCAP_CMD_MOVE_DOWN; + } else { + mv_num_pos = -offset - 1; + cmd = VCAP_CMD_MOVE_UP; + } + + lan_wr(VCAP_MV_CFG_MV_NUM_POS_SET(mv_num_pos) | + VCAP_MV_CFG_MV_SIZE_SET(mv_size), + lan966x, VCAP_MV_CFG(admin->tgt_inst)); + + lan_wr(VCAP_UPDATE_CTRL_UPDATE_CMD_SET(cmd) | + VCAP_UPDATE_CTRL_UPDATE_ENTRY_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_ACTION_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_CNT_DIS_SET(0) | + VCAP_UPDATE_CTRL_UPDATE_ADDR_SET(addr) | + VCAP_UPDATE_CTRL_CLEAR_CACHE_SET(false) | + VCAP_UPDATE_CTRL_UPDATE_SHOT, + lan966x, VCAP_UPDATE_CTRL(admin->tgt_inst)); + + lan966x_vcap_wait_update(lan966x, admin->tgt_inst); +} + +static int lan966x_vcap_port_info(struct net_device *dev, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + return 0; +} + +static int lan966x_vcap_enable(struct net_device *dev, + struct vcap_admin *admin, + bool enable) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + + lan_rmw(ANA_VCAP_S2_CFG_ENA_SET(enable), + ANA_VCAP_S2_CFG_ENA, + lan966x, ANA_VCAP_S2_CFG(port->chip_port)); + + return 0; +} + +static struct vcap_operations lan966x_vcap_ops = { + .validate_keyset = lan966x_vcap_validate_keyset, + .add_default_fields = lan966x_vcap_add_default_fields, + .cache_erase = lan966x_vcap_cache_erase, + .cache_write = lan966x_vcap_cache_write, + .cache_read = lan966x_vcap_cache_read, + .init = lan966x_vcap_range_init, + .update = lan966x_vcap_update, + .move = lan966x_vcap_move, + .port_info = lan966x_vcap_port_info, + .enable = lan966x_vcap_enable, +}; + +static void lan966x_vcap_admin_free(struct vcap_admin *admin) +{ + if (!admin) + return; + + kfree(admin->cache.keystream); + kfree(admin->cache.maskstream); + kfree(admin->cache.actionstream); + mutex_destroy(&admin->lock); + kfree(admin); +} + +static struct vcap_admin * +lan966x_vcap_admin_alloc(struct lan966x *lan966x, struct vcap_control *ctrl, + const struct lan966x_vcap_inst *cfg) +{ + struct vcap_admin *admin; + + admin = kzalloc(sizeof(*admin), GFP_KERNEL); + if (!admin) + return ERR_PTR(-ENOMEM); + + mutex_init(&admin->lock); + INIT_LIST_HEAD(&admin->list); + INIT_LIST_HEAD(&admin->rules); + INIT_LIST_HEAD(&admin->enabled); + + admin->vtype = cfg->vtype; + admin->vinst = 0; + admin->w32be = true; + admin->tgt_inst = cfg->tgt_inst; + + admin->lookups = cfg->lookups; + admin->lookups_per_instance = cfg->lookups; + + admin->first_cid = cfg->first_cid; + admin->last_cid = cfg->last_cid; + + admin->cache.keystream = kzalloc(STREAMSIZE, GFP_KERNEL); + admin->cache.maskstream = kzalloc(STREAMSIZE, GFP_KERNEL); + admin->cache.actionstream = kzalloc(STREAMSIZE, GFP_KERNEL); + if (!admin->cache.keystream || + !admin->cache.maskstream || + !admin->cache.actionstream) { + lan966x_vcap_admin_free(admin); + return ERR_PTR(-ENOMEM); + } + + return admin; +} + +static void lan966x_vcap_block_init(struct lan966x *lan966x, + struct vcap_admin *admin, + struct lan966x_vcap_inst *cfg) +{ + admin->first_valid_addr = 0; + admin->last_used_addr = cfg->count; + admin->last_valid_addr = cfg->count - 1; + + lan_wr(VCAP_CORE_IDX_CORE_IDX_SET(0), + lan966x, VCAP_CORE_IDX(admin->tgt_inst)); + lan_wr(VCAP_CORE_MAP_CORE_MAP_SET(1), + lan966x, VCAP_CORE_MAP(admin->tgt_inst)); + + __lan966x_vcap_range_init(lan966x, admin, admin->first_valid_addr, + admin->last_valid_addr - + admin->first_valid_addr); +} + +static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x, + struct vcap_admin *admin) +{ + for (int p = 0; p < lan966x->num_phys_ports; ++p) + lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p)); +} + +int lan966x_vcap_init(struct lan966x *lan966x) +{ + struct lan966x_vcap_inst *cfg; + struct vcap_control *ctrl; + struct vcap_admin *admin; + + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) + return -ENOMEM; + + ctrl->vcaps = lan966x_vcaps; + ctrl->stats = &lan966x_vcap_stats; + ctrl->ops = &lan966x_vcap_ops; + + INIT_LIST_HEAD(&ctrl->list); + for (int i = 0; i < ARRAY_SIZE(lan966x_vcap_inst_cfg); ++i) { + cfg = &lan966x_vcap_inst_cfg[i]; + + admin = lan966x_vcap_admin_alloc(lan966x, ctrl, cfg); + if (IS_ERR(admin)) + return PTR_ERR(admin); + + lan966x_vcap_block_init(lan966x, admin, cfg); + lan966x_vcap_port_key_deselection(lan966x, admin); + + list_add_tail(&admin->list, &ctrl->list); + } + + lan966x->vcap_ctrl = ctrl; + + return 0; +} + +void lan966x_vcap_deinit(struct lan966x *lan966x) +{ + struct vcap_admin *admin, *admin_next; + struct vcap_control *ctrl; + + ctrl = lan966x->vcap_ctrl; + if (!ctrl) + return; + + list_for_each_entry_safe(admin, admin_next, &ctrl->list, list) { + lan966x_vcap_port_key_deselection(lan966x, admin); + vcap_del_rules(ctrl, admin); + list_del(&admin->list); + lan966x_vcap_admin_free(admin); + } + + kfree(ctrl); +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c index e77d9f2aad2b..2e6f486ec67d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c @@ -11,6 +11,8 @@ static int lan966x_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) struct lan966x_port *port = netdev_priv(dev); struct lan966x *lan966x = port->lan966x; struct bpf_prog *old_prog; + bool old_xdp, new_xdp; + int err; if (!lan966x->fdma) { NL_SET_ERR_MSG_MOD(xdp->extack, @@ -18,7 +20,20 @@ static int lan966x_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) return -EOPNOTSUPP; } + old_xdp = lan966x_xdp_present(lan966x); old_prog = xchg(&port->xdp_prog, xdp->prog); + new_xdp = lan966x_xdp_present(lan966x); + + if (old_xdp == new_xdp) + goto out; + + err = lan966x_fdma_reload_page_pool(lan966x); + if (err) { + xchg(&port->xdp_prog, old_prog); + return err; + } + +out: if (old_prog) bpf_prog_put(old_prog); @@ -35,21 +50,57 @@ int lan966x_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +int lan966x_xdp_xmit(struct net_device *dev, + int n, + struct xdp_frame **frames, + u32 flags) +{ + struct lan966x_port *port = netdev_priv(dev); + int nxmit = 0; + + for (int i = 0; i < n; ++i) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = lan966x_fdma_xmit_xdpf(port, xdpf, NULL, true); + if (err) + break; + + nxmit++; + } + + return nxmit; +} + int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len) { struct bpf_prog *xdp_prog = port->xdp_prog; struct lan966x *lan966x = port->lan966x; + struct xdp_frame *xdpf; struct xdp_buff xdp; u32 act; xdp_init_buff(&xdp, PAGE_SIZE << lan966x->rx.page_order, &port->xdp_rxq); - xdp_prepare_buff(&xdp, page_address(page), IFH_LEN_BYTES, + xdp_prepare_buff(&xdp, page_address(page), + IFH_LEN_BYTES + XDP_PACKET_HEADROOM, data_len - IFH_LEN_BYTES, false); act = bpf_prog_run_xdp(xdp_prog, &xdp); switch (act) { case XDP_PASS: return FDMA_PASS; + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(&xdp); + if (!xdpf) + return FDMA_DROP; + + return lan966x_fdma_xmit_xdpf(port, xdpf, page, false) ? + FDMA_DROP : FDMA_TX; + case XDP_REDIRECT: + if (xdp_do_redirect(port->dev, &xdp, xdp_prog)) + return FDMA_DROP; + + return FDMA_REDIRECT; default: bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act); fallthrough; @@ -61,6 +112,19 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len) } } +bool lan966x_xdp_present(struct lan966x *lan966x) +{ + for (int p = 0; p < lan966x->num_phys_ports; ++p) { + if (!lan966x->ports[p]) + continue; + + if (lan966x_xdp_port_present(lan966x->ports[p])) + return true; + } + + return false; +} + int lan966x_xdp_port_init(struct lan966x_port *port) { struct lan966x *lan966x = port->lan966x; diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile index cff07b8841bd..d0ed7090aa54 100644 --- a/drivers/net/ethernet/microchip/sparx5/Makefile +++ b/drivers/net/ethernet/microchip/sparx5/Makefile @@ -12,6 +12,7 @@ sparx5-switch-y := sparx5_main.o sparx5_packet.o \ sparx5_vcap_impl.o sparx5_vcap_ag_api.o sparx5_tc_flower.o sparx5_tc_matchall.o sparx5-switch-$(CONFIG_SPARX5_DCB) += sparx5_dcb.o +sparx5-switch-$(CONFIG_DEBUG_FS) += sparx5_vcap_debugfs.o # Provide include files ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c index 8108f3767767..74abb946b2a3 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_dcb.c @@ -90,7 +90,7 @@ static int sparx5_dcb_app_validate(struct net_device *dev, static int sparx5_dcb_apptrust_validate(struct net_device *dev, u8 *selectors, int nselectors, int *err) { - bool match; + bool match = false; int i, ii; for (i = 0; i < ARRAY_SIZE(sparx5_dcb_apptrust_policies); i++) { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 33e17d4f5406..f8382d3124e0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -763,6 +763,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) /* Default values, some from DT */ sparx5->coreclock = SPX5_CORE_CLOCK_DEFAULT; + sparx5->debugfs_root = debugfs_create_dir("sparx5", NULL); + ports = of_get_child_by_name(np, "ethernet-ports"); if (!ports) { dev_err(sparx5->dev, "no ethernet-ports child node found\n"); @@ -906,6 +908,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) { struct sparx5 *sparx5 = platform_get_drvdata(pdev); + debugfs_remove_recursive(sparx5->debugfs_root); if (sparx5->xtr_irq) { disable_irq(sparx5->xtr_irq); sparx5->xtr_irq = -ENXIO; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 5985f2087d7f..4a574cdcb584 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -17,6 +17,7 @@ #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> #include <linux/hrtimer.h> +#include <linux/debugfs.h> #include "sparx5_main_regs.h" @@ -292,6 +293,8 @@ struct sparx5 { struct vcap_control *vcap_ctrl; /* PGID allocation map */ u8 pgid_map[PGID_TABLE_SIZE]; + /* Common root for debugfs */ + struct dentry *debugfs_root; }; /* sparx5_switchdev.c */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 19516ccad533..d078156581d5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -104,7 +104,7 @@ static int sparx5_port_open(struct net_device *ndev) err = phylink_of_phy_connect(port->phylink, port->of_node, 0); if (err) { netdev_err(ndev, "Could not attach to PHY\n"); - return err; + goto err_connect; } phylink_start(port->phylink); @@ -116,10 +116,20 @@ static int sparx5_port_open(struct net_device *ndev) err = sparx5_serdes_set(port->sparx5, port, &port->conf); else err = phy_power_on(port->serdes); - if (err) + if (err) { netdev_err(ndev, "%s failed\n", __func__); + goto out_power; + } } + return 0; + +out_power: + phylink_stop(port->phylink); + phylink_disconnect_phy(port->phylink); +err_connect: + sparx5_port_enable(port, false); + return err; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c index edd4c53dcce2..205246b5af82 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c @@ -139,13 +139,10 @@ static int sparx5_tc_setup_qdisc_ets(struct net_device *ndev, } } - sparx5_tc_ets_add(port, params); - break; + return sparx5_tc_ets_add(port, params); case TC_ETS_DESTROY: - sparx5_tc_ets_del(port); - - break; + return sparx5_tc_ets_del(port); case TC_ETS_GRAFT: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c index a48baeacc1d2..1ed304a816cc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c @@ -12,6 +12,20 @@ #include "sparx5_main.h" #include "sparx5_vcap_impl.h" +#define SPX5_MAX_RULE_SIZE 13 /* allows X1, X2, X4, X6 and X12 rules */ + +/* Collect keysets and type ids for multiple rules per size */ +struct sparx5_wildcard_rule { + bool selected; + u8 value; + u8 mask; + enum vcap_keyfield_set keyset; +}; + +struct sparx5_multiple_rules { + struct sparx5_wildcard_rule rule[SPX5_MAX_RULE_SIZE]; +}; + struct sparx5_tc_flower_parse_usage { struct flow_cls_offload *fco; struct flow_rule *frule; @@ -341,7 +355,7 @@ sparx5_tc_flower_handler_vlan_usage(struct sparx5_tc_flower_parse_usage *st) st->used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN); - return err; + return 0; out: NL_SET_ERR_MSG_MOD(st->fco->common.extack, "vlan parse error"); return err; @@ -452,8 +466,10 @@ sparx5_tc_flower_handler_arp_usage(struct sparx5_tc_flower_parse_usage *st) /* The IS2 ARP keyset does not support ARP hardware addresses */ if (!is_zero_ether_addr(mt.mask->sha) || - !is_zero_ether_addr(mt.mask->tha)) + !is_zero_ether_addr(mt.mask->tha)) { + err = -EINVAL; goto out; + } if (mt.mask->sip) { ipval = be32_to_cpu((__force __be32)mt.key->sip); @@ -477,7 +493,7 @@ sparx5_tc_flower_handler_arp_usage(struct sparx5_tc_flower_parse_usage *st) st->used_keys |= BIT(FLOW_DISSECTOR_KEY_ARP); - return err; + return 0; out: NL_SET_ERR_MSG_MOD(st->fco->common.extack, "arp parse error"); @@ -616,7 +632,7 @@ static int sparx5_tc_add_rule_counter(struct vcap_admin *admin, { int err; - err = vcap_rule_add_action_u32(vrule, VCAP_AF_CNT_ID, vrule->id); + err = vcap_rule_mod_action_u32(vrule, VCAP_AF_CNT_ID, vrule->id); if (err) return err; @@ -624,11 +640,190 @@ static int sparx5_tc_add_rule_counter(struct vcap_admin *admin, return err; } +/* Collect all port keysets and apply the first of them, possibly wildcarded */ +static int sparx5_tc_select_protocol_keyset(struct net_device *ndev, + struct vcap_rule *vrule, + struct vcap_admin *admin, + u16 l3_proto, + struct sparx5_multiple_rules *multi) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct vcap_keyset_list portkeysetlist = {}; + enum vcap_keyfield_set portkeysets[10] = {}; + struct vcap_keyset_list matches = {}; + enum vcap_keyfield_set keysets[10]; + int idx, jdx, err = 0, count = 0; + struct sparx5_wildcard_rule *mru; + const struct vcap_set *kinfo; + struct vcap_control *vctrl; + + vctrl = port->sparx5->vcap_ctrl; + + /* Find the keysets that the rule can use */ + matches.keysets = keysets; + matches.max = ARRAY_SIZE(keysets); + if (vcap_rule_find_keysets(vrule, &matches) == 0) + return -EINVAL; + + /* Find the keysets that the port configuration supports */ + portkeysetlist.max = ARRAY_SIZE(portkeysets); + portkeysetlist.keysets = portkeysets; + err = sparx5_vcap_get_port_keyset(ndev, + admin, vrule->vcap_chain_id, + l3_proto, + &portkeysetlist); + if (err) + return err; + + /* Find the intersection of the two sets of keyset */ + for (idx = 0; idx < portkeysetlist.cnt; ++idx) { + kinfo = vcap_keyfieldset(vctrl, admin->vtype, + portkeysetlist.keysets[idx]); + if (!kinfo) + continue; + + /* Find a port keyset that matches the required keys + * If there are multiple keysets then compose a type id mask + */ + for (jdx = 0; jdx < matches.cnt; ++jdx) { + if (portkeysetlist.keysets[idx] != matches.keysets[jdx]) + continue; + + mru = &multi->rule[kinfo->sw_per_item]; + if (!mru->selected) { + mru->selected = true; + mru->keyset = portkeysetlist.keysets[idx]; + mru->value = kinfo->type_id; + } + mru->value &= kinfo->type_id; + mru->mask |= kinfo->type_id; + ++count; + } + } + if (count == 0) + return -EPROTO; + + if (l3_proto == ETH_P_ALL && count < portkeysetlist.cnt) + return -ENOENT; + + for (idx = 0; idx < SPX5_MAX_RULE_SIZE; ++idx) { + mru = &multi->rule[idx]; + if (!mru->selected) + continue; + + /* Align the mask to the combined value */ + mru->mask ^= mru->value; + } + + /* Set the chosen keyset on the rule and set a wildcarded type if there + * are more than one keyset + */ + for (idx = 0; idx < SPX5_MAX_RULE_SIZE; ++idx) { + mru = &multi->rule[idx]; + if (!mru->selected) + continue; + + vcap_set_rule_set_keyset(vrule, mru->keyset); + if (count > 1) + /* Some keysets do not have a type field */ + vcap_rule_mod_key_u32(vrule, VCAP_KF_TYPE, + mru->value, + ~mru->mask); + mru->selected = false; /* mark as done */ + break; /* Stop here and add more rules later */ + } + return err; +} + +static int sparx5_tc_add_rule_copy(struct vcap_control *vctrl, + struct flow_cls_offload *fco, + struct vcap_rule *erule, + struct vcap_admin *admin, + struct sparx5_wildcard_rule *rule) +{ + enum vcap_key_field keylist[] = { + VCAP_KF_IF_IGR_PORT_MASK, + VCAP_KF_IF_IGR_PORT_MASK_SEL, + VCAP_KF_IF_IGR_PORT_MASK_RNG, + VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_TYPE, + }; + struct vcap_rule *vrule; + int err; + + /* Add an extra rule with a special user and the new keyset */ + erule->user = VCAP_USER_TC_EXTRA; + vrule = vcap_copy_rule(erule); + if (IS_ERR(vrule)) + return PTR_ERR(vrule); + + /* Link the new rule to the existing rule with the cookie */ + vrule->cookie = erule->cookie; + vcap_filter_rule_keys(vrule, keylist, ARRAY_SIZE(keylist), true); + err = vcap_set_rule_set_keyset(vrule, rule->keyset); + if (err) { + pr_err("%s:%d: could not set keyset %s in rule: %u\n", + __func__, __LINE__, + vcap_keyset_name(vctrl, rule->keyset), + vrule->id); + goto out; + } + + /* Some keysets do not have a type field, so ignore return value */ + vcap_rule_mod_key_u32(vrule, VCAP_KF_TYPE, rule->value, ~rule->mask); + + err = vcap_set_rule_set_actionset(vrule, erule->actionset); + if (err) + goto out; + + err = sparx5_tc_add_rule_counter(admin, vrule); + if (err) + goto out; + + err = vcap_val_rule(vrule, ETH_P_ALL); + if (err) { + pr_err("%s:%d: could not validate rule: %u\n", + __func__, __LINE__, vrule->id); + vcap_set_tc_exterr(fco, vrule); + goto out; + } + err = vcap_add_rule(vrule); + if (err) { + pr_err("%s:%d: could not add rule: %u\n", + __func__, __LINE__, vrule->id); + goto out; + } +out: + vcap_free_rule(vrule); + return err; +} + +static int sparx5_tc_add_remaining_rules(struct vcap_control *vctrl, + struct flow_cls_offload *fco, + struct vcap_rule *erule, + struct vcap_admin *admin, + struct sparx5_multiple_rules *multi) +{ + int idx, err = 0; + + for (idx = 0; idx < SPX5_MAX_RULE_SIZE; ++idx) { + if (!multi->rule[idx].selected) + continue; + + err = sparx5_tc_add_rule_copy(vctrl, fco, erule, admin, + &multi->rule[idx]); + if (err) + break; + } + return err; +} + static int sparx5_tc_flower_replace(struct net_device *ndev, struct flow_cls_offload *fco, struct vcap_admin *admin) { struct sparx5_port *port = netdev_priv(ndev); + struct sparx5_multiple_rules multi = {}; struct flow_action_entry *act; struct vcap_control *vctrl; struct flow_rule *frule; @@ -648,7 +843,11 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, return PTR_ERR(vrule); vrule->cookie = fco->cookie; - sparx5_tc_use_dissectors(fco, admin, vrule, &l3_proto); + + l3_proto = ETH_P_ALL; + err = sparx5_tc_use_dissectors(fco, admin, vrule, &l3_proto); + if (err) + goto out; err = sparx5_tc_add_rule_counter(admin, vrule); if (err) @@ -694,6 +893,15 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, goto out; } } + + err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin, l3_proto, + &multi); + if (err) { + NL_SET_ERR_MSG_MOD(fco->common.extack, + "No matching port keyset for filter protocol and keys"); + goto out; + } + /* provide the l3 protocol to guide the keyset selection */ err = vcap_val_rule(vrule, l3_proto); if (err) { @@ -704,6 +912,11 @@ static int sparx5_tc_flower_replace(struct net_device *ndev, if (err) NL_SET_ERR_MSG_MOD(fco->common.extack, "Could not add the filter"); + + if (l3_proto == ETH_P_ALL) + err = sparx5_tc_add_remaining_rules(vctrl, fco, vrule, admin, + &multi); + out: vcap_free_rule(vrule); return err; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c new file mode 100644 index 000000000000..b91e05ffe2f4 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip Sparx5 Switch driver VCAP debugFS implementation + * + * Copyright (c) 2022 Microchip Technology Inc. and its subsidiaries. + */ + +#include <linux/types.h> +#include <linux/list.h> + +#include "sparx5_vcap_debugfs.h" +#include "sparx5_main_regs.h" +#include "sparx5_main.h" +#include "sparx5_vcap_impl.h" +#include "sparx5_vcap_ag_api.h" + +static void sparx5_vcap_port_keys(struct sparx5 *sparx5, + struct vcap_admin *admin, + struct sparx5_port *port, + struct vcap_output_print *out) +{ + int lookup; + u32 value; + + out->prf(out->dst, " port[%02d] (%s): ", port->portno, + netdev_name(port->ndev)); + for (lookup = 0; lookup < admin->lookups; ++lookup) { + out->prf(out->dst, "\n Lookup %d: ", lookup); + + /* Get lookup state */ + value = spx5_rd(sparx5, ANA_ACL_VCAP_S2_CFG(port->portno)); + out->prf(out->dst, "\n state: "); + if (ANA_ACL_VCAP_S2_CFG_SEC_ENA_GET(value)) + out->prf(out->dst, "on"); + else + out->prf(out->dst, "off"); + + /* Get key selection state */ + value = spx5_rd(sparx5, + ANA_ACL_VCAP_S2_KEY_SEL(port->portno, lookup)); + + out->prf(out->dst, "\n noneth: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_NON_ETH_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_NONETH_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_NONETH_CUSTOM_1: + out->prf(out->dst, "custom1"); + break; + case VCAP_IS2_PS_NONETH_CUSTOM_2: + out->prf(out->dst, "custom2"); + break; + case VCAP_IS2_PS_NONETH_NO_LOOKUP: + out->prf(out->dst, "none"); + break; + } + out->prf(out->dst, "\n ipv4_mc: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_IP4_MC_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_IPV4_MC_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_IPV4_MC_IP4_TCP_UDP_OTHER: + out->prf(out->dst, "ip4_tcp_udp ip4_other"); + break; + case VCAP_IS2_PS_IPV4_MC_IP_7TUPLE: + out->prf(out->dst, "ip_7tuple"); + break; + case VCAP_IS2_PS_IPV4_MC_IP4_VID: + out->prf(out->dst, "ip4_vid"); + break; + } + out->prf(out->dst, "\n ipv4_uc: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_IP4_UC_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_IPV4_UC_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_IPV4_UC_IP4_TCP_UDP_OTHER: + out->prf(out->dst, "ip4_tcp_udp ip4_other"); + break; + case VCAP_IS2_PS_IPV4_UC_IP_7TUPLE: + out->prf(out->dst, "ip_7tuple"); + break; + } + out->prf(out->dst, "\n ipv6_mc: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_IP6_MC_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_IPV6_MC_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_IPV6_MC_IP_7TUPLE: + out->prf(out->dst, "ip_7tuple"); + break; + case VCAP_IS2_PS_IPV6_MC_IP6_VID: + out->prf(out->dst, "ip6_vid"); + break; + case VCAP_IS2_PS_IPV6_MC_IP6_STD: + out->prf(out->dst, "ip6_std"); + break; + case VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER: + out->prf(out->dst, "ip4_tcp_udp ipv4_other"); + break; + } + out->prf(out->dst, "\n ipv6_uc: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_IPV6_UC_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_IPV6_UC_IP_7TUPLE: + out->prf(out->dst, "ip_7tuple"); + break; + case VCAP_IS2_PS_IPV6_UC_IP6_STD: + out->prf(out->dst, "ip6_std"); + break; + case VCAP_IS2_PS_IPV6_UC_IP4_TCP_UDP_OTHER: + out->prf(out->dst, "ip4_tcp_udp ip4_other"); + break; + } + out->prf(out->dst, "\n arp: "); + switch (ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL_GET(value)) { + case VCAP_IS2_PS_ARP_MAC_ETYPE: + out->prf(out->dst, "mac_etype"); + break; + case VCAP_IS2_PS_ARP_ARP: + out->prf(out->dst, "arp"); + break; + } + } + out->prf(out->dst, "\n"); +} + +static void sparx5_vcap_port_stickies(struct sparx5 *sparx5, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + int lookup; + u32 value; + + out->prf(out->dst, " Sticky bits: "); + for (lookup = 0; lookup < admin->lookups; ++lookup) { + out->prf(out->dst, "\n Lookup %d: ", lookup); + /* Get lookup sticky bits */ + value = spx5_rd(sparx5, ANA_ACL_SEC_LOOKUP_STICKY(lookup)); + + if (ANA_ACL_SEC_LOOKUP_STICKY_KEY_SEL_CLM_STICKY_GET(value)) + out->prf(out->dst, " sel_clm"); + if (ANA_ACL_SEC_LOOKUP_STICKY_KEY_SEL_IRLEG_STICKY_GET(value)) + out->prf(out->dst, " sel_irleg"); + if (ANA_ACL_SEC_LOOKUP_STICKY_KEY_SEL_ERLEG_STICKY_GET(value)) + out->prf(out->dst, " sel_erleg"); + if (ANA_ACL_SEC_LOOKUP_STICKY_KEY_SEL_PORT_STICKY_GET(value)) + out->prf(out->dst, " sel_port"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_CUSTOM2_STICKY_GET(value)) + out->prf(out->dst, " custom2"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_CUSTOM1_STICKY_GET(value)) + out->prf(out->dst, " custom1"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_OAM_STICKY_GET(value)) + out->prf(out->dst, " oam"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP6_VID_STICKY_GET(value)) + out->prf(out->dst, " ip6_vid"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP6_STD_STICKY_GET(value)) + out->prf(out->dst, " ip6_std"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP6_TCPUDP_STICKY_GET(value)) + out->prf(out->dst, " ip6_tcpudp"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP_7TUPLE_STICKY_GET(value)) + out->prf(out->dst, " ip_7tuple"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP4_VID_STICKY_GET(value)) + out->prf(out->dst, " ip4_vid"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP4_TCPUDP_STICKY_GET(value)) + out->prf(out->dst, " ip4_tcpudp"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_IP4_OTHER_STICKY_GET(value)) + out->prf(out->dst, " ip4_other"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_ARP_STICKY_GET(value)) + out->prf(out->dst, " arp"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_MAC_SNAP_STICKY_GET(value)) + out->prf(out->dst, " mac_snap"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_MAC_LLC_STICKY_GET(value)) + out->prf(out->dst, " mac_llc"); + if (ANA_ACL_SEC_LOOKUP_STICKY_SEC_TYPE_MAC_ETYPE_STICKY_GET(value)) + out->prf(out->dst, " mac_etype"); + /* Clear stickies */ + spx5_wr(value, sparx5, ANA_ACL_SEC_LOOKUP_STICKY(lookup)); + } + out->prf(out->dst, "\n"); +} + +/* Provide port information via a callback interface */ +int sparx5_port_info(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + struct sparx5_port *port = netdev_priv(ndev); + struct sparx5 *sparx5 = port->sparx5; + const struct vcap_info *vcap; + struct vcap_control *vctrl; + + vctrl = sparx5->vcap_ctrl; + vcap = &vctrl->vcaps[admin->vtype]; + out->prf(out->dst, "%s:\n", vcap->name); + sparx5_vcap_port_keys(sparx5, admin, port, out); + sparx5_vcap_port_stickies(sparx5, admin, out); + return 0; +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.h new file mode 100644 index 000000000000..f9ede03441f2 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Microchip Sparx5 Switch driver VCAP implementation + * + * Copyright (c) 2022 Microchip Technology Inc. and its subsidiaries. + */ + +#ifndef __SPARX5_VCAP_DEBUGFS_H__ +#define __SPARX5_VCAP_DEBUGFS_H__ + +#include <linux/netdevice.h> + +#include <vcap_api.h> +#include <vcap_api_client.h> + +#if defined(CONFIG_DEBUG_FS) + +/* Provide port information via a callback interface */ +int sparx5_port_info(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_output_print *out); + +#else + +static inline int sparx5_port_info(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + return 0; +} + +#endif + +#endif /* __SPARX5_VCAP_DEBUGFS_H__ */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c index e8f3d030eba2..a0c126ba9a87 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c @@ -7,15 +7,12 @@ * https://github.com/microchip-ung/sparx-5_reginfo */ -#include <linux/types.h> -#include <linux/list.h> - -#include "vcap_api.h" -#include "vcap_api_client.h" +#include "vcap_api_debugfs.h" #include "sparx5_main_regs.h" #include "sparx5_main.h" #include "sparx5_vcap_impl.h" #include "sparx5_vcap_ag_api.h" +#include "sparx5_vcap_debugfs.h" #define SUPER_VCAP_BLK_SIZE 3072 /* addresses per Super VCAP block */ #define STREAMSIZE (64 * 4) /* bytes in the VCAP cache area */ @@ -30,54 +27,6 @@ ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL_SET(_v6_uc) | \ ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL_SET(_arp)) -/* IS2 port keyset selection control */ - -/* IS2 non-ethernet traffic type keyset generation */ -enum vcap_is2_port_sel_noneth { - VCAP_IS2_PS_NONETH_MAC_ETYPE, - VCAP_IS2_PS_NONETH_CUSTOM_1, - VCAP_IS2_PS_NONETH_CUSTOM_2, - VCAP_IS2_PS_NONETH_NO_LOOKUP -}; - -/* IS2 IPv4 unicast traffic type keyset generation */ -enum vcap_is2_port_sel_ipv4_uc { - VCAP_IS2_PS_IPV4_UC_MAC_ETYPE, - VCAP_IS2_PS_IPV4_UC_IP4_TCP_UDP_OTHER, - VCAP_IS2_PS_IPV4_UC_IP_7TUPLE, -}; - -/* IS2 IPv4 multicast traffic type keyset generation */ -enum vcap_is2_port_sel_ipv4_mc { - VCAP_IS2_PS_IPV4_MC_MAC_ETYPE, - VCAP_IS2_PS_IPV4_MC_IP4_TCP_UDP_OTHER, - VCAP_IS2_PS_IPV4_MC_IP_7TUPLE, - VCAP_IS2_PS_IPV4_MC_IP4_VID, -}; - -/* IS2 IPv6 unicast traffic type keyset generation */ -enum vcap_is2_port_sel_ipv6_uc { - VCAP_IS2_PS_IPV6_UC_MAC_ETYPE, - VCAP_IS2_PS_IPV6_UC_IP_7TUPLE, - VCAP_IS2_PS_IPV6_UC_IP6_STD, - VCAP_IS2_PS_IPV6_UC_IP4_TCP_UDP_OTHER, -}; - -/* IS2 IPv6 multicast traffic type keyset generation */ -enum vcap_is2_port_sel_ipv6_mc { - VCAP_IS2_PS_IPV6_MC_MAC_ETYPE, - VCAP_IS2_PS_IPV6_MC_IP_7TUPLE, - VCAP_IS2_PS_IPV6_MC_IP6_VID, - VCAP_IS2_PS_IPV6_MC_IP6_STD, - VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER, -}; - -/* IS2 ARP traffic type keyset generation */ -enum vcap_is2_port_sel_arp { - VCAP_IS2_PS_ARP_MAC_ETYPE, - VCAP_IS2_PS_ARP_ARP, -}; - static struct sparx5_vcap_inst { enum vcap_type vtype; /* type of vcap */ int vinst; /* instance number within the same type */ @@ -325,6 +274,19 @@ static int sparx5_vcap_is2_get_port_keysets(struct net_device *ndev, return 0; } +/* Get the port keyset for the vcap lookup */ +int sparx5_vcap_get_port_keyset(struct net_device *ndev, + struct vcap_admin *admin, + int cid, + u16 l3_proto, + struct vcap_keyset_list *kslist) +{ + int lookup; + + lookup = sparx5_vcap_cid_to_lookup(cid); + return sparx5_vcap_is2_get_port_keysets(ndev, lookup, kslist, l3_proto); +} + /* API callback used for validating a field keyset (check the port keysets) */ static enum vcap_keyfield_set sparx5_vcap_validate_keyset(struct net_device *ndev, @@ -548,15 +510,6 @@ static void sparx5_vcap_move(struct net_device *ndev, struct vcap_admin *admin, sparx5_vcap_wait_super_update(sparx5); } -/* Provide port information via a callback interface */ -static int sparx5_port_info(struct net_device *ndev, enum vcap_type vtype, - int (*pf)(void *out, int arg, const char *fmt, ...), - void *out, int arg) -{ - /* this will be added later */ - return 0; -} - /* Enable all lookups in the VCAP instance */ static int sparx5_vcap_enable(struct net_device *ndev, struct vcap_admin *admin, @@ -634,6 +587,7 @@ static void sparx5_vcap_admin_free(struct vcap_admin *admin) { if (!admin) return; + mutex_destroy(&admin->lock); kfree(admin->cache.keystream); kfree(admin->cache.maskstream); kfree(admin->cache.actionstream); @@ -653,6 +607,7 @@ sparx5_vcap_admin_alloc(struct sparx5 *sparx5, struct vcap_control *ctrl, INIT_LIST_HEAD(&admin->list); INIT_LIST_HEAD(&admin->rules); INIT_LIST_HEAD(&admin->enabled); + mutex_init(&admin->lock); admin->vtype = cfg->vtype; admin->vinst = cfg->vinst; admin->lookups = cfg->lookups; @@ -702,6 +657,7 @@ int sparx5_vcap_init(struct sparx5 *sparx5) const struct sparx5_vcap_inst *cfg; struct vcap_control *ctrl; struct vcap_admin *admin; + struct dentry *dir; int err = 0, idx; /* Create a VCAP control instance that owns the platform specific VCAP @@ -740,6 +696,11 @@ int sparx5_vcap_init(struct sparx5 *sparx5) sparx5_vcap_port_key_selection(sparx5, admin); list_add_tail(&admin->list, &ctrl->list); } + dir = vcap_debugfs(sparx5->dev, sparx5->debugfs_root, ctrl); + for (idx = 0; idx < SPX5_PORTS; ++idx) + if (sparx5->ports[idx]) + vcap_port_debugfs(sparx5->dev, dir, ctrl, + sparx5->ports[idx]->ndev); return err; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h index 8e44ebd76b41..0a0f2412c980 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h @@ -10,6 +10,12 @@ #ifndef __SPARX5_VCAP_IMPL_H__ #define __SPARX5_VCAP_IMPL_H__ +#include <linux/types.h> +#include <linux/list.h> + +#include "vcap_api.h" +#include "vcap_api_client.h" + #define SPARX5_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */ #define SPARX5_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ #define SPARX5_VCAP_CID_IS2_L2 VCAP_CID_INGRESS_STAGE2_L2 /* IS2 lookup 2 */ @@ -17,4 +23,59 @@ #define SPARX5_VCAP_CID_IS2_MAX \ (VCAP_CID_INGRESS_STAGE2_L3 + VCAP_CID_LOOKUP_SIZE - 1) /* IS2 Max */ +/* IS2 port keyset selection control */ + +/* IS2 non-ethernet traffic type keyset generation */ +enum vcap_is2_port_sel_noneth { + VCAP_IS2_PS_NONETH_MAC_ETYPE, + VCAP_IS2_PS_NONETH_CUSTOM_1, + VCAP_IS2_PS_NONETH_CUSTOM_2, + VCAP_IS2_PS_NONETH_NO_LOOKUP +}; + +/* IS2 IPv4 unicast traffic type keyset generation */ +enum vcap_is2_port_sel_ipv4_uc { + VCAP_IS2_PS_IPV4_UC_MAC_ETYPE, + VCAP_IS2_PS_IPV4_UC_IP4_TCP_UDP_OTHER, + VCAP_IS2_PS_IPV4_UC_IP_7TUPLE, +}; + +/* IS2 IPv4 multicast traffic type keyset generation */ +enum vcap_is2_port_sel_ipv4_mc { + VCAP_IS2_PS_IPV4_MC_MAC_ETYPE, + VCAP_IS2_PS_IPV4_MC_IP4_TCP_UDP_OTHER, + VCAP_IS2_PS_IPV4_MC_IP_7TUPLE, + VCAP_IS2_PS_IPV4_MC_IP4_VID, +}; + +/* IS2 IPv6 unicast traffic type keyset generation */ +enum vcap_is2_port_sel_ipv6_uc { + VCAP_IS2_PS_IPV6_UC_MAC_ETYPE, + VCAP_IS2_PS_IPV6_UC_IP_7TUPLE, + VCAP_IS2_PS_IPV6_UC_IP6_STD, + VCAP_IS2_PS_IPV6_UC_IP4_TCP_UDP_OTHER, +}; + +/* IS2 IPv6 multicast traffic type keyset generation */ +enum vcap_is2_port_sel_ipv6_mc { + VCAP_IS2_PS_IPV6_MC_MAC_ETYPE, + VCAP_IS2_PS_IPV6_MC_IP_7TUPLE, + VCAP_IS2_PS_IPV6_MC_IP6_VID, + VCAP_IS2_PS_IPV6_MC_IP6_STD, + VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER, +}; + +/* IS2 ARP traffic type keyset generation */ +enum vcap_is2_port_sel_arp { + VCAP_IS2_PS_ARP_MAC_ETYPE, + VCAP_IS2_PS_ARP_ARP, +}; + +/* Get the port keyset for the vcap lookup */ +int sparx5_vcap_get_port_keyset(struct net_device *ndev, + struct vcap_admin *admin, + int cid, + u16 l3_proto, + struct vcap_keyset_list *kslist); + #endif /* __SPARX5_VCAP_IMPL_H__ */ diff --git a/drivers/net/ethernet/microchip/vcap/Kconfig b/drivers/net/ethernet/microchip/vcap/Kconfig index 1af30a358a15..97f43fd4473f 100644 --- a/drivers/net/ethernet/microchip/vcap/Kconfig +++ b/drivers/net/ethernet/microchip/vcap/Kconfig @@ -40,6 +40,7 @@ config VCAP_KUNIT_TEST bool "KUnit test for VCAP library" if !KUNIT_ALL_TESTS depends on KUNIT depends on KUNIT=y && VCAP=y && y + select DEBUG_FS default KUNIT_ALL_TESTS help This builds unit tests for the VCAP library. diff --git a/drivers/net/ethernet/microchip/vcap/Makefile b/drivers/net/ethernet/microchip/vcap/Makefile index b377569f92d8..0adb8f5a8735 100644 --- a/drivers/net/ethernet/microchip/vcap/Makefile +++ b/drivers/net/ethernet/microchip/vcap/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_VCAP) += vcap.o obj-$(CONFIG_VCAP_KUNIT_TEST) += vcap_model_kunit.o +vcap-$(CONFIG_DEBUG_FS) += vcap_api_debugfs.o vcap-y += vcap_api.o diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h index 804d57b9b60a..84de2aee4169 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h @@ -11,6 +11,8 @@ #define __VCAP_AG_API__ enum vcap_type { + VCAP_TYPE_ES2, + VCAP_TYPE_IS0, VCAP_TYPE_IS2, VCAP_TYPE_MAX }; @@ -18,12 +20,29 @@ enum vcap_type { /* Keyfieldset names with origin information */ enum vcap_keyfield_set { VCAP_KFS_NO_VALUE, /* initial value */ - VCAP_KFS_ARP, /* sparx5 is2 X6 */ - VCAP_KFS_IP4_OTHER, /* sparx5 is2 X6 */ - VCAP_KFS_IP4_TCP_UDP, /* sparx5 is2 X6 */ + VCAP_KFS_ARP, /* sparx5 is2 X6, sparx5 es2 X6 */ + VCAP_KFS_ETAG, /* sparx5 is0 X2 */ + VCAP_KFS_IP4_OTHER, /* sparx5 is2 X6, sparx5 es2 X6 */ + VCAP_KFS_IP4_TCP_UDP, /* sparx5 is2 X6, sparx5 es2 X6 */ + VCAP_KFS_IP4_VID, /* sparx5 es2 X3 */ VCAP_KFS_IP6_STD, /* sparx5 is2 X6 */ - VCAP_KFS_IP_7TUPLE, /* sparx5 is2 X12 */ - VCAP_KFS_MAC_ETYPE, /* sparx5 is2 X6 */ + VCAP_KFS_IP6_VID, /* sparx5 is2 X6, sparx5 es2 X6 */ + VCAP_KFS_IP_7TUPLE, /* sparx5 is2 X12, sparx5 es2 X12 */ + VCAP_KFS_LL_FULL, /* sparx5 is0 X6 */ + VCAP_KFS_MAC_ETYPE, /* sparx5 is2 X6, sparx5 es2 X6 */ + VCAP_KFS_MLL, /* sparx5 is0 X3 */ + VCAP_KFS_NORMAL, /* sparx5 is0 X6 */ + VCAP_KFS_NORMAL_5TUPLE_IP4, /* sparx5 is0 X6 */ + VCAP_KFS_NORMAL_7TUPLE, /* sparx5 is0 X12 */ + VCAP_KFS_PURE_5TUPLE_IP4, /* sparx5 is0 X3 */ + VCAP_KFS_TRI_VID, /* sparx5 is0 X2 */ + VCAP_KFS_MAC_LLC, /* lan966x is2 X2 */ + VCAP_KFS_MAC_SNAP, /* lan966x is2 X2 */ + VCAP_KFS_OAM, /* lan966x is2 X2 */ + VCAP_KFS_IP6_TCP_UDP, /* lan966x is2 X4 */ + VCAP_KFS_IP6_OTHER, /* lan966x is2 X4 */ + VCAP_KFS_SMAC_SIP4, /* lan966x is2 X1 */ + VCAP_KFS_SMAC_SIP6, /* lan966x is2 X2 */ }; /* List of keyfields with description @@ -31,34 +50,93 @@ enum vcap_keyfield_set { * Keys ending in _IS are booleans derived from frame data * Keys ending in _CLS are classified frame data * - * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2 + * VCAP_KF_8021BR_ECID_BASE: W12, sparx5: is0 + * Used by 802.1BR Bridge Port Extension in an E-Tag + * VCAP_KF_8021BR_ECID_EXT: W8, sparx5: is0 + * Used by 802.1BR Bridge Port Extension in an E-Tag + * VCAP_KF_8021BR_E_TAGGED: W1, sparx5: is0 + * Set for frames containing an E-TAG (802.1BR Ethertype 893f) + * VCAP_KF_8021BR_GRP: W2, sparx5: is0 + * E-Tag group bits in 802.1BR Bridge Port Extension + * VCAP_KF_8021BR_IGR_ECID_BASE: W12, sparx5: is0 + * Used by 802.1BR Bridge Port Extension in an E-Tag + * VCAP_KF_8021BR_IGR_ECID_EXT: W8, sparx5: is0 + * Used by 802.1BR Bridge Port Extension in an E-Tag + * VCAP_KF_8021Q_DEI0: W1, sparx5: is0 + * First DEI in multiple vlan tags (outer tag or default port tag) + * VCAP_KF_8021Q_DEI1: W1, sparx5: is0 + * Second DEI in multiple vlan tags (inner tag) + * VCAP_KF_8021Q_DEI2: W1, sparx5: is0 + * Third DEI in multiple vlan tags (not always available) + * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2 * Classified DEI - * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2 + * VCAP_KF_8021Q_PCP0: W3, sparx5: is0 + * First PCP in multiple vlan tags (outer tag or default port tag) + * VCAP_KF_8021Q_PCP1: W3, sparx5: is0 + * Second PCP in multiple vlan tags (inner tag) + * VCAP_KF_8021Q_PCP2: W3, sparx5: is0 + * Third PCP in multiple vlan tags (not always available) + * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2 * Classified PCP - * VCAP_KF_8021Q_VID_CLS: W13, sparx5: is2 + * VCAP_KF_8021Q_TPID0: W3, sparx5: is0 + * First TPIC in multiple vlan tags (outer tag or default port tag) + * VCAP_KF_8021Q_TPID1: W3, sparx5: is0 + * Second TPID in multiple vlan tags (inner tag) + * VCAP_KF_8021Q_TPID2: W3, sparx5: is0 + * Third TPID in multiple vlan tags (not always available) + * VCAP_KF_8021Q_VID0: W12, sparx5: is0 + * First VID in multiple vlan tags (outer tag or default port tag) + * VCAP_KF_8021Q_VID1: W12, sparx5: is0 + * Second VID in multiple vlan tags (inner tag) + * VCAP_KF_8021Q_VID2: W12, sparx5: is0 + * Third VID in multiple vlan tags (not always available) + * VCAP_KF_8021Q_VID_CLS: W13, sparx5: is2/es2, lan966x is2 W12 * Classified VID - * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2 + * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: Set if frame was received with a VLAN tag, LAN966x: Set if frame has * one or more Q-tags. Independent of port VLAN awareness - * VCAP_KF_ARP_ADDR_SPACE_OK_IS: W1, sparx5: is2 + * VCAP_KF_8021Q_VLAN_TAGS: W3, sparx5: is0 + * Number of VLAN tags in frame: 0: Untagged, 1: Single tagged, 3: Double + * tagged, 7: Triple tagged + * VCAP_KF_ACL_GRP_ID: W8, sparx5: es2 + * Used in interface map table + * VCAP_KF_ARP_ADDR_SPACE_OK_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if hardware address is Ethernet - * VCAP_KF_ARP_LEN_OK_IS: W1, sparx5: is2 + * VCAP_KF_ARP_LEN_OK_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if hardware address length = 6 (Ethernet) and IP address length = 4 (IP). - * VCAP_KF_ARP_OPCODE: W2, sparx5: is2 + * VCAP_KF_ARP_OPCODE: W2, sparx5: is2/es2, lan966x: i2 * ARP opcode - * VCAP_KF_ARP_OPCODE_UNKNOWN_IS: W1, sparx5: is2 + * VCAP_KF_ARP_OPCODE_UNKNOWN_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if not one of the codes defined in VCAP_KF_ARP_OPCODE - * VCAP_KF_ARP_PROTO_SPACE_OK_IS: W1, sparx5: is2 + * VCAP_KF_ARP_PROTO_SPACE_OK_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if protocol address space is 0x0800 - * VCAP_KF_ARP_SENDER_MATCH_IS: W1, sparx5: is2 + * VCAP_KF_ARP_SENDER_MATCH_IS: W1, sparx5: is2/es2, lan966x: is2 * Sender Hardware Address = SMAC (ARP) - * VCAP_KF_ARP_TGT_MATCH_IS: W1, sparx5: is2 + * VCAP_KF_ARP_TGT_MATCH_IS: W1, sparx5: is2/es2, lan966x: is2 * Target Hardware Address = SMAC (RARP) - * VCAP_KF_ETYPE: W16, sparx5: is2 + * VCAP_KF_COSID_CLS: W3, sparx5: es2 + * Class of service + * VCAP_KF_DST_ENTRY: W1, sparx5: is0 + * Selects whether the frame’s destination or source information is used for + * fields L2_SMAC and L3_IP4_SIP + * VCAP_KF_ES0_ISDX_KEY_ENA: W1, sparx5: es2 + * The value taken from the IFH .FWD.ES0_ISDX_KEY_ENA + * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2, lan966x: is2 * Ethernet type - * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is2 + * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2 * Set if frame has EtherType >= 0x600 - * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is2 W32, sparx5 is2 W65 + * VCAP_KF_ETYPE_MPLS: W2, sparx5: is0 + * Type of MPLS Ethertype (or not) + * VCAP_KF_IF_EGR_PORT_MASK: W32, sparx5: es2 + * Egress port mask, one bit per port + * VCAP_KF_IF_EGR_PORT_MASK_RNG: W3, sparx5: es2 + * Select which 32 port group is available in IF_EGR_PORT (or virtual ports or + * CPU queue) + * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9 + * Sparx5: Logical ingress port number retrieved from + * ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber + * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65, + * lan966x is2 W9 * Ingress port mask, one bit per port/erleg * VCAP_KF_IF_IGR_PORT_MASK_L3: W1, sparx5: is2 * If set, IF_IGR_PORT_MASK, IF_IGR_PORT_MASK_RNG, and IF_IGR_PORT_MASK_SEL are @@ -66,123 +144,206 @@ enum vcap_keyfield_set { * VCAP_KF_IF_IGR_PORT_MASK_RNG: W4, sparx5: is2 * Range selector for IF_IGR_PORT_MASK. Specifies which group of 32 ports are * available in IF_IGR_PORT_MASK - * VCAP_KF_IF_IGR_PORT_MASK_SEL: W2, sparx5: is2 + * VCAP_KF_IF_IGR_PORT_MASK_SEL: W2, sparx5: is0/is2 * Mode selector for IF_IGR_PORT_MASK, applicable when IF_IGR_PORT_MASK_L3 == 0. * Mapping: 0: DEFAULT 1: LOOPBACK 2: MASQUERADE 3: CPU_VD - * VCAP_KF_IP4_IS: W1, sparx5: is2 + * VCAP_KF_IF_IGR_PORT_SEL: W1, sparx5: es2 + * Selector for IF_IGR_PORT: physical port number or ERLEG + * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if frame has EtherType = 0x800 and IP version = 4 - * VCAP_KF_ISDX_CLS: W12, sparx5: is2 + * VCAP_KF_IP_MC_IS: W1, sparx5: is0 + * Set if frame is IPv4 frame and frame’s destination MAC address is an IPv4 + * multicast address (0x01005E0 /25). Set if frame is IPv6 frame and frame’s + * destination MAC address is an IPv6 multicast address (0x3333/16). + * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0 + * Payload bytes after IP header + * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0 + * Set if frame is IPv4, IPv6, or SNAP frame + * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es2 * Classified ISDX - * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2 + * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if classified ISDX > 0 - * VCAP_KF_L2_BC_IS: W1, sparx5: is2 + * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if frame’s destination MAC address is the broadcast address * (FF-FF-FF-FF-FF-FF). - * VCAP_KF_L2_DMAC: W48, sparx5: is2 + * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is2 * Destination MAC address * VCAP_KF_L2_FWD_IS: W1, sparx5: is2 * Set if the frame is allowed to be forwarded to front ports - * VCAP_KF_L2_MC_IS: W1, sparx5: is2 + * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan9966x is2 * Set if frame’s destination MAC address is a multicast address (bit 40 = 1). - * VCAP_KF_L2_PAYLOAD_ETYPE: W64, sparx5: is2 + * VCAP_KF_L2_PAYLOAD_ETYPE: W64, sparx5: is2/es2 * Byte 0-7 of L2 payload after Type/Len field and overloading for OAM - * VCAP_KF_L2_SMAC: W48, sparx5: is2 + * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2, lan966x is2 * Source MAC address - * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2 + * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if Src IP matches Dst IP address + * VCAP_KF_L3_DMAC_DIP_MATCH: W1, sparx5: is2 + * Match found in DIP security lookup in ANA_L3 + * VCAP_KF_L3_DPL_CLS: W1, sparx5: es2 + * The frames drop precedence level + * VCAP_KF_L3_DSCP: W6, sparx5: is0 + * Frame’s DSCP value * VCAP_KF_L3_DST_IS: W1, sparx5: is2 * Set if lookup is done for egress router leg - * VCAP_KF_L3_FRAGMENT_TYPE: W2, sparx5: is2 + * VCAP_KF_L3_FRAGMENT_TYPE: W2, sparx5: is0/is2/es2 * L3 Fragmentation type (none, initial, suspicious, valid follow up) - * VCAP_KF_L3_FRAG_INVLD_L4_LEN: W1, sparx5: is2 + * VCAP_KF_L3_FRAG_INVLD_L4_LEN: W1, sparx5: is0/is2 * Set if frame's L4 length is less than ANA_CL:COMMON:CLM_FRAGMENT_CFG.L4_MIN_L * EN - * VCAP_KF_L3_IP4_DIP: W32, sparx5: is2 + * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2, lan966x: is2 * Destination IPv4 Address - * VCAP_KF_L3_IP4_SIP: W32, sparx5: is2 + * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2, lan966x: is2 * Source IPv4 Address - * VCAP_KF_L3_IP6_DIP: W128, sparx5: is2 + * VCAP_KF_L3_IP6_DIP: W128, sparx5: is0/is2/es2, lan966x: is2 * Sparx5: Full IPv6 DIP, LAN966x: Either Full IPv6 DIP or a subset depending on * frame type - * VCAP_KF_L3_IP6_SIP: W128, sparx5: is2 + * VCAP_KF_L3_IP6_SIP: W128, sparx5: is0/is2/es2, lan966x: is2 * Sparx5: Full IPv6 SIP, LAN966x: Either Full IPv6 SIP or a subset depending on * frame type - * VCAP_KF_L3_IP_PROTO: W8, sparx5: is2 + * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2, lan966x: is2 * IPv4 frames: IP protocol. IPv6 frames: Next header, same as for IPV4 - * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is2 + * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if IPv4 frame contains options (IP len > 5) - * VCAP_KF_L3_PAYLOAD: sparx5 is2 W96, sparx5 is2 W40 + * VCAP_KF_L3_PAYLOAD: sparx5 is2 W96, sparx5 is2 W40, sparx5 es2 W96, + * lan966x is2 W56 * Sparx5: Payload bytes after IP header. IPv4: IPv4 options are not parsed so * payload is always taken 20 bytes after the start of the IPv4 header, LAN966x: * Bytes 0-6 after IP header - * VCAP_KF_L3_RT_IS: W1, sparx5: is2 + * VCAP_KF_L3_RT_IS: W1, sparx5: is2/es2 * Set if frame has hit a router leg - * VCAP_KF_L3_TOS: W8, sparx5: is2 + * VCAP_KF_L3_SMAC_SIP_MATCH: W1, sparx5: is2 + * Match found in SIP security lookup in ANA_L3 + * VCAP_KF_L3_TOS: W8, sparx5: is2/es2, lan966x: is2 * Sparx5: Frame's IPv4/IPv6 DSCP and ECN fields, LAN966x: IP TOS field - * VCAP_KF_L3_TTL_GT0: W1, sparx5: is2 + * VCAP_KF_L3_TTL_GT0: W1, sparx5: is2/es2, lan966x: is2 * Set if IPv4 TTL / IPv6 hop limit is greater than 0 - * VCAP_KF_L4_ACK: W1, sparx5: is2 + * VCAP_KF_L4_ACK: W1, sparx5: is2/es2, lan966x: is2 * Sparx5 and LAN966x: TCP flag ACK, LAN966x only: PTP over UDP: flagField bit 2 * (unicastFlag) - * VCAP_KF_L4_DPORT: W16, sparx5: is2 + * VCAP_KF_L4_DPORT: W16, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP/UDP destination port. Overloading for IP_7TUPLE: Non-TCP/UDP IP * frames: L4_DPORT = L3_IP_PROTO, LAN966x: TCP/UDP destination port - * VCAP_KF_L4_FIN: W1, sparx5: is2 + * VCAP_KF_L4_FIN: W1, sparx5: is2/es2 * TCP flag FIN, LAN966x: TCP flag FIN, and for PTP over UDP: messageType bit 1 - * VCAP_KF_L4_PAYLOAD: W64, sparx5: is2 + * VCAP_KF_L4_PAYLOAD: W64, sparx5: is2/es2 * Payload bytes after TCP/UDP header Overloading for IP_7TUPLE: Non TCP/UDP * frames: Payload bytes 0–7 after IP header. IPv4 options are not parsed so * payload is always taken 20 bytes after the start of the IPv4 header for non * TCP/UDP IPv4 frames - * VCAP_KF_L4_PSH: W1, sparx5: is2 + * VCAP_KF_L4_PSH: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag PSH, LAN966x: TCP: TCP flag PSH. PTP over UDP: flagField bit * 1 (twoStepFlag) - * VCAP_KF_L4_RNG: W16, sparx5: is2 + * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16, lan966x: is2 * Range checker bitmask (one for each range checker). Input into range checkers * is taken from classified results (VID, DSCP) and frame (SPORT, DPORT, ETYPE, * outer VID, inner VID) - * VCAP_KF_L4_RST: W1, sparx5: is2 + * VCAP_KF_L4_RST: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag RST , LAN966x: TCP: TCP flag RST. PTP over UDP: messageType * bit 3 - * VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2 + * VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if TCP sequence number is 0, LAN966x: Overlayed with PTP over UDP: * messageType bit 0 - * VCAP_KF_L4_SPORT: W16, sparx5: is2 + * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is2 * TCP/UDP source port - * VCAP_KF_L4_SPORT_EQ_DPORT_IS: W1, sparx5: is2 + * VCAP_KF_L4_SPORT_EQ_DPORT_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if UDP or TCP source port equals UDP or TCP destination port - * VCAP_KF_L4_SYN: W1, sparx5: is2 + * VCAP_KF_L4_SYN: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag SYN, LAN966x: TCP: TCP flag SYN. PTP over UDP: messageType * bit 2 - * VCAP_KF_L4_URG: W1, sparx5: is2 + * VCAP_KF_L4_URG: W1, sparx5: is2/es2, lan966x: is2 * Sparx5: TCP flag URG, LAN966x: TCP: TCP flag URG. PTP over UDP: flagField bit * 7 (reserved) - * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is2 + * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Selects between entries relevant for first and second lookup. Set for first * lookup, cleared for second lookup. - * VCAP_KF_LOOKUP_PAG: W8, sparx5: is2 + * VCAP_KF_LOOKUP_GEN_IDX: W12, sparx5: is0 + * Generic index - for chaining CLM instances + * VCAP_KF_LOOKUP_GEN_IDX_SEL: W2, sparx5: is0 + * Select the mode of the Generic Index + * VCAP_KF_LOOKUP_PAG: W8, sparx5: is2, lan966x: is2 * Classified Policy Association Group: chains rules from IS1/CLM to IS2 - * VCAP_KF_OAM_CCM_CNTS_EQ0: W1, sparx5: is2 + * VCAP_KF_OAM_CCM_CNTS_EQ0: W1, sparx5: is2/es2, lan966x: is2 * Dual-ended loss measurement counters in CCM frames are all zero - * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is2 + * VCAP_KF_OAM_MEL_FLAGS: W7, sparx5: is0, lan966x: is2 + * Encoding of MD level/MEG level (MEL) + * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if frame’s EtherType = 0x8902 - * VCAP_KF_TCP_IS: W1, sparx5: is2 + * VCAP_KF_PROT_ACTIVE: W1, sparx5: es2 + * Protection is active + * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if frame is IPv4 TCP frame (IP protocol = 6) or IPv6 TCP frames (Next * header = 6) - * VCAP_KF_TCP_UDP_IS: W1, sparx5: is2 + * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2, lan966x: is2 * Set if frame is IPv4/IPv6 TCP or UDP frame (IP protocol/next header equals 6 * or 17) - * VCAP_KF_TYPE: sparx5 is2 W4, sparx5 is2 W2 + * VCAP_KF_TYPE: sparx5 is0 W2, sparx5 is0 W1, sparx5 is2 W4, sparx5 is2 W2, + * sparx5 es2 W3, lan966x: is2 * Keyset type id - set by the API + * VCAP_KF_HOST_MATCH: W1, lan966x: is2 + * The action from the SMAC_SIP4 or SMAC_SIP6 lookups. Used for IP source + * guarding. + * VCAP_KF_L2_FRM_TYPE: W4, lan966x: is2 + * Frame subtype for specific EtherTypes (MRP, DLR) + * VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2 + * Payload bytes 0-1 after the frame’s EtherType + * VCAP_KF_L2_PAYLOAD1: W8, lan966x: is2 + * Payload byte 4 after the frame’s EtherType. This is specifically for PTP + * frames. + * VCAP_KF_L2_PAYLOAD2: W3, lan966x: is2 + * Bits 7, 2, and 1 from payload byte 6 after the frame’s EtherType. This is + * specifically for PTP frames. + * VCAP_KF_L2_LLC: W40, lan966x: is2 + * LLC header and data after up to two VLAN tags and the type/length field + * VCAP_KF_L3_FRAGMENT: W1, lan966x: is2 + * Set if IPv4 frame is fragmented + * VCAP_KF_L3_FRAG_OFS_GT0: W1, lan966x: is2 + * Set if IPv4 frame is fragmented and it is not the first fragment + * VCAP_KF_L2_SNAP: W40, lan966x: is2 + * SNAP header after LLC header (AA-AA-03) + * VCAP_KF_L4_1588_DOM: W8, lan966x: is2 + * PTP over UDP: domainNumber + * VCAP_KF_L4_1588_VER: W4, lan966x: is2 + * PTP over UDP: version + * VCAP_KF_OAM_MEPID: W16, lan966x: is2 + * CCM frame’s OAM MEP ID + * VCAP_KF_OAM_OPCODE: W8, lan966x: is2 + * Frame’s OAM opcode + * VCAP_KF_OAM_VER: W5, lan966x: is2 + * Frame’s OAM version + * VCAP_KF_OAM_FLAGS: W8, lan966x: is2 + * Frame’s OAM flags + * VCAP_KF_OAM_DETECTED: W1, lan966x: is2 + * This is missing in the datasheet, but present in the OAM keyset in XML */ /* Keyfield names */ enum vcap_key_field { VCAP_KF_NO_VALUE, /* initial value */ + VCAP_KF_8021BR_ECID_BASE, + VCAP_KF_8021BR_ECID_EXT, + VCAP_KF_8021BR_E_TAGGED, + VCAP_KF_8021BR_GRP, + VCAP_KF_8021BR_IGR_ECID_BASE, + VCAP_KF_8021BR_IGR_ECID_EXT, + VCAP_KF_8021Q_DEI0, + VCAP_KF_8021Q_DEI1, + VCAP_KF_8021Q_DEI2, VCAP_KF_8021Q_DEI_CLS, + VCAP_KF_8021Q_PCP0, + VCAP_KF_8021Q_PCP1, + VCAP_KF_8021Q_PCP2, VCAP_KF_8021Q_PCP_CLS, + VCAP_KF_8021Q_TPID0, + VCAP_KF_8021Q_TPID1, + VCAP_KF_8021Q_TPID2, + VCAP_KF_8021Q_VID0, + VCAP_KF_8021Q_VID1, + VCAP_KF_8021Q_VID2, VCAP_KF_8021Q_VID_CLS, VCAP_KF_8021Q_VLAN_TAGGED_IS, + VCAP_KF_8021Q_VLAN_TAGS, + VCAP_KF_ACL_GRP_ID, VCAP_KF_ARP_ADDR_SPACE_OK_IS, VCAP_KF_ARP_LEN_OK_IS, VCAP_KF_ARP_OPCODE, @@ -190,13 +351,24 @@ enum vcap_key_field { VCAP_KF_ARP_PROTO_SPACE_OK_IS, VCAP_KF_ARP_SENDER_MATCH_IS, VCAP_KF_ARP_TGT_MATCH_IS, + VCAP_KF_COSID_CLS, + VCAP_KF_DST_ENTRY, + VCAP_KF_ES0_ISDX_KEY_ENA, VCAP_KF_ETYPE, VCAP_KF_ETYPE_LEN_IS, + VCAP_KF_ETYPE_MPLS, + VCAP_KF_IF_EGR_PORT_MASK, + VCAP_KF_IF_EGR_PORT_MASK_RNG, + VCAP_KF_IF_IGR_PORT, VCAP_KF_IF_IGR_PORT_MASK, VCAP_KF_IF_IGR_PORT_MASK_L3, VCAP_KF_IF_IGR_PORT_MASK_RNG, VCAP_KF_IF_IGR_PORT_MASK_SEL, + VCAP_KF_IF_IGR_PORT_SEL, VCAP_KF_IP4_IS, + VCAP_KF_IP_MC_IS, + VCAP_KF_IP_PAYLOAD_5TUPLE, + VCAP_KF_IP_SNAP_IS, VCAP_KF_ISDX_CLS, VCAP_KF_ISDX_GT0_IS, VCAP_KF_L2_BC_IS, @@ -206,6 +378,9 @@ enum vcap_key_field { VCAP_KF_L2_PAYLOAD_ETYPE, VCAP_KF_L2_SMAC, VCAP_KF_L3_DIP_EQ_SIP_IS, + VCAP_KF_L3_DMAC_DIP_MATCH, + VCAP_KF_L3_DPL_CLS, + VCAP_KF_L3_DSCP, VCAP_KF_L3_DST_IS, VCAP_KF_L3_FRAGMENT_TYPE, VCAP_KF_L3_FRAG_INVLD_L4_LEN, @@ -217,6 +392,7 @@ enum vcap_key_field { VCAP_KF_L3_OPTIONS_IS, VCAP_KF_L3_PAYLOAD, VCAP_KF_L3_RT_IS, + VCAP_KF_L3_SMAC_SIP_MATCH, VCAP_KF_L3_TOS, VCAP_KF_L3_TTL_GT0, VCAP_KF_L4_ACK, @@ -232,95 +408,328 @@ enum vcap_key_field { VCAP_KF_L4_SYN, VCAP_KF_L4_URG, VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_LOOKUP_GEN_IDX, + VCAP_KF_LOOKUP_GEN_IDX_SEL, VCAP_KF_LOOKUP_PAG, + VCAP_KF_MIRROR_ENA, VCAP_KF_OAM_CCM_CNTS_EQ0, + VCAP_KF_OAM_MEL_FLAGS, VCAP_KF_OAM_Y1731_IS, + VCAP_KF_PROT_ACTIVE, VCAP_KF_TCP_IS, VCAP_KF_TCP_UDP_IS, VCAP_KF_TYPE, + VCAP_KF_HOST_MATCH, + VCAP_KF_L2_FRM_TYPE, + VCAP_KF_L2_PAYLOAD0, + VCAP_KF_L2_PAYLOAD1, + VCAP_KF_L2_PAYLOAD2, + VCAP_KF_L2_LLC, + VCAP_KF_L3_FRAGMENT, + VCAP_KF_L3_FRAG_OFS_GT0, + VCAP_KF_L2_SNAP, + VCAP_KF_L4_1588_DOM, + VCAP_KF_L4_1588_VER, + VCAP_KF_OAM_MEPID, + VCAP_KF_OAM_OPCODE, + VCAP_KF_OAM_VER, + VCAP_KF_OAM_FLAGS, + VCAP_KF_OAM_DETECTED, }; /* Actionset names with origin information */ enum vcap_actionfield_set { VCAP_AFS_NO_VALUE, /* initial value */ - VCAP_AFS_BASE_TYPE, /* sparx5 is2 X3 */ + VCAP_AFS_BASE_TYPE, /* sparx5 is2 X3, sparx5 es2 X3, lan966x is2 X2 */ + VCAP_AFS_CLASSIFICATION, /* sparx5 is0 X2 */ + VCAP_AFS_CLASS_REDUCED, /* sparx5 is0 X1 */ + VCAP_AFS_FULL, /* sparx5 is0 X3 */ + VCAP_AFS_MLBS, /* sparx5 is0 X2 */ + VCAP_AFS_MLBS_REDUCED, /* sparx5 is0 X1 */ + VCAP_AFS_SMAC_SIP, /* lan966x is2 x1 */ }; /* List of actionfields with description * - * VCAP_AF_CNT_ID: W12, sparx5: is2 + * VCAP_AF_CLS_VID_SEL: W3, sparx5: is0 + * Controls the classified VID: 0: VID_NONE: No action. 1: VID_ADD: New VID = + * old VID + VID_VAL. 2: VID_REPLACE: New VID = VID_VAL. 3: VID_FIRST_TAG: New + * VID = VID from frame's first tag (outer tag) if available, otherwise VID_VAL. + * 4: VID_SECOND_TAG: New VID = VID from frame's second tag (middle tag) if + * available, otherwise VID_VAL. 5: VID_THIRD_TAG: New VID = VID from frame's + * third tag (inner tag) if available, otherwise VID_VAL. + * VCAP_AF_CNT_ID: sparx5 is2 W12, sparx5 es2 W11 * Counter ID, used per lookup to index the 4K frame counters (ANA_ACL:CNT_TBL). * Multiple VCAP IS2 entries can use the same counter. - * VCAP_AF_CPU_COPY_ENA: W1, sparx5: is2 + * VCAP_AF_COPY_PORT_NUM: W7, sparx5: es2 + * QSYS port number when FWD_MODE is redirect or copy + * VCAP_AF_COPY_QUEUE_NUM: W16, sparx5: es2 + * QSYS queue number when FWD_MODE is redirect or copy + * VCAP_AF_CPU_COPY_ENA: W1, sparx5: is2/es2, lan966x: is2 * Setting this bit to 1 causes all frames that hit this action to be copied to * the CPU extraction queue specified in CPU_QUEUE_NUM. - * VCAP_AF_CPU_QUEUE_NUM: W3, sparx5: is2 + * VCAP_AF_CPU_QUEUE_NUM: W3, sparx5: is2/es2, lan966x: is2 * CPU queue number. Used when CPU_COPY_ENA is set. - * VCAP_AF_HIT_ME_ONCE: W1, sparx5: is2 + * VCAP_AF_DEI_ENA: W1, sparx5: is0 + * If set, use DEI_VAL as classified DEI value. Otherwise, DEI from basic + * classification is used + * VCAP_AF_DEI_VAL: W1, sparx5: is0 + * See DEI_ENA + * VCAP_AF_DP_ENA: W1, sparx5: is0 + * If set, use DP_VAL as classified drop precedence level. Otherwise, drop + * precedence level from basic classification is used. + * VCAP_AF_DP_VAL: W2, sparx5: is0 + * See DP_ENA. + * VCAP_AF_DSCP_ENA: W1, sparx5: is0 + * If set, use DSCP_VAL as classified DSCP value. Otherwise, DSCP value from + * basic classification is used. + * VCAP_AF_DSCP_VAL: W6, sparx5: is0 + * See DSCP_ENA. + * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2 + * Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP + * DMAC translation when entering or leaving a tunnel. + * VCAP_AF_FWD_MODE: W2, sparx5: es2 + * Forward selector: 0: Forward. 1: Discard. 2: Redirect. 3: Copy. + * VCAP_AF_HIT_ME_ONCE: W1, sparx5: is2/es2, lan966x: is2 * Setting this bit to 1 causes the first frame that hits this action where the * HIT_CNT counter is zero to be copied to the CPU extraction queue specified in * CPU_QUEUE_NUM. The HIT_CNT counter is then incremented and any frames that * hit this action later are not copied to the CPU. To re-enable the HIT_ME_ONCE * functionality, the HIT_CNT counter must be cleared. - * VCAP_AF_IGNORE_PIPELINE_CTRL: W1, sparx5: is2 + * VCAP_AF_IGNORE_PIPELINE_CTRL: W1, sparx5: is2/es2 * Ignore ingress pipeline control. This enforces the use of the VCAP IS2 action * even when the pipeline control has terminated the frame before VCAP IS2. - * VCAP_AF_INTR_ENA: W1, sparx5: is2 + * VCAP_AF_INTR_ENA: W1, sparx5: is2/es2 * If set, an interrupt is triggered when this rule is hit - * VCAP_AF_LRN_DIS: W1, sparx5: is2 + * VCAP_AF_ISDX_ADD_REPLACE_SEL: W1, sparx5: is0 + * Controls the classified ISDX. 0: New ISDX = old ISDX + ISDX_VAL. 1: New ISDX + * = ISDX_VAL. + * VCAP_AF_ISDX_VAL: W12, sparx5: is0 + * See isdx_add_replace_sel + * VCAP_AF_LRN_DIS: W1, sparx5: is2, lan966x: is2 * Setting this bit to 1 disables learning of frames hitting this action. - * VCAP_AF_MASK_MODE: W3, sparx5: is2 + * VCAP_AF_MAP_IDX: W9, sparx5: is0 + * Index for QoS mapping table lookup + * VCAP_AF_MAP_KEY: W3, sparx5: is0 + * Key type for QoS mapping table lookup. 0: DEI0, PCP0 (outer tag). 1: DEI1, + * PCP1 (middle tag). 2: DEI2, PCP2 (inner tag). 3: MPLS TC. 4: PCP0 (outer + * tag). 5: E-DEI, E-PCP (E-TAG). 6: DSCP if available, otherwise none. 7: DSCP + * if available, otherwise DEI0, PCP0 (outer tag) if available using MAP_IDX+8, + * otherwise none + * VCAP_AF_MAP_LOOKUP_SEL: W2, sparx5: is0 + * Selects which of the two QoS Mapping Table lookups that MAP_KEY and MAP_IDX + * are applied to. 0: No changes to the QoS Mapping Table lookup. 1: Update key + * type and index for QoS Mapping Table lookup #0. 2: Update key type and index + * for QoS Mapping Table lookup #1. 3: Reserved. + * VCAP_AF_MASK_MODE: W3, sparx5: is0/is2, lan966x is2 W2 * Controls the PORT_MASK use. Sparx5: 0: OR_DSTMASK, 1: AND_VLANMASK, 2: * REPLACE_PGID, 3: REPLACE_ALL, 4: REDIR_PGID, 5: OR_PGID_MASK, 6: VSTAX, 7: * Not applicable. LAN966X: 0: No action, 1: Permit/deny (AND), 2: Policy * forwarding (DMAC lookup), 3: Redirect. The CPU port is untouched by * MASK_MODE. - * VCAP_AF_MATCH_ID: W16, sparx5: is2 + * VCAP_AF_MATCH_ID: W16, sparx5: is0/is2 * Logical ID for the entry. The MATCH_ID is extracted together with the frame * if the frame is forwarded to the CPU (CPU_COPY_ENA). The result is placed in * IFH.CL_RSLT. - * VCAP_AF_MATCH_ID_MASK: W16, sparx5: is2 + * VCAP_AF_MATCH_ID_MASK: W16, sparx5: is0/is2 * Mask used by MATCH_ID. * VCAP_AF_MIRROR_PROBE: W2, sparx5: is2 * Mirroring performed according to configuration of a mirror probe. 0: No * mirroring. 1: Mirror probe 0. 2: Mirror probe 1. 3: Mirror probe 2 - * VCAP_AF_PIPELINE_FORCE_ENA: W1, sparx5: is2 + * VCAP_AF_MIRROR_PROBE_ID: W2, sparx5: es2 + * Signals a mirror probe to be placed in the IFH. Only possible when FWD_MODE + * is copy. 0: No mirroring. 1–3: Use mirror probe 0-2. + * VCAP_AF_NXT_IDX: W12, sparx5: is0 + * Index used as part of key (field G_IDX) in the next lookup. + * VCAP_AF_NXT_IDX_CTRL: W3, sparx5: is0 + * Controls the generation of the G_IDX used in the VCAP CLM next lookup + * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0 + * Bits set in this mask will override PAG_VAL from port profile. New PAG = + * (PAG (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK) + * VCAP_AF_PAG_VAL: W8, sparx5: is0 + * See PAG_OVERRIDE_MASK. + * VCAP_AF_PCP_ENA: W1, sparx5: is0 + * If set, use PCP_VAL as classified PCP value. Otherwise, PCP from basic + * classification is used. + * VCAP_AF_PCP_VAL: W3, sparx5: is0 + * See PCP_ENA. + * VCAP_AF_PIPELINE_FORCE_ENA: sparx5 is0 W2, sparx5 is2 W1 * If set, use PIPELINE_PT unconditionally and set PIPELINE_ACT = NONE if * PIPELINE_PT == NONE. Overrules previous settings of pipeline point. - * VCAP_AF_PIPELINE_PT: W5, sparx5: is2 + * VCAP_AF_PIPELINE_PT: W5, sparx5: is0/is2 * Pipeline point used if PIPELINE_FORCE_ENA is set - * VCAP_AF_POLICE_ENA: W1, sparx5: is2 + * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2, lan966x: is2 * Setting this bit to 1 causes frames that hit this action to be policed by the * ACL policer specified in POLICE_IDX. Only applies to the first lookup. - * VCAP_AF_POLICE_IDX: W6, sparx5: is2 + * VCAP_AF_POLICE_IDX: W6, sparx5: is2/es2, lan966x: is2 W9 * Selects VCAP policer used when policing frames (POLICE_ENA) - * VCAP_AF_PORT_MASK: W68, sparx5: is2 + * VCAP_AF_POLICE_REMARK: W1, sparx5: es2 + * If set, frames exceeding policer rates are marked as yellow but not + * discarded. + * VCAP_AF_PORT_MASK: sparx5 is0 W65, sparx5 is2 W68, lan966x is2 W8 * Port mask applied to the forwarding decision based on MASK_MODE. + * VCAP_AF_QOS_ENA: W1, sparx5: is0 + * If set, use QOS_VAL as classified QoS class. Otherwise, QoS class from basic + * classification is used. + * VCAP_AF_QOS_VAL: W3, sparx5: is0 + * See QOS_ENA. * VCAP_AF_RT_DIS: W1, sparx5: is2 * If set, routing is disallowed. Only applies when IS_INNER_ACL is 0. See also * IGR_ACL_ENA, EGR_ACL_ENA, and RLEG_STAT_IDX. + * VCAP_AF_TYPE: W1, sparx5: is0 + * Actionset type id - Set by the API + * VCAP_AF_VID_VAL: W13, sparx5: is0 + * New VID Value + * VCAP_AF_MIRROR_ENA: W1, lan966x: is2 + * Setting this bit to 1 causes frames to be mirrored to the mirror target + * port (ANA::MIRRPORPORTS). + * VCAP_AF_POLICE_VCAP_ONLY: W1, lan966x: is2 + * Disable policing from QoS, and port policers. Only the VCAP policer + * selected by POLICE_IDX is active. Only applies to the second lookup. + * VCAP_AF_REW_OP: W16, lan966x: is2 + * Rewriter operation command. + * VCAP_AF_ISDX_ENA: W1, lan966x: is2 + * Setting this bit to 1 causes the classified ISDX to be set to the value of + * POLICE_IDX[8:0]. + * VCAP_AF_ACL_ID: W6, lan966x: is2 + * Logical ID for the entry. This ID is extracted together with the frame in + * the CPU extraction header. Only applicable to actions with CPU_COPY_ENA or + * HIT_ME_ONCE set. + * VCAP_AF_FWD_KILL_ENA: W1, lan966x: is2 + * Setting this bit to 1 denies forwarding of the frame forwarding to any + * front port. The frame can still be copied to the CPU by other actions. + * VCAP_AF_HOST_MATCH: W1, lan966x: is2 + * Used for IP source guarding. If set, it signals that the host is a valid + * (for instance a valid combination of source MAC address and source IP + * address). HOST_MATCH is input to the IS2 keys. */ /* Actionfield names */ enum vcap_action_field { VCAP_AF_NO_VALUE, /* initial value */ + VCAP_AF_ACL_MAC, + VCAP_AF_ACL_RT_MODE, + VCAP_AF_CLS_VID_SEL, VCAP_AF_CNT_ID, + VCAP_AF_COPY_PORT_NUM, + VCAP_AF_COPY_QUEUE_NUM, + VCAP_AF_COSID_ENA, + VCAP_AF_COSID_VAL, VCAP_AF_CPU_COPY_ENA, + VCAP_AF_CPU_DIS, + VCAP_AF_CPU_ENA, + VCAP_AF_CPU_Q, VCAP_AF_CPU_QUEUE_NUM, + VCAP_AF_CUSTOM_ACE_ENA, + VCAP_AF_CUSTOM_ACE_OFFSET, + VCAP_AF_DEI_ENA, + VCAP_AF_DEI_VAL, + VCAP_AF_DLB_OFFSET, + VCAP_AF_DMAC_OFFSET_ENA, + VCAP_AF_DP_ENA, + VCAP_AF_DP_VAL, + VCAP_AF_DSCP_ENA, + VCAP_AF_DSCP_VAL, + VCAP_AF_EGR_ACL_ENA, + VCAP_AF_ES2_REW_CMD, + VCAP_AF_FWD_DIS, + VCAP_AF_FWD_MODE, + VCAP_AF_FWD_TYPE, + VCAP_AF_GVID_ADD_REPLACE_SEL, VCAP_AF_HIT_ME_ONCE, VCAP_AF_IGNORE_PIPELINE_CTRL, + VCAP_AF_IGR_ACL_ENA, + VCAP_AF_INJ_MASQ_ENA, + VCAP_AF_INJ_MASQ_LPORT, + VCAP_AF_INJ_MASQ_PORT, VCAP_AF_INTR_ENA, + VCAP_AF_ISDX_ADD_REPLACE_SEL, + VCAP_AF_ISDX_VAL, + VCAP_AF_IS_INNER_ACL, + VCAP_AF_L3_MAC_UPDATE_DIS, + VCAP_AF_LOG_MSG_INTERVAL, + VCAP_AF_LPM_AFFIX_ENA, + VCAP_AF_LPM_AFFIX_VAL, + VCAP_AF_LPORT_ENA, VCAP_AF_LRN_DIS, + VCAP_AF_MAP_IDX, + VCAP_AF_MAP_KEY, + VCAP_AF_MAP_LOOKUP_SEL, VCAP_AF_MASK_MODE, VCAP_AF_MATCH_ID, VCAP_AF_MATCH_ID_MASK, + VCAP_AF_MIP_SEL, VCAP_AF_MIRROR_PROBE, + VCAP_AF_MIRROR_PROBE_ID, + VCAP_AF_MPLS_IP_CTRL_ENA, + VCAP_AF_MPLS_MEP_ENA, + VCAP_AF_MPLS_MIP_ENA, + VCAP_AF_MPLS_OAM_FLAVOR, + VCAP_AF_MPLS_OAM_TYPE, + VCAP_AF_NUM_VLD_LABELS, + VCAP_AF_NXT_IDX, + VCAP_AF_NXT_IDX_CTRL, + VCAP_AF_NXT_KEY_TYPE, + VCAP_AF_NXT_NORMALIZE, + VCAP_AF_NXT_NORM_W16_OFFSET, + VCAP_AF_NXT_NORM_W32_OFFSET, + VCAP_AF_NXT_OFFSET_FROM_TYPE, + VCAP_AF_NXT_TYPE_AFTER_OFFSET, + VCAP_AF_OAM_IP_BFD_ENA, + VCAP_AF_OAM_TWAMP_ENA, + VCAP_AF_OAM_Y1731_SEL, + VCAP_AF_PAG_OVERRIDE_MASK, + VCAP_AF_PAG_VAL, + VCAP_AF_PCP_ENA, + VCAP_AF_PCP_VAL, + VCAP_AF_PIPELINE_ACT_SEL, VCAP_AF_PIPELINE_FORCE_ENA, VCAP_AF_PIPELINE_PT, + VCAP_AF_PIPELINE_PT_REDUCED, VCAP_AF_POLICE_ENA, VCAP_AF_POLICE_IDX, + VCAP_AF_POLICE_REMARK, VCAP_AF_PORT_MASK, + VCAP_AF_PTP_MASTER_SEL, + VCAP_AF_QOS_ENA, + VCAP_AF_QOS_VAL, + VCAP_AF_REW_CMD, + VCAP_AF_RLEG_DMAC_CHK_DIS, + VCAP_AF_RLEG_STAT_IDX, + VCAP_AF_RSDX_ENA, + VCAP_AF_RSDX_VAL, + VCAP_AF_RSVD_LBL_VAL, VCAP_AF_RT_DIS, + VCAP_AF_RT_SEL, + VCAP_AF_S2_KEY_SEL_ENA, + VCAP_AF_S2_KEY_SEL_IDX, + VCAP_AF_SAM_SEQ_ENA, + VCAP_AF_SIP_IDX, + VCAP_AF_SWAP_MAC_ENA, + VCAP_AF_TCP_UDP_DPORT, + VCAP_AF_TCP_UDP_ENA, + VCAP_AF_TCP_UDP_SPORT, + VCAP_AF_TC_ENA, + VCAP_AF_TC_LABEL, + VCAP_AF_TPID_SEL, + VCAP_AF_TTL_DECR_DIS, + VCAP_AF_TTL_ENA, + VCAP_AF_TTL_LABEL, + VCAP_AF_TTL_UPDATE_ENA, + VCAP_AF_TYPE, + VCAP_AF_VID_VAL, + VCAP_AF_VLAN_POP_CNT, + VCAP_AF_VLAN_POP_CNT_ENA, + VCAP_AF_VLAN_PUSH_CNT, + VCAP_AF_VLAN_PUSH_CNT_ENA, + VCAP_AF_VLAN_WAS_TAGGED, + VCAP_AF_MIRROR_ENA, + VCAP_AF_POLICE_VCAP_ONLY, + VCAP_AF_REW_OP, + VCAP_AF_ISDX_ENA, + VCAP_AF_ACL_ID, + VCAP_AF_FWD_KILL_ENA, + VCAP_AF_HOST_MATCH, }; #endif /* __VCAP_AG_API__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api_kunit.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api_kunit.h deleted file mode 100644 index e538ca725687..000000000000 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api_kunit.h +++ /dev/null @@ -1,643 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ -/* Copyright (C) 2022 Microchip Technology Inc. and its subsidiaries. - * Microchip VCAP API interface for kunit testing - * This is a different interface, to be able to include different VCAPs - */ - -/* Use same include guard as the official API to be able to override it */ -#ifndef __VCAP_AG_API__ -#define __VCAP_AG_API__ - -enum vcap_type { - VCAP_TYPE_ES2, - VCAP_TYPE_IS0, - VCAP_TYPE_IS2, - VCAP_TYPE_MAX -}; - -/* Keyfieldset names with origin information */ -enum vcap_keyfield_set { - VCAP_KFS_NO_VALUE, /* initial value */ - VCAP_KFS_ARP, /* sparx5 is2 X6, sparx5 es2 X6 */ - VCAP_KFS_ETAG, /* sparx5 is0 X2 */ - VCAP_KFS_IP4_OTHER, /* sparx5 is2 X6, sparx5 es2 X6 */ - VCAP_KFS_IP4_TCP_UDP, /* sparx5 is2 X6, sparx5 es2 X6 */ - VCAP_KFS_IP4_VID, /* sparx5 es2 X3 */ - VCAP_KFS_IP6_STD, /* sparx5 is2 X6 */ - VCAP_KFS_IP6_VID, /* sparx5 is2 X6, sparx5 es2 X6 */ - VCAP_KFS_IP_7TUPLE, /* sparx5 is2 X12, sparx5 es2 X12 */ - VCAP_KFS_LL_FULL, /* sparx5 is0 X6 */ - VCAP_KFS_MAC_ETYPE, /* sparx5 is2 X6, sparx5 es2 X6 */ - VCAP_KFS_MLL, /* sparx5 is0 X3 */ - VCAP_KFS_NORMAL, /* sparx5 is0 X6 */ - VCAP_KFS_NORMAL_5TUPLE_IP4, /* sparx5 is0 X6 */ - VCAP_KFS_NORMAL_7TUPLE, /* sparx5 is0 X12 */ - VCAP_KFS_PURE_5TUPLE_IP4, /* sparx5 is0 X3 */ - VCAP_KFS_TRI_VID, /* sparx5 is0 X2 */ -}; - -/* List of keyfields with description - * - * Keys ending in _IS are booleans derived from frame data - * Keys ending in _CLS are classified frame data - * - * VCAP_KF_8021BR_ECID_BASE: W12, sparx5: is0 - * Used by 802.1BR Bridge Port Extension in an E-Tag - * VCAP_KF_8021BR_ECID_EXT: W8, sparx5: is0 - * Used by 802.1BR Bridge Port Extension in an E-Tag - * VCAP_KF_8021BR_E_TAGGED: W1, sparx5: is0 - * Set for frames containing an E-TAG (802.1BR Ethertype 893f) - * VCAP_KF_8021BR_GRP: W2, sparx5: is0 - * E-Tag group bits in 802.1BR Bridge Port Extension - * VCAP_KF_8021BR_IGR_ECID_BASE: W12, sparx5: is0 - * Used by 802.1BR Bridge Port Extension in an E-Tag - * VCAP_KF_8021BR_IGR_ECID_EXT: W8, sparx5: is0 - * Used by 802.1BR Bridge Port Extension in an E-Tag - * VCAP_KF_8021Q_DEI0: W1, sparx5: is0 - * First DEI in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_DEI1: W1, sparx5: is0 - * Second DEI in multiple vlan tags (inner tag) - * VCAP_KF_8021Q_DEI2: W1, sparx5: is0 - * Third DEI in multiple vlan tags (not always available) - * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2 - * Classified DEI - * VCAP_KF_8021Q_PCP0: W3, sparx5: is0 - * First PCP in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_PCP1: W3, sparx5: is0 - * Second PCP in multiple vlan tags (inner tag) - * VCAP_KF_8021Q_PCP2: W3, sparx5: is0 - * Third PCP in multiple vlan tags (not always available) - * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2 - * Classified PCP - * VCAP_KF_8021Q_TPID0: W3, sparx5: is0 - * First TPIC in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_TPID1: W3, sparx5: is0 - * Second TPID in multiple vlan tags (inner tag) - * VCAP_KF_8021Q_TPID2: W3, sparx5: is0 - * Third TPID in multiple vlan tags (not always available) - * VCAP_KF_8021Q_VID0: W12, sparx5: is0 - * First VID in multiple vlan tags (outer tag or default port tag) - * VCAP_KF_8021Q_VID1: W12, sparx5: is0 - * Second VID in multiple vlan tags (inner tag) - * VCAP_KF_8021Q_VID2: W12, sparx5: is0 - * Third VID in multiple vlan tags (not always available) - * VCAP_KF_8021Q_VID_CLS: W13, sparx5: is2/es2 - * Classified VID - * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2 - * Sparx5: Set if frame was received with a VLAN tag, LAN966x: Set if frame has - * one or more Q-tags. Independent of port VLAN awareness - * VCAP_KF_8021Q_VLAN_TAGS: W3, sparx5: is0 - * Number of VLAN tags in frame: 0: Untagged, 1: Single tagged, 3: Double - * tagged, 7: Triple tagged - * VCAP_KF_ACL_GRP_ID: W8, sparx5: es2 - * Used in interface map table - * VCAP_KF_ARP_ADDR_SPACE_OK_IS: W1, sparx5: is2/es2 - * Set if hardware address is Ethernet - * VCAP_KF_ARP_LEN_OK_IS: W1, sparx5: is2/es2 - * Set if hardware address length = 6 (Ethernet) and IP address length = 4 (IP). - * VCAP_KF_ARP_OPCODE: W2, sparx5: is2/es2 - * ARP opcode - * VCAP_KF_ARP_OPCODE_UNKNOWN_IS: W1, sparx5: is2/es2 - * Set if not one of the codes defined in VCAP_KF_ARP_OPCODE - * VCAP_KF_ARP_PROTO_SPACE_OK_IS: W1, sparx5: is2/es2 - * Set if protocol address space is 0x0800 - * VCAP_KF_ARP_SENDER_MATCH_IS: W1, sparx5: is2/es2 - * Sender Hardware Address = SMAC (ARP) - * VCAP_KF_ARP_TGT_MATCH_IS: W1, sparx5: is2/es2 - * Target Hardware Address = SMAC (RARP) - * VCAP_KF_COSID_CLS: W3, sparx5: es2 - * Class of service - * VCAP_KF_DST_ENTRY: W1, sparx5: is0 - * Selects whether the frame’s destination or source information is used for - * fields L2_SMAC and L3_IP4_SIP - * VCAP_KF_ES0_ISDX_KEY_ENA: W1, sparx5: es2 - * The value taken from the IFH .FWD.ES0_ISDX_KEY_ENA - * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2 - * Ethernet type - * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2 - * Set if frame has EtherType >= 0x600 - * VCAP_KF_ETYPE_MPLS: W2, sparx5: is0 - * Type of MPLS Ethertype (or not) - * VCAP_KF_IF_EGR_PORT_MASK: W32, sparx5: es2 - * Egress port mask, one bit per port - * VCAP_KF_IF_EGR_PORT_MASK_RNG: W3, sparx5: es2 - * Select which 32 port group is available in IF_EGR_PORT (or virtual ports or - * CPU queue) - * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9 - * Sparx5: Logical ingress port number retrieved from - * ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber - * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65 - * Ingress port mask, one bit per port/erleg - * VCAP_KF_IF_IGR_PORT_MASK_L3: W1, sparx5: is2 - * If set, IF_IGR_PORT_MASK, IF_IGR_PORT_MASK_RNG, and IF_IGR_PORT_MASK_SEL are - * used to specify L3 interfaces - * VCAP_KF_IF_IGR_PORT_MASK_RNG: W4, sparx5: is2 - * Range selector for IF_IGR_PORT_MASK. Specifies which group of 32 ports are - * available in IF_IGR_PORT_MASK - * VCAP_KF_IF_IGR_PORT_MASK_SEL: W2, sparx5: is0/is2 - * Mode selector for IF_IGR_PORT_MASK, applicable when IF_IGR_PORT_MASK_L3 == 0. - * Mapping: 0: DEFAULT 1: LOOPBACK 2: MASQUERADE 3: CPU_VD - * VCAP_KF_IF_IGR_PORT_SEL: W1, sparx5: es2 - * Selector for IF_IGR_PORT: physical port number or ERLEG - * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2 - * Set if frame has EtherType = 0x800 and IP version = 4 - * VCAP_KF_IP_MC_IS: W1, sparx5: is0 - * Set if frame is IPv4 frame and frame’s destination MAC address is an IPv4 - * multicast address (0x01005E0 /25). Set if frame is IPv6 frame and frame’s - * destination MAC address is an IPv6 multicast address (0x3333/16). - * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0 - * Payload bytes after IP header - * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0 - * Set if frame is IPv4, IPv6, or SNAP frame - * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es2 - * Classified ISDX - * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es2 - * Set if classified ISDX > 0 - * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2 - * Set if frame’s destination MAC address is the broadcast address - * (FF-FF-FF-FF-FF-FF). - * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2 - * Destination MAC address - * VCAP_KF_L2_FWD_IS: W1, sparx5: is2 - * Set if the frame is allowed to be forwarded to front ports - * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2 - * Set if frame’s destination MAC address is a multicast address (bit 40 = 1). - * VCAP_KF_L2_PAYLOAD_ETYPE: W64, sparx5: is2/es2 - * Byte 0-7 of L2 payload after Type/Len field and overloading for OAM - * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2 - * Source MAC address - * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2/es2 - * Set if Src IP matches Dst IP address - * VCAP_KF_L3_DMAC_DIP_MATCH: W1, sparx5: is2 - * Match found in DIP security lookup in ANA_L3 - * VCAP_KF_L3_DPL_CLS: W1, sparx5: es2 - * The frames drop precedence level - * VCAP_KF_L3_DSCP: W6, sparx5: is0 - * Frame’s DSCP value - * VCAP_KF_L3_DST_IS: W1, sparx5: is2 - * Set if lookup is done for egress router leg - * VCAP_KF_L3_FRAGMENT_TYPE: W2, sparx5: is0/is2/es2 - * L3 Fragmentation type (none, initial, suspicious, valid follow up) - * VCAP_KF_L3_FRAG_INVLD_L4_LEN: W1, sparx5: is0/is2 - * Set if frame's L4 length is less than ANA_CL:COMMON:CLM_FRAGMENT_CFG.L4_MIN_L - * EN - * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2 - * Destination IPv4 Address - * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2 - * Source IPv4 Address - * VCAP_KF_L3_IP6_DIP: W128, sparx5: is0/is2/es2 - * Sparx5: Full IPv6 DIP, LAN966x: Either Full IPv6 DIP or a subset depending on - * frame type - * VCAP_KF_L3_IP6_SIP: W128, sparx5: is0/is2/es2 - * Sparx5: Full IPv6 SIP, LAN966x: Either Full IPv6 SIP or a subset depending on - * frame type - * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2 - * IPv4 frames: IP protocol. IPv6 frames: Next header, same as for IPV4 - * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2 - * Set if IPv4 frame contains options (IP len > 5) - * VCAP_KF_L3_PAYLOAD: sparx5 is2 W96, sparx5 is2 W40, sparx5 es2 W96 - * Sparx5: Payload bytes after IP header. IPv4: IPv4 options are not parsed so - * payload is always taken 20 bytes after the start of the IPv4 header, LAN966x: - * Bytes 0-6 after IP header - * VCAP_KF_L3_RT_IS: W1, sparx5: is2/es2 - * Set if frame has hit a router leg - * VCAP_KF_L3_SMAC_SIP_MATCH: W1, sparx5: is2 - * Match found in SIP security lookup in ANA_L3 - * VCAP_KF_L3_TOS: W8, sparx5: is2/es2 - * Sparx5: Frame's IPv4/IPv6 DSCP and ECN fields, LAN966x: IP TOS field - * VCAP_KF_L3_TTL_GT0: W1, sparx5: is2/es2 - * Set if IPv4 TTL / IPv6 hop limit is greater than 0 - * VCAP_KF_L4_ACK: W1, sparx5: is2/es2 - * Sparx5 and LAN966x: TCP flag ACK, LAN966x only: PTP over UDP: flagField bit 2 - * (unicastFlag) - * VCAP_KF_L4_DPORT: W16, sparx5: is2/es2 - * Sparx5: TCP/UDP destination port. Overloading for IP_7TUPLE: Non-TCP/UDP IP - * frames: L4_DPORT = L3_IP_PROTO, LAN966x: TCP/UDP destination port - * VCAP_KF_L4_FIN: W1, sparx5: is2/es2 - * TCP flag FIN, LAN966x: TCP flag FIN, and for PTP over UDP: messageType bit 1 - * VCAP_KF_L4_PAYLOAD: W64, sparx5: is2/es2 - * Payload bytes after TCP/UDP header Overloading for IP_7TUPLE: Non TCP/UDP - * frames: Payload bytes 0–7 after IP header. IPv4 options are not parsed so - * payload is always taken 20 bytes after the start of the IPv4 header for non - * TCP/UDP IPv4 frames - * VCAP_KF_L4_PSH: W1, sparx5: is2/es2 - * Sparx5: TCP flag PSH, LAN966x: TCP: TCP flag PSH. PTP over UDP: flagField bit - * 1 (twoStepFlag) - * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16 - * Range checker bitmask (one for each range checker). Input into range checkers - * is taken from classified results (VID, DSCP) and frame (SPORT, DPORT, ETYPE, - * outer VID, inner VID) - * VCAP_KF_L4_RST: W1, sparx5: is2/es2 - * Sparx5: TCP flag RST , LAN966x: TCP: TCP flag RST. PTP over UDP: messageType - * bit 3 - * VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2/es2 - * Set if TCP sequence number is 0, LAN966x: Overlayed with PTP over UDP: - * messageType bit 0 - * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2 - * TCP/UDP source port - * VCAP_KF_L4_SPORT_EQ_DPORT_IS: W1, sparx5: is2/es2 - * Set if UDP or TCP source port equals UDP or TCP destination port - * VCAP_KF_L4_SYN: W1, sparx5: is2/es2 - * Sparx5: TCP flag SYN, LAN966x: TCP: TCP flag SYN. PTP over UDP: messageType - * bit 2 - * VCAP_KF_L4_URG: W1, sparx5: is2/es2 - * Sparx5: TCP flag URG, LAN966x: TCP: TCP flag URG. PTP over UDP: flagField bit - * 7 (reserved) - * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2 - * Selects between entries relevant for first and second lookup. Set for first - * lookup, cleared for second lookup. - * VCAP_KF_LOOKUP_GEN_IDX: W12, sparx5: is0 - * Generic index - for chaining CLM instances - * VCAP_KF_LOOKUP_GEN_IDX_SEL: W2, sparx5: is0 - * Select the mode of the Generic Index - * VCAP_KF_LOOKUP_PAG: W8, sparx5: is2 - * Classified Policy Association Group: chains rules from IS1/CLM to IS2 - * VCAP_KF_OAM_CCM_CNTS_EQ0: W1, sparx5: is2/es2 - * Dual-ended loss measurement counters in CCM frames are all zero - * VCAP_KF_OAM_MEL_FLAGS: W7, sparx5: is0 - * Encoding of MD level/MEG level (MEL) - * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is0/is2/es2 - * Set if frame’s EtherType = 0x8902 - * VCAP_KF_PROT_ACTIVE: W1, sparx5: es2 - * Protection is active - * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2 - * Set if frame is IPv4 TCP frame (IP protocol = 6) or IPv6 TCP frames (Next - * header = 6) - * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2 - * Set if frame is IPv4/IPv6 TCP or UDP frame (IP protocol/next header equals 6 - * or 17) - * VCAP_KF_TYPE: sparx5 is0 W2, sparx5 is0 W1, sparx5 is2 W4, sparx5 is2 W2, - * sparx5 es2 W3 - * Keyset type id - set by the API - */ - -/* Keyfield names */ -enum vcap_key_field { - VCAP_KF_NO_VALUE, /* initial value */ - VCAP_KF_8021BR_ECID_BASE, - VCAP_KF_8021BR_ECID_EXT, - VCAP_KF_8021BR_E_TAGGED, - VCAP_KF_8021BR_GRP, - VCAP_KF_8021BR_IGR_ECID_BASE, - VCAP_KF_8021BR_IGR_ECID_EXT, - VCAP_KF_8021Q_DEI0, - VCAP_KF_8021Q_DEI1, - VCAP_KF_8021Q_DEI2, - VCAP_KF_8021Q_DEI_CLS, - VCAP_KF_8021Q_PCP0, - VCAP_KF_8021Q_PCP1, - VCAP_KF_8021Q_PCP2, - VCAP_KF_8021Q_PCP_CLS, - VCAP_KF_8021Q_TPID0, - VCAP_KF_8021Q_TPID1, - VCAP_KF_8021Q_TPID2, - VCAP_KF_8021Q_VID0, - VCAP_KF_8021Q_VID1, - VCAP_KF_8021Q_VID2, - VCAP_KF_8021Q_VID_CLS, - VCAP_KF_8021Q_VLAN_TAGGED_IS, - VCAP_KF_8021Q_VLAN_TAGS, - VCAP_KF_ACL_GRP_ID, - VCAP_KF_ARP_ADDR_SPACE_OK_IS, - VCAP_KF_ARP_LEN_OK_IS, - VCAP_KF_ARP_OPCODE, - VCAP_KF_ARP_OPCODE_UNKNOWN_IS, - VCAP_KF_ARP_PROTO_SPACE_OK_IS, - VCAP_KF_ARP_SENDER_MATCH_IS, - VCAP_KF_ARP_TGT_MATCH_IS, - VCAP_KF_COSID_CLS, - VCAP_KF_DST_ENTRY, - VCAP_KF_ES0_ISDX_KEY_ENA, - VCAP_KF_ETYPE, - VCAP_KF_ETYPE_LEN_IS, - VCAP_KF_ETYPE_MPLS, - VCAP_KF_IF_EGR_PORT_MASK, - VCAP_KF_IF_EGR_PORT_MASK_RNG, - VCAP_KF_IF_IGR_PORT, - VCAP_KF_IF_IGR_PORT_MASK, - VCAP_KF_IF_IGR_PORT_MASK_L3, - VCAP_KF_IF_IGR_PORT_MASK_RNG, - VCAP_KF_IF_IGR_PORT_MASK_SEL, - VCAP_KF_IF_IGR_PORT_SEL, - VCAP_KF_IP4_IS, - VCAP_KF_IP_MC_IS, - VCAP_KF_IP_PAYLOAD_5TUPLE, - VCAP_KF_IP_SNAP_IS, - VCAP_KF_ISDX_CLS, - VCAP_KF_ISDX_GT0_IS, - VCAP_KF_L2_BC_IS, - VCAP_KF_L2_DMAC, - VCAP_KF_L2_FWD_IS, - VCAP_KF_L2_MC_IS, - VCAP_KF_L2_PAYLOAD_ETYPE, - VCAP_KF_L2_SMAC, - VCAP_KF_L3_DIP_EQ_SIP_IS, - VCAP_KF_L3_DMAC_DIP_MATCH, - VCAP_KF_L3_DPL_CLS, - VCAP_KF_L3_DSCP, - VCAP_KF_L3_DST_IS, - VCAP_KF_L3_FRAGMENT_TYPE, - VCAP_KF_L3_FRAG_INVLD_L4_LEN, - VCAP_KF_L3_IP4_DIP, - VCAP_KF_L3_IP4_SIP, - VCAP_KF_L3_IP6_DIP, - VCAP_KF_L3_IP6_SIP, - VCAP_KF_L3_IP_PROTO, - VCAP_KF_L3_OPTIONS_IS, - VCAP_KF_L3_PAYLOAD, - VCAP_KF_L3_RT_IS, - VCAP_KF_L3_SMAC_SIP_MATCH, - VCAP_KF_L3_TOS, - VCAP_KF_L3_TTL_GT0, - VCAP_KF_L4_ACK, - VCAP_KF_L4_DPORT, - VCAP_KF_L4_FIN, - VCAP_KF_L4_PAYLOAD, - VCAP_KF_L4_PSH, - VCAP_KF_L4_RNG, - VCAP_KF_L4_RST, - VCAP_KF_L4_SEQUENCE_EQ0_IS, - VCAP_KF_L4_SPORT, - VCAP_KF_L4_SPORT_EQ_DPORT_IS, - VCAP_KF_L4_SYN, - VCAP_KF_L4_URG, - VCAP_KF_LOOKUP_FIRST_IS, - VCAP_KF_LOOKUP_GEN_IDX, - VCAP_KF_LOOKUP_GEN_IDX_SEL, - VCAP_KF_LOOKUP_PAG, - VCAP_KF_MIRROR_ENA, - VCAP_KF_OAM_CCM_CNTS_EQ0, - VCAP_KF_OAM_MEL_FLAGS, - VCAP_KF_OAM_Y1731_IS, - VCAP_KF_PROT_ACTIVE, - VCAP_KF_TCP_IS, - VCAP_KF_TCP_UDP_IS, - VCAP_KF_TYPE, -}; - -/* Actionset names with origin information */ -enum vcap_actionfield_set { - VCAP_AFS_NO_VALUE, /* initial value */ - VCAP_AFS_BASE_TYPE, /* sparx5 is2 X3, sparx5 es2 X3 */ - VCAP_AFS_CLASSIFICATION, /* sparx5 is0 X2 */ - VCAP_AFS_CLASS_REDUCED, /* sparx5 is0 X1 */ - VCAP_AFS_FULL, /* sparx5 is0 X3 */ - VCAP_AFS_MLBS, /* sparx5 is0 X2 */ - VCAP_AFS_MLBS_REDUCED, /* sparx5 is0 X1 */ -}; - -/* List of actionfields with description - * - * VCAP_AF_CLS_VID_SEL: W3, sparx5: is0 - * Controls the classified VID: 0: VID_NONE: No action. 1: VID_ADD: New VID = - * old VID + VID_VAL. 2: VID_REPLACE: New VID = VID_VAL. 3: VID_FIRST_TAG: New - * VID = VID from frame's first tag (outer tag) if available, otherwise VID_VAL. - * 4: VID_SECOND_TAG: New VID = VID from frame's second tag (middle tag) if - * available, otherwise VID_VAL. 5: VID_THIRD_TAG: New VID = VID from frame's - * third tag (inner tag) if available, otherwise VID_VAL. - * VCAP_AF_CNT_ID: sparx5 is2 W12, sparx5 es2 W11 - * Counter ID, used per lookup to index the 4K frame counters (ANA_ACL:CNT_TBL). - * Multiple VCAP IS2 entries can use the same counter. - * VCAP_AF_COPY_PORT_NUM: W7, sparx5: es2 - * QSYS port number when FWD_MODE is redirect or copy - * VCAP_AF_COPY_QUEUE_NUM: W16, sparx5: es2 - * QSYS queue number when FWD_MODE is redirect or copy - * VCAP_AF_CPU_COPY_ENA: W1, sparx5: is2/es2 - * Setting this bit to 1 causes all frames that hit this action to be copied to - * the CPU extraction queue specified in CPU_QUEUE_NUM. - * VCAP_AF_CPU_QUEUE_NUM: W3, sparx5: is2/es2 - * CPU queue number. Used when CPU_COPY_ENA is set. - * VCAP_AF_DEI_ENA: W1, sparx5: is0 - * If set, use DEI_VAL as classified DEI value. Otherwise, DEI from basic - * classification is used - * VCAP_AF_DEI_VAL: W1, sparx5: is0 - * See DEI_ENA - * VCAP_AF_DP_ENA: W1, sparx5: is0 - * If set, use DP_VAL as classified drop precedence level. Otherwise, drop - * precedence level from basic classification is used. - * VCAP_AF_DP_VAL: W2, sparx5: is0 - * See DP_ENA. - * VCAP_AF_DSCP_ENA: W1, sparx5: is0 - * If set, use DSCP_VAL as classified DSCP value. Otherwise, DSCP value from - * basic classification is used. - * VCAP_AF_DSCP_VAL: W6, sparx5: is0 - * See DSCP_ENA. - * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2 - * Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP - * DMAC translation when entering or leaving a tunnel. - * VCAP_AF_FWD_MODE: W2, sparx5: es2 - * Forward selector: 0: Forward. 1: Discard. 2: Redirect. 3: Copy. - * VCAP_AF_HIT_ME_ONCE: W1, sparx5: is2/es2 - * Setting this bit to 1 causes the first frame that hits this action where the - * HIT_CNT counter is zero to be copied to the CPU extraction queue specified in - * CPU_QUEUE_NUM. The HIT_CNT counter is then incremented and any frames that - * hit this action later are not copied to the CPU. To re-enable the HIT_ME_ONCE - * functionality, the HIT_CNT counter must be cleared. - * VCAP_AF_IGNORE_PIPELINE_CTRL: W1, sparx5: is2/es2 - * Ignore ingress pipeline control. This enforces the use of the VCAP IS2 action - * even when the pipeline control has terminated the frame before VCAP IS2. - * VCAP_AF_INTR_ENA: W1, sparx5: is2/es2 - * If set, an interrupt is triggered when this rule is hit - * VCAP_AF_ISDX_ADD_REPLACE_SEL: W1, sparx5: is0 - * Controls the classified ISDX. 0: New ISDX = old ISDX + ISDX_VAL. 1: New ISDX - * = ISDX_VAL. - * VCAP_AF_ISDX_VAL: W12, sparx5: is0 - * See isdx_add_replace_sel - * VCAP_AF_LRN_DIS: W1, sparx5: is2 - * Setting this bit to 1 disables learning of frames hitting this action. - * VCAP_AF_MAP_IDX: W9, sparx5: is0 - * Index for QoS mapping table lookup - * VCAP_AF_MAP_KEY: W3, sparx5: is0 - * Key type for QoS mapping table lookup. 0: DEI0, PCP0 (outer tag). 1: DEI1, - * PCP1 (middle tag). 2: DEI2, PCP2 (inner tag). 3: MPLS TC. 4: PCP0 (outer - * tag). 5: E-DEI, E-PCP (E-TAG). 6: DSCP if available, otherwise none. 7: DSCP - * if available, otherwise DEI0, PCP0 (outer tag) if available using MAP_IDX+8, - * otherwise none - * VCAP_AF_MAP_LOOKUP_SEL: W2, sparx5: is0 - * Selects which of the two QoS Mapping Table lookups that MAP_KEY and MAP_IDX - * are applied to. 0: No changes to the QoS Mapping Table lookup. 1: Update key - * type and index for QoS Mapping Table lookup #0. 2: Update key type and index - * for QoS Mapping Table lookup #1. 3: Reserved. - * VCAP_AF_MASK_MODE: W3, sparx5: is0/is2 - * Controls the PORT_MASK use. Sparx5: 0: OR_DSTMASK, 1: AND_VLANMASK, 2: - * REPLACE_PGID, 3: REPLACE_ALL, 4: REDIR_PGID, 5: OR_PGID_MASK, 6: VSTAX, 7: - * Not applicable. LAN966X: 0: No action, 1: Permit/deny (AND), 2: Policy - * forwarding (DMAC lookup), 3: Redirect. The CPU port is untouched by - * MASK_MODE. - * VCAP_AF_MATCH_ID: W16, sparx5: is0/is2 - * Logical ID for the entry. The MATCH_ID is extracted together with the frame - * if the frame is forwarded to the CPU (CPU_COPY_ENA). The result is placed in - * IFH.CL_RSLT. - * VCAP_AF_MATCH_ID_MASK: W16, sparx5: is0/is2 - * Mask used by MATCH_ID. - * VCAP_AF_MIRROR_PROBE: W2, sparx5: is2 - * Mirroring performed according to configuration of a mirror probe. 0: No - * mirroring. 1: Mirror probe 0. 2: Mirror probe 1. 3: Mirror probe 2 - * VCAP_AF_MIRROR_PROBE_ID: W2, sparx5: es2 - * Signals a mirror probe to be placed in the IFH. Only possible when FWD_MODE - * is copy. 0: No mirroring. 1–3: Use mirror probe 0-2. - * VCAP_AF_NXT_IDX: W12, sparx5: is0 - * Index used as part of key (field G_IDX) in the next lookup. - * VCAP_AF_NXT_IDX_CTRL: W3, sparx5: is0 - * Controls the generation of the G_IDX used in the VCAP CLM next lookup - * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0 - * Bits set in this mask will override PAG_VAL from port profile. New PAG = - * (PAG (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK) - * VCAP_AF_PAG_VAL: W8, sparx5: is0 - * See PAG_OVERRIDE_MASK. - * VCAP_AF_PCP_ENA: W1, sparx5: is0 - * If set, use PCP_VAL as classified PCP value. Otherwise, PCP from basic - * classification is used. - * VCAP_AF_PCP_VAL: W3, sparx5: is0 - * See PCP_ENA. - * VCAP_AF_PIPELINE_FORCE_ENA: sparx5 is0 W2, sparx5 is2 W1 - * If set, use PIPELINE_PT unconditionally and set PIPELINE_ACT = NONE if - * PIPELINE_PT == NONE. Overrules previous settings of pipeline point. - * VCAP_AF_PIPELINE_PT: W5, sparx5: is0/is2 - * Pipeline point used if PIPELINE_FORCE_ENA is set - * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2 - * Setting this bit to 1 causes frames that hit this action to be policed by the - * ACL policer specified in POLICE_IDX. Only applies to the first lookup. - * VCAP_AF_POLICE_IDX: W6, sparx5: is2/es2 - * Selects VCAP policer used when policing frames (POLICE_ENA) - * VCAP_AF_POLICE_REMARK: W1, sparx5: es2 - * If set, frames exceeding policer rates are marked as yellow but not - * discarded. - * VCAP_AF_PORT_MASK: sparx5 is0 W65, sparx5 is2 W68 - * Port mask applied to the forwarding decision based on MASK_MODE. - * VCAP_AF_QOS_ENA: W1, sparx5: is0 - * If set, use QOS_VAL as classified QoS class. Otherwise, QoS class from basic - * classification is used. - * VCAP_AF_QOS_VAL: W3, sparx5: is0 - * See QOS_ENA. - * VCAP_AF_RT_DIS: W1, sparx5: is2 - * If set, routing is disallowed. Only applies when IS_INNER_ACL is 0. See also - * IGR_ACL_ENA, EGR_ACL_ENA, and RLEG_STAT_IDX. - * VCAP_AF_TYPE: W1, sparx5: is0 - * Actionset type id - Set by the API - * VCAP_AF_VID_VAL: W13, sparx5: is0 - * New VID Value - */ - -/* Actionfield names */ -enum vcap_action_field { - VCAP_AF_NO_VALUE, /* initial value */ - VCAP_AF_ACL_MAC, - VCAP_AF_ACL_RT_MODE, - VCAP_AF_CLS_VID_SEL, - VCAP_AF_CNT_ID, - VCAP_AF_COPY_PORT_NUM, - VCAP_AF_COPY_QUEUE_NUM, - VCAP_AF_COSID_ENA, - VCAP_AF_COSID_VAL, - VCAP_AF_CPU_COPY_ENA, - VCAP_AF_CPU_DIS, - VCAP_AF_CPU_ENA, - VCAP_AF_CPU_Q, - VCAP_AF_CPU_QUEUE_NUM, - VCAP_AF_CUSTOM_ACE_ENA, - VCAP_AF_CUSTOM_ACE_OFFSET, - VCAP_AF_DEI_ENA, - VCAP_AF_DEI_VAL, - VCAP_AF_DLB_OFFSET, - VCAP_AF_DMAC_OFFSET_ENA, - VCAP_AF_DP_ENA, - VCAP_AF_DP_VAL, - VCAP_AF_DSCP_ENA, - VCAP_AF_DSCP_VAL, - VCAP_AF_EGR_ACL_ENA, - VCAP_AF_ES2_REW_CMD, - VCAP_AF_FWD_DIS, - VCAP_AF_FWD_MODE, - VCAP_AF_FWD_TYPE, - VCAP_AF_GVID_ADD_REPLACE_SEL, - VCAP_AF_HIT_ME_ONCE, - VCAP_AF_IGNORE_PIPELINE_CTRL, - VCAP_AF_IGR_ACL_ENA, - VCAP_AF_INJ_MASQ_ENA, - VCAP_AF_INJ_MASQ_LPORT, - VCAP_AF_INJ_MASQ_PORT, - VCAP_AF_INTR_ENA, - VCAP_AF_ISDX_ADD_REPLACE_SEL, - VCAP_AF_ISDX_VAL, - VCAP_AF_IS_INNER_ACL, - VCAP_AF_L3_MAC_UPDATE_DIS, - VCAP_AF_LOG_MSG_INTERVAL, - VCAP_AF_LPM_AFFIX_ENA, - VCAP_AF_LPM_AFFIX_VAL, - VCAP_AF_LPORT_ENA, - VCAP_AF_LRN_DIS, - VCAP_AF_MAP_IDX, - VCAP_AF_MAP_KEY, - VCAP_AF_MAP_LOOKUP_SEL, - VCAP_AF_MASK_MODE, - VCAP_AF_MATCH_ID, - VCAP_AF_MATCH_ID_MASK, - VCAP_AF_MIP_SEL, - VCAP_AF_MIRROR_PROBE, - VCAP_AF_MIRROR_PROBE_ID, - VCAP_AF_MPLS_IP_CTRL_ENA, - VCAP_AF_MPLS_MEP_ENA, - VCAP_AF_MPLS_MIP_ENA, - VCAP_AF_MPLS_OAM_FLAVOR, - VCAP_AF_MPLS_OAM_TYPE, - VCAP_AF_NUM_VLD_LABELS, - VCAP_AF_NXT_IDX, - VCAP_AF_NXT_IDX_CTRL, - VCAP_AF_NXT_KEY_TYPE, - VCAP_AF_NXT_NORMALIZE, - VCAP_AF_NXT_NORM_W16_OFFSET, - VCAP_AF_NXT_NORM_W32_OFFSET, - VCAP_AF_NXT_OFFSET_FROM_TYPE, - VCAP_AF_NXT_TYPE_AFTER_OFFSET, - VCAP_AF_OAM_IP_BFD_ENA, - VCAP_AF_OAM_TWAMP_ENA, - VCAP_AF_OAM_Y1731_SEL, - VCAP_AF_PAG_OVERRIDE_MASK, - VCAP_AF_PAG_VAL, - VCAP_AF_PCP_ENA, - VCAP_AF_PCP_VAL, - VCAP_AF_PIPELINE_ACT_SEL, - VCAP_AF_PIPELINE_FORCE_ENA, - VCAP_AF_PIPELINE_PT, - VCAP_AF_PIPELINE_PT_REDUCED, - VCAP_AF_POLICE_ENA, - VCAP_AF_POLICE_IDX, - VCAP_AF_POLICE_REMARK, - VCAP_AF_PORT_MASK, - VCAP_AF_PTP_MASTER_SEL, - VCAP_AF_QOS_ENA, - VCAP_AF_QOS_VAL, - VCAP_AF_REW_CMD, - VCAP_AF_RLEG_DMAC_CHK_DIS, - VCAP_AF_RLEG_STAT_IDX, - VCAP_AF_RSDX_ENA, - VCAP_AF_RSDX_VAL, - VCAP_AF_RSVD_LBL_VAL, - VCAP_AF_RT_DIS, - VCAP_AF_RT_SEL, - VCAP_AF_S2_KEY_SEL_ENA, - VCAP_AF_S2_KEY_SEL_IDX, - VCAP_AF_SAM_SEQ_ENA, - VCAP_AF_SIP_IDX, - VCAP_AF_SWAP_MAC_ENA, - VCAP_AF_TCP_UDP_DPORT, - VCAP_AF_TCP_UDP_ENA, - VCAP_AF_TCP_UDP_SPORT, - VCAP_AF_TC_ENA, - VCAP_AF_TC_LABEL, - VCAP_AF_TPID_SEL, - VCAP_AF_TTL_DECR_DIS, - VCAP_AF_TTL_ENA, - VCAP_AF_TTL_LABEL, - VCAP_AF_TTL_UPDATE_ENA, - VCAP_AF_TYPE, - VCAP_AF_VID_VAL, - VCAP_AF_VLAN_POP_CNT, - VCAP_AF_VLAN_POP_CNT_ENA, - VCAP_AF_VLAN_PUSH_CNT, - VCAP_AF_VLAN_PUSH_CNT_ENA, - VCAP_AF_VLAN_WAS_TAGGED, -}; - -#endif /* __VCAP_AG_API__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index d12c8ec40fe2..f2435d7ab515 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -6,27 +6,28 @@ #include <linux/types.h> -#include "vcap_api.h" -#include "vcap_api_client.h" - -#define to_intrule(rule) container_of((rule), struct vcap_rule_internal, data) - -/* Private VCAP API rule data */ -struct vcap_rule_internal { - struct vcap_rule data; /* provided by the client */ - struct list_head list; /* for insertion in the vcap admin list of rules */ - struct vcap_admin *admin; /* vcap hw instance */ - struct net_device *ndev; /* the interface that the rule applies to */ - struct vcap_control *vctrl; /* the client control */ - u32 sort_key; /* defines the position in the VCAP */ - int keyset_sw; /* subwords in a keyset */ - int actionset_sw; /* subwords in an actionset */ - int keyset_sw_regs; /* registers in a subword in an keyset */ - int actionset_sw_regs; /* registers in a subword in an actionset */ - int size; /* the size of the rule: max(entry, action) */ - u32 addr; /* address in the VCAP at insertion */ - u32 counter_id; /* counter id (if a dedicated counter is available) */ - struct vcap_counter counter; /* last read counter value */ +#include "vcap_api_private.h" + +static int keyfield_size_table[] = { + [VCAP_FIELD_BIT] = sizeof(struct vcap_u1_key), + [VCAP_FIELD_U32] = sizeof(struct vcap_u32_key), + [VCAP_FIELD_U48] = sizeof(struct vcap_u48_key), + [VCAP_FIELD_U56] = sizeof(struct vcap_u56_key), + [VCAP_FIELD_U64] = sizeof(struct vcap_u64_key), + [VCAP_FIELD_U72] = sizeof(struct vcap_u72_key), + [VCAP_FIELD_U112] = sizeof(struct vcap_u112_key), + [VCAP_FIELD_U128] = sizeof(struct vcap_u128_key), +}; + +static int actionfield_size_table[] = { + [VCAP_FIELD_BIT] = sizeof(struct vcap_u1_action), + [VCAP_FIELD_U32] = sizeof(struct vcap_u32_action), + [VCAP_FIELD_U48] = sizeof(struct vcap_u48_action), + [VCAP_FIELD_U56] = sizeof(struct vcap_u56_action), + [VCAP_FIELD_U64] = sizeof(struct vcap_u64_action), + [VCAP_FIELD_U72] = sizeof(struct vcap_u72_action), + [VCAP_FIELD_U112] = sizeof(struct vcap_u112_action), + [VCAP_FIELD_U128] = sizeof(struct vcap_u128_action), }; /* Moving a rule in the VCAP address space */ @@ -36,16 +37,6 @@ struct vcap_rule_move { int count; /* blocksize of addresses to move */ }; -/* Bit iterator for the VCAP cache streams */ -struct vcap_stream_iter { - u32 offset; /* bit offset from the stream start */ - u32 sw_width; /* subword width in bits */ - u32 regs_per_sw; /* registers per subword */ - u32 reg_idx; /* current register index */ - u32 reg_bitpos; /* bit offset in current register */ - const struct vcap_typegroup *tg; /* current typegroup */ -}; - /* Stores the filter cookie that enabled the port */ struct vcap_enabled_port { struct list_head list; /* for insertion in enabled ports list */ @@ -53,8 +44,8 @@ struct vcap_enabled_port { unsigned long cookie; /* filter that enabled the port */ }; -static void vcap_iter_set(struct vcap_stream_iter *itr, int sw_width, - const struct vcap_typegroup *tg, u32 offset) +void vcap_iter_set(struct vcap_stream_iter *itr, int sw_width, + const struct vcap_typegroup *tg, u32 offset) { memset(itr, 0, sizeof(*itr)); itr->offset = offset; @@ -74,7 +65,7 @@ static void vcap_iter_skip_tg(struct vcap_stream_iter *itr) } } -static void vcap_iter_update(struct vcap_stream_iter *itr) +void vcap_iter_update(struct vcap_stream_iter *itr) { int sw_idx, sw_bitpos; @@ -86,15 +77,15 @@ static void vcap_iter_update(struct vcap_stream_iter *itr) itr->reg_bitpos = sw_bitpos % 32; } -static void vcap_iter_init(struct vcap_stream_iter *itr, int sw_width, - const struct vcap_typegroup *tg, u32 offset) +void vcap_iter_init(struct vcap_stream_iter *itr, int sw_width, + const struct vcap_typegroup *tg, u32 offset) { vcap_iter_set(itr, sw_width, tg, offset); vcap_iter_skip_tg(itr); vcap_iter_update(itr); } -static void vcap_iter_next(struct vcap_stream_iter *itr) +void vcap_iter_next(struct vcap_stream_iter *itr) { itr->offset++; vcap_iter_skip_tg(itr); @@ -179,9 +170,9 @@ static void vcap_encode_typegroups(u32 *stream, int sw_width, } /* Return the list of keyfields for the keyset */ -static const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl, - enum vcap_type vt, - enum vcap_keyfield_set keyset) +const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_keyfield_set keyset) { /* Check that the keyset exists in the vcap keyset list */ if (keyset >= vctrl->vcaps[vt].keyfield_set_size) @@ -190,9 +181,9 @@ static const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl, } /* Return the keyset information for the keyset */ -static const struct vcap_set *vcap_keyfieldset(struct vcap_control *vctrl, - enum vcap_type vt, - enum vcap_keyfield_set keyset) +const struct vcap_set *vcap_keyfieldset(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_keyfield_set keyset) { const struct vcap_set *kset; @@ -204,9 +195,10 @@ static const struct vcap_set *vcap_keyfieldset(struct vcap_control *vctrl, return NULL; return kset; } +EXPORT_SYMBOL_GPL(vcap_keyfieldset); /* Return the typegroup table for the matching keyset (using subword size) */ -static const struct vcap_typegroup * +const struct vcap_typegroup * vcap_keyfield_typegroup(struct vcap_control *vctrl, enum vcap_type vt, enum vcap_keyfield_set keyset) { @@ -219,8 +211,8 @@ vcap_keyfield_typegroup(struct vcap_control *vctrl, } /* Return the number of keyfields in the keyset */ -static int vcap_keyfield_count(struct vcap_control *vctrl, - enum vcap_type vt, enum vcap_keyfield_set keyset) +int vcap_keyfield_count(struct vcap_control *vctrl, + enum vcap_type vt, enum vcap_keyfield_set keyset) { /* Check that the keyset exists in the vcap keyset list */ if (keyset >= vctrl->vcaps[vt].keyfield_set_size) @@ -347,7 +339,7 @@ static int vcap_encode_rule_keyset(struct vcap_rule_internal *ri) } /* Return the list of actionfields for the actionset */ -static const struct vcap_field * +const struct vcap_field * vcap_actionfields(struct vcap_control *vctrl, enum vcap_type vt, enum vcap_actionfield_set actionset) { @@ -357,7 +349,7 @@ vcap_actionfields(struct vcap_control *vctrl, return vctrl->vcaps[vt].actionfield_set_map[actionset]; } -static const struct vcap_set * +const struct vcap_set * vcap_actionfieldset(struct vcap_control *vctrl, enum vcap_type vt, enum vcap_actionfield_set actionset) { @@ -373,7 +365,7 @@ vcap_actionfieldset(struct vcap_control *vctrl, } /* Return the typegroup table for the matching actionset (using subword size) */ -static const struct vcap_typegroup * +const struct vcap_typegroup * vcap_actionfield_typegroup(struct vcap_control *vctrl, enum vcap_type vt, enum vcap_actionfield_set actionset) { @@ -386,9 +378,9 @@ vcap_actionfield_typegroup(struct vcap_control *vctrl, } /* Return the number of actionfields in the actionset */ -static int vcap_actionfield_count(struct vcap_control *vctrl, - enum vcap_type vt, - enum vcap_actionfield_set actionset) +int vcap_actionfield_count(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_actionfield_set actionset) { /* Check that the actionset exists in the vcap actionset list */ if (actionset >= vctrl->vcaps[vt].actionfield_set_size) @@ -515,7 +507,7 @@ static int vcap_encode_rule(struct vcap_rule_internal *ri) return 0; } -static int vcap_api_check(struct vcap_control *ctrl) +int vcap_api_check(struct vcap_control *ctrl) { if (!ctrl) { pr_err("%s:%d: vcap control is missing\n", __func__, __LINE__); @@ -533,7 +525,7 @@ static int vcap_api_check(struct vcap_control *ctrl) return 0; } -static void vcap_erase_cache(struct vcap_rule_internal *ri) +void vcap_erase_cache(struct vcap_rule_internal *ri) { ri->vctrl->ops->cache_erase(ri->admin); } @@ -609,7 +601,7 @@ int vcap_lookup_rule_by_cookie(struct vcap_control *vctrl, u64 cookie) EXPORT_SYMBOL_GPL(vcap_lookup_rule_by_cookie); /* Make a shallow copy of the rule without the fields */ -static struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri) +struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri) { struct vcap_rule_internal *duprule; @@ -813,9 +805,16 @@ const char *vcap_keyfield_name(struct vcap_control *vctrl, } EXPORT_SYMBOL_GPL(vcap_keyfield_name); +/* map actionset id to a string with the actionset name */ +const char *vcap_actionset_name(struct vcap_control *vctrl, + enum vcap_actionfield_set actionset) +{ + return vctrl->stats->actionfield_set_names[actionset]; +} + /* map action field id to a string with the action name */ -static const char *vcap_actionfield_name(struct vcap_control *vctrl, - enum vcap_action_field action) +const char *vcap_actionfield_name(struct vcap_control *vctrl, + enum vcap_action_field action) { return vctrl->stats->actionfield_names[action]; } @@ -848,8 +847,8 @@ vcap_find_keyset_keyfield(struct vcap_control *vctrl, } /* Match a list of keys against the keysets available in a vcap type */ -static bool vcap_rule_find_keysets(struct vcap_rule_internal *ri, - struct vcap_keyset_list *matches) +static bool _vcap_rule_find_keysets(struct vcap_rule_internal *ri, + struct vcap_keyset_list *matches) { const struct vcap_client_keyfield *ckf; int keyset, found, keycount, map_size; @@ -888,6 +887,16 @@ static bool vcap_rule_find_keysets(struct vcap_rule_internal *ri, return matches->cnt > 0; } +/* Match a list of keys against the keysets available in a vcap type */ +bool vcap_rule_find_keysets(struct vcap_rule *rule, + struct vcap_keyset_list *matches) +{ + struct vcap_rule_internal *ri = to_intrule(rule); + + return _vcap_rule_find_keysets(ri, matches); +} +EXPORT_SYMBOL_GPL(vcap_rule_find_keysets); + /* Validate a rule with respect to available port keys */ int vcap_val_rule(struct vcap_rule *rule, u16 l3_proto) { @@ -912,7 +921,7 @@ int vcap_val_rule(struct vcap_rule *rule, u16 l3_proto) matches.max = ARRAY_SIZE(keysets); if (ri->data.keyset == VCAP_KFS_NO_VALUE) { /* Iterate over rule keyfields and select keysets that fits */ - if (!vcap_rule_find_keysets(ri, &matches)) { + if (!_vcap_rule_find_keysets(ri, &matches)) { ri->data.exterr = VCAP_ERR_NO_KEYSET_MATCH; return -EINVAL; } @@ -976,17 +985,12 @@ static u32 vcap_next_rule_addr(u32 addr, struct vcap_rule_internal *ri) /* Assign a unique rule id and autogenerate one if id == 0 */ static u32 vcap_set_rule_id(struct vcap_rule_internal *ri) { - u32 next_id; - if (ri->data.id != 0) return ri->data.id; - next_id = ri->vctrl->rule_id + 1; - - for (next_id = ri->vctrl->rule_id + 1; next_id < ~0; ++next_id) { + for (u32 next_id = 1; next_id < ~0; ++next_id) { if (!vcap_lookup_rule(ri->vctrl, next_id)) { ri->data.id = next_id; - ri->vctrl->rule_id = next_id; break; } } @@ -1078,6 +1082,7 @@ int vcap_add_rule(struct vcap_rule *rule) if (ret) return ret; /* Insert the new rule in the list of vcap rules */ + mutex_lock(&ri->admin->lock); ret = vcap_insert_rule(ri, &move); if (ret < 0) { pr_err("%s:%d: could not insert rule in vcap list: %d\n", @@ -1096,6 +1101,7 @@ int vcap_add_rule(struct vcap_rule *rule) if (ret) pr_err("%s:%d: rule write error: %d\n", __func__, __LINE__, ret); out: + mutex_unlock(&ri->admin->lock); return ret; } EXPORT_SYMBOL_GPL(vcap_add_rule); @@ -1245,13 +1251,15 @@ int vcap_del_rule(struct vcap_control *vctrl, struct net_device *ndev, u32 id) gap = vcap_fill_rule_gap(ri); /* Delete the rule from the list of rules and the cache */ + mutex_lock(&admin->lock); list_del(&ri->list); vctrl->ops->init(ndev, admin, admin->last_used_addr, ri->size + gap); kfree(ri); + mutex_unlock(&admin->lock); - /* Update the last used address */ + /* Update the last used address, set to default when no rules */ if (list_empty(&admin->rules)) { - admin->last_used_addr = admin->last_valid_addr; + admin->last_used_addr = admin->last_valid_addr + 1; } else { elem = list_last_entry(&admin->rules, struct vcap_rule_internal, list); @@ -1270,6 +1278,8 @@ int vcap_del_rules(struct vcap_control *vctrl, struct vcap_admin *admin) if (ret) return ret; + + mutex_lock(&admin->lock); list_for_each_entry_safe(ri, next_ri, &admin->rules, list) { vctrl->ops->init(ri->ndev, admin, ri->addr, ri->size); list_del(&ri->list); @@ -1282,11 +1292,25 @@ int vcap_del_rules(struct vcap_control *vctrl, struct vcap_admin *admin) list_del(&eport->list); kfree(eport); } + mutex_unlock(&admin->lock); return 0; } EXPORT_SYMBOL_GPL(vcap_del_rules); +/* Find a client key field in a rule */ +static struct vcap_client_keyfield * +vcap_find_keyfield(struct vcap_rule *rule, enum vcap_key_field key) +{ + struct vcap_rule_internal *ri = to_intrule(rule); + struct vcap_client_keyfield *ckf; + + list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) + if (ckf->ctrl.key == key) + return ckf; + return NULL; +} + /* Find information on a key field in a rule */ const struct vcap_field *vcap_lookup_keyfield(struct vcap_rule *rule, enum vcap_key_field key) @@ -1305,12 +1329,67 @@ const struct vcap_field *vcap_lookup_keyfield(struct vcap_rule *rule, } EXPORT_SYMBOL_GPL(vcap_lookup_keyfield); +/* Copy data from src to dst but reverse the data in chunks of 32bits. + * For example if src is 00:11:22:33:44:55 where 55 is LSB the dst will + * have the value 22:33:44:55:00:11. + */ +static void vcap_copy_to_w32be(u8 *dst, u8 *src, int size) +{ + for (int idx = 0; idx < size; ++idx) { + int first_byte_index = 0; + int nidx; + + first_byte_index = size - (((idx >> 2) + 1) << 2); + if (first_byte_index < 0) + first_byte_index = 0; + nidx = idx + first_byte_index - (idx & ~0x3); + dst[nidx] = src[idx]; + } +} + static void vcap_copy_from_client_keyfield(struct vcap_rule *rule, struct vcap_client_keyfield *field, struct vcap_client_keyfield_data *data) { - /* This will be expanded later to handle different vcap memory layouts */ - memcpy(&field->data, data, sizeof(field->data)); + struct vcap_rule_internal *ri = to_intrule(rule); + int size; + + if (!ri->admin->w32be) { + memcpy(&field->data, data, sizeof(field->data)); + return; + } + + size = keyfield_size_table[field->ctrl.type] / 2; + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + case VCAP_FIELD_U32: + memcpy(&field->data, data, sizeof(field->data)); + break; + case VCAP_FIELD_U48: + vcap_copy_to_w32be(field->data.u48.value, data->u48.value, size); + vcap_copy_to_w32be(field->data.u48.mask, data->u48.mask, size); + break; + case VCAP_FIELD_U56: + vcap_copy_to_w32be(field->data.u56.value, data->u56.value, size); + vcap_copy_to_w32be(field->data.u56.mask, data->u56.mask, size); + break; + case VCAP_FIELD_U64: + vcap_copy_to_w32be(field->data.u64.value, data->u64.value, size); + vcap_copy_to_w32be(field->data.u64.mask, data->u64.mask, size); + break; + case VCAP_FIELD_U72: + vcap_copy_to_w32be(field->data.u72.value, data->u72.value, size); + vcap_copy_to_w32be(field->data.u72.mask, data->u72.mask, size); + break; + case VCAP_FIELD_U112: + vcap_copy_to_w32be(field->data.u112.value, data->u112.value, size); + vcap_copy_to_w32be(field->data.u112.mask, data->u112.mask, size); + break; + case VCAP_FIELD_U128: + vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size); + vcap_copy_to_w32be(field->data.u128.mask, data->u128.mask, size); + break; + }; } /* Check if the keyfield is already in the rule */ @@ -1451,12 +1530,56 @@ int vcap_rule_add_key_u128(struct vcap_rule *rule, enum vcap_key_field key, } EXPORT_SYMBOL_GPL(vcap_rule_add_key_u128); +/* Find a client action field in a rule */ +static struct vcap_client_actionfield * +vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act) +{ + struct vcap_rule_internal *ri = (struct vcap_rule_internal *)rule; + struct vcap_client_actionfield *caf; + + list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) + if (caf->ctrl.action == act) + return caf; + return NULL; +} + static void vcap_copy_from_client_actionfield(struct vcap_rule *rule, struct vcap_client_actionfield *field, struct vcap_client_actionfield_data *data) { - /* This will be expanded later to handle different vcap memory layouts */ - memcpy(&field->data, data, sizeof(field->data)); + struct vcap_rule_internal *ri = to_intrule(rule); + int size; + + if (!ri->admin->w32be) { + memcpy(&field->data, data, sizeof(field->data)); + return; + } + + size = actionfield_size_table[field->ctrl.type]; + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + case VCAP_FIELD_U32: + memcpy(&field->data, data, sizeof(field->data)); + break; + case VCAP_FIELD_U48: + vcap_copy_to_w32be(field->data.u48.value, data->u48.value, size); + break; + case VCAP_FIELD_U56: + vcap_copy_to_w32be(field->data.u56.value, data->u56.value, size); + break; + case VCAP_FIELD_U64: + vcap_copy_to_w32be(field->data.u64.value, data->u64.value, size); + break; + case VCAP_FIELD_U72: + vcap_copy_to_w32be(field->data.u72.value, data->u72.value, size); + break; + case VCAP_FIELD_U112: + vcap_copy_to_w32be(field->data.u112.value, data->u112.value, size); + break; + case VCAP_FIELD_U128: + vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size); + break; + }; } /* Check if the actionfield is already in the rule */ @@ -1711,10 +1834,13 @@ int vcap_enable_lookups(struct vcap_control *vctrl, struct net_device *ndev, if (chain_id) { if (vcap_is_enabled(admin, ndev, cookie)) return -EADDRINUSE; + mutex_lock(&admin->lock); vcap_enable(admin, ndev, cookie); } else { + mutex_lock(&admin->lock); vcap_disable(admin, ndev, cookie); } + mutex_unlock(&admin->lock); return 0; } @@ -1786,6 +1912,148 @@ int vcap_rule_get_counter(struct vcap_rule *rule, struct vcap_counter *ctr) } EXPORT_SYMBOL_GPL(vcap_rule_get_counter); +static int vcap_rule_mod_key(struct vcap_rule *rule, + enum vcap_key_field key, + enum vcap_field_type ftype, + struct vcap_client_keyfield_data *data) +{ + struct vcap_client_keyfield *field; + + field = vcap_find_keyfield(rule, key); + if (!field) + return vcap_rule_add_key(rule, key, ftype, data); + vcap_copy_from_client_keyfield(rule, field, data); + return 0; +} + +/* Modify a 32 bit key field with value and mask in the rule */ +int vcap_rule_mod_key_u32(struct vcap_rule *rule, enum vcap_key_field key, + u32 value, u32 mask) +{ + struct vcap_client_keyfield_data data; + + data.u32.value = value; + data.u32.mask = mask; + return vcap_rule_mod_key(rule, key, VCAP_FIELD_U32, &data); +} +EXPORT_SYMBOL_GPL(vcap_rule_mod_key_u32); + +static int vcap_rule_mod_action(struct vcap_rule *rule, + enum vcap_action_field action, + enum vcap_field_type ftype, + struct vcap_client_actionfield_data *data) +{ + struct vcap_client_actionfield *field; + + field = vcap_find_actionfield(rule, action); + if (!field) + return vcap_rule_add_action(rule, action, ftype, data); + vcap_copy_from_client_actionfield(rule, field, data); + return 0; +} + +/* Modify a 32 bit action field with value in the rule */ +int vcap_rule_mod_action_u32(struct vcap_rule *rule, + enum vcap_action_field action, + u32 value) +{ + struct vcap_client_actionfield_data data; + + data.u32.value = value; + return vcap_rule_mod_action(rule, action, VCAP_FIELD_U32, &data); +} +EXPORT_SYMBOL_GPL(vcap_rule_mod_action_u32); + +/* Drop keys in a keylist and any keys that are not supported by the keyset */ +int vcap_filter_rule_keys(struct vcap_rule *rule, + enum vcap_key_field keylist[], int length, + bool drop_unsupported) +{ + struct vcap_rule_internal *ri = to_intrule(rule); + struct vcap_client_keyfield *ckf, *next_ckf; + const struct vcap_field *fields; + enum vcap_key_field key; + int err = 0; + int idx; + + if (length > 0) { + err = -EEXIST; + list_for_each_entry_safe(ckf, next_ckf, + &ri->data.keyfields, ctrl.list) { + key = ckf->ctrl.key; + for (idx = 0; idx < length; ++idx) + if (key == keylist[idx]) { + list_del(&ckf->ctrl.list); + kfree(ckf); + idx++; + err = 0; + } + } + } + if (drop_unsupported) { + err = -EEXIST; + fields = vcap_keyfields(ri->vctrl, ri->admin->vtype, + rule->keyset); + if (!fields) + return err; + list_for_each_entry_safe(ckf, next_ckf, + &ri->data.keyfields, ctrl.list) { + key = ckf->ctrl.key; + if (fields[key].width == 0) { + list_del(&ckf->ctrl.list); + kfree(ckf); + err = 0; + } + } + } + return err; +} +EXPORT_SYMBOL_GPL(vcap_filter_rule_keys); + +/* Make a full copy of an existing rule with a new rule id */ +struct vcap_rule *vcap_copy_rule(struct vcap_rule *erule) +{ + struct vcap_rule_internal *ri = to_intrule(erule); + struct vcap_client_actionfield *caf; + struct vcap_client_keyfield *ckf; + struct vcap_rule *rule; + int err; + + err = vcap_api_check(ri->vctrl); + if (err) + return ERR_PTR(err); + + rule = vcap_alloc_rule(ri->vctrl, ri->ndev, ri->data.vcap_chain_id, + ri->data.user, ri->data.priority, 0); + if (IS_ERR(rule)) + return rule; + + list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) { + /* Add a key duplicate in the new rule */ + err = vcap_rule_add_key(rule, + ckf->ctrl.key, + ckf->ctrl.type, + &ckf->data); + if (err) + goto err; + } + + list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) { + /* Add a action duplicate in the new rule */ + err = vcap_rule_add_action(rule, + caf->ctrl.action, + caf->ctrl.type, + &caf->data); + if (err) + goto err; + } + return rule; +err: + vcap_free_rule(rule); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(vcap_copy_rule); + #ifdef CONFIG_VCAP_KUNIT_TEST #include "vcap_api_kunit.c" #endif diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.h b/drivers/net/ethernet/microchip/vcap/vcap_api.h index bfb8ad535074..689c7270f2a8 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.h @@ -11,9 +11,6 @@ #include <linux/netdevice.h> /* Use the generated API model */ -#ifdef CONFIG_VCAP_KUNIT_TEST -#include "vcap_ag_api_kunit.h" -#endif #include "vcap_ag_api.h" #define VCAP_CID_LOOKUP_SIZE 100000 /* Chains in a lookup */ @@ -167,6 +164,7 @@ struct vcap_admin { struct list_head list; /* for insertion in vcap_control */ struct list_head rules; /* list of rules */ struct list_head enabled; /* list of enabled ports */ + struct mutex lock; /* control access to rules */ enum vcap_type vtype; /* type of vcap */ int vinst; /* instance number within the same type */ int first_cid; /* first chain id in this vcap */ @@ -203,6 +201,13 @@ struct vcap_keyset_list { enum vcap_keyfield_set *keysets; /* the list of keysets */ }; +/* Client output printf-like function with destination */ +struct vcap_output_print { + __printf(2, 3) + void (*prf)(void *out, const char *fmt, ...); + void *dst; +}; + /* Client supplied VCAP callback operations */ struct vcap_operations { /* validate port keyset operation */ @@ -252,10 +257,8 @@ struct vcap_operations { /* informational */ int (*port_info) (struct net_device *ndev, - enum vcap_type vtype, - int (*pf)(void *out, int arg, const char *fmt, ...), - void *out, - int arg); + struct vcap_admin *admin, + struct vcap_output_print *out); /* enable/disable the lookups in a vcap instance */ int (*enable) (struct net_device *ndev, @@ -265,7 +268,6 @@ struct vcap_operations { /* VCAP API Client control interface */ struct vcap_control { - u32 rule_id; /* last used rule id (unique across VCAP instances) */ struct vcap_operations *ops; /* client supplied operations */ const struct vcap_info *vcaps; /* client supplied vcap models */ const struct vcap_statistics *stats; /* client supplied vcap stats */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h index 654ef8fa6d62..93a0fcb12a81 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h @@ -168,6 +168,8 @@ int vcap_val_rule(struct vcap_rule *rule, u16 l3_proto); int vcap_add_rule(struct vcap_rule *rule); /* Delete rule in a VCAP instance */ int vcap_del_rule(struct vcap_control *vctrl, struct net_device *ndev, u32 id); +/* Make a full copy of an existing rule with a new rule id */ +struct vcap_rule *vcap_copy_rule(struct vcap_rule *rule); /* Update the keyset for the rule */ int vcap_set_rule_set_keyset(struct vcap_rule *rule, @@ -213,7 +215,13 @@ bool vcap_is_next_lookup(struct vcap_control *vctrl, int cur_cid, int next_cid); /* Provide all rules via a callback interface */ int vcap_rule_iter(struct vcap_control *vctrl, int (*callback)(void *, struct vcap_rule *), void *arg); - +/* Match a list of keys against the keysets available in a vcap type */ +bool vcap_rule_find_keysets(struct vcap_rule *rule, + struct vcap_keyset_list *matches); +/* Return the keyset information for the keyset */ +const struct vcap_set *vcap_keyfieldset(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_keyfield_set keyset); /* Copy to host byte order */ void vcap_netbytes_copy(u8 *dst, u8 *src, int count); @@ -226,6 +234,10 @@ int vcap_del_rules(struct vcap_control *vctrl, struct vcap_admin *admin); /* Add a keyset to a keyset list */ bool vcap_keyset_list_add(struct vcap_keyset_list *keysetlist, enum vcap_keyfield_set keyset); +/* Drop keys in a keylist and any keys that are not supported by the keyset */ +int vcap_filter_rule_keys(struct vcap_rule *rule, + enum vcap_key_field keylist[], int length, + bool drop_unsupported); /* map keyset id to a string with the keyset name */ const char *vcap_keyset_name(struct vcap_control *vctrl, @@ -234,4 +246,12 @@ const char *vcap_keyset_name(struct vcap_control *vctrl, const char *vcap_keyfield_name(struct vcap_control *vctrl, enum vcap_key_field key); +/* Modify a 32 bit key field with value and mask in the rule */ +int vcap_rule_mod_key_u32(struct vcap_rule *rule, enum vcap_key_field key, + u32 value, u32 mask); +/* Modify a 32 bit action field with value in the rule */ +int vcap_rule_mod_action_u32(struct vcap_rule *rule, + enum vcap_action_field action, + u32 value); + #endif /* __VCAP_API_CLIENT__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c new file mode 100644 index 000000000000..3b8d165dc832 --- /dev/null +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c @@ -0,0 +1,809 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip VCAP API debug file system support + * + * Copyright (c) 2022 Microchip Technology Inc. and its subsidiaries. + * + */ + +#include "vcap_api_private.h" +#include "vcap_api_debugfs.h" + +struct vcap_admin_debugfs_info { + struct vcap_control *vctrl; + struct vcap_admin *admin; +}; + +struct vcap_port_debugfs_info { + struct vcap_control *vctrl; + struct net_device *ndev; +}; + +static bool vcap_bitarray_zero(int width, u8 *value) +{ + int bytes = DIV_ROUND_UP(width, BITS_PER_BYTE); + u8 total = 0, bmask = 0xff; + int rwidth = width; + int idx; + + for (idx = 0; idx < bytes; ++idx, rwidth -= BITS_PER_BYTE) { + if (rwidth && rwidth < BITS_PER_BYTE) + bmask = (1 << rwidth) - 1; + total += value[idx] & bmask; + } + return total == 0; +} + +static bool vcap_get_bit(u32 *stream, struct vcap_stream_iter *itr) +{ + u32 mask = BIT(itr->reg_bitpos); + u32 *p = &stream[itr->reg_idx]; + + return !!(*p & mask); +} + +static void vcap_decode_field(u32 *stream, struct vcap_stream_iter *itr, + int width, u8 *value) +{ + int idx; + + /* Loop over the field value bits and get the field bits and + * set them in the output value byte array + */ + for (idx = 0; idx < width; idx++) { + u8 bidx = idx & 0x7; + + /* Decode one field value bit */ + if (vcap_get_bit(stream, itr)) + *value |= 1 << bidx; + vcap_iter_next(itr); + if (bidx == 7) + value++; + } +} + +/* Verify that the typegroup bits have the correct values */ +static int vcap_verify_typegroups(u32 *stream, int sw_width, + const struct vcap_typegroup *tgt, bool mask, + int sw_max) +{ + struct vcap_stream_iter iter; + int sw_cnt, idx; + + vcap_iter_set(&iter, sw_width, tgt, 0); + sw_cnt = 0; + while (iter.tg->width) { + u32 value = 0; + u32 tg_value = iter.tg->value; + + if (mask) + tg_value = (1 << iter.tg->width) - 1; + /* Set position to current typegroup bit */ + iter.offset = iter.tg->offset; + vcap_iter_update(&iter); + for (idx = 0; idx < iter.tg->width; idx++) { + /* Decode one typegroup bit */ + if (vcap_get_bit(stream, &iter)) + value |= 1 << idx; + iter.offset++; + vcap_iter_update(&iter); + } + if (value != tg_value) + return -EINVAL; + iter.tg++; /* next typegroup */ + sw_cnt++; + /* Stop checking more typegroups */ + if (sw_max && sw_cnt >= sw_max) + break; + } + return 0; +} + +/* Find the subword width of the key typegroup that matches the stream data */ +static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl, + enum vcap_type vt, u32 *stream, + bool mask, int sw_max) +{ + const struct vcap_typegroup **tgt; + int sw_idx, res; + + tgt = vctrl->vcaps[vt].keyfield_set_typegroups; + /* Try the longest subword match first */ + for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { + if (!tgt[sw_idx]) + continue; + + res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].sw_width, + tgt[sw_idx], mask, sw_max); + if (res == 0) + return sw_idx; + } + return -EINVAL; +} + +/* Find the subword width of the action typegroup that matches the stream data + */ +static int vcap_find_actionstream_typegroup_sw(struct vcap_control *vctrl, + enum vcap_type vt, u32 *stream, + int sw_max) +{ + const struct vcap_typegroup **tgt; + int sw_idx, res; + + tgt = vctrl->vcaps[vt].actionfield_set_typegroups; + /* Try the longest subword match first */ + for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { + if (!tgt[sw_idx]) + continue; + res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].act_width, + tgt[sw_idx], false, sw_max); + if (res == 0) + return sw_idx; + } + return -EINVAL; +} + +/* Verify that the type id in the stream matches the type id of the keyset */ +static bool vcap_verify_keystream_keyset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *keystream, + u32 *mskstream, + enum vcap_keyfield_set keyset) +{ + const struct vcap_info *vcap = &vctrl->vcaps[vt]; + const struct vcap_field *typefld; + const struct vcap_typegroup *tgt; + const struct vcap_field *fields; + struct vcap_stream_iter iter; + const struct vcap_set *info; + u32 value = 0; + u32 mask = 0; + + if (vcap_keyfield_count(vctrl, vt, keyset) == 0) + return false; + + info = vcap_keyfieldset(vctrl, vt, keyset); + /* Check that the keyset is valid */ + if (!info) + return false; + + /* a type_id of value -1 means that there is no type field */ + if (info->type_id == (u8)-1) + return true; + + /* Get a valid typegroup for the specific keyset */ + tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); + if (!tgt) + return false; + + fields = vcap_keyfields(vctrl, vt, keyset); + if (!fields) + return false; + + typefld = &fields[VCAP_KF_TYPE]; + vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); + vcap_decode_field(mskstream, &iter, typefld->width, (u8 *)&mask); + /* no type info if there are no mask bits */ + if (vcap_bitarray_zero(typefld->width, (u8 *)&mask)) + return false; + + /* Get the value of the type field in the stream and compare to the + * one define in the vcap keyset + */ + vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); + vcap_decode_field(keystream, &iter, typefld->width, (u8 *)&value); + + return (value & mask) == (info->type_id & mask); +} + +/* Verify that the typegroup information, subword count, keyset and type id + * are in sync and correct, return the list of matching keysets + */ +static int +vcap_find_keystream_keysets(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *keystream, + u32 *mskstream, + bool mask, int sw_max, + struct vcap_keyset_list *kslist) +{ + const struct vcap_set *keyfield_set; + int sw_count, idx; + + sw_count = vcap_find_keystream_typegroup_sw(vctrl, vt, keystream, mask, + sw_max); + if (sw_count < 0) + return sw_count; + + keyfield_set = vctrl->vcaps[vt].keyfield_set; + for (idx = 0; idx < vctrl->vcaps[vt].keyfield_set_size; ++idx) { + if (keyfield_set[idx].sw_per_item != sw_count) + continue; + + if (vcap_verify_keystream_keyset(vctrl, vt, keystream, + mskstream, idx)) + vcap_keyset_list_add(kslist, idx); + } + if (kslist->cnt > 0) + return 0; + return -EINVAL; +} + +/* Read key data from a VCAP address and discover if there is a rule keyset + * here + */ +static bool +vcap_verify_actionstream_actionset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *actionstream, + enum vcap_actionfield_set actionset) +{ + const struct vcap_typegroup *tgt; + const struct vcap_field *fields; + const struct vcap_set *info; + + if (vcap_actionfield_count(vctrl, vt, actionset) == 0) + return false; + + info = vcap_actionfieldset(vctrl, vt, actionset); + /* Check that the actionset is valid */ + if (!info) + return false; + + /* a type_id of value -1 means that there is no type field */ + if (info->type_id == (u8)-1) + return true; + + /* Get a valid typegroup for the specific actionset */ + tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); + if (!tgt) + return false; + + fields = vcap_actionfields(vctrl, vt, actionset); + if (!fields) + return false; + + /* Later this will be expanded with a check of the type id */ + return true; +} + +/* Verify that the typegroup information, subword count, actionset and type id + * are in sync and correct, return the actionset + */ +static enum vcap_actionfield_set +vcap_find_actionstream_actionset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *stream, + int sw_max) +{ + const struct vcap_set *actionfield_set; + int sw_count, idx; + bool res; + + sw_count = vcap_find_actionstream_typegroup_sw(vctrl, vt, stream, + sw_max); + if (sw_count < 0) + return sw_count; + + actionfield_set = vctrl->vcaps[vt].actionfield_set; + for (idx = 0; idx < vctrl->vcaps[vt].actionfield_set_size; ++idx) { + if (actionfield_set[idx].sw_per_item != sw_count) + continue; + + res = vcap_verify_actionstream_actionset(vctrl, vt, + stream, idx); + if (res) + return idx; + } + return -EINVAL; +} + +/* Read key data from a VCAP address and discover if there are any rule keysets + * here + */ +static int vcap_addr_keysets(struct vcap_control *vctrl, + struct net_device *ndev, + struct vcap_admin *admin, + int addr, + struct vcap_keyset_list *kslist) +{ + enum vcap_type vt = admin->vtype; + int keyset_sw_regs, idx; + u32 key = 0, mask = 0; + + /* Read the cache at the specified address */ + keyset_sw_regs = DIV_ROUND_UP(vctrl->vcaps[vt].sw_width, 32); + vctrl->ops->update(ndev, admin, VCAP_CMD_READ, VCAP_SEL_ALL, addr); + vctrl->ops->cache_read(ndev, admin, VCAP_SEL_ENTRY, 0, + keyset_sw_regs); + /* Skip uninitialized key/mask entries */ + for (idx = 0; idx < keyset_sw_regs; ++idx) { + key |= ~admin->cache.keystream[idx]; + mask |= admin->cache.maskstream[idx]; + } + if (key == 0 && mask == 0) + return -EINVAL; + /* Decode and locate the keysets */ + return vcap_find_keystream_keysets(vctrl, vt, admin->cache.keystream, + admin->cache.maskstream, false, 0, + kslist); +} + +static int vcap_read_rule(struct vcap_rule_internal *ri) +{ + struct vcap_admin *admin = ri->admin; + int sw_idx, ent_idx = 0, act_idx = 0; + u32 addr = ri->addr; + + if (!ri->size || !ri->keyset_sw_regs || !ri->actionset_sw_regs) { + pr_err("%s:%d: rule is empty\n", __func__, __LINE__); + return -EINVAL; + } + vcap_erase_cache(ri); + /* Use the values in the streams to read the VCAP cache */ + for (sw_idx = 0; sw_idx < ri->size; sw_idx++, addr++) { + ri->vctrl->ops->update(ri->ndev, admin, VCAP_CMD_READ, + VCAP_SEL_ALL, addr); + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_ENTRY, ent_idx, + ri->keyset_sw_regs); + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_ACTION, act_idx, + ri->actionset_sw_regs); + if (sw_idx == 0) + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_COUNTER, + ri->counter_id, 0); + ent_idx += ri->keyset_sw_regs; + act_idx += ri->actionset_sw_regs; + } + return 0; +} + +/* Dump the keyfields value and mask values */ +static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl, + struct vcap_output_print *out, + enum vcap_key_field key, + const struct vcap_field *keyfield, + u8 *value, u8 *mask) +{ + bool hex = false; + int idx, bytes; + + out->prf(out->dst, " %s: W%d: ", vcap_keyfield_name(vctrl, key), + keyfield[key].width); + + switch (keyfield[key].type) { + case VCAP_FIELD_BIT: + out->prf(out->dst, "%d/%d", value[0], mask[0]); + break; + case VCAP_FIELD_U32: + if (key == VCAP_KF_L3_IP4_SIP || key == VCAP_KF_L3_IP4_DIP) { + out->prf(out->dst, "%pI4h/%pI4h", value, mask); + } else if (key == VCAP_KF_ETYPE || + key == VCAP_KF_IF_IGR_PORT_MASK) { + hex = true; + } else { + u32 fmsk = (1 << keyfield[key].width) - 1; + u32 val = *(u32 *)value; + u32 msk = *(u32 *)mask; + + out->prf(out->dst, "%u/%u", val & fmsk, msk & fmsk); + } + break; + case VCAP_FIELD_U48: + if (key == VCAP_KF_L2_SMAC || key == VCAP_KF_L2_DMAC) + out->prf(out->dst, "%pMR/%pMR", value, mask); + else + hex = true; + break; + case VCAP_FIELD_U56: + case VCAP_FIELD_U64: + case VCAP_FIELD_U72: + case VCAP_FIELD_U112: + hex = true; + break; + case VCAP_FIELD_U128: + if (key == VCAP_KF_L3_IP6_SIP || key == VCAP_KF_L3_IP6_DIP) { + u8 nvalue[16], nmask[16]; + + vcap_netbytes_copy(nvalue, value, sizeof(nvalue)); + vcap_netbytes_copy(nmask, mask, sizeof(nmask)); + out->prf(out->dst, "%pI6/%pI6", nvalue, nmask); + } else { + hex = true; + } + break; + } + if (hex) { + bytes = DIV_ROUND_UP(keyfield[key].width, BITS_PER_BYTE); + out->prf(out->dst, "0x"); + for (idx = 0; idx < bytes; ++idx) + out->prf(out->dst, "%02x", value[bytes - idx - 1]); + out->prf(out->dst, "/0x"); + for (idx = 0; idx < bytes; ++idx) + out->prf(out->dst, "%02x", mask[bytes - idx - 1]); + } + out->prf(out->dst, "\n"); +} + +static void +vcap_debugfs_show_rule_actionfield(struct vcap_control *vctrl, + struct vcap_output_print *out, + enum vcap_action_field action, + const struct vcap_field *actionfield, + u8 *value) +{ + bool hex = false; + int idx, bytes; + u32 fmsk, val; + + out->prf(out->dst, " %s: W%d: ", + vcap_actionfield_name(vctrl, action), + actionfield[action].width); + + switch (actionfield[action].type) { + case VCAP_FIELD_BIT: + out->prf(out->dst, "%d", value[0]); + break; + case VCAP_FIELD_U32: + fmsk = (1 << actionfield[action].width) - 1; + val = *(u32 *)value; + out->prf(out->dst, "%u", val & fmsk); + break; + case VCAP_FIELD_U48: + case VCAP_FIELD_U56: + case VCAP_FIELD_U64: + case VCAP_FIELD_U72: + case VCAP_FIELD_U112: + case VCAP_FIELD_U128: + hex = true; + break; + } + if (hex) { + bytes = DIV_ROUND_UP(actionfield[action].width, BITS_PER_BYTE); + out->prf(out->dst, "0x"); + for (idx = 0; idx < bytes; ++idx) + out->prf(out->dst, "%02x", value[bytes - idx - 1]); + } + out->prf(out->dst, "\n"); +} + +static int vcap_debugfs_show_rule_keyset(struct vcap_rule_internal *ri, + struct vcap_output_print *out) +{ + struct vcap_control *vctrl = ri->vctrl; + struct vcap_stream_iter kiter, miter; + struct vcap_admin *admin = ri->admin; + enum vcap_keyfield_set keysets[10]; + const struct vcap_field *keyfield; + enum vcap_type vt = admin->vtype; + const struct vcap_typegroup *tgt; + struct vcap_keyset_list matches; + enum vcap_keyfield_set keyset; + int idx, res, keyfield_count; + u32 *maskstream; + u32 *keystream; + u8 value[16]; + u8 mask[16]; + + keystream = admin->cache.keystream; + maskstream = admin->cache.maskstream; + matches.keysets = keysets; + matches.cnt = 0; + matches.max = ARRAY_SIZE(keysets); + res = vcap_find_keystream_keysets(vctrl, vt, keystream, maskstream, + false, 0, &matches); + if (res < 0) { + pr_err("%s:%d: could not find valid keysets: %d\n", + __func__, __LINE__, res); + return -EINVAL; + } + keyset = matches.keysets[0]; + out->prf(out->dst, " keysets:"); + for (idx = 0; idx < matches.cnt; ++idx) + out->prf(out->dst, " %s", + vcap_keyset_name(vctrl, matches.keysets[idx])); + out->prf(out->dst, "\n"); + out->prf(out->dst, " keyset_sw: %d\n", ri->keyset_sw); + out->prf(out->dst, " keyset_sw_regs: %d\n", ri->keyset_sw_regs); + keyfield_count = vcap_keyfield_count(vctrl, vt, keyset); + keyfield = vcap_keyfields(vctrl, vt, keyset); + tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); + /* Start decoding the streams */ + for (idx = 0; idx < keyfield_count; ++idx) { + if (keyfield[idx].width <= 0) + continue; + /* First get the mask */ + memset(mask, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); + vcap_iter_init(&miter, vctrl->vcaps[vt].sw_width, tgt, + keyfield[idx].offset); + vcap_decode_field(maskstream, &miter, keyfield[idx].width, + mask); + /* Skip if no mask bits are set */ + if (vcap_bitarray_zero(keyfield[idx].width, mask)) + continue; + /* Get the key */ + memset(value, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); + vcap_iter_init(&kiter, vctrl->vcaps[vt].sw_width, tgt, + keyfield[idx].offset); + vcap_decode_field(keystream, &kiter, keyfield[idx].width, + value); + vcap_debugfs_show_rule_keyfield(vctrl, out, idx, keyfield, + value, mask); + } + return 0; +} + +static int vcap_debugfs_show_rule_actionset(struct vcap_rule_internal *ri, + struct vcap_output_print *out) +{ + struct vcap_control *vctrl = ri->vctrl; + struct vcap_admin *admin = ri->admin; + const struct vcap_field *actionfield; + enum vcap_actionfield_set actionset; + enum vcap_type vt = admin->vtype; + const struct vcap_typegroup *tgt; + struct vcap_stream_iter iter; + int idx, res, actfield_count; + u32 *actstream; + u8 value[16]; + bool no_bits; + + actstream = admin->cache.actionstream; + res = vcap_find_actionstream_actionset(vctrl, vt, actstream, 0); + if (res < 0) { + pr_err("%s:%d: could not find valid actionset: %d\n", + __func__, __LINE__, res); + return -EINVAL; + } + actionset = res; + out->prf(out->dst, " actionset: %s\n", + vcap_actionset_name(vctrl, ri->data.actionset)); + out->prf(out->dst, " actionset_sw: %d\n", ri->actionset_sw); + out->prf(out->dst, " actionset_sw_regs: %d\n", ri->actionset_sw_regs); + actfield_count = vcap_actionfield_count(vctrl, vt, actionset); + actionfield = vcap_actionfields(vctrl, vt, actionset); + tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); + /* Start decoding the stream */ + for (idx = 0; idx < actfield_count; ++idx) { + if (actionfield[idx].width <= 0) + continue; + /* Get the action */ + memset(value, 0, DIV_ROUND_UP(actionfield[idx].width, 8)); + vcap_iter_init(&iter, vctrl->vcaps[vt].act_width, tgt, + actionfield[idx].offset); + vcap_decode_field(actstream, &iter, actionfield[idx].width, + value); + /* Skip if no bits are set */ + no_bits = vcap_bitarray_zero(actionfield[idx].width, value); + if (no_bits) + continue; + /* Later the action id will also be checked */ + vcap_debugfs_show_rule_actionfield(vctrl, out, idx, actionfield, + value); + } + return 0; +} + +static void vcap_show_admin_rule(struct vcap_control *vctrl, + struct vcap_admin *admin, + struct vcap_output_print *out, + struct vcap_rule_internal *ri) +{ + ri->counter.value = admin->cache.counter; + ri->counter.sticky = admin->cache.sticky; + out->prf(out->dst, + "rule: %u, addr: [%d,%d], X%d, ctr[%d]: %d, hit: %d\n", + ri->data.id, ri->addr, ri->addr + ri->size - 1, ri->size, + ri->counter_id, ri->counter.value, ri->counter.sticky); + out->prf(out->dst, " chain_id: %d\n", ri->data.vcap_chain_id); + out->prf(out->dst, " user: %d\n", ri->data.user); + out->prf(out->dst, " priority: %d\n", ri->data.priority); + vcap_debugfs_show_rule_keyset(ri, out); + vcap_debugfs_show_rule_actionset(ri, out); +} + +static void vcap_show_admin_info(struct vcap_control *vctrl, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + const struct vcap_info *vcap = &vctrl->vcaps[admin->vtype]; + + out->prf(out->dst, "name: %s\n", vcap->name); + out->prf(out->dst, "rows: %d\n", vcap->rows); + out->prf(out->dst, "sw_count: %d\n", vcap->sw_count); + out->prf(out->dst, "sw_width: %d\n", vcap->sw_width); + out->prf(out->dst, "sticky_width: %d\n", vcap->sticky_width); + out->prf(out->dst, "act_width: %d\n", vcap->act_width); + out->prf(out->dst, "default_cnt: %d\n", vcap->default_cnt); + out->prf(out->dst, "require_cnt_dis: %d\n", vcap->require_cnt_dis); + out->prf(out->dst, "version: %d\n", vcap->version); + out->prf(out->dst, "vtype: %d\n", admin->vtype); + out->prf(out->dst, "vinst: %d\n", admin->vinst); + out->prf(out->dst, "first_cid: %d\n", admin->first_cid); + out->prf(out->dst, "last_cid: %d\n", admin->last_cid); + out->prf(out->dst, "lookups: %d\n", admin->lookups); + out->prf(out->dst, "first_valid_addr: %d\n", admin->first_valid_addr); + out->prf(out->dst, "last_valid_addr: %d\n", admin->last_valid_addr); + out->prf(out->dst, "last_used_addr: %d\n", admin->last_used_addr); +} + +static int vcap_show_admin(struct vcap_control *vctrl, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + struct vcap_rule_internal *elem, *ri; + int ret = 0; + + vcap_show_admin_info(vctrl, admin, out); + mutex_lock(&admin->lock); + list_for_each_entry(elem, &admin->rules, list) { + ri = vcap_dup_rule(elem); + if (IS_ERR(ri)) { + ret = PTR_ERR(ri); + goto err_unlock; + } + /* Read data from VCAP */ + ret = vcap_read_rule(ri); + if (ret) + goto err_free_rule; + out->prf(out->dst, "\n"); + vcap_show_admin_rule(vctrl, admin, out, ri); + vcap_free_rule((struct vcap_rule *)ri); + } + mutex_unlock(&admin->lock); + return 0; + +err_free_rule: + vcap_free_rule((struct vcap_rule *)ri); +err_unlock: + mutex_unlock(&admin->lock); + return ret; +} + +static int vcap_show_admin_raw(struct vcap_control *vctrl, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + enum vcap_keyfield_set keysets[10]; + enum vcap_type vt = admin->vtype; + struct vcap_keyset_list kslist; + struct vcap_rule_internal *ri; + const struct vcap_set *info; + int addr, idx; + int ret; + + if (list_empty(&admin->rules)) + return 0; + + ret = vcap_api_check(vctrl); + if (ret) + return ret; + + ri = list_first_entry(&admin->rules, struct vcap_rule_internal, list); + + /* Go from higher to lower addresses searching for a keyset */ + kslist.keysets = keysets; + kslist.max = ARRAY_SIZE(keysets); + for (addr = admin->last_valid_addr; addr >= admin->first_valid_addr; + --addr) { + kslist.cnt = 0; + ret = vcap_addr_keysets(vctrl, ri->ndev, admin, addr, &kslist); + if (ret < 0) + continue; + info = vcap_keyfieldset(vctrl, vt, kslist.keysets[0]); + if (!info) + continue; + if (addr % info->sw_per_item) { + pr_info("addr: %d X%d error rule, keyset: %s\n", + addr, + info->sw_per_item, + vcap_keyset_name(vctrl, kslist.keysets[0])); + } else { + out->prf(out->dst, " addr: %d, X%d rule, keysets:", + addr, + info->sw_per_item); + for (idx = 0; idx < kslist.cnt; ++idx) + out->prf(out->dst, " %s", + vcap_keyset_name(vctrl, + kslist.keysets[idx])); + out->prf(out->dst, "\n"); + } + } + return 0; +} + +/* Show the port configuration and status */ +static int vcap_port_debugfs_show(struct seq_file *m, void *unused) +{ + struct vcap_port_debugfs_info *info = m->private; + struct vcap_admin *admin; + struct vcap_output_print out = { + .prf = (void *)seq_printf, + .dst = m, + }; + + list_for_each_entry(admin, &info->vctrl->list, list) { + if (admin->vinst) + continue; + info->vctrl->ops->port_info(info->ndev, admin, &out); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(vcap_port_debugfs); + +void vcap_port_debugfs(struct device *dev, struct dentry *parent, + struct vcap_control *vctrl, + struct net_device *ndev) +{ + struct vcap_port_debugfs_info *info; + + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return; + + info->vctrl = vctrl; + info->ndev = ndev; + debugfs_create_file(netdev_name(ndev), 0444, parent, info, + &vcap_port_debugfs_fops); +} +EXPORT_SYMBOL_GPL(vcap_port_debugfs); + +/* Show the full VCAP instance data (rules with all fields) */ +static int vcap_debugfs_show(struct seq_file *m, void *unused) +{ + struct vcap_admin_debugfs_info *info = m->private; + struct vcap_output_print out = { + .prf = (void *)seq_printf, + .dst = m, + }; + + return vcap_show_admin(info->vctrl, info->admin, &out); +} +DEFINE_SHOW_ATTRIBUTE(vcap_debugfs); + +/* Show the raw VCAP instance data (rules with address info) */ +static int vcap_raw_debugfs_show(struct seq_file *m, void *unused) +{ + struct vcap_admin_debugfs_info *info = m->private; + struct vcap_output_print out = { + .prf = (void *)seq_printf, + .dst = m, + }; + + return vcap_show_admin_raw(info->vctrl, info->admin, &out); +} +DEFINE_SHOW_ATTRIBUTE(vcap_raw_debugfs); + +struct dentry *vcap_debugfs(struct device *dev, struct dentry *parent, + struct vcap_control *vctrl) +{ + struct vcap_admin_debugfs_info *info; + struct vcap_admin *admin; + struct dentry *dir; + char name[50]; + + dir = debugfs_create_dir("vcaps", parent); + if (PTR_ERR_OR_ZERO(dir)) + return NULL; + list_for_each_entry(admin, &vctrl->list, list) { + sprintf(name, "raw_%s_%d", vctrl->vcaps[admin->vtype].name, + admin->vinst); + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return NULL; + info->vctrl = vctrl; + info->admin = admin; + debugfs_create_file(name, 0444, dir, info, + &vcap_raw_debugfs_fops); + sprintf(name, "%s_%d", vctrl->vcaps[admin->vtype].name, + admin->vinst); + debugfs_create_file(name, 0444, dir, info, &vcap_debugfs_fops); + } + return dir; +} +EXPORT_SYMBOL_GPL(vcap_debugfs); + +#ifdef CONFIG_VCAP_KUNIT_TEST +#include "vcap_api_debugfs_kunit.c" +#endif diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.h b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.h new file mode 100644 index 000000000000..9f2c59b5f6f5 --- /dev/null +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (C) 2022 Microchip Technology Inc. and its subsidiaries. + * Microchip VCAP API + */ + +#ifndef __VCAP_API_DEBUGFS__ +#define __VCAP_API_DEBUGFS__ + +#include <linux/types.h> +#include <linux/debugfs.h> +#include <linux/netdevice.h> + +#include "vcap_api.h" + +#if defined(CONFIG_DEBUG_FS) + +void vcap_port_debugfs(struct device *dev, struct dentry *parent, + struct vcap_control *vctrl, + struct net_device *ndev); + +/* Create a debugFS entry for a vcap instance */ +struct dentry *vcap_debugfs(struct device *dev, struct dentry *parent, + struct vcap_control *vctrl); + +#else + +static inline void vcap_port_debugfs(struct device *dev, struct dentry *parent, + struct vcap_control *vctrl, + struct net_device *ndev) +{ +} + +static inline struct dentry *vcap_debugfs(struct device *dev, + struct dentry *parent, + struct vcap_control *vctrl) +{ + return NULL; +} + +#endif +#endif /* __VCAP_API_DEBUGFS__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c new file mode 100644 index 000000000000..cf594668d5d9 --- /dev/null +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright (C) 2022 Microchip Technology Inc. and its subsidiaries. + * Microchip VCAP API kunit test suite + */ + +#include <kunit/test.h> +#include "vcap_api.h" +#include "vcap_api_client.h" +#include "vcap_api_debugfs.h" +#include "vcap_model_kunit.h" + +/* First we have the test infrastructure that emulates the platform + * implementation + */ +#define TEST_BUF_CNT 100 +#define TEST_BUF_SZ 350 +#define STREAMWSIZE 64 + +static u32 test_updateaddr[STREAMWSIZE] = {}; +static int test_updateaddridx; +static int test_cache_erase_count; +static u32 test_init_start; +static u32 test_init_count; +static u32 test_hw_counter_id; +static struct vcap_cache_data test_hw_cache; +static struct net_device test_netdev = {}; +static int test_move_addr; +static int test_move_offset; +static int test_move_count; +static char test_pr_buffer[TEST_BUF_CNT][TEST_BUF_SZ]; +static int test_pr_bufferidx; +static int test_pr_idx; + +/* Callback used by the VCAP API */ +static enum vcap_keyfield_set test_val_keyset(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_rule *rule, + struct vcap_keyset_list *kslist, + u16 l3_proto) +{ + int idx; + + if (kslist->cnt > 0) { + switch (admin->vtype) { + case VCAP_TYPE_IS0: + for (idx = 0; idx < kslist->cnt; idx++) { + if (kslist->keysets[idx] == VCAP_KFS_ETAG) + return kslist->keysets[idx]; + if (kslist->keysets[idx] == + VCAP_KFS_PURE_5TUPLE_IP4) + return kslist->keysets[idx]; + if (kslist->keysets[idx] == + VCAP_KFS_NORMAL_5TUPLE_IP4) + return kslist->keysets[idx]; + if (kslist->keysets[idx] == + VCAP_KFS_NORMAL_7TUPLE) + return kslist->keysets[idx]; + } + break; + case VCAP_TYPE_IS2: + for (idx = 0; idx < kslist->cnt; idx++) { + if (kslist->keysets[idx] == VCAP_KFS_MAC_ETYPE) + return kslist->keysets[idx]; + if (kslist->keysets[idx] == VCAP_KFS_ARP) + return kslist->keysets[idx]; + if (kslist->keysets[idx] == VCAP_KFS_IP_7TUPLE) + return kslist->keysets[idx]; + } + break; + default: + pr_info("%s:%d: no validation for VCAP %d\n", + __func__, __LINE__, admin->vtype); + break; + } + } + return -EINVAL; +} + +/* Callback used by the VCAP API */ +static void test_add_def_fields(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + if (admin->vinst == 0 || admin->vinst == 2) + vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, + VCAP_BIT_1); + else + vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, + VCAP_BIT_0); +} + +/* Callback used by the VCAP API */ +static void test_cache_erase(struct vcap_admin *admin) +{ + if (test_cache_erase_count) { + memset(admin->cache.keystream, 0, test_cache_erase_count); + memset(admin->cache.maskstream, 0, test_cache_erase_count); + memset(admin->cache.actionstream, 0, test_cache_erase_count); + test_cache_erase_count = 0; + } +} + +/* Callback used by the VCAP API */ +static void test_cache_init(struct net_device *ndev, struct vcap_admin *admin, + u32 start, u32 count) +{ + test_init_start = start; + test_init_count = count; +} + +/* Callback used by the VCAP API */ +static void test_cache_read(struct net_device *ndev, struct vcap_admin *admin, + enum vcap_selection sel, u32 start, u32 count) +{ + u32 *keystr, *mskstr, *actstr; + int idx; + + pr_debug("%s:%d: %d %d\n", __func__, __LINE__, start, count); + switch (sel) { + case VCAP_SEL_ENTRY: + keystr = &admin->cache.keystream[start]; + mskstr = &admin->cache.maskstream[start]; + for (idx = 0; idx < count; ++idx) { + pr_debug("%s:%d: keydata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, keystr[idx]); + } + for (idx = 0; idx < count; ++idx) { + /* Invert the mask before decoding starts */ + mskstr[idx] = ~mskstr[idx]; + pr_debug("%s:%d: mskdata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, mskstr[idx]); + } + break; + case VCAP_SEL_ACTION: + actstr = &admin->cache.actionstream[start]; + for (idx = 0; idx < count; ++idx) { + pr_debug("%s:%d: actdata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, actstr[idx]); + } + break; + case VCAP_SEL_COUNTER: + pr_debug("%s:%d\n", __func__, __LINE__); + test_hw_counter_id = start; + admin->cache.counter = test_hw_cache.counter; + admin->cache.sticky = test_hw_cache.sticky; + break; + case VCAP_SEL_ALL: + pr_debug("%s:%d\n", __func__, __LINE__); + break; + } +} + +/* Callback used by the VCAP API */ +static void test_cache_write(struct net_device *ndev, struct vcap_admin *admin, + enum vcap_selection sel, u32 start, u32 count) +{ + u32 *keystr, *mskstr, *actstr; + int idx; + + switch (sel) { + case VCAP_SEL_ENTRY: + keystr = &admin->cache.keystream[start]; + mskstr = &admin->cache.maskstream[start]; + for (idx = 0; idx < count; ++idx) { + pr_debug("%s:%d: keydata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, keystr[idx]); + } + for (idx = 0; idx < count; ++idx) { + /* Invert the mask before encoding starts */ + mskstr[idx] = ~mskstr[idx]; + pr_debug("%s:%d: mskdata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, mskstr[idx]); + } + break; + case VCAP_SEL_ACTION: + actstr = &admin->cache.actionstream[start]; + for (idx = 0; idx < count; ++idx) { + pr_debug("%s:%d: actdata[%02d]: 0x%08x\n", __func__, + __LINE__, start + idx, actstr[idx]); + } + break; + case VCAP_SEL_COUNTER: + pr_debug("%s:%d\n", __func__, __LINE__); + test_hw_counter_id = start; + test_hw_cache.counter = admin->cache.counter; + test_hw_cache.sticky = admin->cache.sticky; + break; + case VCAP_SEL_ALL: + pr_err("%s:%d: cannot write all streams at once\n", + __func__, __LINE__); + break; + } +} + +/* Callback used by the VCAP API */ +static void test_cache_update(struct net_device *ndev, struct vcap_admin *admin, + enum vcap_command cmd, + enum vcap_selection sel, u32 addr) +{ + if (test_updateaddridx < ARRAY_SIZE(test_updateaddr)) + test_updateaddr[test_updateaddridx] = addr; + else + pr_err("%s:%d: overflow: %d\n", __func__, __LINE__, + test_updateaddridx); + test_updateaddridx++; +} + +static void test_cache_move(struct net_device *ndev, struct vcap_admin *admin, + u32 addr, int offset, int count) +{ + test_move_addr = addr; + test_move_offset = offset; + test_move_count = count; +} + +/* Provide port information via a callback interface */ +static int vcap_test_port_info(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + return 0; +} + +static int vcap_test_enable(struct net_device *ndev, + struct vcap_admin *admin, + bool enable) +{ + return 0; +} + +static struct vcap_operations test_callbacks = { + .validate_keyset = test_val_keyset, + .add_default_fields = test_add_def_fields, + .cache_erase = test_cache_erase, + .cache_write = test_cache_write, + .cache_read = test_cache_read, + .init = test_cache_init, + .update = test_cache_update, + .move = test_cache_move, + .port_info = vcap_test_port_info, + .enable = vcap_test_enable, +}; + +static struct vcap_control test_vctrl = { + .vcaps = kunit_test_vcaps, + .stats = &kunit_test_vcap_stats, + .ops = &test_callbacks, +}; + +static void vcap_test_api_init(struct vcap_admin *admin) +{ + /* Initialize the shared objects */ + INIT_LIST_HEAD(&test_vctrl.list); + INIT_LIST_HEAD(&admin->list); + INIT_LIST_HEAD(&admin->rules); + list_add_tail(&admin->list, &test_vctrl.list); + memset(test_updateaddr, 0, sizeof(test_updateaddr)); + test_updateaddridx = 0; + test_pr_bufferidx = 0; + test_pr_idx = 0; +} + +/* callback used by the show_admin function */ +static __printf(2, 3) +int test_prf(void *out, const char *fmt, ...) +{ + static char test_buffer[TEST_BUF_SZ]; + va_list args; + int idx, cnt; + + if (test_pr_bufferidx >= TEST_BUF_CNT) { + pr_err("%s:%d: overflow: %d\n", __func__, __LINE__, + test_pr_bufferidx); + return 0; + } + + va_start(args, fmt); + cnt = vscnprintf(test_buffer, TEST_BUF_SZ, fmt, args); + va_end(args); + + for (idx = 0; idx < cnt; ++idx) { + test_pr_buffer[test_pr_bufferidx][test_pr_idx] = + test_buffer[idx]; + if (test_buffer[idx] == '\n') { + test_pr_buffer[test_pr_bufferidx][++test_pr_idx] = 0; + test_pr_idx = 0; + test_pr_bufferidx++; + } else { + ++test_pr_idx; + } + } + + return cnt; +} + +/* Define the test cases. */ + +static void vcap_api_addr_keyset_test(struct kunit *test) +{ + u32 keydata[12] = { + 0x40450042, 0x000feaf3, 0x00000003, 0x00050600, + 0x10203040, 0x00075880, 0x633c6864, 0x00040003, + 0x00000020, 0x00000008, 0x00000240, 0x00000000, + }; + u32 mskdata[12] = { + 0x0030ff80, 0xfff00000, 0xfffffffc, 0xfff000ff, + 0x00000000, 0xfff00000, 0x00000000, 0xfff3fffc, + 0xffffffc0, 0xffffffff, 0xfffffc03, 0xffffffff, + }; + u32 actdata[12] = {}; + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS2, + .cache = { + .keystream = keydata, + .maskstream = mskdata, + .actionstream = actdata, + }, + }; + enum vcap_keyfield_set keysets[10]; + struct vcap_keyset_list matches; + int ret, idx, addr; + + vcap_test_api_init(&admin); + + /* Go from higher to lower addresses searching for a keyset */ + matches.keysets = keysets; + matches.cnt = 0; + matches.max = ARRAY_SIZE(keysets); + for (idx = ARRAY_SIZE(keydata) - 1, addr = 799; idx > 0; + --idx, --addr) { + admin.cache.keystream = &keydata[idx]; + admin.cache.maskstream = &mskdata[idx]; + ret = vcap_addr_keysets(&test_vctrl, &test_netdev, &admin, + addr, &matches); + KUNIT_EXPECT_EQ(test, -EINVAL, ret); + } + + /* Finally we hit the start of the rule */ + admin.cache.keystream = &keydata[idx]; + admin.cache.maskstream = &mskdata[idx]; + matches.cnt = 0; + ret = vcap_addr_keysets(&test_vctrl, &test_netdev, &admin, + addr, &matches); + KUNIT_EXPECT_EQ(test, 0, ret); + KUNIT_EXPECT_EQ(test, matches.cnt, 1); + KUNIT_EXPECT_EQ(test, matches.keysets[0], VCAP_KFS_MAC_ETYPE); +} + +static void vcap_api_show_admin_raw_test(struct kunit *test) +{ + u32 keydata[4] = { + 0x40450042, 0x000feaf3, 0x00000003, 0x00050600, + }; + u32 mskdata[4] = { + 0x0030ff80, 0xfff00000, 0xfffffffc, 0xfff000ff, + }; + u32 actdata[12] = {}; + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS2, + .cache = { + .keystream = keydata, + .maskstream = mskdata, + .actionstream = actdata, + }, + .first_valid_addr = 786, + .last_valid_addr = 788, + }; + struct vcap_rule_internal ri = { + .ndev = &test_netdev, + }; + struct vcap_output_print out = { + .prf = (void *)test_prf, + }; + const char *test_expected = + " addr: 786, X6 rule, keysets: VCAP_KFS_MAC_ETYPE\n"; + int ret; + + vcap_test_api_init(&admin); + list_add_tail(&ri.list, &admin.rules); + + ret = vcap_show_admin_raw(&test_vctrl, &admin, &out); + KUNIT_EXPECT_EQ(test, 0, ret); + KUNIT_EXPECT_STREQ(test, test_expected, test_pr_buffer[0]); +} + +static const char * const test_admin_info_expect[] = { + "name: is2\n", + "rows: 256\n", + "sw_count: 12\n", + "sw_width: 52\n", + "sticky_width: 1\n", + "act_width: 110\n", + "default_cnt: 73\n", + "require_cnt_dis: 0\n", + "version: 1\n", + "vtype: 2\n", + "vinst: 0\n", + "first_cid: 10000\n", + "last_cid: 19999\n", + "lookups: 4\n", + "first_valid_addr: 0\n", + "last_valid_addr: 3071\n", + "last_used_addr: 794\n", +}; + +static void vcap_api_show_admin_test(struct kunit *test) +{ + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS2, + .first_cid = 10000, + .last_cid = 19999, + .lookups = 4, + .last_valid_addr = 3071, + .first_valid_addr = 0, + .last_used_addr = 794, + }; + struct vcap_output_print out = { + .prf = (void *)test_prf, + }; + int idx; + + vcap_test_api_init(&admin); + + vcap_show_admin_info(&test_vctrl, &admin, &out); + for (idx = 0; idx < test_pr_bufferidx; ++idx) { + /* pr_info("log[%02d]: %s", idx, test_pr_buffer[idx]); */ + KUNIT_EXPECT_STREQ(test, test_admin_info_expect[idx], + test_pr_buffer[idx]); + } +} + +static const char * const test_admin_expect[] = { + "name: is2\n", + "rows: 256\n", + "sw_count: 12\n", + "sw_width: 52\n", + "sticky_width: 1\n", + "act_width: 110\n", + "default_cnt: 73\n", + "require_cnt_dis: 0\n", + "version: 1\n", + "vtype: 2\n", + "vinst: 0\n", + "first_cid: 8000000\n", + "last_cid: 8199999\n", + "lookups: 4\n", + "first_valid_addr: 0\n", + "last_valid_addr: 3071\n", + "last_used_addr: 794\n", + "\n", + "rule: 100, addr: [794,799], X6, ctr[0]: 0, hit: 0\n", + " chain_id: 0\n", + " user: 0\n", + " priority: 0\n", + " keysets: VCAP_KFS_MAC_ETYPE\n", + " keyset_sw: 6\n", + " keyset_sw_regs: 2\n", + " ETYPE_LEN_IS: W1: 1/1\n", + " IF_IGR_PORT_MASK: W32: 0xffabcd01/0xffffffff\n", + " IF_IGR_PORT_MASK_RNG: W4: 5/15\n", + " L2_DMAC: W48: 01:02:03:04:05:06/ff:ff:ff:ff:ff:ff\n", + " L2_PAYLOAD_ETYPE: W64: 0x9000002000000081/0xff000000000000ff\n", + " L2_SMAC: W48: b1:9e:34:32:75:88/ff:ff:ff:ff:ff:ff\n", + " LOOKUP_FIRST_IS: W1: 1/1\n", + " TYPE: W4: 0/15\n", + " actionset: VCAP_AFS_BASE_TYPE\n", + " actionset_sw: 3\n", + " actionset_sw_regs: 4\n", + " CNT_ID: W12: 100\n", + " MATCH_ID: W16: 1\n", + " MATCH_ID_MASK: W16: 1\n", + " POLICE_ENA: W1: 1\n", + " PORT_MASK: W68: 0x0514670115f3324589\n", +}; + +static void vcap_api_show_admin_rule_test(struct kunit *test) +{ + u32 keydata[] = { + 0x40450042, 0x000feaf3, 0x00000003, 0x00050600, + 0x10203040, 0x00075880, 0x633c6864, 0x00040003, + 0x00000020, 0x00000008, 0x00000240, 0x00000000, + }; + u32 mskdata[] = { + 0x0030ff80, 0xfff00000, 0xfffffffc, 0xfff000ff, + 0x00000000, 0xfff00000, 0x00000000, 0xfff3fffc, + 0xffffffc0, 0xffffffff, 0xfffffc03, 0xffffffff, + }; + u32 actdata[] = { + 0x00040002, 0xf3324589, 0x14670115, 0x00000005, + 0x00000000, 0x00100000, 0x06400010, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + }; + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS2, + .first_cid = 8000000, + .last_cid = 8199999, + .lookups = 4, + .last_valid_addr = 3071, + .first_valid_addr = 0, + .last_used_addr = 794, + .cache = { + .keystream = keydata, + .maskstream = mskdata, + .actionstream = actdata, + }, + }; + struct vcap_rule_internal ri = { + .admin = &admin, + .data = { + .id = 100, + .keyset = VCAP_KFS_MAC_ETYPE, + .actionset = VCAP_AFS_BASE_TYPE, + }, + .size = 6, + .keyset_sw = 6, + .keyset_sw_regs = 2, + .actionset_sw = 3, + .actionset_sw_regs = 4, + .addr = 794, + .vctrl = &test_vctrl, + }; + struct vcap_output_print out = { + .prf = (void *)test_prf, + }; + int ret, idx; + + vcap_test_api_init(&admin); + list_add_tail(&ri.list, &admin.rules); + + ret = vcap_show_admin(&test_vctrl, &admin, &out); + KUNIT_EXPECT_EQ(test, 0, ret); + for (idx = 0; idx < test_pr_bufferidx; ++idx) { + /* pr_info("log[%02d]: %s", idx, test_pr_buffer[idx]); */ + KUNIT_EXPECT_STREQ(test, test_admin_expect[idx], + test_pr_buffer[idx]); + } +} + +static struct kunit_case vcap_api_debugfs_test_cases[] = { + KUNIT_CASE(vcap_api_addr_keyset_test), + KUNIT_CASE(vcap_api_show_admin_raw_test), + KUNIT_CASE(vcap_api_show_admin_test), + KUNIT_CASE(vcap_api_show_admin_rule_test), + {} +}; + +static struct kunit_suite vcap_api_debugfs_test_suite = { + .name = "VCAP_API_DebugFS_Testsuite", + .test_cases = vcap_api_debugfs_test_cases, +}; + +kunit_test_suite(vcap_api_debugfs_test_suite); diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 6858e44ce4a5..76a31215ebfb 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -204,9 +204,9 @@ static void test_cache_move(struct net_device *ndev, struct vcap_admin *admin, } /* Provide port information via a callback interface */ -static int vcap_test_port_info(struct net_device *ndev, enum vcap_type vtype, - int (*pf)(void *out, int arg, const char *fmt, ...), - void *out, int arg) +static int vcap_test_port_info(struct net_device *ndev, + struct vcap_admin *admin, + struct vcap_output_print *out) { return 0; } @@ -254,8 +254,8 @@ test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user, u16 priority, int id, int size, int expected_addr) { - struct vcap_rule *rule = 0; - struct vcap_rule_internal *ri = 0; + struct vcap_rule *rule; + struct vcap_rule_internal *ri; enum vcap_keyfield_set keyset = VCAP_KFS_NO_VALUE; enum vcap_actionfield_set actionset = VCAP_AFS_NO_VALUE; int ret; @@ -339,7 +339,7 @@ static void vcap_api_set_bit_1_test(struct kunit *test) .sw_width = 52, .reg_idx = 1, .reg_bitpos = 20, - .tg = 0 + .tg = NULL, }; u32 stream[2] = {0}; @@ -356,7 +356,7 @@ static void vcap_api_set_bit_0_test(struct kunit *test) .sw_width = 52, .reg_idx = 2, .reg_bitpos = 11, - .tg = 0 + .tg = NULL, }; u32 stream[3] = {~0, ~0, ~0}; @@ -1197,7 +1197,7 @@ static void vcap_api_rule_find_keyset_basic_test(struct kunit *test) for (idx = 0; idx < ARRAY_SIZE(ckf); idx++) list_add_tail(&ckf[idx].ctrl.list, &ri.data.keyfields); - ret = vcap_rule_find_keysets(&ri, &matches); + ret = vcap_rule_find_keysets(&ri.data, &matches); KUNIT_EXPECT_EQ(test, true, ret); KUNIT_EXPECT_EQ(test, 1, matches.cnt); @@ -1244,7 +1244,7 @@ static void vcap_api_rule_find_keyset_failed_test(struct kunit *test) for (idx = 0; idx < ARRAY_SIZE(ckf); idx++) list_add_tail(&ckf[idx].ctrl.list, &ri.data.keyfields); - ret = vcap_rule_find_keysets(&ri, &matches); + ret = vcap_rule_find_keysets(&ri.data, &matches); KUNIT_EXPECT_EQ(test, false, ret); KUNIT_EXPECT_EQ(test, 0, matches.cnt); @@ -1291,7 +1291,7 @@ static void vcap_api_rule_find_keyset_many_test(struct kunit *test) for (idx = 0; idx < ARRAY_SIZE(ckf); idx++) list_add_tail(&ckf[idx].ctrl.list, &ri.data.keyfields); - ret = vcap_rule_find_keysets(&ri, &matches); + ret = vcap_rule_find_keysets(&ri.data, &matches); KUNIT_EXPECT_EQ(test, true, ret); KUNIT_EXPECT_EQ(test, 6, matches.cnt); @@ -1324,8 +1324,8 @@ static void vcap_api_encode_rule_test(struct kunit *test) .actionstream = actdata, }, }; - struct vcap_rule *rule = 0; - struct vcap_rule_internal *ri = 0; + struct vcap_rule *rule; + struct vcap_rule_internal *ri; int vcap_chain_id = 10005; enum vcap_user user = VCAP_USER_VCAP_UTIL; u16 priority = 10; @@ -1691,7 +1691,7 @@ static void vcap_api_rule_remove_at_end_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 0, test_move_count); KUNIT_EXPECT_EQ(test, 780, test_init_start); KUNIT_EXPECT_EQ(test, 12, test_init_count); - KUNIT_EXPECT_EQ(test, 3071, admin.last_used_addr); + KUNIT_EXPECT_EQ(test, 3072, admin.last_used_addr); } static void vcap_api_rule_remove_in_middle_test(struct kunit *test) @@ -1766,7 +1766,7 @@ static void vcap_api_rule_remove_in_middle_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 0, test_move_count); KUNIT_EXPECT_EQ(test, 798, test_init_start); KUNIT_EXPECT_EQ(test, 2, test_init_count); - KUNIT_EXPECT_EQ(test, 799, admin.last_used_addr); + KUNIT_EXPECT_EQ(test, 800, admin.last_used_addr); } static void vcap_api_rule_remove_in_front_test(struct kunit *test) @@ -1805,7 +1805,7 @@ static void vcap_api_rule_remove_in_front_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 0, test_move_count); KUNIT_EXPECT_EQ(test, 780, test_init_start); KUNIT_EXPECT_EQ(test, 12, test_init_count); - KUNIT_EXPECT_EQ(test, 799, admin.last_used_addr); + KUNIT_EXPECT_EQ(test, 800, admin.last_used_addr); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 20, 400, 6, 792); test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 30, 300, 3, 789); @@ -1954,6 +1954,198 @@ static void vcap_api_next_lookup_advanced_test(struct kunit *test) KUNIT_EXPECT_EQ(test, true, ret); } +static void vcap_api_filter_unsupported_keys_test(struct kunit *test) +{ + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS2, + }; + struct vcap_rule_internal ri = { + .admin = &admin, + .vctrl = &test_vctrl, + .data.keyset = VCAP_KFS_MAC_ETYPE, + }; + enum vcap_key_field keylist[] = { + VCAP_KF_TYPE, + VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_ARP_ADDR_SPACE_OK_IS, /* arp keys are not in keyset */ + VCAP_KF_ARP_PROTO_SPACE_OK_IS, + VCAP_KF_ARP_LEN_OK_IS, + VCAP_KF_ARP_TGT_MATCH_IS, + VCAP_KF_ARP_SENDER_MATCH_IS, + VCAP_KF_ARP_OPCODE_UNKNOWN_IS, + VCAP_KF_ARP_OPCODE, + VCAP_KF_8021Q_DEI_CLS, + VCAP_KF_8021Q_PCP_CLS, + VCAP_KF_8021Q_VID_CLS, + VCAP_KF_L2_MC_IS, + VCAP_KF_L2_BC_IS, + }; + enum vcap_key_field expected[] = { + VCAP_KF_TYPE, + VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_8021Q_DEI_CLS, + VCAP_KF_8021Q_PCP_CLS, + VCAP_KF_8021Q_VID_CLS, + VCAP_KF_L2_MC_IS, + VCAP_KF_L2_BC_IS, + }; + struct vcap_client_keyfield *ckf, *next; + bool ret; + int idx; + + /* Add all keys to the rule */ + INIT_LIST_HEAD(&ri.data.keyfields); + for (idx = 0; idx < ARRAY_SIZE(keylist); idx++) { + ckf = kzalloc(sizeof(*ckf), GFP_KERNEL); + if (ckf) { + ckf->ctrl.key = keylist[idx]; + list_add_tail(&ckf->ctrl.list, &ri.data.keyfields); + } + } + + KUNIT_EXPECT_EQ(test, 14, ARRAY_SIZE(keylist)); + + /* Drop unsupported keys from the rule */ + ret = vcap_filter_rule_keys(&ri.data, NULL, 0, true); + + KUNIT_EXPECT_EQ(test, 0, ret); + + /* Check remaining keys in the rule */ + idx = 0; + list_for_each_entry_safe(ckf, next, &ri.data.keyfields, ctrl.list) { + KUNIT_EXPECT_EQ(test, expected[idx], ckf->ctrl.key); + list_del(&ckf->ctrl.list); + kfree(ckf); + ++idx; + } + KUNIT_EXPECT_EQ(test, 7, idx); +} + +static void vcap_api_filter_keylist_test(struct kunit *test) +{ + struct vcap_admin admin = { + .vtype = VCAP_TYPE_IS0, + }; + struct vcap_rule_internal ri = { + .admin = &admin, + .vctrl = &test_vctrl, + .data.keyset = VCAP_KFS_NORMAL_7TUPLE, + }; + enum vcap_key_field keylist[] = { + VCAP_KF_TYPE, + VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_LOOKUP_GEN_IDX_SEL, + VCAP_KF_LOOKUP_GEN_IDX, + VCAP_KF_IF_IGR_PORT_MASK_SEL, + VCAP_KF_IF_IGR_PORT_MASK, + VCAP_KF_L2_MC_IS, + VCAP_KF_L2_BC_IS, + VCAP_KF_8021Q_VLAN_TAGS, + VCAP_KF_8021Q_TPID0, + VCAP_KF_8021Q_PCP0, + VCAP_KF_8021Q_DEI0, + VCAP_KF_8021Q_VID0, + VCAP_KF_8021Q_TPID1, + VCAP_KF_8021Q_PCP1, + VCAP_KF_8021Q_DEI1, + VCAP_KF_8021Q_VID1, + VCAP_KF_8021Q_TPID2, + VCAP_KF_8021Q_PCP2, + VCAP_KF_8021Q_DEI2, + VCAP_KF_8021Q_VID2, + VCAP_KF_L2_DMAC, + VCAP_KF_L2_SMAC, + VCAP_KF_IP_MC_IS, + VCAP_KF_ETYPE_LEN_IS, + VCAP_KF_ETYPE, + VCAP_KF_IP_SNAP_IS, + VCAP_KF_IP4_IS, + VCAP_KF_L3_FRAGMENT_TYPE, + VCAP_KF_L3_FRAG_INVLD_L4_LEN, + VCAP_KF_L3_OPTIONS_IS, + VCAP_KF_L3_DSCP, + VCAP_KF_L3_IP6_DIP, + VCAP_KF_L3_IP6_SIP, + VCAP_KF_TCP_UDP_IS, + VCAP_KF_TCP_IS, + VCAP_KF_L4_SPORT, + VCAP_KF_L4_RNG, + }; + enum vcap_key_field droplist[] = { + VCAP_KF_8021Q_TPID1, + VCAP_KF_8021Q_PCP1, + VCAP_KF_8021Q_DEI1, + VCAP_KF_8021Q_VID1, + VCAP_KF_8021Q_TPID2, + VCAP_KF_8021Q_PCP2, + VCAP_KF_8021Q_DEI2, + VCAP_KF_8021Q_VID2, + VCAP_KF_L3_IP6_DIP, + VCAP_KF_L3_IP6_SIP, + VCAP_KF_L4_SPORT, + VCAP_KF_L4_RNG, + }; + enum vcap_key_field expected[] = { + VCAP_KF_TYPE, + VCAP_KF_LOOKUP_FIRST_IS, + VCAP_KF_LOOKUP_GEN_IDX_SEL, + VCAP_KF_LOOKUP_GEN_IDX, + VCAP_KF_IF_IGR_PORT_MASK_SEL, + VCAP_KF_IF_IGR_PORT_MASK, + VCAP_KF_L2_MC_IS, + VCAP_KF_L2_BC_IS, + VCAP_KF_8021Q_VLAN_TAGS, + VCAP_KF_8021Q_TPID0, + VCAP_KF_8021Q_PCP0, + VCAP_KF_8021Q_DEI0, + VCAP_KF_8021Q_VID0, + VCAP_KF_L2_DMAC, + VCAP_KF_L2_SMAC, + VCAP_KF_IP_MC_IS, + VCAP_KF_ETYPE_LEN_IS, + VCAP_KF_ETYPE, + VCAP_KF_IP_SNAP_IS, + VCAP_KF_IP4_IS, + VCAP_KF_L3_FRAGMENT_TYPE, + VCAP_KF_L3_FRAG_INVLD_L4_LEN, + VCAP_KF_L3_OPTIONS_IS, + VCAP_KF_L3_DSCP, + VCAP_KF_TCP_UDP_IS, + VCAP_KF_TCP_IS, + }; + struct vcap_client_keyfield *ckf, *next; + bool ret; + int idx; + + /* Add all keys to the rule */ + INIT_LIST_HEAD(&ri.data.keyfields); + for (idx = 0; idx < ARRAY_SIZE(keylist); idx++) { + ckf = kzalloc(sizeof(*ckf), GFP_KERNEL); + if (ckf) { + ckf->ctrl.key = keylist[idx]; + list_add_tail(&ckf->ctrl.list, &ri.data.keyfields); + } + } + + KUNIT_EXPECT_EQ(test, 38, ARRAY_SIZE(keylist)); + + /* Drop listed keys from the rule */ + ret = vcap_filter_rule_keys(&ri.data, droplist, ARRAY_SIZE(droplist), + false); + + KUNIT_EXPECT_EQ(test, 0, ret); + + /* Check remaining keys in the rule */ + idx = 0; + list_for_each_entry_safe(ckf, next, &ri.data.keyfields, ctrl.list) { + KUNIT_EXPECT_EQ(test, expected[idx], ckf->ctrl.key); + list_del(&ckf->ctrl.list); + kfree(ckf); + ++idx; + } + KUNIT_EXPECT_EQ(test, 26, idx); +} + static struct kunit_suite vcap_api_rule_remove_test_suite = { .name = "VCAP_API_Rule_Remove_Testsuite", .test_cases = vcap_api_rule_remove_test_cases, @@ -1984,6 +2176,8 @@ static struct kunit_suite vcap_api_rule_counter_test_suite = { static struct kunit_case vcap_api_support_test_cases[] = { KUNIT_CASE(vcap_api_next_lookup_basic_test), KUNIT_CASE(vcap_api_next_lookup_advanced_test), + KUNIT_CASE(vcap_api_filter_unsupported_keys_test), + KUNIT_CASE(vcap_api_filter_keylist_test), {} }; diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h new file mode 100644 index 000000000000..9ac1b1d55f22 --- /dev/null +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (C) 2022 Microchip Technology Inc. and its subsidiaries. + * Microchip VCAP API + */ + +#ifndef __VCAP_API_PRIVATE__ +#define __VCAP_API_PRIVATE__ + +#include <linux/types.h> + +#include "vcap_api.h" +#include "vcap_api_client.h" + +#define to_intrule(rule) container_of((rule), struct vcap_rule_internal, data) + +/* Private VCAP API rule data */ +struct vcap_rule_internal { + struct vcap_rule data; /* provided by the client */ + struct list_head list; /* the vcap admin list of rules */ + struct vcap_admin *admin; /* vcap hw instance */ + struct net_device *ndev; /* the interface that the rule applies to */ + struct vcap_control *vctrl; /* the client control */ + u32 sort_key; /* defines the position in the VCAP */ + int keyset_sw; /* subwords in a keyset */ + int actionset_sw; /* subwords in an actionset */ + int keyset_sw_regs; /* registers in a subword in an keyset */ + int actionset_sw_regs; /* registers in a subword in an actionset */ + int size; /* the size of the rule: max(entry, action) */ + u32 addr; /* address in the VCAP at insertion */ + u32 counter_id; /* counter id (if a dedicated counter is available) */ + struct vcap_counter counter; /* last read counter value */ +}; + +/* Bit iterator for the VCAP cache streams */ +struct vcap_stream_iter { + u32 offset; /* bit offset from the stream start */ + u32 sw_width; /* subword width in bits */ + u32 regs_per_sw; /* registers per subword */ + u32 reg_idx; /* current register index */ + u32 reg_bitpos; /* bit offset in current register */ + const struct vcap_typegroup *tg; /* current typegroup */ +}; + +/* Check that the control has a valid set of callbacks */ +int vcap_api_check(struct vcap_control *ctrl); +/* Make a shallow copy of the rule without the fields */ +struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri); +/* Erase the VCAP cache area used or encoding and decoding */ +void vcap_erase_cache(struct vcap_rule_internal *ri); + +/* Iterator functionality */ + +void vcap_iter_init(struct vcap_stream_iter *itr, int sw_width, + const struct vcap_typegroup *tg, u32 offset); +void vcap_iter_next(struct vcap_stream_iter *itr); +void vcap_iter_set(struct vcap_stream_iter *itr, int sw_width, + const struct vcap_typegroup *tg, u32 offset); +void vcap_iter_update(struct vcap_stream_iter *itr); + +/* Keyset and keyfield functionality */ + +/* Return the number of keyfields in the keyset */ +int vcap_keyfield_count(struct vcap_control *vctrl, + enum vcap_type vt, enum vcap_keyfield_set keyset); +/* Return the typegroup table for the matching keyset (using subword size) */ +const struct vcap_typegroup * +vcap_keyfield_typegroup(struct vcap_control *vctrl, + enum vcap_type vt, enum vcap_keyfield_set keyset); +/* Return the list of keyfields for the keyset */ +const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_keyfield_set keyset); + +/* Actionset and actionfield functionality */ + +/* Return the actionset information for the actionset */ +const struct vcap_set * +vcap_actionfieldset(struct vcap_control *vctrl, + enum vcap_type vt, enum vcap_actionfield_set actionset); +/* Return the number of actionfields in the actionset */ +int vcap_actionfield_count(struct vcap_control *vctrl, + enum vcap_type vt, + enum vcap_actionfield_set actionset); +/* Return the typegroup table for the matching actionset (using subword size) */ +const struct vcap_typegroup * +vcap_actionfield_typegroup(struct vcap_control *vctrl, enum vcap_type vt, + enum vcap_actionfield_set actionset); +/* Return the list of actionfields for the actionset */ +const struct vcap_field * +vcap_actionfields(struct vcap_control *vctrl, + enum vcap_type vt, enum vcap_actionfield_set actionset); +/* Map actionset id to a string with the actionset name */ +const char *vcap_actionset_name(struct vcap_control *vctrl, + enum vcap_actionfield_set actionset); +/* Map key field id to a string with the key name */ +const char *vcap_actionfield_name(struct vcap_control *vctrl, + enum vcap_action_field action); + +#endif /* __VCAP_API_PRIVATE__ */ diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index dbd20b125cea..1478c3b21af1 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -9,6 +9,225 @@ #include <linux/workqueue.h> #include "ocelot.h" +enum ocelot_stat { + OCELOT_STAT_RX_OCTETS, + OCELOT_STAT_RX_UNICAST, + OCELOT_STAT_RX_MULTICAST, + OCELOT_STAT_RX_BROADCAST, + OCELOT_STAT_RX_SHORTS, + OCELOT_STAT_RX_FRAGMENTS, + OCELOT_STAT_RX_JABBERS, + OCELOT_STAT_RX_CRC_ALIGN_ERRS, + OCELOT_STAT_RX_SYM_ERRS, + OCELOT_STAT_RX_64, + OCELOT_STAT_RX_65_127, + OCELOT_STAT_RX_128_255, + OCELOT_STAT_RX_256_511, + OCELOT_STAT_RX_512_1023, + OCELOT_STAT_RX_1024_1526, + OCELOT_STAT_RX_1527_MAX, + OCELOT_STAT_RX_PAUSE, + OCELOT_STAT_RX_CONTROL, + OCELOT_STAT_RX_LONGS, + OCELOT_STAT_RX_CLASSIFIED_DROPS, + OCELOT_STAT_RX_RED_PRIO_0, + OCELOT_STAT_RX_RED_PRIO_1, + OCELOT_STAT_RX_RED_PRIO_2, + OCELOT_STAT_RX_RED_PRIO_3, + OCELOT_STAT_RX_RED_PRIO_4, + OCELOT_STAT_RX_RED_PRIO_5, + OCELOT_STAT_RX_RED_PRIO_6, + OCELOT_STAT_RX_RED_PRIO_7, + OCELOT_STAT_RX_YELLOW_PRIO_0, + OCELOT_STAT_RX_YELLOW_PRIO_1, + OCELOT_STAT_RX_YELLOW_PRIO_2, + OCELOT_STAT_RX_YELLOW_PRIO_3, + OCELOT_STAT_RX_YELLOW_PRIO_4, + OCELOT_STAT_RX_YELLOW_PRIO_5, + OCELOT_STAT_RX_YELLOW_PRIO_6, + OCELOT_STAT_RX_YELLOW_PRIO_7, + OCELOT_STAT_RX_GREEN_PRIO_0, + OCELOT_STAT_RX_GREEN_PRIO_1, + OCELOT_STAT_RX_GREEN_PRIO_2, + OCELOT_STAT_RX_GREEN_PRIO_3, + OCELOT_STAT_RX_GREEN_PRIO_4, + OCELOT_STAT_RX_GREEN_PRIO_5, + OCELOT_STAT_RX_GREEN_PRIO_6, + OCELOT_STAT_RX_GREEN_PRIO_7, + OCELOT_STAT_TX_OCTETS, + OCELOT_STAT_TX_UNICAST, + OCELOT_STAT_TX_MULTICAST, + OCELOT_STAT_TX_BROADCAST, + OCELOT_STAT_TX_COLLISION, + OCELOT_STAT_TX_DROPS, + OCELOT_STAT_TX_PAUSE, + OCELOT_STAT_TX_64, + OCELOT_STAT_TX_65_127, + OCELOT_STAT_TX_128_255, + OCELOT_STAT_TX_256_511, + OCELOT_STAT_TX_512_1023, + OCELOT_STAT_TX_1024_1526, + OCELOT_STAT_TX_1527_MAX, + OCELOT_STAT_TX_YELLOW_PRIO_0, + OCELOT_STAT_TX_YELLOW_PRIO_1, + OCELOT_STAT_TX_YELLOW_PRIO_2, + OCELOT_STAT_TX_YELLOW_PRIO_3, + OCELOT_STAT_TX_YELLOW_PRIO_4, + OCELOT_STAT_TX_YELLOW_PRIO_5, + OCELOT_STAT_TX_YELLOW_PRIO_6, + OCELOT_STAT_TX_YELLOW_PRIO_7, + OCELOT_STAT_TX_GREEN_PRIO_0, + OCELOT_STAT_TX_GREEN_PRIO_1, + OCELOT_STAT_TX_GREEN_PRIO_2, + OCELOT_STAT_TX_GREEN_PRIO_3, + OCELOT_STAT_TX_GREEN_PRIO_4, + OCELOT_STAT_TX_GREEN_PRIO_5, + OCELOT_STAT_TX_GREEN_PRIO_6, + OCELOT_STAT_TX_GREEN_PRIO_7, + OCELOT_STAT_TX_AGED, + OCELOT_STAT_DROP_LOCAL, + OCELOT_STAT_DROP_TAIL, + OCELOT_STAT_DROP_YELLOW_PRIO_0, + OCELOT_STAT_DROP_YELLOW_PRIO_1, + OCELOT_STAT_DROP_YELLOW_PRIO_2, + OCELOT_STAT_DROP_YELLOW_PRIO_3, + OCELOT_STAT_DROP_YELLOW_PRIO_4, + OCELOT_STAT_DROP_YELLOW_PRIO_5, + OCELOT_STAT_DROP_YELLOW_PRIO_6, + OCELOT_STAT_DROP_YELLOW_PRIO_7, + OCELOT_STAT_DROP_GREEN_PRIO_0, + OCELOT_STAT_DROP_GREEN_PRIO_1, + OCELOT_STAT_DROP_GREEN_PRIO_2, + OCELOT_STAT_DROP_GREEN_PRIO_3, + OCELOT_STAT_DROP_GREEN_PRIO_4, + OCELOT_STAT_DROP_GREEN_PRIO_5, + OCELOT_STAT_DROP_GREEN_PRIO_6, + OCELOT_STAT_DROP_GREEN_PRIO_7, + OCELOT_NUM_STATS, +}; + +struct ocelot_stat_layout { + u32 reg; + char name[ETH_GSTRING_LEN]; +}; + +/* 32-bit counter checked for wraparound by ocelot_port_update_stats() + * and copied to ocelot->stats. + */ +#define OCELOT_STAT(kind) \ + [OCELOT_STAT_ ## kind] = { .reg = SYS_COUNT_ ## kind } +/* Same as above, except also exported to ethtool -S. Standard counters should + * only be exposed to more specific interfaces rather than by their string name. + */ +#define OCELOT_STAT_ETHTOOL(kind, ethtool_name) \ + [OCELOT_STAT_ ## kind] = { .reg = SYS_COUNT_ ## kind, .name = ethtool_name } + +#define OCELOT_COMMON_STATS \ + OCELOT_STAT_ETHTOOL(RX_OCTETS, "rx_octets"), \ + OCELOT_STAT_ETHTOOL(RX_UNICAST, "rx_unicast"), \ + OCELOT_STAT_ETHTOOL(RX_MULTICAST, "rx_multicast"), \ + OCELOT_STAT_ETHTOOL(RX_BROADCAST, "rx_broadcast"), \ + OCELOT_STAT_ETHTOOL(RX_SHORTS, "rx_shorts"), \ + OCELOT_STAT_ETHTOOL(RX_FRAGMENTS, "rx_fragments"), \ + OCELOT_STAT_ETHTOOL(RX_JABBERS, "rx_jabbers"), \ + OCELOT_STAT_ETHTOOL(RX_CRC_ALIGN_ERRS, "rx_crc_align_errs"), \ + OCELOT_STAT_ETHTOOL(RX_SYM_ERRS, "rx_sym_errs"), \ + OCELOT_STAT_ETHTOOL(RX_64, "rx_frames_below_65_octets"), \ + OCELOT_STAT_ETHTOOL(RX_65_127, "rx_frames_65_to_127_octets"), \ + OCELOT_STAT_ETHTOOL(RX_128_255, "rx_frames_128_to_255_octets"), \ + OCELOT_STAT_ETHTOOL(RX_256_511, "rx_frames_256_to_511_octets"), \ + OCELOT_STAT_ETHTOOL(RX_512_1023, "rx_frames_512_to_1023_octets"), \ + OCELOT_STAT_ETHTOOL(RX_1024_1526, "rx_frames_1024_to_1526_octets"), \ + OCELOT_STAT_ETHTOOL(RX_1527_MAX, "rx_frames_over_1526_octets"), \ + OCELOT_STAT_ETHTOOL(RX_PAUSE, "rx_pause"), \ + OCELOT_STAT_ETHTOOL(RX_CONTROL, "rx_control"), \ + OCELOT_STAT_ETHTOOL(RX_LONGS, "rx_longs"), \ + OCELOT_STAT_ETHTOOL(RX_CLASSIFIED_DROPS, "rx_classified_drops"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_0, "rx_red_prio_0"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_1, "rx_red_prio_1"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_2, "rx_red_prio_2"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_3, "rx_red_prio_3"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_4, "rx_red_prio_4"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_5, "rx_red_prio_5"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_6, "rx_red_prio_6"), \ + OCELOT_STAT_ETHTOOL(RX_RED_PRIO_7, "rx_red_prio_7"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_0, "rx_yellow_prio_0"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_1, "rx_yellow_prio_1"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_2, "rx_yellow_prio_2"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_3, "rx_yellow_prio_3"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_4, "rx_yellow_prio_4"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_5, "rx_yellow_prio_5"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_6, "rx_yellow_prio_6"), \ + OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_7, "rx_yellow_prio_7"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_0, "rx_green_prio_0"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_1, "rx_green_prio_1"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_2, "rx_green_prio_2"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_3, "rx_green_prio_3"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_4, "rx_green_prio_4"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_5, "rx_green_prio_5"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_6, "rx_green_prio_6"), \ + OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_7, "rx_green_prio_7"), \ + OCELOT_STAT_ETHTOOL(TX_OCTETS, "tx_octets"), \ + OCELOT_STAT_ETHTOOL(TX_UNICAST, "tx_unicast"), \ + OCELOT_STAT_ETHTOOL(TX_MULTICAST, "tx_multicast"), \ + OCELOT_STAT_ETHTOOL(TX_BROADCAST, "tx_broadcast"), \ + OCELOT_STAT_ETHTOOL(TX_COLLISION, "tx_collision"), \ + OCELOT_STAT_ETHTOOL(TX_DROPS, "tx_drops"), \ + OCELOT_STAT_ETHTOOL(TX_PAUSE, "tx_pause"), \ + OCELOT_STAT_ETHTOOL(TX_64, "tx_frames_below_65_octets"), \ + OCELOT_STAT_ETHTOOL(TX_65_127, "tx_frames_65_to_127_octets"), \ + OCELOT_STAT_ETHTOOL(TX_128_255, "tx_frames_128_255_octets"), \ + OCELOT_STAT_ETHTOOL(TX_256_511, "tx_frames_256_511_octets"), \ + OCELOT_STAT_ETHTOOL(TX_512_1023, "tx_frames_512_1023_octets"), \ + OCELOT_STAT_ETHTOOL(TX_1024_1526, "tx_frames_1024_1526_octets"), \ + OCELOT_STAT_ETHTOOL(TX_1527_MAX, "tx_frames_over_1526_octets"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_0, "tx_yellow_prio_0"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_1, "tx_yellow_prio_1"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_2, "tx_yellow_prio_2"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_3, "tx_yellow_prio_3"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_4, "tx_yellow_prio_4"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_5, "tx_yellow_prio_5"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_6, "tx_yellow_prio_6"), \ + OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_7, "tx_yellow_prio_7"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_0, "tx_green_prio_0"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_1, "tx_green_prio_1"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_2, "tx_green_prio_2"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_3, "tx_green_prio_3"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_4, "tx_green_prio_4"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_5, "tx_green_prio_5"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_6, "tx_green_prio_6"), \ + OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_7, "tx_green_prio_7"), \ + OCELOT_STAT_ETHTOOL(TX_AGED, "tx_aged"), \ + OCELOT_STAT_ETHTOOL(DROP_LOCAL, "drop_local"), \ + OCELOT_STAT_ETHTOOL(DROP_TAIL, "drop_tail"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_0, "drop_yellow_prio_0"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_1, "drop_yellow_prio_1"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_2, "drop_yellow_prio_2"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_3, "drop_yellow_prio_3"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_4, "drop_yellow_prio_4"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_5, "drop_yellow_prio_5"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_6, "drop_yellow_prio_6"), \ + OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_7, "drop_yellow_prio_7"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_0, "drop_green_prio_0"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_1, "drop_green_prio_1"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_2, "drop_green_prio_2"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_3, "drop_green_prio_3"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_4, "drop_green_prio_4"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_5, "drop_green_prio_5"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_6, "drop_green_prio_6"), \ + OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_7, "drop_green_prio_7") + +struct ocelot_stats_region { + struct list_head node; + u32 base; + int count; + u32 *buf; +}; + +static const struct ocelot_stat_layout ocelot_stats_layout[OCELOT_NUM_STATS] = { + OCELOT_COMMON_STATS, +}; + /* Read the counters from hardware and keep them in region->buf. * Caller must hold &ocelot->stat_view_lock. */ @@ -93,10 +312,10 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) return; for (i = 0; i < OCELOT_NUM_STATS; i++) { - if (ocelot->stats_layout[i].name[0] == '\0') + if (ocelot_stats_layout[i].name[0] == '\0') continue; - memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name, + memcpy(data + i * ETH_GSTRING_LEN, ocelot_stats_layout[i].name, ETH_GSTRING_LEN); } } @@ -137,7 +356,7 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset) return -EOPNOTSUPP; for (i = 0; i < OCELOT_NUM_STATS; i++) - if (ocelot->stats_layout[i].name[0] != '\0') + if (ocelot_stats_layout[i].name[0] != '\0') num_stats++; return num_stats; @@ -154,7 +373,7 @@ static void ocelot_port_ethtool_stats_cb(struct ocelot *ocelot, int port, for (i = 0; i < OCELOT_NUM_STATS; i++) { int index = port * OCELOT_NUM_STATS + i; - if (ocelot->stats_layout[i].name[0] == '\0') + if (ocelot_stats_layout[i].name[0] == '\0') continue; *data++ = ocelot->stats[index]; @@ -383,16 +602,16 @@ EXPORT_SYMBOL(ocelot_port_get_stats64); static int ocelot_prepare_stats_regions(struct ocelot *ocelot) { struct ocelot_stats_region *region = NULL; - unsigned int last; + unsigned int last = 0; int i; INIT_LIST_HEAD(&ocelot->stats_regions); for (i = 0; i < OCELOT_NUM_STATS; i++) { - if (!ocelot->stats_layout[i].reg) + if (!ocelot_stats_layout[i].reg) continue; - if (region && ocelot->stats_layout[i].reg == last + 4) { + if (region && ocelot_stats_layout[i].reg == last + 4) { region->count++; } else { region = devm_kzalloc(ocelot->dev, sizeof(*region), @@ -400,12 +619,18 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (!region) return -ENOMEM; - region->base = ocelot->stats_layout[i].reg; + /* enum ocelot_stat must be kept sorted in the same + * order as ocelot_stats_layout[i].reg in order to have + * efficient bulking + */ + WARN_ON(last >= ocelot_stats_layout[i].reg); + + region->base = ocelot_stats_layout[i].reg; region->count = 1; list_add_tail(®ion->node, &ocelot->stats_regions); } - last = ocelot->stats_layout[i].reg; + last = ocelot_stats_layout[i].reg; } list_for_each_entry(region, &ocelot->stats_regions, node) { @@ -456,3 +681,4 @@ void ocelot_stats_deinit(struct ocelot *ocelot) cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); } + diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 93431d2ff4f1..b097fd4a4061 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -100,10 +100,6 @@ static const struct reg_field ocelot_regfields[REGFIELD_MAX] = { [SYS_PAUSE_CFG_PAUSE_ENA] = REG_FIELD_ID(SYS_PAUSE_CFG, 0, 1, 12, 4), }; -static const struct ocelot_stat_layout ocelot_stats_layout[OCELOT_NUM_STATS] = { - OCELOT_COMMON_STATS, -}; - static void ocelot_pll5_init(struct ocelot *ocelot) { /* Configure PLL5. This will need a proper CCF driver @@ -138,7 +134,6 @@ static int ocelot_chip_init(struct ocelot *ocelot, const struct ocelot_ops *ops) int ret; ocelot->map = ocelot_regmap; - ocelot->stats_layout = ocelot_stats_layout; ocelot->num_mact_rows = 1024; ocelot->ops = ops; diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index 8844d1ac053a..e785c00b5845 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -54,6 +54,17 @@ config NFP_APP_ABM_NIC functionality. Code will be built into the nfp.ko driver. +config NFP_NET_IPSEC + bool "NFP IPsec crypto offload support" + depends on NFP + depends on XFRM_OFFLOAD + default y + help + Enable driver support IPsec crypto offload on NFP NIC. + Say Y, if you are planning to make use of IPsec crypto + offload. NOTE that IPsec crypto offload on NFP NIC + requires specific FW to work. + config NFP_DEBUG bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers" depends on NFP diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 9c0861d03634..8a250214e289 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -80,4 +80,6 @@ nfp-objs += \ abm/main.o endif +nfp-$(CONFIG_NFP_NET_IPSEC) += crypto/ipsec.o nfd3/ipsec.o + nfp-$(CONFIG_NFP_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h index bffe58bb2f27..1df73d658938 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -39,4 +39,27 @@ nfp_net_tls_rx_resync_req(struct net_device *netdev, } #endif +/* IPsec related structures and functions */ +struct nfp_ipsec_offload { + u32 seq_hi; + u32 seq_low; + u32 handle; +}; + +#ifndef CONFIG_NFP_NET_IPSEC +static inline void nfp_net_ipsec_init(struct nfp_net *nn) +{ +} + +static inline void nfp_net_ipsec_clean(struct nfp_net *nn) +{ +} +#else +void nfp_net_ipsec_init(struct nfp_net *nn); +void nfp_net_ipsec_clean(struct nfp_net *nn); +bool nfp_net_ipsec_tx_prep(struct nfp_net_dp *dp, struct sk_buff *skb, + struct nfp_ipsec_offload *offload_info); +int nfp_net_ipsec_rx(struct nfp_meta_parsed *meta, struct sk_buff *skb); +#endif + #endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c new file mode 100644 index 000000000000..3728870d8e9c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -0,0 +1,587 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <asm/unaligned.h> +#include <linux/ktime.h> +#include <net/xfrm.h> + +#include "../nfp_net_ctrl.h" +#include "../nfp_net.h" +#include "crypto.h" + +#define NFP_NET_IPSEC_MAX_SA_CNT (16 * 1024) /* Firmware support a maximum of 16K SA offload */ + +/* IPsec config message cmd codes */ +enum nfp_ipsec_cfg_mssg_cmd_codes { + NFP_IPSEC_CFG_MSSG_ADD_SA, /* Add a new SA */ + NFP_IPSEC_CFG_MSSG_INV_SA /* Invalidate an existing SA */ +}; + +/* IPsec config message response codes */ +enum nfp_ipsec_cfg_mssg_rsp_codes { + NFP_IPSEC_CFG_MSSG_OK, + NFP_IPSEC_CFG_MSSG_FAILED, + NFP_IPSEC_CFG_MSSG_SA_VALID, + NFP_IPSEC_CFG_MSSG_SA_HASH_ADD_FAILED, + NFP_IPSEC_CFG_MSSG_SA_HASH_DEL_FAILED, + NFP_IPSEC_CFG_MSSG_SA_INVALID_CMD +}; + +/* Protocol */ +enum nfp_ipsec_sa_prot { + NFP_IPSEC_PROTOCOL_AH = 0, + NFP_IPSEC_PROTOCOL_ESP = 1 +}; + +/* Mode */ +enum nfp_ipsec_sa_mode { + NFP_IPSEC_PROTMODE_TRANSPORT = 0, + NFP_IPSEC_PROTMODE_TUNNEL = 1 +}; + +/* Cipher types */ +enum nfp_ipsec_sa_cipher { + NFP_IPSEC_CIPHER_NULL, + NFP_IPSEC_CIPHER_3DES, + NFP_IPSEC_CIPHER_AES128, + NFP_IPSEC_CIPHER_AES192, + NFP_IPSEC_CIPHER_AES256, + NFP_IPSEC_CIPHER_AES128_NULL, + NFP_IPSEC_CIPHER_AES192_NULL, + NFP_IPSEC_CIPHER_AES256_NULL, + NFP_IPSEC_CIPHER_CHACHA20 +}; + +/* Cipher modes */ +enum nfp_ipsec_sa_cipher_mode { + NFP_IPSEC_CIMODE_ECB, + NFP_IPSEC_CIMODE_CBC, + NFP_IPSEC_CIMODE_CFB, + NFP_IPSEC_CIMODE_OFB, + NFP_IPSEC_CIMODE_CTR +}; + +/* Hash types */ +enum nfp_ipsec_sa_hash_type { + NFP_IPSEC_HASH_NONE, + NFP_IPSEC_HASH_MD5_96, + NFP_IPSEC_HASH_SHA1_96, + NFP_IPSEC_HASH_SHA256_96, + NFP_IPSEC_HASH_SHA384_96, + NFP_IPSEC_HASH_SHA512_96, + NFP_IPSEC_HASH_MD5_128, + NFP_IPSEC_HASH_SHA1_80, + NFP_IPSEC_HASH_SHA256_128, + NFP_IPSEC_HASH_SHA384_192, + NFP_IPSEC_HASH_SHA512_256, + NFP_IPSEC_HASH_GF128_128, + NFP_IPSEC_HASH_POLY1305_128 +}; + +/* IPSEC_CFG_MSSG_ADD_SA */ +struct nfp_ipsec_cfg_add_sa { + u32 ciph_key[8]; /* Cipher Key */ + union { + u32 auth_key[16]; /* Authentication Key */ + struct nfp_ipsec_aesgcm { /* AES-GCM-ESP fields */ + u32 salt; /* Initialized with SA */ + u32 resv[15]; + } aesgcm_fields; + }; + struct sa_ctrl_word { + uint32_t hash :4; /* From nfp_ipsec_sa_hash_type */ + uint32_t cimode :4; /* From nfp_ipsec_sa_cipher_mode */ + uint32_t cipher :4; /* From nfp_ipsec_sa_cipher */ + uint32_t mode :2; /* From nfp_ipsec_sa_mode */ + uint32_t proto :2; /* From nfp_ipsec_sa_prot */ + uint32_t dir :1; /* SA direction */ + uint32_t resv0 :12; + uint32_t encap_dsbl:1; /* Encap/Decap disable */ + uint32_t resv1 :2; /* Must be set to 0 */ + } ctrl_word; + u32 spi; /* SPI Value */ + uint32_t pmtu_limit :16; /* PMTU Limit */ + uint32_t resv0 :5; + uint32_t ipv6 :1; /* Outbound IPv6 addr format */ + uint32_t resv1 :10; + u32 resv2[2]; + u32 src_ip[4]; /* Src IP addr */ + u32 dst_ip[4]; /* Dst IP addr */ + u32 resv3[6]; +}; + +/* IPSEC_CFG_MSSG */ +struct nfp_ipsec_cfg_mssg { + union { + struct{ + uint32_t cmd:16; /* One of nfp_ipsec_cfg_mssg_cmd_codes */ + uint32_t rsp:16; /* One of nfp_ipsec_cfg_mssg_rsp_codes */ + uint32_t sa_idx:16; /* SA table index */ + uint32_t spare0:16; + struct nfp_ipsec_cfg_add_sa cfg_add_sa; + }; + u32 raw[64]; + }; +}; + +static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx, + struct nfp_ipsec_cfg_mssg *msg) +{ + int i, msg_size, ret; + + msg->cmd = type; + msg->sa_idx = saidx; + msg->rsp = 0; + msg_size = ARRAY_SIZE(msg->raw); + + for (i = 0; i < msg_size; i++) + nn_writel(nn, NFP_NET_CFG_MBOX_VAL + 4 * i, msg->raw[i]); + + ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC); + if (ret < 0) + return ret; + + /* For now we always read the whole message response back */ + for (i = 0; i < msg_size; i++) + msg->raw[i] = nn_readl(nn, NFP_NET_CFG_MBOX_VAL + 4 * i); + + switch (msg->rsp) { + case NFP_IPSEC_CFG_MSSG_OK: + return 0; + case NFP_IPSEC_CFG_MSSG_SA_INVALID_CMD: + return -EINVAL; + case NFP_IPSEC_CFG_MSSG_SA_VALID: + return -EEXIST; + case NFP_IPSEC_CFG_MSSG_FAILED: + case NFP_IPSEC_CFG_MSSG_SA_HASH_ADD_FAILED: + case NFP_IPSEC_CFG_MSSG_SA_HASH_DEL_FAILED: + return -EIO; + default: + return -EINVAL; + } +} + +static int set_aes_keylen(struct nfp_ipsec_cfg_add_sa *cfg, int alg, int keylen) +{ + bool aes_gmac = (alg == SADB_X_EALG_NULL_AES_GMAC); + + switch (keylen) { + case 128: + cfg->ctrl_word.cipher = aes_gmac ? NFP_IPSEC_CIPHER_AES128_NULL : + NFP_IPSEC_CIPHER_AES128; + break; + case 192: + cfg->ctrl_word.cipher = aes_gmac ? NFP_IPSEC_CIPHER_AES192_NULL : + NFP_IPSEC_CIPHER_AES192; + break; + case 256: + cfg->ctrl_word.cipher = aes_gmac ? NFP_IPSEC_CIPHER_AES256_NULL : + NFP_IPSEC_CIPHER_AES256; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void set_md5hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) +{ + switch (*trunc_len) { + case 96: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_MD5_96; + break; + case 128: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_MD5_128; + break; + default: + *trunc_len = 0; + } +} + +static void set_sha1hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) +{ + switch (*trunc_len) { + case 96: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA1_96; + break; + case 80: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA1_80; + break; + default: + *trunc_len = 0; + } +} + +static void set_sha2_256hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) +{ + switch (*trunc_len) { + case 96: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA256_96; + break; + case 128: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA256_128; + break; + default: + *trunc_len = 0; + } +} + +static void set_sha2_384hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) +{ + switch (*trunc_len) { + case 96: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA384_96; + break; + case 192: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA384_192; + break; + default: + *trunc_len = 0; + } +} + +static void set_sha2_512hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) +{ + switch (*trunc_len) { + case 96: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA512_96; + break; + case 256: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_SHA512_256; + break; + default: + *trunc_len = 0; + } +} + +static int nfp_net_xfrm_add_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct nfp_ipsec_cfg_mssg msg = {}; + int i, key_len, trunc_len, err = 0; + struct nfp_ipsec_cfg_add_sa *cfg; + struct nfp_net *nn; + unsigned int saidx; + + nn = netdev_priv(netdev); + cfg = &msg.cfg_add_sa; + + /* General */ + switch (x->props.mode) { + case XFRM_MODE_TUNNEL: + cfg->ctrl_word.mode = NFP_IPSEC_PROTMODE_TUNNEL; + break; + case XFRM_MODE_TRANSPORT: + cfg->ctrl_word.mode = NFP_IPSEC_PROTMODE_TRANSPORT; + break; + default: + nn_err(nn, "Unsupported mode for xfrm offload\n"); + return -EINVAL; + } + + switch (x->id.proto) { + case IPPROTO_ESP: + cfg->ctrl_word.proto = NFP_IPSEC_PROTOCOL_ESP; + break; + case IPPROTO_AH: + cfg->ctrl_word.proto = NFP_IPSEC_PROTOCOL_AH; + break; + default: + nn_err(nn, "Unsupported protocol for xfrm offload\n"); + return -EINVAL; + } + + if (x->props.flags & XFRM_STATE_ESN) { + nn_err(nn, "Unsupported XFRM_REPLAY_MODE_ESN for xfrm offload\n"); + return -EINVAL; + } + + cfg->spi = ntohl(x->id.spi); + + /* Hash/Authentication */ + if (x->aalg) + trunc_len = x->aalg->alg_trunc_len; + else + trunc_len = 0; + + switch (x->props.aalgo) { + case SADB_AALG_NONE: + if (x->aead) { + trunc_len = -1; + } else { + nn_err(nn, "Unsupported authentication algorithm\n"); + return -EINVAL; + } + break; + case SADB_X_AALG_NULL: + cfg->ctrl_word.hash = NFP_IPSEC_HASH_NONE; + trunc_len = -1; + break; + case SADB_AALG_MD5HMAC: + set_md5hmac(cfg, &trunc_len); + break; + case SADB_AALG_SHA1HMAC: + set_sha1hmac(cfg, &trunc_len); + break; + case SADB_X_AALG_SHA2_256HMAC: + set_sha2_256hmac(cfg, &trunc_len); + break; + case SADB_X_AALG_SHA2_384HMAC: + set_sha2_384hmac(cfg, &trunc_len); + break; + case SADB_X_AALG_SHA2_512HMAC: + set_sha2_512hmac(cfg, &trunc_len); + break; + default: + nn_err(nn, "Unsupported authentication algorithm\n"); + return -EINVAL; + } + + if (!trunc_len) { + nn_err(nn, "Unsupported authentication algorithm trunc length\n"); + return -EINVAL; + } + + if (x->aalg) { + key_len = DIV_ROUND_UP(x->aalg->alg_key_len, BITS_PER_BYTE); + if (key_len > sizeof(cfg->auth_key)) { + nn_err(nn, "Insufficient space for offloaded auth key\n"); + return -EINVAL; + } + for (i = 0; i < key_len / sizeof(cfg->auth_key[0]) ; i++) + cfg->auth_key[i] = get_unaligned_be32(x->aalg->alg_key + + sizeof(cfg->auth_key[0]) * i); + } + + /* Encryption */ + switch (x->props.ealgo) { + case SADB_EALG_NONE: + case SADB_EALG_NULL: + cfg->ctrl_word.cimode = NFP_IPSEC_CIMODE_CBC; + cfg->ctrl_word.cipher = NFP_IPSEC_CIPHER_NULL; + break; + case SADB_EALG_3DESCBC: + cfg->ctrl_word.cimode = NFP_IPSEC_CIMODE_CBC; + cfg->ctrl_word.cipher = NFP_IPSEC_CIPHER_3DES; + break; + case SADB_X_EALG_AES_GCM_ICV16: + case SADB_X_EALG_NULL_AES_GMAC: + if (!x->aead) { + nn_err(nn, "Invalid AES key data\n"); + return -EINVAL; + } + + if (x->aead->alg_icv_len != 128) { + nn_err(nn, "ICV must be 128bit with SADB_X_EALG_AES_GCM_ICV16\n"); + return -EINVAL; + } + cfg->ctrl_word.cimode = NFP_IPSEC_CIMODE_CTR; + cfg->ctrl_word.hash = NFP_IPSEC_HASH_GF128_128; + + /* Aead->alg_key_len includes 32-bit salt */ + if (set_aes_keylen(cfg, x->props.ealgo, x->aead->alg_key_len - 32)) { + nn_err(nn, "Unsupported AES key length %d\n", x->aead->alg_key_len); + return -EINVAL; + } + break; + case SADB_X_EALG_AESCBC: + cfg->ctrl_word.cimode = NFP_IPSEC_CIMODE_CBC; + if (!x->ealg) { + nn_err(nn, "Invalid AES key data\n"); + return -EINVAL; + } + if (set_aes_keylen(cfg, x->props.ealgo, x->ealg->alg_key_len) < 0) { + nn_err(nn, "Unsupported AES key length %d\n", x->ealg->alg_key_len); + return -EINVAL; + } + break; + default: + nn_err(nn, "Unsupported encryption algorithm for offload\n"); + return -EINVAL; + } + + if (x->aead) { + int salt_len = 4; + + key_len = DIV_ROUND_UP(x->aead->alg_key_len, BITS_PER_BYTE); + key_len -= salt_len; + + if (key_len > sizeof(cfg->ciph_key)) { + nn_err(nn, "aead: Insufficient space for offloaded key\n"); + return -EINVAL; + } + + for (i = 0; i < key_len / sizeof(cfg->ciph_key[0]) ; i++) + cfg->ciph_key[i] = get_unaligned_be32(x->aead->alg_key + + sizeof(cfg->ciph_key[0]) * i); + + /* Load up the salt */ + cfg->aesgcm_fields.salt = get_unaligned_be32(x->aead->alg_key + key_len); + } + + if (x->ealg) { + key_len = DIV_ROUND_UP(x->ealg->alg_key_len, BITS_PER_BYTE); + + if (key_len > sizeof(cfg->ciph_key)) { + nn_err(nn, "ealg: Insufficient space for offloaded key\n"); + return -EINVAL; + } + for (i = 0; i < key_len / sizeof(cfg->ciph_key[0]) ; i++) + cfg->ciph_key[i] = get_unaligned_be32(x->ealg->alg_key + + sizeof(cfg->ciph_key[0]) * i); + } + + /* IP related info */ + switch (x->props.family) { + case AF_INET: + cfg->ipv6 = 0; + cfg->src_ip[0] = ntohl(x->props.saddr.a4); + cfg->dst_ip[0] = ntohl(x->id.daddr.a4); + break; + case AF_INET6: + cfg->ipv6 = 1; + for (i = 0; i < 4; i++) { + cfg->src_ip[i] = ntohl(x->props.saddr.a6[i]); + cfg->dst_ip[i] = ntohl(x->id.daddr.a6[i]); + } + break; + default: + nn_err(nn, "Unsupported address family\n"); + return -EINVAL; + } + + /* Maximum nic IPsec code could handle. Other limits may apply. */ + cfg->pmtu_limit = 0xffff; + cfg->ctrl_word.encap_dsbl = 1; + + /* SA direction */ + cfg->ctrl_word.dir = x->xso.dir; + + /* Find unused SA data*/ + err = xa_alloc(&nn->xa_ipsec, &saidx, x, + XA_LIMIT(0, NFP_NET_IPSEC_MAX_SA_CNT - 1), GFP_KERNEL); + if (err < 0) { + nn_err(nn, "Unable to get sa_data number for IPsec\n"); + return err; + } + + /* Allocate saidx and commit the SA */ + err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_ADD_SA, saidx, &msg); + if (err) { + xa_erase(&nn->xa_ipsec, saidx); + nn_err(nn, "Failed to issue IPsec command err ret=%d\n", err); + return err; + } + + /* 0 is invalid offload_handle for kernel */ + x->xso.offload_handle = saidx + 1; + return 0; +} + +static void nfp_net_xfrm_del_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct nfp_ipsec_cfg_mssg msg; + struct nfp_net *nn; + int err; + + nn = netdev_priv(netdev); + err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_INV_SA, + x->xso.offload_handle - 1, &msg); + if (err) + nn_warn(nn, "Failed to invalidate SA in hardware\n"); + + xa_erase(&nn->xa_ipsec, x->xso.offload_handle - 1); +} + +static bool nfp_net_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) + /* Offload with IPv4 options is not supported yet */ + return ip_hdr(skb)->ihl == 5; + + /* Offload with IPv6 extension headers is not support yet */ + return !(ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)); +} + +static const struct xfrmdev_ops nfp_net_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = nfp_net_xfrm_add_state, + .xdo_dev_state_delete = nfp_net_xfrm_del_state, + .xdo_dev_offload_ok = nfp_net_ipsec_offload_ok, +}; + +void nfp_net_ipsec_init(struct nfp_net *nn) +{ + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_IPSEC)) + return; + + xa_init_flags(&nn->xa_ipsec, XA_FLAGS_ALLOC); + nn->dp.netdev->xfrmdev_ops = &nfp_net_ipsec_xfrmdev_ops; +} + +void nfp_net_ipsec_clean(struct nfp_net *nn) +{ + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_IPSEC)) + return; + + WARN_ON(!xa_empty(&nn->xa_ipsec)); + xa_destroy(&nn->xa_ipsec); +} + +bool nfp_net_ipsec_tx_prep(struct nfp_net_dp *dp, struct sk_buff *skb, + struct nfp_ipsec_offload *offload_info) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_state *x; + + x = xfrm_input_state(skb); + if (!x) + return false; + + offload_info->seq_hi = xo->seq.hi; + offload_info->seq_low = xo->seq.low; + offload_info->handle = x->xso.offload_handle; + + return true; +} + +int nfp_net_ipsec_rx(struct nfp_meta_parsed *meta, struct sk_buff *skb) +{ + struct net_device *netdev = skb->dev; + struct xfrm_offload *xo; + struct xfrm_state *x; + struct sec_path *sp; + struct nfp_net *nn; + u32 saidx; + + nn = netdev_priv(netdev); + + saidx = meta->ipsec_saidx - 1; + if (saidx >= NFP_NET_IPSEC_MAX_SA_CNT) + return -EINVAL; + + sp = secpath_set(skb); + if (unlikely(!sp)) + return -ENOMEM; + + xa_lock(&nn->xa_ipsec); + x = xa_load(&nn->xa_ipsec, saidx); + xa_unlock(&nn->xa_ipsec); + if (!x) + return -EINVAL; + + xfrm_state_hold(x); + sp->xvec[sp->len++] = x; + sp->olen++; + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + xo->status = CRYPTO_SUCCESS; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 448c1c1afaee..861082c5dbff 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -4,6 +4,7 @@ #include <linux/bpf_trace.h> #include <linux/netdevice.h> #include <linux/bitfield.h> +#include <net/xfrm.h> #include "../nfp_app.h" #include "../nfp_net.h" @@ -167,28 +168,34 @@ nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, u64_stats_update_end(&r_vec->tx_sync); } -static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, u64 tls_handle) +static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, + u64 tls_handle, bool *ipsec) { struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct nfp_ipsec_offload offload_info; unsigned char *data; bool vlan_insert; u32 meta_id = 0; int md_bytes; - if (unlikely(md_dst || tls_handle)) { - if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) - md_dst = NULL; - } +#ifdef CONFIG_NFP_NET_IPSEC + if (xfrm_offload(skb)) + *ipsec = nfp_net_ipsec_tx_prep(dp, skb, &offload_info); +#endif + + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) + md_dst = NULL; vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); - if (!(md_dst || tls_handle || vlan_insert)) + if (!(md_dst || tls_handle || vlan_insert || *ipsec)) return 0; md_bytes = sizeof(meta_id) + !!md_dst * NFP_NET_META_PORTID_SIZE + !!tls_handle * NFP_NET_META_CONN_HANDLE_SIZE + - vlan_insert * NFP_NET_META_VLAN_SIZE; + vlan_insert * NFP_NET_META_VLAN_SIZE + + *ipsec * NFP_NET_META_IPSEC_FIELD_SIZE; /* IPsec has 12 bytes of metadata */ if (unlikely(skb_cow_head(skb, md_bytes))) return -ENOMEM; @@ -218,6 +225,19 @@ static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, u64 meta_id <<= NFP_NET_META_FIELD_SIZE; meta_id |= NFP_NET_META_VLAN; } + if (*ipsec) { + /* IPsec has three consecutive 4-bit IPsec metadata types, + * so in total IPsec has three 4 bytes of metadata. + */ + data -= NFP_NET_META_IPSEC_SIZE; + put_unaligned_be32(offload_info.seq_hi, data); + data -= NFP_NET_META_IPSEC_SIZE; + put_unaligned_be32(offload_info.seq_low, data); + data -= NFP_NET_META_IPSEC_SIZE; + put_unaligned_be32(offload_info.handle - 1, data); + meta_id <<= NFP_NET_META_IPSEC_FIELD_SIZE; + meta_id |= NFP_NET_META_IPSEC << 8 | NFP_NET_META_IPSEC << 4 | NFP_NET_META_IPSEC; + } data -= sizeof(meta_id); put_unaligned_be32(meta_id, data); @@ -246,6 +266,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) dma_addr_t dma_addr; unsigned int fsize; u64 tls_handle = 0; + bool ipsec = false; u16 qidx; dp = &nn->dp; @@ -273,7 +294,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle); + md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle, &ipsec); if (unlikely(md_bytes < 0)) goto err_flush; @@ -312,6 +333,8 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); } + if (ipsec) + nfp_nfd3_ipsec_tx(txd, skb); /* Gather DMA */ if (nr_frags > 0) { __le64 second_half; @@ -764,6 +787,15 @@ nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, return false; data += sizeof(struct nfp_net_tls_resync_req); break; +#ifdef CONFIG_NFP_NET_IPSEC + case NFP_NET_META_IPSEC: + /* Note: IPsec packet will have zero saidx, so need add 1 + * to indicate packet is IPsec packet within driver. + */ + meta->ipsec_saidx = get_unaligned_be32(data) + 1; + data += 4; + break; +#endif default: return true; } @@ -876,12 +908,11 @@ static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_net_dp *dp = &r_vec->nfp_net->dp; struct nfp_net_tx_ring *tx_ring; struct bpf_prog *xdp_prog; + int idx, pkts_polled = 0; bool xdp_tx_cmpl = false; unsigned int true_bufsz; struct sk_buff *skb; - int pkts_polled = 0; struct xdp_buff xdp; - int idx; xdp_prog = READ_ONCE(dp->xdp_prog); true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; @@ -1081,6 +1112,13 @@ static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) continue; } +#ifdef CONFIG_NFP_NET_IPSEC + if (meta.ipsec_saidx != 0 && unlikely(nfp_net_ipsec_rx(&meta, skb))) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); + continue; + } +#endif + if (meta_len_xdp) skb_metadata_set(skb, meta_len_xdp); diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c new file mode 100644 index 000000000000..e90f8c975903 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/ipsec.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <net/xfrm.h> + +#include "../nfp_net.h" +#include "nfd3.h" + +void nfp_nfd3_ipsec_tx(struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb) +{ + struct xfrm_state *x = xfrm_input_state(skb); + + if (x->xso.dev && (x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)) { + txd->flags |= NFD3_DESC_TX_CSUM | NFD3_DESC_TX_IP4_CSUM | + NFD3_DESC_TX_TCP_CSUM | NFD3_DESC_TX_UDP_CSUM; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h index 7a0df9e6c3c4..9c1c10dcbaee 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h +++ b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h @@ -103,4 +103,12 @@ void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf); int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget); +#ifndef CONFIG_NFP_NET_IPSEC +static inline void nfp_nfd3_ipsec_tx(struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb) +{ +} +#else +void nfp_nfd3_ipsec_tx(struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb); +#endif + #endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 8bfd48d50ef0..bf6bae557158 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -239,10 +239,6 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char *buf = NULL; int err; - err = devlink_info_driver_name_put(req, "nfp"); - if (err) - return err; - vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); @@ -343,7 +339,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; attrs.split = eth_port.is_split; - attrs.splittable = !attrs.split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 5a18af672e6b..14a751bfe1fe 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -27,6 +27,7 @@ struct nfp_hwinfo; struct nfp_mip; struct nfp_net; struct nfp_nsp_identify; +struct nfp_eth_media_buf; struct nfp_port; struct nfp_rtsym; struct nfp_rtsym_table; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index a101ff30a1ae..6c83e47d8b3d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -263,6 +263,10 @@ struct nfp_meta_parsed { u8 tpid; u16 tci; } vlan; + +#ifdef CONFIG_NFP_NET_IPSEC + u32 ipsec_saidx; +#endif }; struct nfp_net_rx_hash { @@ -541,6 +545,7 @@ struct nfp_net_dp { * @id: vNIC id within the PF (0 for VFs) * @fw_ver: Firmware version * @cap: Capabilities advertised by the Firmware + * @cap_w1: Extended capabilities word advertised by the Firmware * @max_mtu: Maximum support MTU advertised by the Firmware * @rss_hfunc: RSS selected hash function * @rss_cfg: RSS configuration @@ -583,6 +588,7 @@ struct nfp_net_dp { * @qcp_cfg: Pointer to QCP queue used for configuration notification * @tx_bar: Pointer to mapped TX queues * @rx_bar: Pointer to mapped FL/RX queues + * @xa_ipsec: IPsec xarray SA data * @tlv_caps: Parsed TLV capabilities * @ktls_tx_conn_cnt: Number of offloaded kTLS TX connections * @ktls_rx_conn_cnt: Number of offloaded kTLS RX connections @@ -617,6 +623,7 @@ struct nfp_net { u32 id; u32 cap; + u32 cap_w1; u32 max_mtu; u8 rss_hfunc; @@ -670,6 +677,10 @@ struct nfp_net { u8 __iomem *tx_bar; u8 __iomem *rx_bar; +#ifdef CONFIG_NFP_NET_IPSEC + struct xarray xa_ipsec; +#endif + struct nfp_net_tlv_caps tlv_caps; unsigned int ktls_tx_conn_cnt; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 5620faa63c7e..682a9198fb54 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2371,6 +2371,12 @@ static void nfp_net_netdev_init(struct nfp_net *nn) } if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) netdev->hw_features |= NETIF_F_RXHASH; + +#ifdef CONFIG_NFP_NET_IPSEC + if (nn->cap_w1 & NFP_NET_CFG_CTRL_IPSEC) + netdev->hw_features |= NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM; +#endif + if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) { if (nn->cap & NFP_NET_CFG_CTRL_LSO) { netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | @@ -2452,6 +2458,7 @@ static int nfp_net_read_caps(struct nfp_net *nn) { /* Get some of the read-only fields from the BAR */ nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); + nn->cap_w1 = nn_readq(nn, NFP_NET_CFG_CAP_WORD1); nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); /* ABI 4.x and ctrl vNIC always use chained metadata, in other cases @@ -2563,6 +2570,8 @@ int nfp_net_init(struct nfp_net *nn) err = nfp_net_tls_init(nn); if (err) goto err_clean_mbox; + + nfp_net_ipsec_init(nn); } nfp_net_vecs_init(nn); @@ -2586,6 +2595,7 @@ void nfp_net_clean(struct nfp_net *nn) return; unregister_netdev(nn->dp.netdev); + nfp_net_ipsec_clean(nn); nfp_ccm_mbox_clean(nn); nfp_net_reconfig_wait_posted(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 6714d5e8fdab..cc11b3dc1252 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -48,6 +48,7 @@ #define NFP_NET_META_CSUM 6 /* checksum complete type */ #define NFP_NET_META_CONN_HANDLE 7 #define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */ +#define NFP_NET_META_IPSEC 9 /* IPsec SA index for tx and rx */ #define NFP_META_PORT_ID_CTRL ~0U @@ -55,6 +56,8 @@ #define NFP_NET_META_VLAN_SIZE 4 #define NFP_NET_META_PORTID_SIZE 4 #define NFP_NET_META_CONN_HANDLE_SIZE 8 +#define NFP_NET_META_IPSEC_SIZE 4 +#define NFP_NET_META_IPSEC_FIELD_SIZE 12 /* Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 #define NFP_NET_RSS_IPV4 1 @@ -257,10 +260,19 @@ #define NFP_NET_CFG_BPF_CFG_MASK 7ULL #define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) -/* 40B reserved for future use (0x0098 - 0x00c0) +/* 3 words reserved for extended ctrl words (0x0098 - 0x00a4) + * 3 words reserved for extended cap words (0x00a4 - 0x00b0) + * Currently only one word is used, can be extended in future. */ -#define NFP_NET_CFG_RESERVED 0x0098 -#define NFP_NET_CFG_RESERVED_SZ 0x0028 +#define NFP_NET_CFG_CTRL_WORD1 0x0098 +#define NFP_NET_CFG_CTRL_PKT_TYPE (0x1 << 0) /* Pkttype offload */ +#define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */ + +#define NFP_NET_CFG_CAP_WORD1 0x00a4 + +/* 16B reserved for future use (0x00b0 - 0x00c0) */ +#define NFP_NET_CFG_RESERVED 0x00b0 +#define NFP_NET_CFG_RESERVED_SZ 0x0010 /* RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled @@ -390,14 +402,14 @@ */ #define NFP_NET_CFG_MBOX_BASE 0x1800 #define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 - +#define NFP_NET_CFG_MBOX_VAL 0x1808 #define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 #define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 #define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1 #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2 - +#define NFP_NET_CFG_MBOX_CMD_IPSEC 3 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 0058ba2b3505..a4a89ef3f18b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -293,6 +293,76 @@ nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port, } } +static const u16 nfp_eth_media_table[] = { + [NFP_MEDIA_1000BASE_CX] = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + [NFP_MEDIA_1000BASE_KX] = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + [NFP_MEDIA_10GBASE_KX4] = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + [NFP_MEDIA_10GBASE_KR] = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + [NFP_MEDIA_10GBASE_CX4] = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + [NFP_MEDIA_10GBASE_CR] = ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + [NFP_MEDIA_10GBASE_SR] = ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + [NFP_MEDIA_10GBASE_ER] = ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + [NFP_MEDIA_25GBASE_KR] = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + [NFP_MEDIA_25GBASE_KR_S] = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + [NFP_MEDIA_25GBASE_CR] = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + [NFP_MEDIA_25GBASE_CR_S] = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + [NFP_MEDIA_25GBASE_SR] = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + [NFP_MEDIA_40GBASE_CR4] = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + [NFP_MEDIA_40GBASE_KR4] = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + [NFP_MEDIA_40GBASE_SR4] = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + [NFP_MEDIA_40GBASE_LR4] = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + [NFP_MEDIA_50GBASE_KR] = ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + [NFP_MEDIA_50GBASE_SR] = ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + [NFP_MEDIA_50GBASE_CR] = ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + [NFP_MEDIA_50GBASE_LR] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + [NFP_MEDIA_50GBASE_ER] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + [NFP_MEDIA_50GBASE_FR] = ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + [NFP_MEDIA_100GBASE_KR4] = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + [NFP_MEDIA_100GBASE_SR4] = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + [NFP_MEDIA_100GBASE_CR4] = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + [NFP_MEDIA_100GBASE_KP4] = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + [NFP_MEDIA_100GBASE_CR10] = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, +}; + +static void nfp_add_media_link_mode(struct nfp_port *port, + struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *cmd) +{ + u64 supported_modes[2], advertised_modes[2]; + struct nfp_eth_media_buf ethm = { + .eth_index = eth_port->eth_index, + }; + struct nfp_cpp *cpp = port->app->cpp; + + if (nfp_eth_read_media(cpp, ðm)) + return; + + for (u32 i = 0; i < 2; i++) { + supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]); + advertised_modes[i] = le64_to_cpu(ethm.advertised_modes[i]); + } + + for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) { + if (i < 64) { + if (supported_modes[0] & BIT_ULL(i)) + __set_bit(nfp_eth_media_table[i], + cmd->link_modes.supported); + + if (advertised_modes[0] & BIT_ULL(i)) + __set_bit(nfp_eth_media_table[i], + cmd->link_modes.advertising); + } else { + if (supported_modes[1] & BIT_ULL(i - 64)) + __set_bit(nfp_eth_media_table[i], + cmd->link_modes.supported); + + if (advertised_modes[1] & BIT_ULL(i - 64)) + __set_bit(nfp_eth_media_table[i], + cmd->link_modes.advertising); + } + } +} + /** * nfp_net_get_link_ksettings - Get Link Speed settings * @netdev: network interface device structure @@ -311,6 +381,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, u16 sts; /* Init to unknowns */ + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; @@ -321,6 +393,7 @@ nfp_net_get_link_ksettings(struct net_device *netdev, if (eth_port) { ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + nfp_add_media_link_mode(port, eth_port, cmd); if (eth_port->supp_aneg) { ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); if (eth_port->aneg == NFP_ANEG_AUTO) { @@ -1432,6 +1505,9 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + /* update port state to get latest interface */ set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 730fea214b8a..7136bc48530b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -100,6 +100,7 @@ enum nfp_nsp_cmd { SPCODE_FW_LOADED = 19, /* Is application firmware loaded */ SPCODE_VERSIONS = 21, /* Report FW versions */ SPCODE_READ_SFF_EEPROM = 22, /* Read module EEPROM */ + SPCODE_READ_MEDIA = 23, /* Get either the supported or advertised media for a port */ }; struct nfp_nsp_dma_buf { @@ -1100,4 +1101,20 @@ int nfp_nsp_read_module_eeprom(struct nfp_nsp *state, int eth_index, kfree(buf); return ret; +}; + +int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size) +{ + struct nfp_nsp_command_buf_arg media = { + { + .code = SPCODE_READ_MEDIA, + .option = size, + }, + .in_buf = buf, + .in_size = size, + .out_buf = buf, + .out_size = size, + }; + + return nfp_nsp_command_buf(state, &media); } diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index 992d72ac98d3..8f5cab0032d0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -65,6 +65,11 @@ static inline bool nfp_nsp_has_read_module_eeprom(struct nfp_nsp *state) return nfp_nsp_get_abi_ver_minor(state) > 28; } +static inline bool nfp_nsp_has_read_media(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 33; +} + enum nfp_eth_interface { NFP_INTERFACE_NONE = 0, NFP_INTERFACE_SFP = 1, @@ -97,6 +102,47 @@ enum nfp_eth_fec { NFP_FEC_DISABLED_BIT, }; +/* link modes about RJ45 haven't been used, so there's no mapping to them */ +enum nfp_ethtool_link_mode_list { + NFP_MEDIA_W0_RJ45_10M, + NFP_MEDIA_W0_RJ45_10M_HD, + NFP_MEDIA_W0_RJ45_100M, + NFP_MEDIA_W0_RJ45_100M_HD, + NFP_MEDIA_W0_RJ45_1G, + NFP_MEDIA_W0_RJ45_2P5G, + NFP_MEDIA_W0_RJ45_5G, + NFP_MEDIA_W0_RJ45_10G, + NFP_MEDIA_1000BASE_CX, + NFP_MEDIA_1000BASE_KX, + NFP_MEDIA_10GBASE_KX4, + NFP_MEDIA_10GBASE_KR, + NFP_MEDIA_10GBASE_CX4, + NFP_MEDIA_10GBASE_CR, + NFP_MEDIA_10GBASE_SR, + NFP_MEDIA_10GBASE_ER, + NFP_MEDIA_25GBASE_KR, + NFP_MEDIA_25GBASE_KR_S, + NFP_MEDIA_25GBASE_CR, + NFP_MEDIA_25GBASE_CR_S, + NFP_MEDIA_25GBASE_SR, + NFP_MEDIA_40GBASE_CR4, + NFP_MEDIA_40GBASE_KR4, + NFP_MEDIA_40GBASE_SR4, + NFP_MEDIA_40GBASE_LR4, + NFP_MEDIA_50GBASE_KR, + NFP_MEDIA_50GBASE_SR, + NFP_MEDIA_50GBASE_CR, + NFP_MEDIA_50GBASE_LR, + NFP_MEDIA_50GBASE_ER, + NFP_MEDIA_50GBASE_FR, + NFP_MEDIA_100GBASE_KR4, + NFP_MEDIA_100GBASE_SR4, + NFP_MEDIA_100GBASE_CR4, + NFP_MEDIA_100GBASE_KP4, + NFP_MEDIA_100GBASE_CR10, + NFP_MEDIA_LINK_MODES_NUMBER +}; + #define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) #define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) #define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) @@ -256,6 +302,16 @@ enum nfp_nsp_sensor_id { int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id, long *val); +struct nfp_eth_media_buf { + u8 eth_index; + u8 reserved[7]; + __le64 supported_modes[2]; + __le64 advertised_modes[2]; +}; + +int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size); +int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm); + #define NFP_NSP_VERSION_BUFSZ 1024 /* reasonable size, not in the ABI */ enum nfp_nsp_versions { diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index bb64efec4c46..570ac1bb2122 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -647,3 +647,29 @@ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } + +int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm) +{ + struct nfp_nsp *nsp; + int ret; + + nsp = nfp_nsp_open(cpp); + if (IS_ERR(nsp)) { + nfp_err(cpp, "Failed to access the NSP: %pe\n", nsp); + return PTR_ERR(nsp); + } + + if (!nfp_nsp_has_read_media(nsp)) { + nfp_warn(cpp, "Reading media link modes not supported. Please update flash\n"); + ret = -EOPNOTSUPP; + goto exit_close_nsp; + } + + ret = nfp_nsp_read_media(nsp, ethm, sizeof(*ethm)); + if (ret) + nfp_err(cpp, "Reading media link modes failed: %pe\n", ERR_PTR(ret)); + +exit_close_nsp: + nfp_nsp_close(nsp); + return ret; +} diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 19d043b593cc..62320be4de5a 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); - - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); - - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); + + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3f2c30184752..28b7cec485ef 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1143,6 +1143,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, buffer_info->dma = 0; buffer_info->time_stamp = 0; tx_ring->next_to_use = ring_num; + dev_kfree_skb_any(skb); return; } buffer_info->mapped = true; @@ -2459,6 +2460,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); pch_gbe_phy_hw_reset(&adapter->hw); + pci_dev_put(adapter->ptp_pdev); free_netdev(netdev); } @@ -2533,7 +2535,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, /* setup the private structure */ ret = pch_gbe_sw_init(adapter); if (ret) - goto err_free_netdev; + goto err_put_dev; /* Initialize PHY */ ret = pch_gbe_init_phy(adapter); @@ -2591,6 +2593,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, err_free_adapter: pch_gbe_phy_hw_reset(&adapter->hw); +err_put_dev: + pci_dev_put(adapter->ptp_pdev); err_free_netdev: free_netdev(netdev); return ret; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index 567f778433e2..e6ff757895ab 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -26,10 +26,6 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, char buf[16]; int err = 0; - err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); - if (err) - return err; - err = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, idev->dev_info.fw_version); diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index 6bb4e165b592..922c47797af6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -162,10 +162,6 @@ static int qed_devlink_info_get(struct devlink *devlink, dev_info = &cdev->common_dev_info; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num)); buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9fb1fa479d4b..16e6bd466143 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn, return rc; } -#define CONFIG_QEDE_BITMAP_IDX BIT(0) -#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1) -#define CONFIG_QEDR_BITMAP_IDX BIT(2) -#define CONFIG_QEDF_BITMAP_IDX BIT(4) -#define CONFIG_QEDI_BITMAP_IDX BIT(5) -#define CONFIG_QED_LL2_BITMAP_IDX BIT(6) +#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0) +#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1) +#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2) +#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4) +#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5) +#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6) static u32 qed_get_config_bitmap(void) { u32 config_bitmap = 0x0; if (IS_ENABLED(CONFIG_QEDE)) - config_bitmap |= CONFIG_QEDE_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE; if (IS_ENABLED(CONFIG_QED_SRIOV)) - config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV; if (IS_ENABLED(CONFIG_QED_RDMA)) - config_bitmap |= CONFIG_QEDR_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR; if (IS_ENABLED(CONFIG_QED_FCOE)) - config_bitmap |= CONFIG_QEDF_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF; if (IS_ENABLED(CONFIG_QED_ISCSI)) - config_bitmap |= CONFIG_QEDI_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI; if (IS_ENABLED(CONFIG_QED_LL2)) - config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2; return config_bitmap; } diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 76072f8c3d2f..0d57ffcedf0c 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2471,6 +2471,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, skb_shinfo(skb)->nr_frags); if (tx_cb->seg_count == -1) { netdev_err(ndev, "%s: invalid segment count!\n", __func__); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5bc1181f829b..ec157885da13 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4559,12 +4559,13 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) static void r8169_phylink_handler(struct net_device *ndev) { struct rtl8169_private *tp = netdev_priv(ndev); + struct device *d = tp_to_dev(tp); if (netif_carrier_ok(ndev)) { rtl_link_chg_patch(tp); - pm_request_resume(&tp->pci_dev->dev); + pm_request_resume(d); } else { - pm_runtime_idle(&tp->pci_dev->dev); + pm_runtime_idle(d); } phy_print_status(tp->phydev); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36324126db6d..6bc923326268 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index c098b27093ea..e42ceaa0099f 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1714,7 +1714,7 @@ static int rswitch_init(struct rswitch_private *priv) } for (i = 0; i < RSWITCH_NUM_PORTS; i++) - netdev_info(priv->rdev[i]->ndev, "MAC address %pMn", + netdev_info(priv->rdev[i]->ndev, "MAC address %pM\n", priv->rdev[i]->ndev->dev_addr); return 0; diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 88fa29572e23..ddcc325ed570 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -218,6 +218,7 @@ netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, skb->len, skb->data_len, channel->channel); if (!efx->n_channels || !efx->n_tx_channels || !channel) { netif_stop_queue(net_dev); + dev_kfree_skb_any(skb); goto err; } diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index 2bba5d3a2fdb..d1a91d54c6bb 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -476,13 +476,15 @@ static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark) goto out; } - /* Update seen_gen unconditionally, to avoid a missed wakeup if - * we race with efx_mae_stop_counters(). - */ - efx->tc->seen_gen[type] = mark; - if (efx->tc->flush_counters && - (s32)(efx->tc->flush_gen[type] - mark) <= 0) - wake_up(&efx->tc->flush_wq); + if (type < EFX_TC_COUNTER_TYPE_MAX) { + /* Update seen_gen unconditionally, to avoid a missed wakeup if + * we race with efx_mae_stop_counters(). + */ + efx->tc->seen_gen[type] = mark; + if (efx->tc->flush_counters && + (s32)(efx->tc->flush_gen[type] - mark) <= 0) + wake_up(&efx->tc->flush_wq); + } out: efx_free_rx_buffers(rx_queue, rx_buf, 1); channel->rx_pkt_n_frags = 0; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index c5f88f7a7a04..4ed4082836a9 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -207,11 +207,11 @@ static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, skb_frag_t *f = &skb_shinfo(skb)->frags[i]; u8 *vaddr; - vaddr = kmap_atomic(skb_frag_page(f)); + vaddr = kmap_local_page(skb_frag_page(f)); efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f), skb_frag_size(f), copy_buf); - kunmap_atomic(vaddr); + kunmap_local(vaddr); } EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 513f6ea335a8..8c7a0b7c9952 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -739,6 +739,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0a9d13d7976f..18c7ca29da2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 0aca193d9550..4ef05bad4613 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -90,8 +90,6 @@ #include <linux/uaccess.h> #include <linux/jiffies.h> -#define cas_page_map(x) kmap_atomic((x)) -#define cas_page_unmap(x) kunmap_atomic((x)) #define CAS_NCPUS num_online_cpus() #define cas_skb_release(x) netif_rx(x) @@ -1915,7 +1913,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, int off, swivel = RX_SWIVEL_OFF_VAL; struct cas_page *page; struct sk_buff *skb; - void *addr, *crcaddr; + void *crcaddr; __sum16 csum; char *p; @@ -1936,7 +1934,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb_reserve(skb, swivel); p = skb->data; - addr = crcaddr = NULL; + crcaddr = NULL; if (hlen) { /* always copy header pages */ i = CAS_VAL(RX_COMP2_HDR_INDEX, words[1]); page = cp->rx_pages[CAS_VAL(RX_INDEX_RING, i)][CAS_VAL(RX_INDEX_NUM, i)]; @@ -1948,12 +1946,10 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, i += cp->crc_size; dma_sync_single_for_cpu(&cp->pdev->dev, page->dma_addr + off, i, DMA_FROM_DEVICE); - addr = cas_page_map(page->buffer); - memcpy(p, addr + off, i); + memcpy(p, page_address(page->buffer) + off, i); dma_sync_single_for_device(&cp->pdev->dev, page->dma_addr + off, i, DMA_FROM_DEVICE); - cas_page_unmap(addr); RX_USED_ADD(page, 0x100); p += hlen; swivel = 0; @@ -1984,12 +1980,11 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, /* make sure we always copy a header */ swivel = 0; if (p == (char *) skb->data) { /* not split */ - addr = cas_page_map(page->buffer); - memcpy(p, addr + off, RX_COPY_MIN); + memcpy(p, page_address(page->buffer) + off, + RX_COPY_MIN); dma_sync_single_for_device(&cp->pdev->dev, page->dma_addr + off, i, DMA_FROM_DEVICE); - cas_page_unmap(addr); off += RX_COPY_MIN; swivel = RX_COPY_MIN; RX_USED_ADD(page, cp->mtu_stride); @@ -2036,10 +2031,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, RX_USED_ADD(page, hlen + cp->crc_size); } - if (cp->crc_size) { - addr = cas_page_map(page->buffer); - crcaddr = addr + off + hlen; - } + if (cp->crc_size) + crcaddr = page_address(page->buffer) + off + hlen; } else { /* copying packet */ @@ -2061,12 +2054,10 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, i += cp->crc_size; dma_sync_single_for_cpu(&cp->pdev->dev, page->dma_addr + off, i, DMA_FROM_DEVICE); - addr = cas_page_map(page->buffer); - memcpy(p, addr + off, i); + memcpy(p, page_address(page->buffer) + off, i); dma_sync_single_for_device(&cp->pdev->dev, page->dma_addr + off, i, DMA_FROM_DEVICE); - cas_page_unmap(addr); if (p == (char *) skb->data) /* not split */ RX_USED_ADD(page, cp->mtu_stride); else @@ -2081,20 +2072,17 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, page->dma_addr, dlen + cp->crc_size, DMA_FROM_DEVICE); - addr = cas_page_map(page->buffer); - memcpy(p, addr, dlen + cp->crc_size); + memcpy(p, page_address(page->buffer), dlen + cp->crc_size); dma_sync_single_for_device(&cp->pdev->dev, page->dma_addr, dlen + cp->crc_size, DMA_FROM_DEVICE); - cas_page_unmap(addr); RX_USED_ADD(page, dlen + cp->crc_size); } end_copy_pkt: - if (cp->crc_size) { - addr = NULL; + if (cp->crc_size) crcaddr = skb->data + alloclen; - } + skb_put(skb, alloclen); } @@ -2103,8 +2091,6 @@ end_copy_pkt: /* checksum includes FCS. strip it out. */ csum = csum_fold(csum_partial(crcaddr, cp->crc_size, csum_unfold(csum))); - if (addr) - cas_page_unmap(addr); } skb->protocol = eth_type_trans(skb, cp->dev); if (skb->protocol == htons(ETH_P_IP)) { @@ -2793,18 +2779,14 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, tabort = cas_calc_tabort(cp, skb_frag_off(fragp), len); if (unlikely(tabort)) { - void *addr; - /* NOTE: len is always > tabort */ cas_write_txd(cp, ring, entry, mapping, len - tabort, ctrl, 0); entry = TX_DESC_NEXT(ring, entry); - - addr = cas_page_map(skb_frag_page(fragp)); - memcpy(tx_tiny_buf(cp, ring, entry), - addr + skb_frag_off(fragp) + len - tabort, - tabort); - cas_page_unmap(addr); + memcpy_from_page(tx_tiny_buf(cp, ring, entry), + skb_frag_page(fragp), + skb_frag_off(fragp) + len - tabort, + tabort); mapping = tx_tiny_map(cp, ring, entry, tentry); len = tabort; } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 80fde5f06fce..a6211b95ed17 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1085,13 +1085,13 @@ static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb, u8 *vaddr; if (nc < ncookies) { - vaddr = kmap_atomic(skb_frag_page(f)); + vaddr = kmap_local_page(skb_frag_page(f)); blen = skb_frag_size(f); blen += 8 - (blen & 7); err = ldc_map_single(lp, vaddr + skb_frag_off(f), blen, cookies + nc, ncookies - nc, map_perm); - kunmap_atomic(vaddr); + kunmap_local(vaddr); } else { err = -EMSGSIZE; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 6b0458df613a..51c37e99d086 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2132,7 +2132,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; - if (port->ndev) + if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) unregister_netdev(port->ndev); } } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 6370c447ac5c..575ff9de8985 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -611,8 +611,6 @@ static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg, #endif /* CONFIG_64BIT */ /* Function prototypes visible in xilinx_axienet_mdio.c for other files */ -int axienet_mdio_enable(struct axienet_local *lp); -void axienet_mdio_disable(struct axienet_local *lp); int axienet_mdio_setup(struct axienet_local *lp); void axienet_mdio_teardown(struct axienet_local *lp); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 0b3b6935c558..2f07fde361aa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -17,7 +17,7 @@ #include "xilinx_axienet.h" -#define MAX_MDIO_FREQ 2500000 /* 2.5 MHz */ +#define DEFAULT_MDIO_FREQ 2500000 /* 2.5 MHz */ #define DEFAULT_HOST_CLOCK 150000000 /* 150 MHz */ /* Wait till MDIO interface is ready to accept a new transaction.*/ @@ -147,15 +147,20 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, /** * axienet_mdio_enable - MDIO hardware setup function * @lp: Pointer to axienet local data structure. + * @np: Pointer to mdio device tree node. * - * Return: 0 on success, -ETIMEDOUT on a timeout. + * Return: 0 on success, -ETIMEDOUT on a timeout, -EOVERFLOW on a clock + * divisor overflow. * * Sets up the MDIO interface by initializing the MDIO clock and enabling the * MDIO interface in hardware. **/ -int axienet_mdio_enable(struct axienet_local *lp) +static int axienet_mdio_enable(struct axienet_local *lp, struct device_node *np) { + u32 mdio_freq = DEFAULT_MDIO_FREQ; u32 host_clock; + u32 clk_div; + int ret; lp->mii_clk_div = 0; @@ -184,6 +189,12 @@ int axienet_mdio_enable(struct axienet_local *lp) host_clock); } + if (np) + of_property_read_u32(np, "clock-frequency", &mdio_freq); + if (mdio_freq != DEFAULT_MDIO_FREQ) + netdev_info(lp->ndev, "Setting non-standard mdio bus frequency to %u Hz\n", + mdio_freq); + /* clk_div can be calculated by deriving it from the equation: * fMDIO = fHOST / ((1 + clk_div) * 2) * @@ -209,40 +220,42 @@ int axienet_mdio_enable(struct axienet_local *lp) * "clock-frequency" from the CPU */ - lp->mii_clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; + clk_div = (host_clock / (mdio_freq * 2)) - 1; /* If there is any remainder from the division of - * fHOST / (MAX_MDIO_FREQ * 2), then we need to add - * 1 to the clock divisor or we will surely be above 2.5 MHz + * fHOST / (mdio_freq * 2), then we need to add + * 1 to the clock divisor or we will surely be + * above the requested frequency */ - if (host_clock % (MAX_MDIO_FREQ * 2)) - lp->mii_clk_div++; + if (host_clock % (mdio_freq * 2)) + clk_div++; + + /* Check for overflow of mii_clk_div */ + if (clk_div & ~XAE_MDIO_MC_CLOCK_DIVIDE_MAX) { + netdev_warn(lp->ndev, "MDIO clock divisor overflow\n"); + return -EOVERFLOW; + } + lp->mii_clk_div = (u8)clk_div; netdev_dbg(lp->ndev, "Setting MDIO clock divisor to %u/%u Hz host clock.\n", lp->mii_clk_div, host_clock); - axienet_iow(lp, XAE_MDIO_MC_OFFSET, lp->mii_clk_div | XAE_MDIO_MC_MDIOEN_MASK); + axienet_mdio_mdc_enable(lp); - return axienet_mdio_wait_until_ready(lp); -} + ret = axienet_mdio_wait_until_ready(lp); + if (ret) + axienet_mdio_mdc_disable(lp); -/** - * axienet_mdio_disable - MDIO hardware disable function - * @lp: Pointer to axienet local data structure. - * - * Disable the MDIO interface in hardware. - **/ -void axienet_mdio_disable(struct axienet_local *lp) -{ - axienet_iow(lp, XAE_MDIO_MC_OFFSET, 0); + return ret; } /** * axienet_mdio_setup - MDIO setup function * @lp: Pointer to axienet local data structure. * - * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when - * mdiobus_alloc (to allocate memory for mii bus structure) fails. + * Return: 0 on success, -ETIMEDOUT on a timeout, -EOVERFLOW on a clock + * divisor overflow, -ENOMEM when mdiobus_alloc (to allocate + * memory for mii bus structure) fails. * * Sets up the MDIO interface by initializing the MDIO clock. * Register the MDIO interface. @@ -253,10 +266,6 @@ int axienet_mdio_setup(struct axienet_local *lp) struct mii_bus *bus; int ret; - ret = axienet_mdio_enable(lp); - if (ret < 0) - return ret; - bus = mdiobus_alloc(); if (!bus) return -ENOMEM; @@ -272,15 +281,23 @@ int axienet_mdio_setup(struct axienet_local *lp) lp->mii_bus = bus; mdio_node = of_get_child_by_name(lp->dev->of_node, "mdio"); + ret = axienet_mdio_enable(lp, mdio_node); + if (ret < 0) + goto unregister; ret = of_mdiobus_register(bus, mdio_node); + if (ret) + goto unregister_mdio_enabled; of_node_put(mdio_node); - if (ret) { - mdiobus_free(bus); - lp->mii_bus = NULL; - return ret; - } axienet_mdio_mdc_disable(lp); return 0; + +unregister_mdio_enabled: + axienet_mdio_mdc_disable(lp); +unregister: + of_node_put(mdio_node); + mdiobus_free(bus); + lp->mii_bus = NULL; + return ret; } /** diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index de94921cbef9..025e0c19ec25 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -98,6 +98,7 @@ struct ipvl_port { struct sk_buff_head backlog; int count; struct ida ida; + netdevice_tracker dev_tracker; }; struct ipvl_skb_cb { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index b6bfa9fdca62..b15dd9a3ad54 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -83,6 +83,7 @@ static int ipvlan_port_create(struct net_device *dev) if (err) goto err; + netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); return 0; err: @@ -95,6 +96,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; + netdev_put(dev, &port->dev_tracker); if (port->mode == IPVLAN_MODE_L3S) ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c4ad98d39ea6..f6d53e63ef4e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a7b46219bab7..d73b9d535b7a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3835,7 +3835,6 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; - int ret; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 689e728345ce..eb344f6d4a7b 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -148,7 +148,7 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* Associate the fwnode with the device structure so it * can be looked up later. */ - phy->mdio.dev.fwnode = child; + phy->mdio.dev.fwnode = fwnode_handle_get(child); /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e14686594a71..b962fc8e1397 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -994,9 +994,6 @@ static int nsim_dev_info_get(struct devlink *devlink, { int err; - err = devlink_info_driver_name_put(req, DRV_NAME); - if (err) - return err; err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30", DEVLINK_INFO_VERSION_TYPE_COMPONENT); if (err) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 464d88ca8ab0..a4abea921046 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); diff --git a/drivers/net/pcs/pcs-altera-tse.c b/drivers/net/pcs/pcs-altera-tse.c index 97a7cabff962..d616749761f4 100644 --- a/drivers/net/pcs/pcs-altera-tse.c +++ b/drivers/net/pcs/pcs-altera-tse.c @@ -12,22 +12,13 @@ /* SGMII PCS register addresses */ -#define SGMII_PCS_SCRATCH 0x10 -#define SGMII_PCS_REV 0x11 #define SGMII_PCS_LINK_TIMER_0 0x12 -#define SGMII_PCS_LINK_TIMER_REG(x) (0x12 + (x)) #define SGMII_PCS_LINK_TIMER_1 0x13 #define SGMII_PCS_IF_MODE 0x14 #define PCS_IF_MODE_SGMII_ENA BIT(0) #define PCS_IF_MODE_USE_SGMII_AN BIT(1) -#define PCS_IF_MODE_SGMI_SPEED_MASK GENMASK(3, 2) -#define PCS_IF_MODE_SGMI_SPEED_10 (0 << 2) -#define PCS_IF_MODE_SGMI_SPEED_100 (1 << 2) -#define PCS_IF_MODE_SGMI_SPEED_1000 (2 << 2) #define PCS_IF_MODE_SGMI_HALF_DUPLEX BIT(4) #define PCS_IF_MODE_SGMI_PHY_AN BIT(5) -#define SGMII_PCS_DIS_READ_TO 0x15 -#define SGMII_PCS_READ_TO 0x16 #define SGMII_PCS_SW_RESET_TIMEOUT 100 /* usecs */ struct altera_tse_pcs { @@ -60,7 +51,6 @@ static void tse_pcs_write(struct altera_tse_pcs *tse_pcs, int regnum, static int tse_pcs_reset(struct altera_tse_pcs *tse_pcs) { - int i = 0; u16 bmcr; /* Reset PCS block */ @@ -68,13 +58,9 @@ static int tse_pcs_reset(struct altera_tse_pcs *tse_pcs) bmcr |= BMCR_RESET; tse_pcs_write(tse_pcs, MII_BMCR, bmcr); - for (i = 0; i < SGMII_PCS_SW_RESET_TIMEOUT; i++) { - if (!(tse_pcs_read(tse_pcs, MII_BMCR) & BMCR_RESET)) - return 0; - udelay(1); - } - - return -ETIMEDOUT; + return read_poll_timeout(tse_pcs_read, bmcr, (bmcr & BMCR_RESET), + 10, SGMII_PCS_SW_RESET_TIMEOUT, 1, + tse_pcs, MII_BMCR); } static int alt_tse_pcs_validate(struct phylink_pcs *pcs, @@ -107,7 +93,6 @@ static int alt_tse_pcs_config(struct phylink_pcs *pcs, unsigned int mode, if_mode |= PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA; } else if (interface == PHY_INTERFACE_MODE_1000BASEX) { if_mode &= ~(PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA); - if_mode |= PCS_IF_MODE_SGMI_SPEED_1000; } ctrl |= (BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_ANENABLE); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 040c8bf6d05b..af00cf44cd97 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -260,7 +260,7 @@ config MOTORCOMM_PHY tristate "Motorcomm PHYs" help Enables support for Motorcomm network PHYs. - Currently supports the YT8511, YT8521 Gigabit Ethernet PHYs. + Currently supports the YT8511, YT8521, YT8531S Gigabit Ethernet PHYs. config NATIONAL_PHY tristate "National Semiconductor PHYs" diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 349b7b1dbbf2..d49965907561 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -870,8 +870,10 @@ static int at803x_probe(struct phy_device *phydev) .wolopts = 0, }; - if (ccr < 0) + if (ccr < 0) { + ret = ccr; goto err; + } mode_cfg = ccr & AT803X_MODE_CFG_MASK; switch (mode_cfg) { diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c index bd1ab5d0631f..685190db72de 100644 --- a/drivers/net/phy/motorcomm.c +++ b/drivers/net/phy/motorcomm.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Motorcomm 8511/8521 PHY driver. + * Motorcomm 8511/8521/8531S PHY driver. * * Author: Peter Geis <[email protected]> * Author: Frank <[email protected]> @@ -12,9 +12,10 @@ #include <linux/phy.h> #define PHY_ID_YT8511 0x0000010a -#define PHY_ID_YT8521 0x0000011A +#define PHY_ID_YT8521 0x0000011A +#define PHY_ID_YT8531S 0x4F51E91A -/* YT8521 Register Overview +/* YT8521/YT8531S Register Overview * UTP Register space | FIBER Register space * ------------------------------------------------------------ * | UTP MII | FIBER MII | @@ -147,7 +148,7 @@ #define YT8521_LINK_TIMER_CFG2_REG 0xA5 #define YT8521_LTCR_EN_AUTOSEN BIT(15) -/* 0xA000, 0xA001, 0xA003 ,and 0xA006 ~ 0xA00A are common ext registers +/* 0xA000, 0xA001, 0xA003, 0xA006 ~ 0xA00A and 0xA012 are common ext registers * of yt8521 phy. There is no need to switch reg space when operating these * registers. */ @@ -221,6 +222,9 @@ */ #define YTPHY_WCR_TYPE_PULSE BIT(0) +#define YT8531S_SYNCE_CFG_REG 0xA012 +#define YT8531S_SCR_SYNCE_ENABLE BIT(6) + /* Extended Register end */ struct yt8521_priv { @@ -648,6 +652,26 @@ static int yt8521_probe(struct phy_device *phydev) } /** + * yt8531s_probe() - read chip config then set suitable polling_mode + * @phydev: a pointer to a &struct phy_device + * + * returns 0 or negative errno code + */ +static int yt8531s_probe(struct phy_device *phydev) +{ + int ret; + + /* Disable SyncE clock output by default */ + ret = ytphy_modify_ext_with_lock(phydev, YT8531S_SYNCE_CFG_REG, + YT8531S_SCR_SYNCE_ENABLE, 0); + if (ret < 0) + return ret; + + /* same as yt8521_probe */ + return yt8521_probe(phydev); +} + +/** * ytphy_utp_read_lpa() - read LPA then setup lp_advertising for utp * @phydev: a pointer to a &struct phy_device * @@ -1750,11 +1774,28 @@ static struct phy_driver motorcomm_phy_drvs[] = { .suspend = yt8521_suspend, .resume = yt8521_resume, }, + { + PHY_ID_MATCH_EXACT(PHY_ID_YT8531S), + .name = "YT8531S Gigabit Ethernet", + .get_features = yt8521_get_features, + .probe = yt8531s_probe, + .read_page = yt8521_read_page, + .write_page = yt8521_write_page, + .get_wol = ytphy_get_wol, + .set_wol = ytphy_set_wol, + .config_aneg = yt8521_config_aneg, + .aneg_done = yt8521_aneg_done, + .config_init = yt8521_config_init, + .read_status = yt8521_read_status, + .soft_reset = yt8521_soft_reset, + .suspend = yt8521_suspend, + .resume = yt8521_resume, + }, }; module_phy_driver(motorcomm_phy_drvs); -MODULE_DESCRIPTION("Motorcomm 8511/8521 PHY driver"); +MODULE_DESCRIPTION("Motorcomm 8511/8521/8531S PHY driver"); MODULE_AUTHOR("Peter Geis"); MODULE_AUTHOR("Frank"); MODULE_LICENSE("GPL"); @@ -1762,6 +1803,7 @@ MODULE_LICENSE("GPL"); static const struct mdio_device_id __maybe_unused motorcomm_tbl[] = { { PHY_ID_MATCH_EXACT(PHY_ID_YT8511) }, { PHY_ID_MATCH_EXACT(PHY_ID_YT8521) }, + { PHY_ID_MATCH_EXACT(PHY_ID_YT8531S) }, { /* sentinal */ } }; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 57849ac0384e..716870a4499c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) static void phy_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } @@ -1511,6 +1512,15 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phy_resume(phydev); phy_led_triggers_register(phydev); + /** + * If the external phy used by current mac interface is managed by + * another mac interface, so we should create a device link between + * phy dev and mac dev. + */ + if (phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent) + phydev->devlink = device_link_add(dev->dev.parent, &phydev->mdio.dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + return err; error: @@ -1520,6 +1530,7 @@ error: error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) @@ -1748,6 +1759,9 @@ void phy_detach(struct phy_device *phydev) struct module *ndev_owner = NULL; struct mii_bus *bus; + if (phydev->devlink) + device_link_del(phydev->devlink); + if (phydev->sysfs_links) { if (dev) sysfs_remove_link(&dev->dev.kobj, "phydev"); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 25feab1802ee..09cc65c0da93 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1622,19 +1622,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - /* Clause 45 PHYs switch their Serdes lane between several different - * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G - * speeds. We really need to know which interface modes the PHY and - * MAC supports to properly work out which linkmodes can be supported. + /* Check whether we would use rate matching for the proposed interface + * mode. */ - if (phy->is_c45 && + config.rate_matching = phy_get_rate_matching(phy, interface); + + /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R, + * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching. + * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching + * their Serdes is either unnecessary or not reasonable. + * + * For these which switch interface modes, we really need to know which + * interface modes the PHY supports to properly work out which ethtool + * linkmodes can be supported. For now, as a work-around, we validate + * against all interface modes, which may lead to more ethtool link + * modes being advertised than are actually supported. + */ + if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE && interface != PHY_INTERFACE_MODE_RXAUI && interface != PHY_INTERFACE_MODE_XAUI && interface != PHY_INTERFACE_MODE_USXGMII) config.interface = PHY_INTERFACE_MODE_NA; else config.interface = interface; - config.rate_matching = phy_get_rate_matching(phy, config.interface); ret = phylink_validate(pl, supported, &config); if (ret) { diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 9206c660a72e..d4c821c8cf57 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1743,6 +1743,8 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) int len; unsigned char *cp; + skb->dev = ppp->dev; + if (proto < 0x8000) { #ifdef CONFIG_PPP_FILTER /* check if we should pass this packet */ diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index a52ee2bf5575..4ed7f5b547e3 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -58,10 +58,10 @@ * supported then @frame_id is filled, otherwise it stays %0. */ struct thunderbolt_ip_frame_header { - u32 frame_size; - u16 frame_index; - u16 frame_id; - u32 frame_count; + __le32 frame_size; + __le16 frame_index; + __le16 frame_id; + __le32 frame_count; }; enum thunderbolt_ip_frame_pdf { @@ -1051,7 +1051,7 @@ static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num, const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num]; *len = skb_frag_size(frag); - return kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag); + return kmap_local_page(skb_frag_page(frag)) + skb_frag_off(frag); } static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, @@ -1109,7 +1109,7 @@ static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, dest += len; if (unmap) { - kunmap_atomic(src); + kunmap_local(src); unmap = false; } @@ -1147,7 +1147,7 @@ static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, dest += len; if (unmap) { - kunmap_atomic(src); + kunmap_local(src); unmap = false; } @@ -1162,7 +1162,7 @@ static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, memcpy(dest, src, data_len); if (unmap) - kunmap_atomic(src); + kunmap_local(src); if (!tbnet_xmit_csum_and_map(net, skb, frames, frame_index + 1)) goto err_drop; @@ -1319,7 +1319,7 @@ static void tbnet_shutdown(struct tb_service *svc) tbnet_tear_down(tb_service_get_drvdata(svc), true); } -static int __maybe_unused tbnet_suspend(struct device *dev) +static int tbnet_suspend(struct device *dev) { struct tb_service *svc = tb_to_service(dev); struct tbnet *net = tb_service_get_drvdata(svc); @@ -1334,7 +1334,7 @@ static int __maybe_unused tbnet_suspend(struct device *dev) return 0; } -static int __maybe_unused tbnet_resume(struct device *dev) +static int tbnet_resume(struct device *dev) { struct tb_service *svc = tb_to_service(dev); struct tbnet *net = tb_service_get_drvdata(svc); @@ -1350,9 +1350,7 @@ static int __maybe_unused tbnet_resume(struct device *dev) return 0; } -static const struct dev_pm_ops tbnet_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tbnet_suspend, tbnet_resume) -}; +static DEFINE_SIMPLE_DEV_PM_OPS(tbnet_pm_ops, tbnet_suspend, tbnet_resume); static const struct tb_service_id tbnet_ids[] = { { TB_SERVICE("network", 1) }, @@ -1364,7 +1362,7 @@ static struct tb_service_driver tbnet_driver = { .driver = { .owner = THIS_MODULE, .name = "thunderbolt-net", - .pm = &tbnet_pm_ops, + .pm = pm_sleep_ptr(&tbnet_pm_ops), }, .probe = tbnet_probe, .remove = tbnet_remove, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f5f412ef39b3..b4baa2001a63 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index e11f70911acc..8911cd2ed534 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -990,6 +990,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* U-blox LARA-L6 */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1343, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { /* Cinterion PLS8 modem by GEMALTO */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 6b5f24f28dd1..6ce8f4f0c70e 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1916,6 +1916,7 @@ static const struct driver_info cdc_ncm_zlp_info = { .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, + .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_WWAN */ diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c36caf9d6553..a808d718c012 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1424,6 +1424,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ + {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 2a4592780141..ac7c0653695f 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1119,14 +1119,10 @@ static void veth_disable_xdp_range(struct net_device *dev, int start, int end, static int veth_enable_xdp(struct net_device *dev) { + bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); struct veth_priv *priv = netdev_priv(dev); - bool napi_already_on; - struct veth_rq *rq; int err, i; - rq = &priv->rq[0]; - napi_already_on = rcu_access_pointer(rq->napi); - if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); if (err) @@ -1327,28 +1323,18 @@ revert: static int veth_open(struct net_device *dev) { - struct veth_priv *peer_priv, *priv = netdev_priv(dev); + struct veth_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); - struct veth_rq *peer_rq; int err; if (!peer) return -ENOTCONN; - peer_priv = netdev_priv(peer); - peer_rq = &peer_priv->rq[0]; - if (priv->_xdp_prog) { err = veth_enable_xdp(dev); if (err) return err; - /* refer to the logic in veth_xdp_set() */ - if (!rtnl_dereference(peer_rq->napi)) { - err = veth_napi_enable(peer); - if (err) - return err; - } - } else if (veth_gro_requested(dev) || peer_priv->_xdp_prog) { + } else if (veth_gro_requested(dev)) { err = veth_napi_enable(dev); if (err) return err; @@ -1364,29 +1350,17 @@ static int veth_open(struct net_device *dev) static int veth_close(struct net_device *dev) { - struct veth_priv *peer_priv, *priv = netdev_priv(dev); + struct veth_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); - struct veth_rq *peer_rq; netif_carrier_off(dev); - if (peer) { - peer_priv = netdev_priv(peer); - peer_rq = &peer_priv->rq[0]; - } + if (peer) + netif_carrier_off(peer); - if (priv->_xdp_prog) { + if (priv->_xdp_prog) veth_disable_xdp(dev); - /* refer to the logic in veth_xdp_set */ - if (peer && rtnl_dereference(peer_rq->napi)) { - if (!veth_gro_requested(peer) && !peer_priv->_xdp_prog) - veth_napi_del(peer); - } - } else if (veth_gro_requested(dev) || (peer && peer_priv->_xdp_prog)) { + else if (veth_gro_requested(dev)) veth_napi_del(dev); - } - - if (peer) - netif_carrier_off(peer); return 0; } @@ -1496,21 +1470,17 @@ static int veth_set_features(struct net_device *dev, { netdev_features_t changed = features ^ dev->features; struct veth_priv *priv = netdev_priv(dev); - struct veth_rq *rq = &priv->rq[0]; int err; if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) return 0; if (features & NETIF_F_GRO) { - if (!rtnl_dereference(rq->napi)) { - err = veth_napi_enable(dev); - if (err) - return err; - } + err = veth_napi_enable(dev); + if (err) + return err; } else { - if (rtnl_dereference(rq->napi)) - veth_napi_del(dev); + veth_napi_del(dev); } return 0; } @@ -1542,19 +1512,14 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct veth_priv *priv = netdev_priv(dev); - struct veth_priv *peer_priv; struct bpf_prog *old_prog; - struct veth_rq *peer_rq; struct net_device *peer; - bool napi_already_off; unsigned int max_mtu; - bool noreq_napi; int err; old_prog = priv->_xdp_prog; priv->_xdp_prog = prog; peer = rtnl_dereference(priv->peer); - peer_priv = netdev_priv(peer); if (prog) { if (!peer) { @@ -1591,24 +1556,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, } } - if (peer && (peer->flags & IFF_UP)) { - peer_rq = &peer_priv->rq[0]; - - /* If the peer hasn't enabled GRO and loaded xdp, - * then we enable napi automatically if its napi - * is not ready. - */ - napi_already_off = !rtnl_dereference(peer_rq->napi); - if (napi_already_off) { - err = veth_napi_enable(peer); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Failed to automatically enable napi of peer"); - goto err; - } - } - } - if (!old_prog) { peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; @@ -1623,17 +1570,6 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (peer) { peer->hw_features |= NETIF_F_GSO_SOFTWARE; peer->max_mtu = ETH_MAX_MTU; - peer_rq = &peer_priv->rq[0]; - - /* If the peer doesn't has its xdp and enabled - * GRO, then we disable napi if its napi is ready; - */ - if (rtnl_dereference(peer_rq->napi)) { - noreq_napi = !veth_gro_requested(peer) && - !peer_priv->_xdp_prog; - if (noreq_napi && (peer->flags & IFF_UP)) - veth_napi_del(peer); - } } } bpf_prog_put(old_prog); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56dbd645d7c8..19eee0655b99 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3949,12 +3949,11 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_unregister_netdev: - virtio_reset_device(vdev); - unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); free_vqs: + virtio_reset_device(vdev); cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 8dca8d2447b7..3f8c0845fcca 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -632,7 +632,7 @@ mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q) switch (type) { case MT76_WED_Q_TX: - ret = mtk_wed_device_tx_ring_setup(wed, ring, q->regs); + ret = mtk_wed_device_tx_ring_setup(wed, ring, q->regs, false); if (!ret) q->wed_regs = wed->tx_ring[ring].reg_base; break; diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 9bbfff803357..b545d93c6e37 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -959,30 +959,51 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (unsigned int i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index eb1d1ba3a443..67df8221b5ae 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d41e373f9c0a..d6b166fc5c0e 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) /* Pass the DL packet to the netif layer. */ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, struct iosm_wwan *wwan, u32 offset, - u8 service_class, struct sk_buff *skb) + u8 service_class, struct sk_buff *skb, + u32 pkt_len) { struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC); @@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, return -ENOMEM; skb_pull(dest_skb, offset); - skb_set_tail_pointer(dest_skb, dest_skb->len); + skb_trim(dest_skb, pkt_len); /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; @@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) { - u32 pad_len, packet_offset; + u32 pad_len, packet_offset, adgh_len; struct iosm_wwan *wwan; struct mux_adgh *adgh; u8 *block = skb->data; @@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, packet_offset = sizeof(*adgh) + pad_len; if_id += ipc_mux->wwan_q_offset; + adgh_len = le16_to_cpu(adgh->length); /* Pass the packet to the netif layer */ rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset, - adgh->service_class, skb); + adgh->service_class, skb, + adgh_len - packet_offset); if (rc) { dev_err(ipc_mux->dev, "mux adgh decoding error"); return; @@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, int if_id, int nr_of_dg) { u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len; - u32 packet_offset, i, rc; + u32 packet_offset, i, rc, dg_len; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) @@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, packet_offset = le32_to_cpu(dg->datagram_index) + dl_head_pad_len; + dg_len = le16_to_cpu(dg->datagram_length); /* Pass the packet to the netif layer. */ rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan, packet_offset, - dg->service_class, - skb); + dg->service_class, skb, + dg_len - dl_head_pad_len); if (rc) goto dg_error; } @@ -1207,10 +1211,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux, qlth_n_ql_size, ul_list); ipc_mux_ul_adb_finish(ipc_mux); if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed, - IOSM_AGGR_MUX_SIG_ADBH)) { - dev_kfree_skb(src_skb); + IOSM_AGGR_MUX_SIG_ADBH)) return -ENOMEM; - } + ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length); ipc_mux->size_needed += offsetof(struct mux_adth, dg); @@ -1471,8 +1474,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) ipc_mux->ul_data_pend_bytes); /* Reset the skb settings. */ - skb->tail = 0; - skb->len = 0; + skb_trim(skb, 0); /* Add the consumed ADB to the free list. */ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index d3d34d1c4704..5bf5a93937c9 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -249,7 +249,7 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) if (object->integer.value == 3) sleep_state = IPC_PCIE_D3L2; - kfree(object); + ACPI_FREE(object); default_ret: return sleep_state; diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h index 9b3a6d86ece7..289397c4ea6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h @@ -122,7 +122,7 @@ struct iosm_protocol { struct iosm_imem *imem; struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; struct device *dev; - phys_addr_t phy_ap_shm; + dma_addr_t phy_ap_shm; u32 old_msg_tail; }; diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 3458af31e864..7d0f5e4f0a78 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -165,6 +165,8 @@ static int t7xx_acpi_reset(struct t7xx_pci_dev *t7xx_dev, char *fn_name) return -EFAULT; } + kfree(buffer.pointer); + #endif return 0; } diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index 5eaa18f81355..e72b358a2a12 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -231,8 +231,7 @@ static const struct nfc_phy_ops i2c_phy_ops = { .disable = microread_i2c_disable, }; -static int microread_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int microread_i2c_probe(struct i2c_client *client) { struct microread_i2c_phy *phy; int r; @@ -287,7 +286,7 @@ static struct i2c_driver microread_i2c_driver = { .driver = { .name = MICROREAD_I2C_DRIVER_NAME, }, - .probe = microread_i2c_probe, + .probe_new = microread_i2c_probe, .remove = microread_i2c_remove, .id_table = microread_i2c_id, }; diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 24436c9e54c9..e74342b0b728 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -112,8 +112,10 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data; int ret; - if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) + if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) { + kfree_skb(skb); return -EREMOTEIO; + } ret = i2c_master_send(drv_data->i2c, skb->data, skb->len); @@ -181,8 +183,7 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, return 0; } -static int nfcmrvl_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int nfcmrvl_i2c_probe(struct i2c_client *client) { const struct nfcmrvl_platform_data *pdata; struct nfcmrvl_i2c_drv_data *drv_data; @@ -257,7 +258,7 @@ static const struct i2c_device_id nfcmrvl_i2c_id_table[] = { MODULE_DEVICE_TABLE(i2c, nfcmrvl_i2c_id_table); static struct i2c_driver nfcmrvl_i2c_driver = { - .probe = nfcmrvl_i2c_probe, + .probe_new = nfcmrvl_i2c_probe, .id_table = nfcmrvl_i2c_id_table, .remove = nfcmrvl_i2c_remove, .driver = { diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 580cb6ecffee..66b198663387 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -73,11 +73,15 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) + if (!info->phy_ops->write) { + kfree_skb(skb); return -EOPNOTSUPP; + } - if (info->mode != NXP_NCI_MODE_NCI) + if (info->mode != NXP_NCI_MODE_NCI) { + kfree_skb(skb); return -EINVAL; + } r = info->phy_ops->write(info->phy_id, skb); if (r < 0) { diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index ec6446511984..d4c299be7949 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -263,8 +263,7 @@ static const struct acpi_gpio_mapping acpi_nxp_nci_gpios[] = { { } }; -static int nxp_nci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int nxp_nci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct nxp_nci_i2c_phy *phy; @@ -349,7 +348,7 @@ static struct i2c_driver nxp_nci_i2c_driver = { .acpi_match_table = ACPI_PTR(acpi_id), .of_match_table = of_nxp_nci_i2c_match, }, - .probe = nxp_nci_i2c_probe, + .probe_new = nxp_nci_i2c_probe, .id_table = nxp_nci_i2c_id_table, .remove = nxp_nci_i2c_remove, }; diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c index ddf3db286bad..1503a98f0405 100644 --- a/drivers/nfc/pn533/i2c.c +++ b/drivers/nfc/pn533/i2c.c @@ -163,8 +163,7 @@ static const struct pn533_phy_ops i2c_phy_ops = { }; -static int pn533_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pn533_i2c_probe(struct i2c_client *client) { struct pn533_i2c_phy *phy; struct pn533 *priv; @@ -260,7 +259,7 @@ static struct i2c_driver pn533_i2c_driver = { .name = PN533_I2C_DRIVER_NAME, .of_match_table = of_match_ptr(of_pn533_i2c_match), }, - .probe = pn533_i2c_probe, + .probe_new = pn533_i2c_probe, .id_table = pn533_i2c_id_table, .remove = pn533_i2c_remove, }; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 9e754abcfa2a..8b0d910bee06 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -866,8 +866,7 @@ static const struct acpi_gpio_mapping acpi_pn544_gpios[] = { { }, }; -static int pn544_hci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pn544_hci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct pn544_i2c_phy *phy; @@ -954,7 +953,7 @@ static struct i2c_driver pn544_hci_i2c_driver = { .of_match_table = of_match_ptr(of_pn544_i2c_match), .acpi_match_table = ACPI_PTR(pn544_hci_i2c_acpi_match), }, - .probe = pn544_hci_i2c_probe, + .probe_new = pn544_hci_i2c_probe, .id_table = pn544_hci_i2c_id_table, .remove = pn544_hci_i2c_remove, }; diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 0270e05b68df..aec356880adf 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -105,6 +105,7 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) mutex_lock(&info->mutex); if (s3fwrn5_get_mode(info) != S3FWRN5_MODE_NCI) { + kfree_skb(skb); mutex_unlock(&info->mutex); return -EINVAL; } diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index ecdee838d25d..2517ae71f9a4 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -177,8 +177,7 @@ static int s3fwrn5_i2c_parse_dt(struct i2c_client *client) return 0; } -static int s3fwrn5_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int s3fwrn5_i2c_probe(struct i2c_client *client) { struct s3fwrn5_i2c_phy *phy; int ret; @@ -262,7 +261,7 @@ static struct i2c_driver s3fwrn5_i2c_driver = { .name = S3FWRN5_I2C_DRIVER_NAME, .of_match_table = of_match_ptr(of_s3fwrn5_i2c_match), }, - .probe = s3fwrn5_i2c_probe, + .probe_new = s3fwrn5_i2c_probe, .remove = s3fwrn5_i2c_remove, .id_table = s3fwrn5_i2c_id_table, }; diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c index 89fa24d71bef..6b5eed8a1fbe 100644 --- a/drivers/nfc/st-nci/i2c.c +++ b/drivers/nfc/st-nci/i2c.c @@ -195,8 +195,7 @@ static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = { {}, }; -static int st_nci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st_nci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct st_nci_i2c_phy *phy; @@ -284,7 +283,7 @@ static struct i2c_driver st_nci_i2c_driver = { .of_match_table = of_match_ptr(of_st_nci_i2c_match), .acpi_match_table = ACPI_PTR(st_nci_i2c_acpi_match), }, - .probe = st_nci_i2c_probe, + .probe_new = st_nci_i2c_probe, .id_table = st_nci_i2c_id_table, .remove = st_nci_i2c_remove, }; diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 7764b1a4c3cf..ec87dd21e054 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, int r = 0; struct device *dev = &ndev->nfc_dev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -325,26 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that + * the aid_len is u8 in the packet, but u32 in the structure, + * and the tags in the packet are not included in + * nfc_evt_transaction. + * + * size(b): 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); - if (!transaction) - return -ENOMEM; + aid_len = skb->data[1]; - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + if (skb->len < aid_len + 4 || + aid_len > sizeof(transaction->aid)) + return -EPROTO; - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + params_len = skb->data[aid_len + 3]; + + /* Verify PARAMETERS tag is (82), and final check that there is + * enough space in the packet to read everything. + */ + if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG || + skb->len < aid_len + 4 + params_len) return -EPROTO; - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + transaction = devm_kzalloc(dev, sizeof(*transaction) + + params_len, GFP_KERNEL); + if (!transaction) + return -ENOMEM; + + transaction->aid_len = aid_len; + transaction->params_len = params_len; + + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], + params_len); r = nfc_se_transaction(ndev->nfc_dev, host, transaction); break; diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 76b55986bcf8..55f7a2391bb1 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -487,8 +487,7 @@ static const struct acpi_gpio_mapping acpi_st21nfca_gpios[] = { {}, }; -static int st21nfca_hci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int st21nfca_hci_i2c_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct st21nfca_i2c_phy *phy; @@ -598,7 +597,7 @@ static struct i2c_driver st21nfca_hci_i2c_driver = { .of_match_table = of_match_ptr(of_st21nfca_i2c_match), .acpi_match_table = ACPI_PTR(st21nfca_hci_i2c_acpi_match), }, - .probe = st21nfca_hci_i2c_probe, + .probe_new = st21nfca_hci_i2c_probe, .id_table = st21nfca_hci_i2c_id_table, .remove = st21nfca_hci_i2c_remove, }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02b5578773a1..f4335519399d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3489,6 +3489,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, + { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ @@ -3519,6 +3521,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index c4113b43dbfe..4dcddcf95279 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -45,9 +45,11 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, if (!dhchap_secret) return -ENOMEM; if (set_ctrl) { + kfree(host->dhchap_ctrl_secret); host->dhchap_ctrl_secret = strim(dhchap_secret); host->dhchap_ctrl_key_hash = key_hash; } else { + kfree(host->dhchap_secret); host->dhchap_secret = strim(dhchap_secret); host->dhchap_key_hash = key_hash; } diff --git a/drivers/nvmem/lan9662-otpc.c b/drivers/nvmem/lan9662-otpc.c index f6732fd216d8..56fc19f092a7 100644 --- a/drivers/nvmem/lan9662-otpc.c +++ b/drivers/nvmem/lan9662-otpc.c @@ -36,7 +36,7 @@ struct lan9662_otp { void __iomem *base; }; -static bool lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) +static int lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) { u32 val; @@ -203,7 +203,7 @@ static int lan9662_otp_probe(struct platform_device *pdev) } static const struct of_device_id lan9662_otp_match[] = { - { .compatible = "microchip,lan9662-otp", }, + { .compatible = "microchip,lan9662-otpc", }, { }, }; MODULE_DEVICE_TABLE(of, lan9662_otp_match); diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index b11c3c974b3d..80cb187f1481 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset, * but as of Dec 2020 this isn't possible on arm64. */ addr = memremap(priv->mem->base, available, MEMREMAP_WB); - if (IS_ERR(addr)) { + if (!addr) { dev_err(priv->dev, "Failed to remap memory region\n"); - return PTR_ERR(addr); + return -ENOMEM; } count = memory_read_from_buffer(val, bytes, &off, addr, available); diff --git a/drivers/nvmem/u-boot-env.c b/drivers/nvmem/u-boot-env.c index 8e72d1bbd649..4fdbdccebda1 100644 --- a/drivers/nvmem/u-boot-env.c +++ b/drivers/nvmem/u-boot-env.c @@ -135,7 +135,7 @@ static int u_boot_env_parse(struct u_boot_env *priv) break; case U_BOOT_FORMAT_REDUNDANT: crc32_offset = offsetof(struct u_boot_env_image_redundant, crc32); - crc32_data_offset = offsetof(struct u_boot_env_image_redundant, mark); + crc32_data_offset = offsetof(struct u_boot_env_image_redundant, data); data_offset = offsetof(struct u_boot_env_image_redundant, data); break; } diff --git a/drivers/of/property.c b/drivers/of/property.c index 967f79b59016..134cfc980b70 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 7c45927e2131..5784dc20fb38 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -468,7 +468,7 @@ static size_t parport_pc_fifo_write_block_pio(struct parport *port, const unsigned char *bufp = buf; size_t left = length; unsigned long expire = jiffies + port->physport->cad->timeout; - const int fifo = FIFO(port); + const unsigned long fifo = FIFO(port); int poll_for = 8; /* 80 usecs */ const struct parport_pc_private *priv = port->physport->private_data; const int fifo_depth = priv->fifo_depth; diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ out: } static u32 hv_compose_msi_req_v1( - struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt *int_pkt, u32 slot, u8 vector, u16 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1632,6 +1632,35 @@ static u32 hv_compose_msi_req_v1( } /* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + +/* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). */ @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) return cpumask_first_and(affinity, cpu_online_mask); } -static u32 hv_compose_msi_req_v2( - struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, - u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void) { + static DEFINE_SPINLOCK(multi_msi_cpu_lock); + + /* -1 means starting with CPU 0 */ + static int cpu_next = -1; + + unsigned long flags; int cpu; + spin_lock_irqsave(&multi_msi_cpu_lock, flags); + + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, + false); + cpu = cpu_next; + + spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + + return cpu; +} + +static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, int cpu, + u32 slot, u8 vector, u16 vector_count) +{ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( } static u32 hv_compose_msi_req_v3( - struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt3 *int_pkt, int cpu, u32 slot, u32 vector, u16 vector_count) { - int cpu; - int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; + bool multi_msi; u64 trans_id; u32 size; int ret; + int cpu; + + msi_desc = irq_data_get_msi_desc(data); + multi_msi = !msi_desc->pci.msi_attrib.is_msix && + msi_desc->nvec_used > 1; /* Reuse the previous allocation */ - if (data->chip_data) { + if (data->chip_data && multi_msi) { int_desc = data->chip_data; msg->address_hi = int_desc->address >> 32; msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - msi_desc = irq_data_get_msi_desc(data); pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; + /* Free any previous message that might have already been composed. */ + if (data->chip_data && !multi_msi) { + int_desc = data->chip_data; + data->chip_data = NULL; + hv_int_desc_free(hpdev, int_desc); + } + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; - if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { + if (multi_msi) { /* * If this is not the first MSI of Multi MSI, we already have * a mapping. Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ vector = 32; vector_count = msi_desc->nvec_used; + cpu = hv_compose_multi_msi_req_get_cpu(); } else { vector = hv_msi_get_int_vector(data); vector_count = 1; + cpu = hv_compose_msi_req_get_cpu(dest); } /* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, - dest, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_3: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, - dest, + cpu, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, - dest, + cpu, hpdev->desc.win_slot.slot, vector, vector_count); diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c index 863fabe05bdc..307ee6f71042 100644 --- a/drivers/power/supply/ab8500_btemp.c +++ b/drivers/power/supply/ab8500_btemp.c @@ -725,7 +725,14 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Get thermal zone and ADC */ di->tz = thermal_zone_get_zone_by_name("battery-thermal"); if (IS_ERR(di->tz)) { - return dev_err_probe(dev, PTR_ERR(di->tz), + ret = PTR_ERR(di->tz); + /* + * This usually just means we are probing before the thermal + * zone, so just defer. + */ + if (ret == -ENODEV) + ret = -EPROBE_DEFER; + return dev_err_probe(dev, ret, "failed to get battery thermal zone\n"); } di->bat_ctrl = devm_iio_channel_get(dev, "bat_ctrl"); diff --git a/drivers/power/supply/ip5xxx_power.c b/drivers/power/supply/ip5xxx_power.c index 218e8e689a3f..00221e9c0bfc 100644 --- a/drivers/power/supply/ip5xxx_power.c +++ b/drivers/power/supply/ip5xxx_power.c @@ -352,7 +352,7 @@ static int ip5xxx_battery_get_property(struct power_supply *psy, ret = ip5xxx_battery_read_adc(ip5xxx, IP5XXX_BATIADC_DAT0, IP5XXX_BATIADC_DAT1, &raw); - val->intval = DIV_ROUND_CLOSEST(raw * 745985, 1000); + val->intval = DIV_ROUND_CLOSEST(raw * 149197, 200); return 0; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 635f051b0821..f20a6ac584cc 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -121,7 +121,7 @@ struct rk817_charger { #define ADC_TO_CHARGE_UAH(adc_value, res_div) \ (adc_value / 3600 * 172 / res_div) -static u8 rk817_chg_cur_to_reg(u32 chg_cur_ma) +static int rk817_chg_cur_to_reg(u32 chg_cur_ma) { if (chg_cur_ma >= 3500) return CHG_3_5A; @@ -864,8 +864,8 @@ static int rk817_battery_init(struct rk817_charger *charger, { struct rk808 *rk808 = charger->rk808; u32 tmp, max_chg_vol_mv, max_chg_cur_ma; - u8 max_chg_vol_reg, chg_term_i_reg, max_chg_cur_reg; - int ret, chg_term_ma; + u8 max_chg_vol_reg, chg_term_i_reg; + int ret, chg_term_ma, max_chg_cur_reg; u8 bulk_reg[2]; /* Get initial plug state */ @@ -1116,14 +1116,12 @@ static int rk817_charger_probe(struct platform_device *pdev) charger->bat_ps = devm_power_supply_register(&pdev->dev, &rk817_bat_desc, &pscfg); - - charger->chg_ps = devm_power_supply_register(&pdev->dev, - &rk817_chg_desc, &pscfg); - - if (IS_ERR(charger->chg_ps)) + if (IS_ERR(charger->bat_ps)) return dev_err_probe(dev, -EINVAL, "Battery failed to probe\n"); + charger->chg_ps = devm_power_supply_register(&pdev->dev, + &rk817_chg_desc, &pscfg); if (IS_ERR(charger->chg_ps)) return dev_err_probe(dev, -EINVAL, "Charger failed to probe\n"); diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c index 97c1be44e323..afc76c22271a 100644 --- a/drivers/ptp/ptp_idt82p33.c +++ b/drivers/ptp/ptp_idt82p33.c @@ -27,6 +27,8 @@ MODULE_VERSION("1.0"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILENAME); +#define EXTTS_PERIOD_MS (95) + /* Module Parameters */ static u32 phase_snap_threshold = SNAP_THRESHOLD_NS; module_param(phase_snap_threshold, uint, 0); @@ -36,6 +38,8 @@ MODULE_PARM_DESC(phase_snap_threshold, static char *firmware; module_param(firmware, charp, 0); +static struct ptp_pin_desc pin_config[MAX_PHC_PLL][MAX_TRIG_CLK]; + static inline int idt82p33_read(struct idt82p33 *idt82p33, u16 regaddr, u8 *buf, u16 count) { @@ -121,24 +125,270 @@ static int idt82p33_dpll_set_mode(struct idt82p33_channel *channel, return 0; } -static int _idt82p33_gettime(struct idt82p33_channel *channel, - struct timespec64 *ts) +static int idt82p33_set_tod_trigger(struct idt82p33_channel *channel, + u8 trigger, bool write) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + int err; + u8 cfg; + + if (trigger > WR_TRIG_SEL_MAX) + return -EINVAL; + + err = idt82p33_read(idt82p33, channel->dpll_tod_trigger, + &cfg, sizeof(cfg)); + + if (err) + return err; + + if (write == true) + trigger = (trigger << WRITE_TRIGGER_SHIFT) | + (cfg & READ_TRIGGER_MASK); + else + trigger = (trigger << READ_TRIGGER_SHIFT) | + (cfg & WRITE_TRIGGER_MASK); + + return idt82p33_write(idt82p33, channel->dpll_tod_trigger, + &trigger, sizeof(trigger)); +} + +static int idt82p33_get_extts(struct idt82p33_channel *channel, + struct timespec64 *ts) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + u8 buf[TOD_BYTE_COUNT]; + int err; + + err = idt82p33_read(idt82p33, channel->dpll_tod_sts, buf, sizeof(buf)); + + if (err) + return err; + + /* Since trigger is not self clearing itself, we have to poll tod_sts */ + if (memcmp(buf, channel->extts_tod_sts, TOD_BYTE_COUNT) == 0) + return -EAGAIN; + + memcpy(channel->extts_tod_sts, buf, TOD_BYTE_COUNT); + + idt82p33_byte_array_to_timespec(ts, buf); + + if (channel->discard_next_extts) { + channel->discard_next_extts = false; + return -EAGAIN; + } + + return 0; +} + +static int map_ref_to_tod_trig_sel(int ref, u8 *trigger) +{ + int err = 0; + + switch (ref) { + case 0: + *trigger = HW_TOD_TRIG_SEL_IN12; + break; + case 1: + *trigger = HW_TOD_TRIG_SEL_IN13; + break; + case 2: + *trigger = HW_TOD_TRIG_SEL_IN14; + break; + default: + err = -EINVAL; + } + + return err; +} + +static bool is_one_shot(u8 mask) +{ + /* Treat single bit PLL masks as continuous trigger */ + if ((mask == 1) || (mask == 2)) + return false; + else + return true; +} + +static int arm_tod_read_with_trigger(struct idt82p33_channel *channel, u8 trigger) { struct idt82p33 *idt82p33 = channel->idt82p33; u8 buf[TOD_BYTE_COUNT]; + int err; + + /* Remember the current tod_sts before setting the trigger */ + err = idt82p33_read(idt82p33, channel->dpll_tod_sts, buf, sizeof(buf)); + + if (err) + return err; + + memcpy(channel->extts_tod_sts, buf, TOD_BYTE_COUNT); + + err = idt82p33_set_tod_trigger(channel, trigger, false); + + if (err) + dev_err(idt82p33->dev, "%s: err = %d", __func__, err); + + return err; +} + +static int idt82p33_extts_enable(struct idt82p33_channel *channel, + struct ptp_clock_request *rq, int on) +{ + u8 index = rq->extts.index; + struct idt82p33 *idt82p33; + u8 mask = 1 << index; + int err = 0; + u8 old_mask; u8 trigger; + int ref; + + idt82p33 = channel->idt82p33; + old_mask = idt82p33->extts_mask; + + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests to enable time stamping on falling edge */ + if ((rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_FALLING_EDGE)) + return -EOPNOTSUPP; + + if (index >= MAX_PHC_PLL) + return -EINVAL; + + if (on) { + /* Return if it was already enabled */ + if (idt82p33->extts_mask & mask) + return 0; + + /* Use the pin configured for the channel */ + ref = ptp_find_pin(channel->ptp_clock, PTP_PF_EXTTS, channel->plln); + + if (ref < 0) { + dev_err(idt82p33->dev, "%s: No valid pin found for Pll%d!\n", + __func__, channel->plln); + return -EBUSY; + } + + err = map_ref_to_tod_trig_sel(ref, &trigger); + + if (err) { + dev_err(idt82p33->dev, + "%s: Unsupported ref %d!\n", __func__, ref); + return err; + } + + err = arm_tod_read_with_trigger(&idt82p33->channel[index], trigger); + + if (err == 0) { + idt82p33->extts_mask |= mask; + idt82p33->channel[index].tod_trigger = trigger; + idt82p33->event_channel[index] = channel; + idt82p33->extts_single_shot = is_one_shot(idt82p33->extts_mask); + + if (old_mask) + return 0; + + schedule_delayed_work(&idt82p33->extts_work, + msecs_to_jiffies(EXTTS_PERIOD_MS)); + } + } else { + idt82p33->extts_mask &= ~mask; + idt82p33->extts_single_shot = is_one_shot(idt82p33->extts_mask); + + if (idt82p33->extts_mask == 0) + cancel_delayed_work(&idt82p33->extts_work); + } + + return err; +} + +static int idt82p33_extts_check_channel(struct idt82p33 *idt82p33, u8 todn) +{ + struct idt82p33_channel *event_channel; + struct ptp_clock_event event; + struct timespec64 ts; + int err; + + err = idt82p33_get_extts(&idt82p33->channel[todn], &ts); + if (err == 0) { + event_channel = idt82p33->event_channel[todn]; + event.type = PTP_CLOCK_EXTTS; + event.index = todn; + event.timestamp = timespec64_to_ns(&ts); + ptp_clock_event(event_channel->ptp_clock, + &event); + } + return err; +} + +static u8 idt82p33_extts_enable_mask(struct idt82p33_channel *channel, + u8 extts_mask, bool enable) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + u8 trigger = channel->tod_trigger; + u8 mask; int err; + int i; - trigger = TOD_TRIGGER(HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG, - HW_TOD_RD_TRIG_SEL_LSB_TOD_STS); + if (extts_mask == 0) + return 0; + if (enable == false) + cancel_delayed_work_sync(&idt82p33->extts_work); - err = idt82p33_write(idt82p33, channel->dpll_tod_trigger, - &trigger, sizeof(trigger)); + for (i = 0; i < MAX_PHC_PLL; i++) { + mask = 1 << i; + + if ((extts_mask & mask) == 0) + continue; + if (enable) { + err = arm_tod_read_with_trigger(&idt82p33->channel[i], trigger); + if (err) + dev_err(idt82p33->dev, + "%s: Arm ToD read trigger failed, err = %d", + __func__, err); + } else { + err = idt82p33_extts_check_channel(idt82p33, i); + if (err == 0 && idt82p33->extts_single_shot) + /* trigger happened so we won't re-enable it */ + extts_mask &= ~mask; + } + } + + if (enable) + schedule_delayed_work(&idt82p33->extts_work, + msecs_to_jiffies(EXTTS_PERIOD_MS)); + + return extts_mask; +} + +static int _idt82p33_gettime(struct idt82p33_channel *channel, + struct timespec64 *ts) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + u8 old_mask = idt82p33->extts_mask; + u8 buf[TOD_BYTE_COUNT]; + u8 new_mask = 0; + int err; + + /* Disable extts */ + if (old_mask) + new_mask = idt82p33_extts_enable_mask(channel, old_mask, false); + + err = idt82p33_set_tod_trigger(channel, HW_TOD_RD_TRIG_SEL_LSB_TOD_STS, + false); if (err) return err; + channel->discard_next_extts = true; + if (idt82p33->calculate_overhead_flag) idt82p33->start_time = ktime_get_raw(); @@ -147,6 +397,10 @@ static int _idt82p33_gettime(struct idt82p33_channel *channel, if (err) return err; + /* Re-enable extts */ + if (new_mask) + idt82p33_extts_enable_mask(channel, new_mask, true); + idt82p33_byte_array_to_timespec(ts, buf); return 0; @@ -165,19 +419,16 @@ static int _idt82p33_settime(struct idt82p33_channel *channel, struct timespec64 local_ts = *ts; char buf[TOD_BYTE_COUNT]; s64 dynamic_overhead_ns; - unsigned char trigger; int err; u8 i; - trigger = TOD_TRIGGER(HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG, - HW_TOD_RD_TRIG_SEL_LSB_TOD_STS); - - err = idt82p33_write(idt82p33, channel->dpll_tod_trigger, - &trigger, sizeof(trigger)); - + err = idt82p33_set_tod_trigger(channel, HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG, + true); if (err) return err; + channel->discard_next_extts = true; + if (idt82p33->calculate_overhead_flag) { dynamic_overhead_ns = ktime_to_ns(ktime_get_raw()) - ktime_to_ns(idt82p33->start_time); @@ -202,7 +453,8 @@ static int _idt82p33_settime(struct idt82p33_channel *channel, return err; } -static int _idt82p33_adjtime(struct idt82p33_channel *channel, s64 delta_ns) +static int _idt82p33_adjtime_immediate(struct idt82p33_channel *channel, + s64 delta_ns) { struct idt82p33 *idt82p33 = channel->idt82p33; struct timespec64 ts; @@ -226,6 +478,60 @@ static int _idt82p33_adjtime(struct idt82p33_channel *channel, s64 delta_ns) return err; } +static int _idt82p33_adjtime_internal_triggered(struct idt82p33_channel *channel, + s64 delta_ns) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + char buf[TOD_BYTE_COUNT]; + struct timespec64 ts; + const u8 delay_ns = 32; + s32 remainder; + s64 ns; + int err; + + err = _idt82p33_gettime(channel, &ts); + + if (err) + return err; + + if (ts.tv_nsec > (NSEC_PER_SEC - 5 * NSEC_PER_MSEC)) { + /* Too close to miss next trigger, so skip it */ + mdelay(6); + ns = (ts.tv_sec + 2) * NSEC_PER_SEC + delta_ns + delay_ns; + } else + ns = (ts.tv_sec + 1) * NSEC_PER_SEC + delta_ns + delay_ns; + + ts = ns_to_timespec64(ns); + idt82p33_timespec_to_byte_array(&ts, buf); + + /* + * Store the new time value. + */ + err = idt82p33_write(idt82p33, channel->dpll_tod_cnfg, buf, sizeof(buf)); + if (err) + return err; + + /* Schedule to implement the workaround in one second */ + (void)div_s64_rem(delta_ns, NSEC_PER_SEC, &remainder); + if (remainder != 0) + schedule_delayed_work(&channel->adjtime_work, HZ); + + return idt82p33_set_tod_trigger(channel, HW_TOD_TRIG_SEL_TOD_PPS, true); +} + +static void idt82p33_adjtime_workaround(struct work_struct *work) +{ + struct idt82p33_channel *channel = container_of(work, + struct idt82p33_channel, + adjtime_work.work); + struct idt82p33 *idt82p33 = channel->idt82p33; + + mutex_lock(idt82p33->lock); + /* Workaround for TOD-to-output alignment issue */ + _idt82p33_adjtime_internal_triggered(channel, 0); + mutex_unlock(idt82p33->lock); +} + static int _idt82p33_adjfine(struct idt82p33_channel *channel, long scaled_ppm) { struct idt82p33 *idt82p33 = channel->idt82p33; @@ -233,25 +539,22 @@ static int _idt82p33_adjfine(struct idt82p33_channel *channel, long scaled_ppm) int err, i; s64 fcw; - if (scaled_ppm == channel->current_freq_ppb) - return 0; - /* - * Frequency Control Word unit is: 1.68 * 10^-10 ppm + * Frequency Control Word unit is: 1.6861512 * 10^-10 ppm * * adjfreq: - * ppb * 10^9 - * FCW = ---------- - * 168 + * ppb * 10^14 + * FCW = ----------- + * 16861512 * * adjfine: - * scaled_ppm * 5^12 - * FCW = ------------- - * 168 * 2^4 + * scaled_ppm * 5^12 * 10^5 + * FCW = ------------------------ + * 16861512 * 2^4 */ - fcw = scaled_ppm * 244140625ULL; - fcw = div_s64(fcw, 2688); + fcw = scaled_ppm * 762939453125ULL; + fcw = div_s64(fcw, 8430756LL); for (i = 0; i < 5; i++) { buf[i] = fcw & 0xff; @@ -266,26 +569,84 @@ static int _idt82p33_adjfine(struct idt82p33_channel *channel, long scaled_ppm) err = idt82p33_write(idt82p33, channel->dpll_freq_cnfg, buf, sizeof(buf)); - if (err == 0) - channel->current_freq_ppb = scaled_ppm; - return err; } +/* ppb = scaled_ppm * 125 / 2^13 */ +static s32 idt82p33_ddco_scaled_ppm(long current_ppm, s32 ddco_ppb) +{ + s64 scaled_ppm = div_s64(((s64)ddco_ppb << 13), 125); + s64 max_scaled_ppm = div_s64(((s64)DCO_MAX_PPB << 13), 125); + + current_ppm += scaled_ppm; + + if (current_ppm > max_scaled_ppm) + current_ppm = max_scaled_ppm; + else if (current_ppm < -max_scaled_ppm) + current_ppm = -max_scaled_ppm; + + return (s32)current_ppm; +} + +static int idt82p33_stop_ddco(struct idt82p33_channel *channel) +{ + int err; + + err = _idt82p33_adjfine(channel, channel->current_freq); + if (err) + return err; + + channel->ddco = false; + + return 0; +} + +static int idt82p33_start_ddco(struct idt82p33_channel *channel, s32 delta_ns) +{ + s32 current_ppm = channel->current_freq; + u32 duration_ms = MSEC_PER_SEC; + s32 ppb; + int err; + + /* If the ToD correction is less than 5 nanoseconds, then skip it. + * The error introduced by the ToD adjustment procedure would be bigger + * than the required ToD correction + */ + if (abs(delta_ns) < DDCO_THRESHOLD_NS) + return 0; + + /* For most cases, keep ddco duration 1 second */ + ppb = delta_ns; + while (abs(ppb) > DCO_MAX_PPB) { + duration_ms *= 2; + ppb /= 2; + } + + err = _idt82p33_adjfine(channel, + idt82p33_ddco_scaled_ppm(current_ppm, ppb)); + if (err) + return err; + + /* schedule the worker to cancel ddco */ + ptp_schedule_worker(channel->ptp_clock, + msecs_to_jiffies(duration_ms) - 1); + channel->ddco = true; + + return 0; +} + static int idt82p33_measure_one_byte_write_overhead( struct idt82p33_channel *channel, s64 *overhead_ns) { struct idt82p33 *idt82p33 = channel->idt82p33; ktime_t start, stop; + u8 trigger = 0; s64 total_ns; - u8 trigger; int err; u8 i; total_ns = 0; *overhead_ns = 0; - trigger = TOD_TRIGGER(HW_TOD_WR_TRIG_SEL_MSB_TOD_CNFG, - HW_TOD_RD_TRIG_SEL_LSB_TOD_STS); for (i = 0; i < MAX_MEASURMENT_COUNT; i++) { @@ -307,8 +668,41 @@ static int idt82p33_measure_one_byte_write_overhead( return err; } +static int idt82p33_measure_one_byte_read_overhead( + struct idt82p33_channel *channel, s64 *overhead_ns) +{ + struct idt82p33 *idt82p33 = channel->idt82p33; + ktime_t start, stop; + u8 trigger = 0; + s64 total_ns; + int err; + u8 i; + + total_ns = 0; + *overhead_ns = 0; + + for (i = 0; i < MAX_MEASURMENT_COUNT; i++) { + + start = ktime_get_raw(); + + err = idt82p33_read(idt82p33, channel->dpll_tod_trigger, + &trigger, sizeof(trigger)); + + stop = ktime_get_raw(); + + if (err) + return err; + + total_ns += ktime_to_ns(stop) - ktime_to_ns(start); + } + + *overhead_ns = div_s64(total_ns, MAX_MEASURMENT_COUNT); + + return err; +} + static int idt82p33_measure_tod_write_9_byte_overhead( - struct idt82p33_channel *channel) + struct idt82p33_channel *channel) { struct idt82p33 *idt82p33 = channel->idt82p33; u8 buf[TOD_BYTE_COUNT]; @@ -368,7 +762,7 @@ static int idt82p33_measure_settime_gettime_gap_overhead( static int idt82p33_measure_tod_write_overhead(struct idt82p33_channel *channel) { - s64 trailing_overhead_ns, one_byte_write_ns, gap_ns; + s64 trailing_overhead_ns, one_byte_write_ns, gap_ns, one_byte_read_ns; struct idt82p33 *idt82p33 = channel->idt82p33; int err; @@ -388,12 +782,19 @@ static int idt82p33_measure_tod_write_overhead(struct idt82p33_channel *channel) if (err) return err; + err = idt82p33_measure_one_byte_read_overhead(channel, + &one_byte_read_ns); + + if (err) + return err; + err = idt82p33_measure_tod_write_9_byte_overhead(channel); if (err) return err; - trailing_overhead_ns = gap_ns - (2 * one_byte_write_ns); + trailing_overhead_ns = gap_ns - 2 * one_byte_write_ns + - one_byte_read_ns; idt82p33->tod_write_overhead_ns -= trailing_overhead_ns; @@ -462,6 +863,20 @@ static int idt82p33_sync_tod(struct idt82p33_channel *channel, bool enable) &sync_cnfg, sizeof(sync_cnfg)); } +static long idt82p33_work_handler(struct ptp_clock_info *ptp) +{ + struct idt82p33_channel *channel = + container_of(ptp, struct idt82p33_channel, caps); + struct idt82p33 *idt82p33 = channel->idt82p33; + + mutex_lock(idt82p33->lock); + (void)idt82p33_stop_ddco(channel); + mutex_unlock(idt82p33->lock); + + /* Return a negative value here to not reschedule */ + return -1; +} + static int idt82p33_output_enable(struct idt82p33_channel *channel, bool enable, unsigned int outn) { @@ -480,40 +895,10 @@ static int idt82p33_output_enable(struct idt82p33_channel *channel, return idt82p33_write(idt82p33, OUT_MUX_CNFG(outn), &val, sizeof(val)); } -static int idt82p33_output_mask_enable(struct idt82p33_channel *channel, - bool enable) -{ - u16 mask; - int err; - u8 outn; - - mask = channel->output_mask; - outn = 0; - - while (mask) { - if (mask & 0x1) { - err = idt82p33_output_enable(channel, enable, outn); - if (err) - return err; - } - - mask >>= 0x1; - outn++; - } - - return 0; -} - static int idt82p33_perout_enable(struct idt82p33_channel *channel, bool enable, struct ptp_perout_request *perout) { - unsigned int flags = perout->flags; - - /* Enable/disable output based on output_mask */ - if (flags == PEROUT_ENABLE_OUTPUT_MASK) - return idt82p33_output_mask_enable(channel, enable); - /* Enable/disable individual output instead */ return idt82p33_output_enable(channel, enable, perout->index); } @@ -546,14 +931,15 @@ static void idt82p33_ptp_clock_unregister_all(struct idt82p33 *idt82p33) u8 i; for (i = 0; i < MAX_PHC_PLL; i++) { - channel = &idt82p33->channel[i]; - + cancel_delayed_work_sync(&channel->adjtime_work); if (channel->ptp_clock) ptp_clock_unregister(channel->ptp_clock); } } + + static int idt82p33_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { @@ -564,7 +950,8 @@ static int idt82p33_enable(struct ptp_clock_info *ptp, mutex_lock(idt82p33->lock); - if (rq->type == PTP_CLK_REQ_PEROUT) { + switch (rq->type) { + case PTP_CLK_REQ_PEROUT: if (!on) err = idt82p33_perout_enable(channel, false, &rq->perout); @@ -575,6 +962,12 @@ static int idt82p33_enable(struct ptp_clock_info *ptp, else err = idt82p33_perout_enable(channel, true, &rq->perout); + break; + case PTP_CLK_REQ_EXTTS: + err = idt82p33_extts_enable(channel, rq, on); + break; + default: + break; } mutex_unlock(idt82p33->lock); @@ -634,13 +1027,22 @@ static int idt82p33_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct idt82p33 *idt82p33 = channel->idt82p33; int err; + if (channel->ddco == true) + return 0; + + if (scaled_ppm == channel->current_freq) + return 0; + mutex_lock(idt82p33->lock); err = _idt82p33_adjfine(channel, scaled_ppm); + + if (err == 0) + channel->current_freq = scaled_ppm; mutex_unlock(idt82p33->lock); + if (err) dev_err(idt82p33->dev, "Failed in %s with err %d!\n", __func__, err); - return err; } @@ -651,14 +1053,21 @@ static int idt82p33_adjtime(struct ptp_clock_info *ptp, s64 delta_ns) struct idt82p33 *idt82p33 = channel->idt82p33; int err; + if (channel->ddco == true) + return -EBUSY; + mutex_lock(idt82p33->lock); if (abs(delta_ns) < phase_snap_threshold) { + err = idt82p33_start_ddco(channel, delta_ns); mutex_unlock(idt82p33->lock); - return 0; + return err; } - err = _idt82p33_adjtime(channel, delta_ns); + /* Use more accurate internal 1pps triggered write first */ + err = _idt82p33_adjtime_internal_triggered(channel, delta_ns); + if (err && delta_ns > IMMEDIATE_SNAP_THRESHOLD_NS) + err = _idt82p33_adjtime_immediate(channel, delta_ns); mutex_unlock(idt82p33->lock); @@ -703,8 +1112,10 @@ static int idt82p33_settime(struct ptp_clock_info *ptp, return err; } -static int idt82p33_channel_init(struct idt82p33_channel *channel, int index) +static int idt82p33_channel_init(struct idt82p33 *idt82p33, u32 index) { + struct idt82p33_channel *channel = &idt82p33->channel[index]; + switch (index) { case 0: channel->dpll_tod_cnfg = DPLL1_TOD_CNFG; @@ -730,22 +1141,60 @@ static int idt82p33_channel_init(struct idt82p33_channel *channel, int index) return -EINVAL; } - channel->current_freq_ppb = 0; + channel->plln = index; + channel->current_freq = 0; + channel->idt82p33 = idt82p33; + INIT_DELAYED_WORK(&channel->adjtime_work, idt82p33_adjtime_workaround); return 0; } -static void idt82p33_caps_init(struct ptp_clock_info *caps) +static int idt82p33_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) { + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + break; + case PTP_PF_PEROUT: + case PTP_PF_PHYSYNC: + return -1; + } + return 0; +} + +static void idt82p33_caps_init(u32 index, struct ptp_clock_info *caps, + struct ptp_pin_desc *pin_cfg, u8 max_pins) +{ + struct ptp_pin_desc *ppd; + int i; + caps->owner = THIS_MODULE; caps->max_adj = DCO_MAX_PPB; - caps->n_per_out = 11; - caps->adjphase = idt82p33_adjwritephase; + caps->n_per_out = MAX_PER_OUT; + caps->n_ext_ts = MAX_PHC_PLL, + caps->n_pins = max_pins, + caps->adjphase = idt82p33_adjwritephase, caps->adjfine = idt82p33_adjfine; caps->adjtime = idt82p33_adjtime; caps->gettime64 = idt82p33_gettime; caps->settime64 = idt82p33_settime; caps->enable = idt82p33_enable; + caps->verify = idt82p33_verify_pin; + caps->do_aux_work = idt82p33_work_handler; + + snprintf(caps->name, sizeof(caps->name), "IDT 82P33 PLL%u", index); + + caps->pin_config = pin_cfg; + + for (i = 0; i < max_pins; ++i) { + ppd = &pin_cfg[i]; + + ppd->index = i; + ppd->func = PTP_PF_NONE; + ppd->chan = index; + snprintf(ppd->name, sizeof(ppd->name), "in%d", 12 + i); + } } static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) @@ -758,7 +1207,7 @@ static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) channel = &idt82p33->channel[index]; - err = idt82p33_channel_init(channel, index); + err = idt82p33_channel_init(idt82p33, index); if (err) { dev_err(idt82p33->dev, "Channel_init failed in %s with err %d!\n", @@ -766,11 +1215,8 @@ static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) return err; } - channel->idt82p33 = idt82p33; - - idt82p33_caps_init(&channel->caps); - snprintf(channel->caps.name, sizeof(channel->caps.name), - "IDT 82P33 PLL%u", index); + idt82p33_caps_init(index, &channel->caps, + pin_config[index], MAX_TRIG_CLK); channel->ptp_clock = ptp_clock_register(&channel->caps, NULL); @@ -805,17 +1251,46 @@ static int idt82p33_enable_channel(struct idt82p33 *idt82p33, u32 index) return 0; } +static int idt82p33_reset(struct idt82p33 *idt82p33, bool cold) +{ + int err; + u8 cfg = SOFT_RESET_EN; + + if (cold == true) + goto cold_reset; + + err = idt82p33_read(idt82p33, REG_SOFT_RESET, &cfg, sizeof(cfg)); + if (err) { + dev_err(idt82p33->dev, + "Soft reset failed with err %d!\n", err); + return err; + } + + cfg |= SOFT_RESET_EN; + +cold_reset: + err = idt82p33_write(idt82p33, REG_SOFT_RESET, &cfg, sizeof(cfg)); + if (err) + dev_err(idt82p33->dev, + "Cold reset failed with err %d!\n", err); + return err; +} + static int idt82p33_load_firmware(struct idt82p33 *idt82p33) { + char fname[128] = FW_FILENAME; const struct firmware *fw; struct idt82p33_fwrc *rec; u8 loaddr, page, val; int err; s32 len; - dev_dbg(idt82p33->dev, "requesting firmware '%s'\n", FW_FILENAME); + if (firmware) /* module parameter */ + snprintf(fname, sizeof(fname), "%s", firmware); - err = request_firmware(&fw, FW_FILENAME, idt82p33->dev); + dev_info(idt82p33->dev, "requesting firmware '%s'\n", fname); + + err = request_firmware(&fw, fname, idt82p33->dev); if (err) { dev_err(idt82p33->dev, @@ -863,6 +1338,46 @@ out: return err; } +static void idt82p33_extts_check(struct work_struct *work) +{ + struct idt82p33 *idt82p33 = container_of(work, struct idt82p33, + extts_work.work); + struct idt82p33_channel *channel; + int err; + u8 mask; + int i; + + if (idt82p33->extts_mask == 0) + return; + + mutex_lock(idt82p33->lock); + + for (i = 0; i < MAX_PHC_PLL; i++) { + mask = 1 << i; + + if ((idt82p33->extts_mask & mask) == 0) + continue; + + err = idt82p33_extts_check_channel(idt82p33, i); + + if (err == 0) { + /* trigger clears itself, so clear the mask */ + if (idt82p33->extts_single_shot) { + idt82p33->extts_mask &= ~mask; + } else { + /* Re-arm */ + channel = &idt82p33->channel[i]; + arm_tod_read_with_trigger(channel, channel->tod_trigger); + } + } + } + + if (idt82p33->extts_mask) + schedule_delayed_work(&idt82p33->extts_work, + msecs_to_jiffies(EXTTS_PERIOD_MS)); + + mutex_unlock(idt82p33->lock); +} static int idt82p33_probe(struct platform_device *pdev) { @@ -885,25 +1400,33 @@ static int idt82p33_probe(struct platform_device *pdev) idt82p33->pll_mask = DEFAULT_PLL_MASK; idt82p33->channel[0].output_mask = DEFAULT_OUTPUT_MASK_PLL0; idt82p33->channel[1].output_mask = DEFAULT_OUTPUT_MASK_PLL1; + idt82p33->extts_mask = 0; + INIT_DELAYED_WORK(&idt82p33->extts_work, idt82p33_extts_check); mutex_lock(idt82p33->lock); - err = idt82p33_load_firmware(idt82p33); + /* cold reset before loading firmware */ + idt82p33_reset(idt82p33, true); + err = idt82p33_load_firmware(idt82p33); if (err) dev_warn(idt82p33->dev, "loading firmware failed with %d\n", err); + /* soft reset after loading firmware */ + idt82p33_reset(idt82p33, false); + if (idt82p33->pll_mask) { for (i = 0; i < MAX_PHC_PLL; i++) { - if (idt82p33->pll_mask & (1 << i)) { + if (idt82p33->pll_mask & (1 << i)) err = idt82p33_enable_channel(idt82p33, i); - if (err) { - dev_err(idt82p33->dev, - "Failed in %s with err %d!\n", - __func__, err); - break; - } + else + err = idt82p33_channel_init(idt82p33, i); + if (err) { + dev_err(idt82p33->dev, + "Failed in %s with err %d!\n", + __func__, err); + break; } } } else { @@ -928,6 +1451,8 @@ static int idt82p33_remove(struct platform_device *pdev) { struct idt82p33 *idt82p33 = platform_get_drvdata(pdev); + cancel_delayed_work_sync(&idt82p33->extts_work); + idt82p33_ptp_clock_unregister_all(idt82p33); return 0; diff --git a/drivers/ptp/ptp_idt82p33.h b/drivers/ptp/ptp_idt82p33.h index 0ea1c35c0f9f..8fcb0b17d207 100644 --- a/drivers/ptp/ptp_idt82p33.h +++ b/drivers/ptp/ptp_idt82p33.h @@ -13,6 +13,8 @@ #define FW_FILENAME "idt82p33xxx.bin" #define MAX_PHC_PLL (2) +#define MAX_TRIG_CLK (3) +#define MAX_PER_OUT (11) #define TOD_BYTE_COUNT (10) #define DCO_MAX_PPB (92000) #define MAX_MEASURMENT_COUNT (5) @@ -20,7 +22,6 @@ #define IMMEDIATE_SNAP_THRESHOLD_NS (50000) #define DDCO_THRESHOLD_NS (5) #define IDT82P33_MAX_WRITE_COUNT (512) -#define PEROUT_ENABLE_OUTPUT_MASK (0xdeadbeef) #define PLLMASK_ADDR_HI 0xFF #define PLLMASK_ADDR_LO 0xA5 @@ -60,8 +61,18 @@ struct idt82p33_channel { struct ptp_clock *ptp_clock; struct idt82p33 *idt82p33; enum pll_mode pll_mode; - s32 current_freq_ppb; + /* Workaround for TOD-to-output alignment issue */ + struct delayed_work adjtime_work; + s32 current_freq; + /* double dco mode */ + bool ddco; u8 output_mask; + /* last input trigger for extts */ + u8 tod_trigger; + bool discard_next_extts; + u8 plln; + /* remember last tod_sts for extts */ + u8 extts_tod_sts[TOD_BYTE_COUNT]; u16 dpll_tod_cnfg; u16 dpll_tod_trigger; u16 dpll_tod_sts; @@ -76,6 +87,12 @@ struct idt82p33 { struct idt82p33_channel channel[MAX_PHC_PLL]; struct device *dev; u8 pll_mask; + /* Polls for external time stamps */ + u8 extts_mask; + bool extts_single_shot; + struct delayed_work extts_work; + /* Remember the ptp channel to report extts */ + struct idt82p33_channel *event_channel[MAX_PHC_PLL]; /* Mutex to protect operations from being interrupted */ struct mutex *lock; struct regmap *regmap; diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 154d58cbd9ce..4bbaccd543ad 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1647,10 +1647,6 @@ ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - fw_image = bp->fw_loader ? "loader" : "fw"; sprintf(buf, "%d.%d", bp->fw_tag, bp->fw_version); err = devlink_info_version_running_put(req, fw_image, buf); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index bcccad8f7516..e8c00a884f1f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5154,6 +5154,7 @@ static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); + debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); @@ -5644,11 +5645,15 @@ wash: mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); + put_device(&rdev->dev); + rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); + if (rdev && rdev->dev.of_node) + of_node_put(rdev->dev.of_node); + kfree(rdev); kfree(config); - put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); @@ -5677,7 +5682,6 @@ void regulator_unregister(struct regulator_dev *rdev) mutex_lock(®ulator_list_mutex); - debugfs_remove_recursive(rdev->debugfs); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c index 6b96899eb27e..8488417f4b2c 100644 --- a/drivers/regulator/rt5759-regulator.c +++ b/drivers/regulator/rt5759-regulator.c @@ -243,6 +243,7 @@ static int rt5759_regulator_register(struct rt5759_priv *priv) if (priv->chip_type == CHIP_TYPE_RT5759A) reg_desc->uV_step = RT5759A_STEP_UV; + memset(®_cfg, 0, sizeof(reg_cfg)); reg_cfg.dev = priv->dev; reg_cfg.of_node = np; reg_cfg.init_data = of_get_regulator_init_data(priv->dev, np, reg_desc); diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 75a941fb3c2b..1b2eee95ad3f 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) chip->cs_gpiod = cs_gpiod; } + usleep_range(10000, 11000); + i2c_set_clientdata(client, chip); chip->chip_irq = client->irq; chip->dev = dev; diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 430265c404d6..f3856750944f 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -67,6 +67,7 @@ struct twlreg_info { #define TWL6030_CFG_STATE_SLEEP 0x03 #define TWL6030_CFG_STATE_GRP_SHIFT 5 #define TWL6030_CFG_STATE_APP_SHIFT 2 +#define TWL6030_CFG_STATE_MASK 0x03 #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ TWL6030_CFG_STATE_APP_SHIFT) @@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) if (grp < 0) return grp; grp &= P1_GRP_6030; + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val = TWL6030_CFG_STATE_APP(val); } else { + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val &= TWL6030_CFG_STATE_MASK; grp = 1; } - val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - val = TWL6030_CFG_STATE_APP(val); - return grp && (val == TWL6030_CFG_STATE_ON); } @@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - switch (TWL6030_CFG_STATE_APP(val)) { + if (info->features & TWL6032_SUBCLASS) + val &= TWL6030_CFG_STATE_MASK; + else + val = TWL6030_CFG_STATE_APP(val); + + switch (val) { case TWL6030_CFG_STATE_ON: return REGULATOR_STATUS_NORMAL; @@ -530,6 +537,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ #define TWL6032_ADJUSTABLE_LDO(label, offset) \ static const struct twlreg_info TWL6032_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ @@ -562,6 +570,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index cb83f81da416..df17f0f9cb0f 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1954,7 +1954,7 @@ dasd_copy_pair_show(struct device *dev, break; } } - if (!copy->entry[i].primary) + if (i == DASD_CP_ENTRIES) goto out; /* print all secondary */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 662730f3b027..5d0b9991e91a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4722,7 +4722,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, struct dasd_device *basedev; struct req_iterator iter; struct dasd_ccw_req *cqr; - unsigned int first_offs; unsigned int trkcount; unsigned long *idaws; unsigned int size; @@ -4756,7 +4755,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; - first_offs = 0; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -4800,13 +4798,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if (use_prefix) { prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, - startdev, 1, first_offs + 1, trkcount, 0, 0); + startdev, 1, 0, trkcount, 0, 0); } else { define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); ccw[-1].flags |= CCW_FLAG_CC; data += sizeof(struct DE_eckd_data); - locate_record_ext(ccw++, data, first_trk, first_offs + 1, + locate_record_ext(ccw++, data, first_trk, 0, trkcount, cmd, basedev, 0, 0); } @@ -5500,7 +5498,7 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) * Dump the range of CCWs into 'page' buffer * and return number of printed chars. */ -static int +static void dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) { int len, count; @@ -5518,16 +5516,21 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) else datap = (char *) ((addr_t) from->cda); - /* dump data (max 32 bytes) */ - for (count = 0; count < from->count && count < 32; count++) { - if (count % 8 == 0) len += sprintf(page + len, " "); - if (count % 4 == 0) len += sprintf(page + len, " "); + /* dump data (max 128 bytes) */ + for (count = 0; count < from->count && count < 128; count++) { + if (count % 32 == 0) + len += sprintf(page + len, "\n"); + if (count % 8 == 0) + len += sprintf(page + len, " "); + if (count % 4 == 0) + len += sprintf(page + len, " "); len += sprintf(page + len, "%02x", datap[count]); } len += sprintf(page + len, "\n"); from++; } - return len; + if (len > 0) + printk(KERN_ERR "%s", page); } static void @@ -5619,37 +5622,33 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, if (req) { /* req == NULL for unsolicited interrupts */ /* dump the Channel Program (max 140 Bytes per line) */ - /* Count CCW and print first CCWs (maximum 1024 % 140 = 7) */ + /* Count CCW and print first CCWs (maximum 7) */ first = req->cpaddr; for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); to = min(first + 6, last); - len = sprintf(page, PRINTK_HEADER - " Related CP in req: %p\n", req); - dasd_eckd_dump_ccw_range(first, to, page + len); - printk(KERN_ERR "%s", page); + printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req); + dasd_eckd_dump_ccw_range(first, to, page); /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ - len = 0; from = ++to; fail = (struct ccw1 *)(addr_t) irb->scsw.cmd.cpa; /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ - len += sprintf(page, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } to = min(fail + 1, last); - len += dasd_eckd_dump_ccw_range(from, to, page + len); + dasd_eckd_dump_ccw_range(from, to, page + len); /* print last CCWs (maximum 2) */ + len = 0; from = max(from, ++to); if (from < last - 1) { from = last - 1; /* there is a gap - print header */ - len += sprintf(page + len, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } - len += dasd_eckd_dump_ccw_range(from, last, page + len); - if (len > 0) - printk(KERN_ERR "%s", page); + dasd_eckd_dump_ccw_range(from, last, page + len); } free_page((unsigned long) page); } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index d0ddf2cc9786..9327dcdd6e5e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -401,7 +401,7 @@ dasd_ioctl_copy_pair_swap(struct block_device *bdev, void __user *argp) return -EFAULT; } if (memchr_inv(data.reserved, 0, sizeof(data.reserved))) { - pr_warn("%s: Ivalid swap data specified.\n", + pr_warn("%s: Invalid swap data specified\n", dev_name(&device->cdev->dev)); dasd_put_device(device); return DASD_COPYPAIRSWAP_INVALID; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 93b80da60277..b392b9f5482e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -636,6 +636,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->minors = DCSSBLK_MINORS_PER_DISK; dev_info->gd->fops = &dcssblk_devops; dev_info->gd->private_data = dev_info; + dev_info->gd->flags |= GENHD_FL_NO_PART; blk_queue_logical_block_size(dev_info->gd->queue, 4096); blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 59ac98f2bd27..b02c631f3b71 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -233,8 +233,11 @@ static void __init ap_init_qci_info(void) if (!ap_qci_info) return; ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); - if (!ap_qci_info_old) + if (!ap_qci_info_old) { + kfree(ap_qci_info); + ap_qci_info = NULL; return; + } if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); kfree(ap_qci_info_old); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 19223b075568..ab3ea529cca7 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; - int req_id = req->req_id; + unsigned long req_id = req->req_id; zfcp_reqlist_add(adapter->req_list, req); diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index f77ee4051b00..3306de7170f6 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3265,7 +3265,8 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc, } if (scmd->result != (DID_OK << 16) && (scmd->cmnd[0] != ATA_12) && - (scmd->cmnd[0] != ATA_16)) { + (scmd->cmnd[0] != ATA_16) && + mrioc->logging_level & MPI3_DEBUG_SCSI_ERROR) { ioc_info(mrioc, "%s :scmd->result 0x%x\n", __func__, scmd->result); scsi_print_command(scmd); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 629853662b82..bebda917b138 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7323,8 +7323,12 @@ static int sdebug_add_host_helper(int per_host_idx) dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); - if (error) + if (error) { + spin_lock(&sdebug_host_list_lock); + list_del(&sdbg_host->host_list); + spin_unlock(&sdebug_host_list_lock); goto clean; + } ++sdebug_num_hosts; return 0; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index cd3db9684e52..f473c002fa4d 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -231,7 +231,7 @@ iscsi_create_endpoint(int dd_size) dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) - goto free_id; + goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) @@ -245,10 +245,12 @@ unregister_dev: device_unregister(&ep->dev); return NULL; -free_id: +put_dev: mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, id); mutex_unlock(&iscsi_ep_idr_mutex); + put_device(&ep->dev); + return NULL; free_ep: kfree(ep); return NULL; @@ -766,7 +768,7 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, err = device_register(&iface->dev); if (err) - goto free_iface; + goto put_dev; err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); if (err) @@ -780,9 +782,8 @@ unreg_iface: device_unregister(&iface->dev); return NULL; -free_iface: - put_device(iface->dev.parent); - kfree(iface); +put_dev: + put_device(&iface->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_iface); @@ -1251,15 +1252,15 @@ iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, err = device_register(&fnode_sess->dev); if (err) - goto free_fnode_sess; + goto put_dev; if (dd_size) fnode_sess->dd_data = &fnode_sess[1]; return fnode_sess; -free_fnode_sess: - kfree(fnode_sess); +put_dev: + put_device(&fnode_sess->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); @@ -1299,15 +1300,15 @@ iscsi_create_flashnode_conn(struct Scsi_Host *shost, err = device_register(&fnode_conn->dev); if (err) - goto free_fnode_conn; + goto put_dev; if (dd_size) fnode_conn->dd_data = &fnode_conn[1]; return fnode_conn; -free_fnode_conn: - kfree(fnode_conn); +put_dev: + put_device(&fnode_conn->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); @@ -4815,7 +4816,7 @@ iscsi_register_transport(struct iscsi_transport *tt) dev_set_name(&priv->dev, "%s", tt->name); err = device_register(&priv->dev); if (err) - goto free_priv; + goto put_dev; err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); if (err) @@ -4850,8 +4851,8 @@ iscsi_register_transport(struct iscsi_transport *tt) unregister_dev: device_unregister(&priv->dev); return NULL; -free_priv: - kfree(priv); +put_dev: + put_device(&priv->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_register_transport); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..3c5b7e4227b2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -303,16 +303,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ - - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; diff --git a/drivers/siox/siox-core.c b/drivers/siox/siox-core.c index 7c4f32d76966..561408583b2b 100644 --- a/drivers/siox/siox-core.c +++ b/drivers/siox/siox-core.c @@ -839,6 +839,8 @@ static struct siox_device *siox_device_add(struct siox_master *smaster, err_device_register: /* don't care to make the buffer smaller again */ + put_device(&sdevice->dev); + sdevice = NULL; err_buf_alloc: siox_master_unlock(smaster); diff --git a/drivers/slimbus/Kconfig b/drivers/slimbus/Kconfig index 2ed821f75816..a0fdf9d792cb 100644 --- a/drivers/slimbus/Kconfig +++ b/drivers/slimbus/Kconfig @@ -23,7 +23,7 @@ config SLIM_QCOM_CTRL config SLIM_QCOM_NGD_CTRL tristate "Qualcomm SLIMbus Satellite Non-Generic Device Component" depends on HAS_IOMEM && DMA_ENGINE && NET - depends on QCOM_RPROC_COMMON || COMPILE_TEST + depends on QCOM_RPROC_COMMON || (COMPILE_TEST && !QCOM_RPROC_COMMON) depends on ARCH_QCOM || COMPILE_TEST select QCOM_QMI_HELPERS select QCOM_PDR_HELPERS diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c index 75f87b3d8b95..73a2aa362957 100644 --- a/drivers/slimbus/stream.c +++ b/drivers/slimbus/stream.c @@ -67,10 +67,10 @@ static const int slim_presence_rate_table[] = { 384000, 768000, 0, /* Reserved */ - 110250, - 220500, - 441000, - 882000, + 11025, + 22050, + 44100, + 88200, 176400, 352800, 705600, diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index cc57a384d74d..28144c699b0c 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -11,6 +11,7 @@ #include <linux/platform_device.h> #include <linux/arm-smccc.h> #include <linux/of.h> +#include <linux/clk.h> #define REV_B1 0x21 @@ -56,6 +57,7 @@ static u32 __init imx8mq_soc_revision(void) void __iomem *ocotp_base; u32 magic; u32 rev; + struct clk *clk; np = of_find_compatible_node(NULL, NULL, "fsl,imx8mq-ocotp"); if (!np) @@ -63,6 +65,13 @@ static u32 __init imx8mq_soc_revision(void) ocotp_base = of_iomap(np, 0); WARN_ON(!ocotp_base); + clk = of_clk_get_by_name(np, NULL); + if (!clk) { + WARN_ON(!clk); + return 0; + } + + clk_prepare_enable(clk); /* * SOC revision on older imx8mq is not available in fuses so query @@ -79,6 +88,8 @@ static u32 __init imx8mq_soc_revision(void) soc_uid <<= 32; soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); + clk_disable_unprepare(clk); + clk_put(clk); iounmap(ocotp_base); of_node_put(np); diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 1322b8cce5b7..ababb910b391 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -128,12 +128,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) dw_spi_dma_sg_burst_init(dws); + pci_dev_put(dma_dev); + return 0; free_rxchan: dma_release_channel(dws->rxchan); dws->rxchan = NULL; err_exit: + pci_dev_put(dma_dev); return -EBUSY; } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 30d82cc7300b..d209930069cf 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -444,8 +444,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, unsigned int pre, post; unsigned int fin = spi_imx->spi_clk; - if (unlikely(fspi > fin)) - return 0; + fspi = min(fspi, fin); post = fls(fin) - fls(fspi); if (fin > fspi << post) @@ -1608,6 +1607,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller, return spi_imx_pio_transfer_slave(spi, transfer); /* + * If we decided in spi_imx_can_dma() that we want to do a DMA + * transfer, the SPI transfer has already been mapped, so we + * have to do the DMA transfer here. + */ + if (spi_imx->usedma) + return spi_imx_dma_transfer(spi_imx, transfer); + /* * Calculate the estimated time in us the transfer runs. Find * the number of Hz per byte per polling limit. */ @@ -1618,9 +1624,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller, if (transfer->len < byte_limit) return spi_imx_poll_transfer(spi, transfer); - if (spi_imx->usedma) - return spi_imx_dma_transfer(spi_imx, transfer); - return spi_imx_pio_transfer(spi, transfer); } diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a33c9a3de395..d6aff909fc36 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1273,8 +1273,11 @@ static int mtk_spi_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct mtk_spi *mdata = spi_master_get_devdata(master); + int ret; - pm_runtime_disable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) + return ret; mtk_spi_reset(mdata); @@ -1283,6 +1286,9 @@ static int mtk_spi_remove(struct platform_device *pdev) clk_unprepare(mdata->spi_hclk); } + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 10f0c5a6e0dc..9f356612ba7e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -924,8 +924,9 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi, static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi) { struct tegra_qspi_client_data *cdata; + struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master); - cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL); + cdata = devm_kzalloc(tqspi->dev, sizeof(*cdata), GFP_KERNEL); if (!cdata) return NULL; diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index f9589c5b62ba..1e5ad3b476ef 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -439,7 +439,7 @@ int rtllib_wx_set_essid(struct rtllib_device *ieee, union iwreq_data *wrqu, char *extra) { - int ret = 0, len, i; + int ret = 0, len; short proto_started; unsigned long flags; @@ -455,13 +455,6 @@ int rtllib_wx_set_essid(struct rtllib_device *ieee, goto out; } - for (i = 0; i < len; i++) { - if (extra[i] < 0) { - ret = -1; - goto out; - } - } - if (proto_started) rtllib_stop_protocol(ieee, true); diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 4407b56aa6d1..139031ccb700 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -397,6 +397,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host ret = device_register(&tl_hba->dev); if (ret) { pr_err("device_register() failed for tl_hba->dev: %d\n", ret); + put_device(&tl_hba->dev); return -ENODEV; } @@ -1073,7 +1074,7 @@ check_len: */ ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt); if (ret) - goto out; + return ERR_PTR(ret); sh = tl_hba->sh; tcm_loop_hba_no_cnt++; diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index f3947be13e2e..64f0e047c23d 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) rc = device_register(&optee_device->dev); if (rc) { pr_err("device registration failed, err: %d\n", rc); - kfree(optee_device); + put_device(&optee_device->dev); } return rc; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 5e516f5cac5a..b6e0cc4571ea 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -264,7 +264,7 @@ struct gsm_mux { bool constipated; /* Asked by remote to shut up */ bool has_devices; /* Devices were registered */ - struct mutex tx_mutex; + spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 @@ -272,7 +272,7 @@ struct gsm_mux { struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ - struct delayed_work kick_timeout; /* Kick TX queuing on timeout */ + struct timer_list kick_timer; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ @@ -700,6 +700,7 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) struct gsm_msg *msg; u8 *dp; int ocr; + unsigned long flags; msg = gsm_data_alloc(gsm, addr, 0, control); if (!msg) @@ -721,10 +722,10 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) gsm_print_packet("Q->", addr, cr, control, NULL, 0); - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); list_add_tail(&msg->list, &gsm->tx_ctrl_list); gsm->tx_bytes += msg->len; - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); gsmld_write_trigger(gsm); return 0; @@ -749,7 +750,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) spin_unlock_irqrestore(&dlci->lock, flags); /* Clear data packets in MUX write queue */ - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { if (msg->addr != addr) continue; @@ -757,7 +758,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) list_del(&msg->list); kfree(msg); } - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** @@ -1028,7 +1029,7 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) gsm->tx_bytes += msg->len; gsmld_write_trigger(gsm); - schedule_delayed_work(&gsm->kick_timeout, 10 * gsm->t1 * HZ / 100); + mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); } /** @@ -1043,9 +1044,10 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { - mutex_lock(&dlci->gsm->tx_mutex); + unsigned long flags; + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); __gsm_data_queue(dlci, msg); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /** @@ -1057,7 +1059,7 @@ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) @@ -1117,7 +1119,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, @@ -1133,7 +1135,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, if (dlci->adaption == 4) overhead = 1; - /* dlci->skb is locked by tx_mutex */ + /* dlci->skb is locked by tx_lock */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) @@ -1187,7 +1189,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, * Push an empty frame in to the transmit queue to update the modem status * bits and to transmit an optional break. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, @@ -1301,12 +1303,13 @@ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { + unsigned long flags; int sweep; if (dlci->constipated) return; - mutex_lock(&dlci->gsm->tx_mutex); + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { @@ -1317,7 +1320,7 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) } if (sweep) gsm_dlci_data_sweep(dlci->gsm); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /* @@ -1708,7 +1711,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, unsigned int command, u8 *data, int clen) { struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), - GFP_KERNEL); + GFP_ATOMIC); unsigned long flags; if (ctrl == NULL) return NULL; @@ -2019,23 +2022,24 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) } /** - * gsm_kick_timeout - transmit if possible - * @work: work contained in our gsm object + * gsm_kick_timer - transmit if possible + * @t: timer contained in our gsm object * * Transmit data from DLCIs if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ -static void gsm_kick_timeout(struct work_struct *work) +static void gsm_kick_timer(struct timer_list *t) { - struct gsm_mux *gsm = container_of(work, struct gsm_mux, kick_timeout.work); + struct gsm_mux *gsm = from_timer(gsm, t, kick_timer); + unsigned long flags; int sent = 0; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ if (gsm->tx_bytes < TX_THRESH_LO) sent = gsm_dlci_data_sweep(gsm); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); if (sent && debug & DBG_DATA) pr_info("%s TX queue stalled\n", __func__); @@ -2492,7 +2496,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) } /* Finish outstanding timers, making sure they are done */ - cancel_delayed_work_sync(&gsm->kick_timeout); + del_timer_sync(&gsm->kick_timer); del_timer_sync(&gsm->t2_timer); /* Finish writing to ldisc */ @@ -2565,7 +2569,6 @@ static void gsm_free_mux(struct gsm_mux *gsm) break; } } - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2637,15 +2640,15 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); - mutex_init(&gsm->tx_mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); - INIT_DELAYED_WORK(&gsm->kick_timeout, gsm_kick_timeout); + timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); + spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; @@ -2670,7 +2673,6 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2826,16 +2828,17 @@ static void gsmld_write_trigger(struct gsm_mux *gsm) static void gsmld_write_task(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); + unsigned long flags; int i, ret; /* All outstanding control channel and control messages and one data * frame is sent. */ ret = -ENODEV; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); if (gsm->tty) ret = gsm_data_kick(gsm); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); if (ret >= 0) for (i = 0; i < NUM_DLCI; i++) @@ -3042,6 +3045,7 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; + unsigned long flags; int space; int ret; @@ -3049,13 +3053,13 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, return -ENODEV; ret = -ENOBUFS; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); return ret; } @@ -3352,13 +3356,14 @@ static struct tty_ldisc_ops tty_ldisc_packet = { static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { struct gsm_mux *gsm = dlci->gsm; + unsigned long flags; if (dlci->state != DLCI_OPEN || dlci->adaption != 2) return; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); gsm_dlci_modem_output(gsm, dlci, brk); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 44cc755b1a29..0e43bdfb7459 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -174,6 +174,8 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) */ up->dma = dma; + lpss->dma_maxburst = 16; + port->set_termios = dw8250_do_set_termios; return 0; @@ -277,8 +279,13 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port struct dw_dma_slave *rx_param, *tx_param; struct device *dev = port->port.dev; - if (!lpss->dma_param.dma_dev) + if (!lpss->dma_param.dma_dev) { + dma = port->dma; + if (dma) + goto out_configuration_only; + return 0; + } rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL); if (!rx_param) @@ -289,16 +296,18 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port return -ENOMEM; *rx_param = lpss->dma_param; - dma->rxconf.src_maxburst = lpss->dma_maxburst; - *tx_param = lpss->dma_param; - dma->txconf.dst_maxburst = lpss->dma_maxburst; dma->fn = lpss8250_dma_filter; dma->rx_param = rx_param; dma->tx_param = tx_param; port->dma = dma; + +out_configuration_only: + dma->rxconf.src_maxburst = lpss->dma_maxburst; + dma->txconf.dst_maxburst = lpss->dma_maxburst; + return 0; } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 41b8c6b27136..3f33014022f0 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -157,7 +157,11 @@ static u32 uart_read(struct uart_8250_port *up, u32 reg) return readl(up->port.membase + (reg << up->port.regshift)); } -static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +/* + * Called on runtime PM resume path from omap8250_restore_regs(), and + * omap8250_set_mctrl(). + */ +static void __omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) { struct uart_8250_port *up = up_to_u8250p(port); struct omap8250_priv *priv = up->port.private_data; @@ -181,6 +185,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) } } +static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + int err; + + err = pm_runtime_resume_and_get(port->dev); + if (err) + return; + + __omap8250_set_mctrl(port, mctrl); + + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); +} + /* * Work Around for Errata i202 (2430, 3430, 3630, 4430 and 4460) * The access to uart register after MDR1 Access @@ -193,27 +211,10 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) static void omap_8250_mdr1_errataset(struct uart_8250_port *up, struct omap8250_priv *priv) { - u8 timeout = 255; - serial_out(up, UART_OMAP_MDR1, priv->mdr1); udelay(2); serial_out(up, UART_FCR, up->fcr | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); - /* - * Wait for FIFO to empty: when empty, RX_FIFO_E bit is 0 and - * TX_FIFO_E bit is 1. - */ - while (UART_LSR_THRE != (serial_in(up, UART_LSR) & - (UART_LSR_THRE | UART_LSR_DR))) { - timeout--; - if (!timeout) { - /* Should *never* happen. we warn and carry on */ - dev_crit(up->port.dev, "Errata i202: timedout %x\n", - serial_in(up, UART_LSR)); - break; - } - udelay(1); - } } static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud, @@ -292,6 +293,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) { struct omap8250_priv *priv = up->port.private_data; struct uart_8250_dma *dma = up->dma; + u8 mcr = serial8250_in_MCR(up); if (dma && dma->tx_running) { /* @@ -308,7 +310,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - serial8250_out_MCR(up, UART_MCR_TCRTLR); + serial8250_out_MCR(up, mcr | UART_MCR_TCRTLR); serial_out(up, UART_FCR, up->fcr); omap8250_update_scr(up, priv); @@ -324,7 +326,8 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); /* drop TCR + TLR access, we setup XON/XOFF later */ - serial8250_out_MCR(up, up->mcr); + serial8250_out_MCR(up, mcr); + serial_out(up, UART_IER, up->ier); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); @@ -341,7 +344,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) omap8250_update_mdr1(up, priv); - up->port.ops->set_mctrl(&up->port, up->port.mctrl); + __omap8250_set_mctrl(&up->port, up->port.mctrl); if (up->port.rs485.flags & SER_RS485_ENABLED) serial8250_em485_stop_tx(up); @@ -669,7 +672,6 @@ static int omap_8250_startup(struct uart_port *port) pm_runtime_get_sync(port->dev); - up->mcr = 0; serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_out(up, UART_LCR, UART_LCR_WLEN8); @@ -1458,9 +1460,15 @@ err: static int omap8250_remove(struct platform_device *pdev) { struct omap8250_priv *priv = platform_get_drvdata(pdev); + int err; + + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + return err; pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + flush_work(&priv->qos_work); pm_runtime_disable(&pdev->dev); serial8250_unregister_port(priv->line); cpu_latency_qos_remove_request(&priv->pm_qos_request); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fe8662cd9402..388172289627 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1897,10 +1897,13 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status); static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) { switch (iir & 0x3f) { - case UART_IIR_RX_TIMEOUT: - serial8250_rx_dma_flush(up); + case UART_IIR_RDI: + if (!up->dma->rx_running) + break; fallthrough; case UART_IIR_RLSI: + case UART_IIR_RX_TIMEOUT: + serial8250_rx_dma_flush(up); return true; } return up->dma->rx_dma(up); diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 67fa113f77d4..888e01fbd9c5 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -12,6 +12,7 @@ #include <linux/dmaengine.h> #include <linux/dmapool.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/irq.h> #include <linux/module.h> #include <linux/of.h> @@ -404,33 +405,6 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport) #define lpuart_enable_clks(x) __lpuart_enable_clks(x, true) #define lpuart_disable_clks(x) __lpuart_enable_clks(x, false) -static int lpuart_global_reset(struct lpuart_port *sport) -{ - struct uart_port *port = &sport->port; - void __iomem *global_addr; - int ret; - - if (uart_console(port)) - return 0; - - ret = clk_prepare_enable(sport->ipg_clk); - if (ret) { - dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); - return ret; - } - - if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { - global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; - writel(UART_GLOBAL_RST, global_addr); - usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); - writel(0, global_addr); - usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); - } - - clk_disable_unprepare(sport->ipg_clk); - return 0; -} - static void lpuart_stop_tx(struct uart_port *port) { unsigned char temp; @@ -2636,6 +2610,54 @@ static const struct serial_rs485 lpuart_rs485_supported = { /* delay_rts_* and RX_DURING_TX are not supported */ }; +static int lpuart_global_reset(struct lpuart_port *sport) +{ + struct uart_port *port = &sport->port; + void __iomem *global_addr; + unsigned long ctrl, bd; + unsigned int val = 0; + int ret; + + ret = clk_prepare_enable(sport->ipg_clk); + if (ret) { + dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); + return ret; + } + + if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { + /* + * If the transmitter is used by earlycon, wait for transmit engine to + * complete and then reset. + */ + ctrl = lpuart32_read(port, UARTCTRL); + if (ctrl & UARTCTRL_TE) { + bd = lpuart32_read(&sport->port, UARTBAUD); + if (read_poll_timeout(lpuart32_tx_empty, val, val, 1, 100000, false, + port)) { + dev_warn(sport->port.dev, + "timeout waiting for transmit engine to complete\n"); + clk_disable_unprepare(sport->ipg_clk); + return 0; + } + } + + global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; + writel(UART_GLOBAL_RST, global_addr); + usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); + writel(0, global_addr); + usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); + + /* Recover the transmitter for earlycon. */ + if (ctrl & UARTCTRL_TE) { + lpuart32_write(port, bd, UARTBAUD); + lpuart32_write(port, ctrl, UARTCTRL); + } + } + + clk_disable_unprepare(sport->ipg_clk); + return 0; +} + static int lpuart_probe(struct platform_device *pdev) { const struct lpuart_soc_data *sdata = of_device_get_match_data(&pdev->dev); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 05b432dc7a85..aadda66405b4 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2594,6 +2594,7 @@ static const struct dev_pm_ops imx_uart_pm_ops = { .suspend_noirq = imx_uart_suspend_noirq, .resume_noirq = imx_uart_resume_noirq, .freeze_noirq = imx_uart_suspend_noirq, + .thaw_noirq = imx_uart_resume_noirq, .restore_noirq = imx_uart_resume_noirq, .suspend = imx_uart_suspend, .resume = imx_uart_resume, diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c67715f6f756..f9aa50ff14d4 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, trace_cdnsp_ep_halt(value ? "Set" : "Clear"); - if (value) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; + ret = cdnsp_cmd_stop_ep(pdev, pep); + if (ret) + return ret; + if (value) { if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) { cdnsp_queue_halt_endpoint(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); @@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, pep->ep_state |= EP_HALTED; } else { - /* - * In device mode driver can call reset endpoint command - * from any endpoint state. - */ cdnsp_queue_reset_ep(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); ret = cdnsp_wait_for_cmd_compl(pdev); diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 794e413800ae..2f29431f612e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1763,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev, int trb_buff_len, unsigned int td_total_len, struct cdnsp_request *preq, - bool more_trbs_coming) + bool more_trbs_coming, + bool zlp) { u32 maxp, total_packet_count; + /* Before ZLP driver needs set TD_SIZE = 1. */ + if (zlp) + return 1; + /* One TRB with a zero-length data packet. */ if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) || trb_buff_len == td_total_len) @@ -1960,7 +1965,8 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) /* Set the TRB length, TD size, and interrupter fields. */ remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len, full_len, preq, - more_trbs_coming); + more_trbs_coming, + zero_len_trb); length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2025,7 +2031,7 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) if (preq->request.length > 0) { remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, - preq->request.length, preq, 1); + preq->request.length, preq, 1, 0); length_field = TRB_LEN(preq->request.length) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2076,7 +2082,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep) u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx); int ret = 0; - if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) { + if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED || + ep_state == EP_STATE_HALTED) { trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx); goto ep_stopped; } @@ -2225,7 +2232,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, /* Set the TRB length, TD size, & interrupter fields. */ remainder = cdnsp_td_remainder(pdev, running_total, trb_buff_len, td_len, preq, - more_trbs_coming); + more_trbs_coming, 0); length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0); diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 9643b905e2d8..6164fc4c96a4 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -24,11 +24,37 @@ #define CFG_RXDET_P3_EN BIT(15) #define LPM_2_STB_SWITCH_EN BIT(25) -static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd); +static void xhci_cdns3_plat_start(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + u32 value; + + /* set usbcmd.EU3S */ + value = readl(&xhci->op_regs->command); + value |= CMD_PM_INDEX; + writel(value, &xhci->op_regs->command); + + if (hcd->regs) { + value = readl(hcd->regs + XECP_AUX_CTRL_REG1); + value |= CFG_RXDET_P3_EN; + writel(value, hcd->regs + XECP_AUX_CTRL_REG1); + + value = readl(hcd->regs + XECP_PORT_CAP_REG); + value |= LPM_2_STB_SWITCH_EN; + writel(value, hcd->regs + XECP_PORT_CAP_REG); + } +} + +static int xhci_cdns3_resume_quirk(struct usb_hcd *hcd) +{ + xhci_cdns3_plat_start(hcd); + return 0; +} static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .quirks = XHCI_SKIP_PHY_INIT | XHCI_AVOID_BEI, - .suspend_quirk = xhci_cdns3_suspend_quirk, + .plat_start = xhci_cdns3_plat_start, + .resume_quirk = xhci_cdns3_resume_quirk, }; static int __cdns_host_init(struct cdns *cdns) @@ -90,32 +116,6 @@ err1: return ret; } -static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd) -{ - struct xhci_hcd *xhci = hcd_to_xhci(hcd); - u32 value; - - if (pm_runtime_status_suspended(hcd->self.controller)) - return 0; - - /* set usbcmd.EU3S */ - value = readl(&xhci->op_regs->command); - value |= CMD_PM_INDEX; - writel(value, &xhci->op_regs->command); - - if (hcd->regs) { - value = readl(hcd->regs + XECP_AUX_CTRL_REG1); - value |= CFG_RXDET_P3_EN; - writel(value, hcd->regs + XECP_AUX_CTRL_REG1); - - value = readl(hcd->regs + XECP_PORT_CAP_REG); - value |= LPM_2_STB_SWITCH_EN; - writel(value, hcd->regs + XECP_PORT_CAP_REG); - } - - return 0; -} - static void cdns_host_exit(struct cdns *cdns) { kfree(cdns->xhci_plat_data); diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c index ada78daba6df..c17516c29b63 100644 --- a/drivers/usb/chipidea/otg_fsm.c +++ b/drivers/usb/chipidea/otg_fsm.c @@ -256,8 +256,10 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t) ci->enabled_otg_timer_bits &= ~(1 << t); if (ci->next_otg_timer == t) { if (ci->enabled_otg_timer_bits == 0) { + spin_unlock_irqrestore(&ci->lock, flags); /* No enabled timers after delete it */ hrtimer_cancel(&ci->otg_fsm_hrtimer); + spin_lock_irqsave(&ci->lock, flags); ci->next_otg_timer = NUM_OTG_FSM_TIMERS; } else { /* Find the next timer */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 0722d2131305..079e183cf3bf 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -362,6 +362,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* Realforce 87U Keyboard */ + { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, + /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c0e7c76dc5c8..1f348bc867c2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1711,6 +1711,16 @@ static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) return extcon_get_extcon_dev(name); /* + * Check explicitly if "usb-role-switch" is used since + * extcon_find_edev_by_node() can not be used to check the absence of + * an extcon device. In the absence of an device it will always return + * EPROBE_DEFER. + */ + if (IS_ENABLED(CONFIG_USB_ROLE_SWITCH) && + device_property_read_bool(dev, "usb-role-switch")) + return NULL; + + /* * Try to get an extcon device from the USB PHY controller's "port" * node. Check if it has the "port" node first, to avoid printing the * error message from underlying code, as it's a valid case: extcon diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 0ecf20eeceee..4be6a873bd07 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -37,15 +37,6 @@ struct dwc3_exynos { struct regulator *vdd10; }; -static int dwc3_exynos_remove_child(struct device *dev, void *unused) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int dwc3_exynos_probe(struct platform_device *pdev) { struct dwc3_exynos *exynos; @@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) struct dwc3_exynos *exynos = platform_get_drvdata(pdev); int i; - device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); + of_platform_depopulate(&pdev->dev); for (i = exynos->num_clks - 1; i >= 0; i--) clk_disable_unprepare(exynos->clks[i]); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5fe2d136dff5..6d524fa76443 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, * * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ - if (dwc->gadget->speed <= USB_SPEED_HIGH) { + if (dwc->gadget->speed <= USB_SPEED_HIGH || + DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; @@ -1023,13 +1024,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); - /* Clear out the ep descriptors for non-ep0 */ - if (dep->number > 1) { - dep->endpoint.comp_desc = NULL; - dep->endpoint.desc = NULL; - } - - dwc3_remove_requests(dwc, dep, -ECONNRESET); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; dep->type = 0; @@ -1043,6 +1038,12 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) mask |= (DWC3_EP_DELAY_STOP | DWC3_EP_TRANSFER_STARTED); dep->flags &= mask; + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } + return 0; } diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index a7154fe8206d..f6f13e7f1ba1 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -11,13 +11,8 @@ #include <linux/of.h> #include <linux/platform_device.h> -#include "../host/xhci-plat.h" #include "core.h" -static const struct xhci_plat_priv dwc3_xhci_plat_priv = { - .quirks = XHCI_SKIP_PHY_INIT, -}; - static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc, int irq, char *name) { @@ -97,11 +92,6 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } - ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv, - sizeof(dwc3_xhci_plat_priv)); - if (ret) - goto err; - memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); if (dwc->usb3_lpm_capable) diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index c4ed48d6b8a4..a189b08bba80 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -199,16 +199,6 @@ uvc_send_response(struct uvc_device *uvc, struct uvc_request_data *data) * V4L2 ioctls */ -struct uvc_format { - u8 bpp; - u32 fcc; -}; - -static struct uvc_format uvc_formats[] = { - { 16, V4L2_PIX_FMT_YUYV }, - { 0, V4L2_PIX_FMT_MJPEG }, -}; - static int uvc_v4l2_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { @@ -243,47 +233,6 @@ uvc_v4l2_get_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int -uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) -{ - struct video_device *vdev = video_devdata(file); - struct uvc_device *uvc = video_get_drvdata(vdev); - struct uvc_video *video = &uvc->video; - struct uvc_format *format; - unsigned int imagesize; - unsigned int bpl; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(uvc_formats); ++i) { - format = &uvc_formats[i]; - if (format->fcc == fmt->fmt.pix.pixelformat) - break; - } - - if (i == ARRAY_SIZE(uvc_formats)) { - uvcg_info(&uvc->func, "Unsupported format 0x%08x.\n", - fmt->fmt.pix.pixelformat); - return -EINVAL; - } - - bpl = format->bpp * fmt->fmt.pix.width / 8; - imagesize = bpl ? bpl * fmt->fmt.pix.height : fmt->fmt.pix.sizeimage; - - video->fcc = format->fcc; - video->bpp = format->bpp; - video->width = fmt->fmt.pix.width; - video->height = fmt->fmt.pix.height; - video->imagesize = imagesize; - - fmt->fmt.pix.field = V4L2_FIELD_NONE; - fmt->fmt.pix.bytesperline = bpl; - fmt->fmt.pix.sizeimage = imagesize; - fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; - fmt->fmt.pix.priv = 0; - - return 0; -} - -static int uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) { struct video_device *vdev = video_devdata(file); @@ -324,6 +273,27 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) } static int +uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_device *uvc = video_get_drvdata(vdev); + struct uvc_video *video = &uvc->video; + int ret; + + ret = uvc_v4l2_try_format(file, fh, fmt); + if (ret) + return ret; + + video->fcc = fmt->fmt.pix.pixelformat; + video->bpp = fmt->fmt.pix.bytesperline * 8 / video->width; + video->width = fmt->fmt.pix.width; + video->height = fmt->fmt.pix.height; + video->imagesize = fmt->fmt.pix.sizeimage; + + return ret; +} + +static int uvc_v4l2_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *fival) { diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 2df52f75f6b3..7558cc4d90cc 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -285,7 +285,7 @@ static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val) { struct bcma_hcd_device *usb_dev = bcma_get_drvdata(dev); - if (IS_ERR_OR_NULL(usb_dev->gpio_desc)) + if (!usb_dev->gpio_desc) return; gpiod_set_value(usb_dev->gpio_desc, val); @@ -406,9 +406,11 @@ static int bcma_hcd_probe(struct bcma_device *core) return -ENOMEM; usb_dev->core = core; - if (core->dev.of_node) - usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc", - GPIOD_OUT_HIGH); + usb_dev->gpio_desc = devm_gpiod_get_optional(&core->dev, "vcc", + GPIOD_OUT_HIGH); + if (IS_ERR(usb_dev->gpio_desc)) + return dev_err_probe(&core->dev, PTR_ERR(usb_dev->gpio_desc), + "error obtaining VCC GPIO"); switch (core->id.id) { case BCMA_CORE_USB20_HOST: diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 697683e3fbff..c3b7f1d98e78 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,8 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 +#define UBLOX_VENDOR_ID 0x1546 + /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 #define AMOI_PRODUCT_H01 0x0800 @@ -240,7 +242,6 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 -#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -581,6 +582,9 @@ static void option_instat_callback(struct urb *urb); #define OPPO_VENDOR_ID 0x22d9 #define OPPO_PRODUCT_R11 0x276c +/* Sierra Wireless products */ +#define SIERRA_VENDOR_ID 0x1199 +#define SIERRA_PRODUCT_EM9191 0x90d3 /* Device flags */ @@ -1124,8 +1128,16 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, - { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b), /* u-blox LARA-R6 00B */ + .driver_info = RSVD(4) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, + /* u-blox products */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */ + .driver_info = RSVD(4) }, + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343), /* u-blox LARA-L6 (ECM) */ + .driver_info = RSVD(4) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, @@ -2167,6 +2179,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ @@ -2176,6 +2189,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index e1f4df7238bf..fdbf3694e21f 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -369,13 +369,24 @@ pmc_usb_mux_usb4(struct pmc_usb_port *port, struct typec_mux_state *state) return pmc_usb_command(port, (void *)&req, sizeof(req)); } -static int pmc_usb_mux_safe_state(struct pmc_usb_port *port) +static int pmc_usb_mux_safe_state(struct pmc_usb_port *port, + struct typec_mux_state *state) { u8 msg; if (IOM_PORT_ACTIVITY_IS(port->iom_status, SAFE_MODE)) return 0; + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, DP) || + IOM_PORT_ACTIVITY_IS(port->iom_status, DP_MFD)) && + state->alt && state->alt->svid == USB_TYPEC_DP_SID) + return 0; + + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, TBT) || + IOM_PORT_ACTIVITY_IS(port->iom_status, ALT_MODE_TBT_USB)) && + state->alt && state->alt->svid == USB_TYPEC_TBT_SID) + return 0; + msg = PMC_USB_SAFE_MODE; msg |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; @@ -443,7 +454,7 @@ pmc_usb_mux_set(struct typec_mux_dev *mux, struct typec_mux_state *state) return 0; if (state->mode == TYPEC_STATE_SAFE) - return pmc_usb_mux_safe_state(port); + return pmc_usb_mux_safe_state(port, state); if (state->mode == TYPEC_STATE_USB) return pmc_usb_connect(port, port->role); diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index b637e8b378b3..2a77bab948f5 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -474,7 +474,7 @@ static void tps6598x_handle_plug_event(struct tps6598x *tps, u32 status) static irqreturn_t cd321x_interrupt(int irq, void *data) { struct tps6598x *tps = data; - u64 event; + u64 event = 0; u32 status; int ret; @@ -519,8 +519,8 @@ err_unlock: static irqreturn_t tps6598x_interrupt(int irq, void *data) { struct tps6598x *tps = data; - u64 event1; - u64 event2; + u64 event1 = 0; + u64 event2 = 0; u32 status; int ret; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 098b62f7b701..c0143d38df83 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -577,7 +577,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (scr_readw(r) != vc->vc_video_erase_char) break; if (r != q && new_rows >= rows + logo_lines) { - save = kmalloc(array3_size(logo_lines, new_cols, 2), + save = kzalloc(array3_size(logo_lines, new_cols, 2), GFP_KERNEL); if (save) { int i = min(cols, new_cols); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index f422f9c58ba7..1ea6d2e5b218 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev) return true; } +/* + * If an error is received from the host or AMD Secure Processor (ASP) there + * are two options. Either retry the exact same encrypted request or discontinue + * using the VMPCK. + * + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to + * encrypt the requests. The IV for this scheme is the sequence number. GCM + * cannot tolerate IV reuse. + * + * The ASP FW v1.51 only increments the sequence numbers on a successful + * guest<->ASP back and forth and only accepts messages at its exact sequence + * number. + * + * So if the sequence number were to be reused the encryption scheme is + * vulnerable. If the sequence number were incremented for a fresh IV the ASP + * will reject the request. + */ static void snp_disable_vmpck(struct snp_guest_dev *snp_dev) { + dev_alert(snp_dev->dev, "Disabling vmpck_id %d to prevent IV reuse.\n", + vmpck_id); memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN); snp_dev->vmpck = NULL; } @@ -321,34 +340,71 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc) return rc; - /* Call firmware to process the request */ + /* + * Call firmware to process the request. In this function the encrypted + * message enters shared memory with the host. So after this call the + * sequence number must be incremented or the VMPCK must be deleted to + * prevent reuse of the IV. + */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * If the extended guest request fails due to having too small of a + * certificate data buffer, retry the same guest request without the + * extended data request in order to increment the sequence number + * and thus avoid IV reuse. + */ + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && + err == SNP_GUEST_REQ_INVALID_LEN) { + const unsigned int certs_npages = snp_dev->input.data_npages; + + exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + /* + * If this call to the firmware succeeds, the sequence number can + * be incremented allowing for continued use of the VMPCK. If + * there is an error reflected in the return value, this value + * is checked further down and the result will be the deletion + * of the VMPCK and the error code being propagated back to the + * user as an ioctl() return code. + */ + rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + err = SNP_GUEST_REQ_INVALID_LEN; + snp_dev->input.data_npages = certs_npages; + } + if (fw_err) *fw_err = err; - if (rc) - return rc; + if (rc) { + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, fw_err: %llu\n", + rc, *fw_err); + goto disable_vmpck; + } - /* - * The verify_and_dec_payload() will fail only if the hypervisor is - * actively modifying the message header or corrupting the encrypted payload. - * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that - * the key cannot be used for any communication. The key is disabled to ensure - * that AES-GCM does not use the same IV while encrypting the request payload. - */ rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); if (rc) { dev_alert(snp_dev->dev, - "Detected unexpected decode failure, disabling the vmpck_id %d\n", - vmpck_id); - snp_disable_vmpck(snp_dev); - return rc; + "Detected unexpected decode failure from ASP. rc: %d\n", + rc); + goto disable_vmpck; } /* Increment to new message sequence after payload decryption was successful. */ snp_inc_msg_seqno(snp_dev); return 0; + +disable_vmpck: + snp_disable_vmpck(snp_dev); + return rc; } static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ responded: clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a9543f01184c..dcb510f38dda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int i; - ASSERT(!path->nowait); + /* + * The nowait semantics are used only for write paths, where we don't + * use the tree mod log and sequence numbers. + */ + if (time_seq) + ASSERT(!path->nowait); nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) @@ -4683,7 +4688,14 @@ again: if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; - down_read(&fs_info->commit_root_sem); + if (path->nowait) { + if (!down_read_trylock(&fs_info->commit_root_sem)) { + ret = -EAGAIN; + goto done; + } + } else { + down_read(&fs_info->commit_root_sem); + } } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } @@ -4759,7 +4771,7 @@ again: next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4769,6 +4781,10 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && path->nowait) { + ret = -EAGAIN; + goto done; + } if (!ret && time_seq) { /* * If we don't get the lock, we may be racing @@ -4799,7 +4815,7 @@ again: ret = read_block_for_search(root, path, &next, level, 0, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4807,8 +4823,16 @@ again: goto done; } - if (!path->skip_locking) - btrfs_tree_read_lock(next); + if (!path->skip_locking) { + if (path->nowait) { + if (!btrfs_try_tree_read_lock(next)) { + ret = -EAGAIN; + goto done; + } + } else { + btrfs_tree_read_lock(next); + } + } } ret = 0; done: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5dd8bed1488..5ba2e810dc6e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; @@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -3205,7 +3209,6 @@ out: } kfree(rootrefs); - btrfs_free_path(path); return ret; } @@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { @@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); - inodes = NULL; - goto out; + goto out_loi; } + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9334c3157c22..b74105a10f16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 145c84b44fd0..1c4b693ee4a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, dst_path, offset, clone_len); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 699b54b3acaa..74fef1f49c35 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 813986e38258..c3cf3dabe0b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, u64 *last_old_dentry_offset) { struct btrfs_root *log = inode->root->log_root; - struct extent_buffer *src = path->nodes[0]; - const int nritems = btrfs_header_nritems(src); + struct extent_buffer *src; + const int nritems = btrfs_header_nritems(path->nodes[0]); const u64 ino = btrfs_ino(inode); bool last_found = false; int batch_start = 0; int batch_size = 0; int i; - for (i = path->slots[0]; i < nritems; i++) { + /* + * We need to clone the leaf, release the read lock on it, and use the + * clone before modifying the log tree. See the comment at copy_items() + * about why we need to do this. + */ + src = btrfs_clone_extent_buffer(path->nodes[0]); + if (!src) + return -ENOMEM; + + i = path->slots[0]; + btrfs_release_path(path); + path->nodes[0] = src; + path->slots[0] = i; + + for (; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; - struct extent_buffer *src = src_path->nodes[0]; + struct extent_buffer *src; int ret = 0; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); + /* + * To keep lockdep happy and avoid deadlocks, clone the source leaf and + * use the clone. This is because otherwise we would be changing the log + * tree, to insert items from the subvolume tree or insert csum items, + * while holding a read lock on a leaf from the subvolume tree, which + * creates a nasty lock dependency when COWing log tree nodes/leaves: + * + * 1) Modifying the log tree triggers an extent buffer allocation while + * holding a write lock on a parent extent buffer from the log tree. + * Allocating the pages for an extent buffer, or the extent buffer + * struct, can trigger inode eviction and finally the inode eviction + * will trigger a release/remove of a delayed node, which requires + * taking the delayed node's mutex; + * + * 2) Allocating a metadata extent for a log tree can trigger the async + * reclaim thread and make us wait for it to release enough space and + * unblock our reservation ticket. The reclaim thread can start + * flushing delayed items, and that in turn results in the need to + * lock delayed node mutexes and in the need to write lock extent + * buffers of a subvolume tree - all this while holding a write lock + * on the parent extent buffer in the log tree. + * + * So one task in scenario 1) running in parallel with another task in + * scenario 2) could lead to a deadlock, one wanting to lock a delayed + * node mutex while having a read lock on a leaf from the subvolume, + * while the other is holding the delayed node's mutex and wants to + * write lock the same subvolume leaf for flushing delayed items. + */ + src = btrfs_clone_extent_buffer(src_path->nodes[0]); + if (!src) + return -ENOMEM; + + i = src_path->slots[0]; + btrfs_release_path(src_path); + src_path->nodes[0] = src; + src_path->slots[0] = i; + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1912abf6d020..c9e2b0c85309 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, super[i] = page_address(page[i]); } - if (super[0]->generation > super[1]->generation) + if (btrfs_super_generation(super[0]) > + btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; @@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; } - zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; @@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } - kfree(zones); + kvfree(zones); switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: @@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) return 0; out: - kfree(zones); + kvfree(zones); out_free_zone_info: btrfs_destroy_dev_zone_info(device); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index fb023f9fafcb..e54814d0c2f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2248,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; - unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2267,27 +2266,23 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) spin_unlock(&ci->i_unsafe_lock); /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + if (req1 || req2) { struct ceph_mds_request *req; + struct ceph_mds_session **sessions; + struct ceph_mds_session *s; + unsigned int max_sessions; int i; + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { + mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } @@ -2299,16 +2294,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2321,16 +2306,6 @@ retry: s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2342,11 +2317,12 @@ retry: /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds]) + sessions[s->s_mds] = ceph_get_mds_session(s); } spin_unlock(&ci->i_ceph_lock); + mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ for (i = 0; i < max_sessions; i++) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4af5e55abc15..bad9eeb6a1a5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2492,7 +2492,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, struct inode *parent; parent = ceph_lookup_inode(sb, ceph_ino(inode)); - if (!parent) + if (IS_ERR(parent)) return PTR_ERR(parent); pci = ceph_inode(parent); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 864cdaa0d2bd..e4151852184e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ - struct ceph_snap_realm *realm = NULL; + struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; int rebuild_snapcs; @@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, dout("%s deletion=%d\n", __func__, deletion); more: + realm = NULL; rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fe220686bba4..712a43161448 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = filemap_write_and_wait_range(src_inode->i_mapping, off, off + len - 1); if (rc) - goto out; + goto unlock; /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); @@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * that target is updated on the server */ CIFS_I(target_inode)->time = 0; + +unlock: /* although unlocking in the reverse order from locking is not * strictly necessary here it is a little cleaner to be consistent */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1cc47dd3b4d6..9db9527c61cf 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3855,9 +3855,13 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: - free_xid(mnt_ctx.xid); cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + + free_xid(mnt_ctx.xid); + return rc; error: dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); @@ -3884,8 +3888,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; } + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + free_xid(mnt_ctx.xid); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + return rc; error: mount_put_conns(&mnt_ctx); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 89d5fa887364..6419ec47c2a8 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -343,7 +343,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); - if (rc != EOPNOTSUPP) + if (rc != -EOPNOTSUPP) break; } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -373,7 +373,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * pSMBFile->fid.netfid, * extAttrBits, * &ExtAttrMask); - * if (rc != EOPNOTSUPP) + * if (rc != -EOPNOTSUPP) * break; */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 92e4278ec35d..9e7d9f0baa18 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* now drop the ref to the current iface */ if (old_iface && iface) { - kref_put(&old_iface->refcount, release_iface); cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); - } else if (old_iface) { kref_put(&old_iface->refcount, release_iface); + } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 880cd494afea..bfaafd02fb1f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1116,6 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto sea_exit; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); @@ -1126,6 +1128,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rqst[2].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); + if (rc) + goto sea_exit; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1956288307f..6c399a8b22b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5227,14 +5228,21 @@ again: ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..9958d4020771 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If the inode is now fully clean, then it can be safely removed from - * its writeback list (if any). Otherwise the flusher threads are - * responsible for the writeback lists. + * If the inode is freeing, its i_io_list shoudn't be updated + * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); - else if (!(inode->i_state & I_SYNC_QUEUED)) { - if ((inode->i_state & I_DIRTY)) - redirty_tail_locked(inode, wb); - else if (inode->i_state & I_DIRTY_TIME) { - inode->dirtied_when = jiffies; - inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + if (!(inode->i_state & I_FREEING)) { + /* + * If the inode is now fully clean, then it can be safely + * removed from its writeback list (if any). Otherwise the + * flusher threads are responsible for the writeback lists. + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED)) { + if ((inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; + inode_io_list_move_locked(inode, + wb, + &wb->b_dirty_time); + } } } diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a058e0136bfe..ab8ceddf9efa 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, struct fscache_volume *volume; struct fscache_cache *cache; size_t klen, hlen; - char *key; + u8 *key; + + klen = strlen(volume_key); + if (klen > NAME_MAX) + return NULL; if (!coherency_data) coherency_len = 0; @@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, /* Stick the length on the front of the key and pad it out to make * hashing easier. */ - klen = strlen(volume_key); hlen = round_up(1 + klen + 1, sizeof(__le32)); key = kzalloc(hlen, GFP_KERNEL); if (!key) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71bfb663aac5..89f4741728ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3039,8 +3035,7 @@ out: if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3990f3e270cb..f33b3baad07c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -31,10 +31,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool __kernfs_active(struct kernfs_node *kn) +{ + return atomic_read(&kn->active) >= 0; +} + static bool kernfs_active(struct kernfs_node *kn) { lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); - return atomic_read(&kn->active) >= 0; + return __kernfs_active(kn); } static bool kernfs_lockdep(struct kernfs_node *kn) @@ -705,7 +710,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, goto err_unlock; } - if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) + /* + * We should fail if @kn has never been activated and guarantee success + * if the caller knows that @kn is active. Both can be achieved by + * __kernfs_active() which tests @kn->active without kernfs_rwsem. + */ + if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; spin_unlock(&kernfs_idr_lock); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 8de970d6146f..94b8ed4ef870 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, - len, 0); + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, + COPY_FILE_SPLICE); if (ret < 0) return ret; diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..9155ecb547ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 06a96e955bd0..d4b6839bb459 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err, rqstp->rq_xprt->xpt_remotelen); __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); - __entry->inode = d_inode(fhp->fh_dentry); + if (fhp->fh_dentry) + __entry->inode = d_inode(fhp->fh_dentry); + else + __entry->inode = NULL; __entry->type = type; __entry->access = access; __entry->error = be32_to_cpu(error); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f650afedd67f..849a720ab43f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } @@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; + struct page *last_page; - page += offset / PAGE_SIZE; - for (int i = sd->len; i > 0; i -= PAGE_SIZE) - svc_rqst_replace_page(rqstp, page++); + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) + svc_rqst_replace_page(rqstp, page); if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 77ff8e95421f..dc359b56fdfa 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5101131e6047..440960110a42 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif show_val_kb(m, "PageTables: ", global_node_page_state(NR_PAGETABLE)); - show_val_kb(m, "SecPageTables: ", + show_val_kb(m, "SecPageTables: ", global_node_page_state(NR_SECONDARY_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..24b9668d6377 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 860f0b1032c6..2c53fbb8d918 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode) return; /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ + if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + + /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ @@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode) return; } +out: /* The zone is not active. If it was, update the active count */ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; @@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* @@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->cond = BLK_ZONE_COND_OFFLINE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; } + zi->i_flags |= ZONEFS_ZONE_READONLY; inode->i_mode &= ~0222; return i_size_read(inode); case BLK_ZONE_COND_FULL: @@ -478,8 +489,7 @@ static void __zonefs_io_error(struct inode *inode, bool write) struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = - zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + unsigned int nr_zones = 1; struct zonefs_ioerr_data err = { .inode = inode, .write = write, @@ -487,6 +497,15 @@ static void __zonefs_io_error(struct inode *inode, bool write) int ret; /* + * The only files that have more than one zone are conventional zone + * files with aggregated conventional zones, for which the inode zone + * size is always larger than the device zone size. + */ + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) + nr_zones = zi->i_zone_size >> + (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + + /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as * struct request allocations for the same device. The former case may @@ -1407,6 +1426,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_ztype = type; zi->i_zsector = zone->start; zi->i_zone_size = zone->len << SECTOR_SHIFT; + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && + !(sbi->s_features & ZONEFS_F_AGGRCNV)) { + zonefs_err(sb, + "zone size %llu doesn't match device's zone sectors %llu\n", + zi->i_zone_size, + bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); + return -EINVAL; + } zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); @@ -1456,11 +1483,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; - int ret; + int ret = -ENOMEM; dentry = d_alloc_name(parent, name); if (!dentry) - return NULL; + return ERR_PTR(ret); inode = new_inode(parent->d_sb); if (!inode) @@ -1485,7 +1512,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, dput: dput(dentry); - return NULL; + return ERR_PTR(ret); } struct zonefs_zone_data { @@ -1505,7 +1532,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, struct blk_zone *zone, *next, *end; const char *zgroup_name; char *file_name; - struct dentry *dir; + struct dentry *dir, *dent; unsigned int n = 0; int ret; @@ -1523,8 +1550,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, zgroup_name = "seq"; dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); - if (!dir) { - ret = -ENOMEM; + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); goto free; } @@ -1570,8 +1597,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, * Use the file number within its group as file name. */ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); - if (!zonefs_create_inode(dir, file_name, zone, type)) { - ret = -ENOMEM; + dent = zonefs_create_inode(dir, file_name, zone, type); + if (IS_ERR(dent)) { + ret = PTR_ERR(dent); goto free; } @@ -1905,18 +1933,18 @@ static int __init zonefs_init(void) if (ret) return ret; - ret = register_filesystem(&zonefs_type); + ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; - ret = zonefs_sysfs_init(); + ret = register_filesystem(&zonefs_type); if (ret) - goto unregister_fs; + goto sysfs_exit; return 0; -unregister_fs: - unregister_filesystem(&zonefs_type); +sysfs_exit: + zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); @@ -1925,9 +1953,9 @@ destroy_inodecache: static void __exit zonefs_exit(void) { + unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - unregister_filesystem(&zonefs_type); } MODULE_AUTHOR("Damien Le Moal"); diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index 9cb6755ce39a..9920689dc098 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -15,11 +15,6 @@ struct zonefs_sysfs_attr { ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf); }; -static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr) -{ - return container_of(attr, struct zonefs_sysfs_attr, attr); -} - #define ZONEFS_SYSFS_ATTR_RO(name) \ static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name) diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 4b3de66c3233..1dbe78119ff1 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) return ZONEFS_ZTYPE_SEQ; } -#define ZONEFS_ZONE_OPEN (1 << 0) -#define ZONEFS_ZONE_ACTIVE (1 << 1) +#define ZONEFS_ZONE_OPEN (1U << 0) +#define ZONEFS_ZONE_ACTIVE (1U << 1) +#define ZONEFS_ZONE_OFFLINE (1U << 2) +#define ZONEFS_ZONE_READONLY (1U << 3) /* * In-memory inode data. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..891f8cbcd043 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,13 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; + + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ + unsigned int dma_alignment; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -456,12 +463,6 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; - /* - * Drivers that set dma_alignment to less than 511 must be prepared to - * handle individual bvec's that are not a multiple of a SECTOR_SIZE - * due to possible offsets. - */ - unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -944,7 +945,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); @@ -1324,7 +1324,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->dma_alignment : 511; + return q ? q->limits.dma_alignment : 511; } static inline unsigned int bdev_dma_alignment(struct block_device *bdev) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c577e4c7c84b..67452103bb86 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -54,6 +54,8 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; +extern struct bpf_mem_alloc bpf_global_ma; +extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, @@ -85,7 +87,8 @@ struct bpf_map_ops { int (*map_lookup_and_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); - int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, + int (*map_update_batch)(struct bpf_map *map, struct file *map_file, + const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -135,7 +138,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -165,9 +168,8 @@ struct bpf_map_ops { }; enum { - /* Support at most 8 pointers in a BTF type */ - BTF_FIELDS_MAX = 10, - BPF_MAP_OFF_ARR_MAX = BTF_FIELDS_MAX, + /* Support at most 10 fields in a BTF type */ + BTF_FIELDS_MAX = 10, }; enum btf_field_type { @@ -176,6 +178,8 @@ enum btf_field_type { BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, + BPF_LIST_HEAD = (1 << 4), + BPF_LIST_NODE = (1 << 5), }; struct btf_field_kptr { @@ -185,11 +189,19 @@ struct btf_field_kptr { u32 btf_id; }; +struct btf_field_list_head { + struct btf *btf; + u32 value_btf_id; + u32 node_offset; + struct btf_record *value_rec; +}; + struct btf_field { u32 offset; enum btf_field_type type; union { struct btf_field_kptr kptr; + struct btf_field_list_head list_head; }; }; @@ -203,8 +215,8 @@ struct btf_record { struct btf_field_offs { u32 cnt; - u32 field_off[BPF_MAP_OFF_ARR_MAX]; - u8 field_sz[BPF_MAP_OFF_ARR_MAX]; + u32 field_off[BTF_FIELDS_MAX]; + u8 field_sz[BTF_FIELDS_MAX]; }; struct bpf_map { @@ -267,6 +279,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_LIST_HEAD: + return "bpf_list_head"; + case BPF_LIST_NODE: + return "bpf_list_node"; default: WARN_ON_ONCE(1); return "unknown"; @@ -283,6 +299,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return sizeof(u64); + case BPF_LIST_HEAD: + return sizeof(struct bpf_list_head); + case BPF_LIST_NODE: + return sizeof(struct bpf_list_node); default: WARN_ON_ONCE(1); return 0; @@ -299,6 +319,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return __alignof__(u64); + case BPF_LIST_HEAD: + return __alignof__(struct bpf_list_head); + case BPF_LIST_NODE: + return __alignof__(struct bpf_list_node); default: WARN_ON_ONCE(1); return 0; @@ -312,16 +336,19 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } -static inline void check_and_init_map_value(struct bpf_map *map, void *dst) +static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) { - if (!IS_ERR_OR_NULL(map->record)) { - struct btf_field *fields = map->record->fields; - u32 cnt = map->record->cnt; - int i; + int i; - for (i = 0; i < cnt; i++) - memset(dst + fields[i].offset, 0, btf_field_type_size(fields[i].type)); - } + if (!foffs) + return; + for (i = 0; i < foffs->cnt; i++) + memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); +} + +static inline void check_and_init_map_value(struct bpf_map *map, void *dst) +{ + bpf_obj_init(map->field_offs, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -361,7 +388,7 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); - curr_off = next_off + foffs->field_sz[i]; + curr_off += foffs->field_sz[i] + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } @@ -391,7 +418,7 @@ static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); - curr_off = next_off + foffs->field_sz[i]; + curr_off += foffs->field_sz[i] + sz; } memset(dst + curr_off, 0, size - curr_off); } @@ -404,6 +431,9 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_list_head_free(const struct btf_field *field, void *list_head, + struct bpf_spin_lock *spin_lock); + int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -472,10 +502,8 @@ enum bpf_type_flag { */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), - /* MEM was "allocated" from a different helper, and cannot be mixed - * with regular non-MEM_ALLOC'ed MEM types. - */ - MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + /* MEM points to BPF ring buffer reservation. */ + MEM_RINGBUF = BIT(2 + BPF_BASE_TYPE_BITS), /* MEM is in user address space. */ MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), @@ -510,6 +538,43 @@ enum bpf_type_flag { /* Size is known at compile time. */ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + /* MEM is of an allocated object of type in program BTF. This is used to + * tag PTR_TO_BTF_ID allocated using bpf_obj_new. + */ + MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS), + + /* PTR was passed from the kernel in a trusted context, and may be + * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions. + * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above. + * PTR_UNTRUSTED refers to a kptr that was read directly from a map + * without invoking bpf_kptr_xchg(). What we really need to know is + * whether a pointer is safe to pass to a kfunc or BPF helper function. + * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF + * helpers, they do not cover all possible instances of unsafe + * pointers. For example, a pointer that was obtained from walking a + * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the + * fact that it may be NULL, invalid, etc. This is due to backwards + * compatibility requirements, as this was the behavior that was first + * introduced when kptrs were added. The behavior is now considered + * deprecated, and PTR_UNTRUSTED will eventually be removed. + * + * PTR_TRUSTED, on the other hand, is a pointer that the kernel + * guarantees to be valid and safe to pass to kfuncs and BPF helpers. + * For example, pointers passed to tracepoint arguments are considered + * PTR_TRUSTED, as are pointers that are passed to struct_ops + * callbacks. As alluded to above, pointers that are obtained from + * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a + * struct task_struct *task is PTR_TRUSTED, then accessing + * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored + * in a BPF register. Similarly, pointers passed to certain programs + * types such as kretprobes are not guaranteed to be valid, as they may + * for example contain an object that was recently freed. + */ + PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS), + + /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */ + MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -549,7 +614,7 @@ enum bpf_arg_type { ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ - ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ + ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ @@ -566,7 +631,6 @@ enum bpf_arg_type { ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, - ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, /* pointer to memory does not need to be initialized, helper function must fill @@ -591,7 +655,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ - RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ + RET_PTR_TO_MEM, /* returns a pointer to memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ __BPF_RET_TYPE_MAX, @@ -601,9 +665,10 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, - RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, - RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_RINGBUF_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -620,6 +685,7 @@ struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; bool pkt_access; + bool might_sleep; enum bpf_return_type ret_type; union { struct { @@ -758,6 +824,7 @@ struct bpf_prog_ops { union bpf_attr __user *uattr); }; +struct bpf_reg_state; struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto * @@ -779,9 +846,8 @@ struct bpf_verifier_ops { struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag); }; @@ -1794,7 +1860,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); -int generic_map_update_batch(struct bpf_map *map, +int generic_map_update_batch(struct bpf_map *map, struct file *map_file, const union bpf_attr *attr, union bpf_attr __user *uattr); int generic_map_delete_batch(struct bpf_map *map, @@ -2085,9 +2151,9 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, return btf_ctx_access(off, size, type, prog, info); } -int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, +int btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, @@ -2100,22 +2166,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); -struct bpf_kfunc_arg_meta { - u64 r0_size; - bool r0_rdonly; - int ref_obj_id; - u32 flags; -}; - struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); -int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, - const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs, - struct bpf_kfunc_arg_meta *meta); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -2338,9 +2393,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) } static inline int btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag) { return -EACCES; @@ -2797,4 +2851,10 @@ struct bpf_key { bool has_ref; }; #endif /* CONFIG_KEYS */ + +static inline bool type_is_alloc(u32 type) +{ + return type & MEM_ALLOC; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1a32baa78ce2..c05aa6e1f6f5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -19,7 +19,7 @@ */ #define BPF_MAX_VAR_SIZ (1 << 29) /* size of type_str_buf in bpf_verifier. */ -#define TYPE_STR_BUF_LEN 64 +#define TYPE_STR_BUF_LEN 128 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -223,6 +223,11 @@ struct bpf_reference_state { * exiting a callback function. */ int callback_ref; + /* Mark the reference state to release the registers sharing the same id + * on bpf_spin_unlock (for nodes that we will lose ownership to but are + * safe to access inside the critical section). + */ + bool release_on_unlock; }; /* state of the program: @@ -323,8 +328,23 @@ struct bpf_verifier_state { u32 branches; u32 insn_idx; u32 curframe; - u32 active_spin_lock; + /* For every reg representing a map value or allocated object pointer, + * we consider the tuple of (ptr, id) for them to be unique in verifier + * context and conside them to not alias each other for the purposes of + * tracking lock state. + */ + struct { + /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, + * there's no active lock held, and other fields have no + * meaning. If non-NULL, it indicates that a lock is held and + * id member has the reg->id of the register which can be >= 0. + */ + void *ptr; + /* This will be reg->id */ + u32 id; + } active_lock; bool speculative; + bool active_rcu_lock; /* first and last insn idx of this verifier state */ u32 first_insn_idx; @@ -419,11 +439,14 @@ struct bpf_insn_aux_data { */ struct bpf_loop_inline_state loop_inline_state; }; + u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + struct btf_struct_meta *kptr_struct_meta; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ @@ -513,6 +536,7 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool rcu_tag_supported; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; @@ -589,8 +613,6 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, enum bpf_arg_type arg_type); -int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, @@ -661,4 +683,11 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) } } +#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED) + +static inline bool bpf_type_has_unsafe_modifiers(u32 type) +{ + return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS; +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index d80345fa566b..9ed00077db6e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,8 @@ #include <linux/types.h> #include <linux/bpfptr.h> +#include <linux/bsearch.h> +#include <linux/btf_ids.h> #include <uapi/linux/btf.h> #include <uapi/linux/bpf.h> @@ -17,36 +19,53 @@ #define KF_RELEASE (1 << 1) /* kfunc is a release function */ #define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ #define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ -/* Trusted arguments are those which are meant to be referenced arguments with - * unchanged offset. It is used to enforce that pointers obtained from acquire - * kfuncs remain unmodified when being passed to helpers taking trusted args. +/* Trusted arguments are those which are guaranteed to be valid when passed to + * the kfunc. It is used to enforce that pointers obtained from either acquire + * kfuncs, or from the main kernel on a tracepoint or struct_ops callback + * invocation, remain unmodified when being passed to helpers taking trusted + * args. * - * Consider - * struct foo { - * int data; - * struct foo *next; - * }; + * Consider, for example, the following new task tracepoint: * - * struct bar { - * int data; - * struct foo f; - * }; + * SEC("tp_btf/task_newtask") + * int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags) + * { + * ... + * } * - * struct foo *f = alloc_foo(); // Acquire kfunc - * struct bar *b = alloc_bar(); // Acquire kfunc + * And the following kfunc: * - * If a kfunc set_foo_data() wants to operate only on the allocated object, it - * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like: + * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) * - * set_foo_data(f, 42); // Allowed - * set_foo_data(f->next, 42); // Rejected, non-referenced pointer - * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type - * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset + * All invocations to the kfunc must pass the unmodified, unwalked task: * - * In the final case, usually for the purposes of type matching, it is deduced - * by looking at the type of the member at the offset, but due to the - * requirement of trusted argument, this deduction will be strict and not done - * for this case. + * bpf_task_acquire(task); // Allowed + * bpf_task_acquire(task->last_wakee); // Rejected, walked task + * + * Programs may also pass referenced tasks directly to the kfunc: + * + * struct task_struct *acquired; + * + * acquired = bpf_task_acquire(task); // Allowed, same as above + * bpf_task_acquire(acquired); // Allowed + * bpf_task_acquire(task); // Allowed + * bpf_task_acquire(acquired->last_wakee); // Rejected, walked task + * + * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or + * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these + * pointers are guaranteed to be safe. For example, the following BPF program + * would be rejected: + * + * SEC("kretprobe/free_task") + * int BPF_PROG(free_task_probe, struct task_struct *tsk) + * { + * struct task_struct *acquired; + * + * acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer + * bpf_task_release(acquired); + * + * return 0; + * } */ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ @@ -78,6 +97,17 @@ struct btf_id_dtor_kfunc { u32 kfunc_btf_id; }; +struct btf_struct_meta { + u32 btf_id; + struct btf_record *record; + struct btf_field_offs *field_offs; +}; + +struct btf_struct_metas { + u32 cnt; + struct btf_struct_meta types[]; +}; + typedef void (*btf_dtor_kfunc_t)(void *); extern const struct file_operations btf_fops; @@ -165,6 +195,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); @@ -324,6 +355,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t) return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; } +static inline bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + +static inline bool btf_type_is_array(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; +} + static inline u16 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); @@ -408,9 +449,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t) return (struct btf_param *)(t + 1); } -#ifdef CONFIG_BPF_SYSCALL +static inline int btf_id_cmp_func(const void *a, const void *b) +{ + const int *pa = a, *pb = b; + + return *pa - *pb; +} + +static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id) +{ + return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; +} + +static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) +{ + return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); +} + struct bpf_prog; +struct bpf_verifier_log; +#ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); @@ -423,6 +482,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); +struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); +const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); +bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -454,6 +521,26 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt { return 0; } +static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) +{ + return NULL; +} +static inline const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) +{ + return NULL; +} +static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, + enum bpf_prog_type prog_type) { + return -EINVAL; +} +static inline bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2) +{ + return false; +} #endif static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index c9744efd202f..93397711a68c 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -204,7 +204,7 @@ extern struct btf_id_set8 name; #else -#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[16]; #define BTF_ID(prefix, name) #define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236661..7c1afe0f4129 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # endif # define __iomem # define __percpu BTF_TYPE_TAG(percpu) -# define __rcu +# define __rcu BTF_TYPE_TAG(rcu) + # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 /* context/locking */ diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 3ed117e299ec..f3664ee12170 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -5,28 +5,8 @@ #ifndef _NET_DSA_8021Q_H #define _NET_DSA_8021Q_H -#include <linux/refcount.h> -#include <linux/types.h> #include <net/dsa.h> - -struct dsa_switch; -struct dsa_port; -struct sk_buff; -struct net_device; - -struct dsa_tag_8021q_vlan { - struct list_head list; - int port; - u16 vid; - refcount_t refcount; -}; - -struct dsa_8021q_context { - struct dsa_switch *ds; - struct list_head vlans; - /* EtherType of RX VID, used for filtering on master interface */ - __be16 proto; -}; +#include <linux/types.h> int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); @@ -38,15 +18,6 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); -struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, - u16 tpid, u16 tci); - -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid); - -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, - int vbid); - u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num); u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed..bf701976056e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -568,10 +568,10 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); extern struct mutex nf_conn_btf_access_lock; -extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag); typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, @@ -643,13 +643,13 @@ struct bpf_nh_params { }; struct bpf_redirect_info { - u32 flags; - u32 tgt_index; + u64 tgt_index; void *tgt_value; struct bpf_map *map; + u32 flags; + u32 kern_flags; u32 map_id; enum bpf_map_type map_type; - u32 kern_flags; struct bpf_nh_params nh; }; @@ -1504,7 +1504,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { @@ -1515,7 +1515,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind if (unlikely(flags & ~(action_mask | flag_mask))) return XDP_ABORTED; - ri->tgt_value = lookup_elem(map, ifindex); + ri->tgt_value = lookup_elem(map, index); if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) { /* If the lookup fails we want to clear out the state in the * redirect_info struct completely, so that if an eBPF program @@ -1527,7 +1527,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind return flags & action_mask; } - ri->tgt_index = ifindex; + ri->tgt_index = index; ri->map_id = map->id; ri->map_type = map->map_type; diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..59ae95ddb679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2089,6 +2089,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "[email protected]@1234" */ + u8 *key; /* Volume ID, eg. "[email protected]@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f7c49bbdb8a1..d84e0e99f084 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -189,6 +189,7 @@ s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 *values); int i2c_get_device_id(const struct i2c_client *client, struct i2c_device_identity *id); +const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client); #endif /* I2C */ /** diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 43bc8a2edccf..0ded9e271523 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -16,6 +16,9 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = 4, IO_URING_F_CQE32 = 8, IO_URING_F_IOPOLL = 16, + + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 32, }; struct io_uring_cmd { diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 570831ca9951..4e968ebadce6 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -224,9 +224,10 @@ extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); -extern void static_key_slow_inc(struct static_key *key); +extern bool static_key_slow_inc(struct static_key *key); +extern bool static_key_fast_inc_not_disabled(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); -extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern bool static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -278,11 +279,23 @@ static __always_inline bool static_key_true(struct static_key *key) return false; } -static inline void static_key_slow_inc(struct static_key *key) +static inline bool static_key_fast_inc_not_disabled(struct static_key *key) { + int v; + STATIC_KEY_CHECK_USE(key); - atomic_inc(&key->enabled); + /* + * Prevent key->enabled getting negative to follow the same semantics + * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment. + */ + v = atomic_read(&key->enabled); + do { + if (v < 0 || (v + 1) < 0) + return false; + } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); + return true; } +#define static_key_slow_inc(key) static_key_fast_inc_not_disabled(key) static inline void static_key_slow_dec(struct static_key *key) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..637a60607c7d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -776,6 +776,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; diff --git a/include/linux/license.h b/include/linux/license.h index 7cce390f120b..ad937f57f2cb 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,6 +2,8 @@ #ifndef __LICENSE_H #define __LICENSE_H +#include <linux/string.h> + static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index eb3fac30488b..5fe5d198b57a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,10 +290,9 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; -#define MLX5_UMR_KLM_ALIGNMENT 4 -#define MLX5_UMR_MTT_ALIGNMENT 0x40 -#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) -#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT +#define MLX5_UMR_FLEX_ALIGNMENT 0x40 +#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) +#define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af2ceb4160bc..d476255c9a3f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -606,8 +606,6 @@ struct mlx5_priv { struct list_head pgdir_list; /* end: alloc staff */ - struct list_head ctx_list; - spinlock_t ctx_lock; struct mlx5_adev **adev; int adev_idx; int sw_vhca_id; @@ -981,6 +979,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 23b3903b0678..5aa35c58c342 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3135,7 +3135,6 @@ struct softnet_data { /* stats */ unsigned int processed; unsigned int time_squeeze; - unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; #endif @@ -3168,6 +3167,7 @@ struct softnet_data { unsigned int cpu; unsigned int input_queue_tail; #endif + unsigned int received_rps; unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 0484b613ca64..d88715a0b3a5 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -14,6 +14,7 @@ struct net_device; extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface); extern int of_get_mac_address(struct device_node *np, u8 *mac); +extern int of_get_mac_address_nvmem(struct device_node *np, u8 *mac); int of_get_ethdev_address(struct device_node *np, struct net_device *dev); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else @@ -28,6 +29,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac) return -ENODEV; } +static inline int of_get_mac_address_nvmem(struct device_node *np, u8 *mac) +{ + return -ENODEV; +} + static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev) { return -ENODEV; diff --git a/include/linux/phy.h b/include/linux/phy.h index 9a3752c0c444..71eeb4e3b1fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -529,6 +529,8 @@ struct macsec_ops; * * @mdio: MDIO bus this PHY is on * @drv: Pointer to the driver for this PHY instance + * @devlink: Create a link between phy dev and mac dev, if the external phy + * used by current mac interface is managed by another mac interface. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -618,6 +620,8 @@ struct phy_device { /* And management functions */ struct phy_driver *drv; + struct device_link *devlink; + u32 phy_id; struct phy_c45_device_ids c45_ids; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2504df9a0453..3c7d295746f6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); + struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 8294978f4bca..beb190449704 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -85,6 +85,9 @@ struct mtk_wed_device { int irq; u8 version; + /* used by wlan driver */ + u32 rev_id; + struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES]; struct mtk_wed_ring txfree_ring; @@ -155,7 +158,7 @@ struct mtk_wed_device { struct mtk_wed_ops { int (*attach)(struct mtk_wed_device *dev); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, - void __iomem *regs); + void __iomem *regs, bool reset); int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs); int (*txfree_ring_setup)(struct mtk_wed_device *dev, @@ -213,8 +216,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) #define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) -#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ - (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs, _reset) #define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ (_dev)->ops->txfree_ring_setup(_dev, _regs) #define mtk_wed_device_reg_read(_dev, _reg) \ @@ -231,6 +234,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) (_dev)->ops->ppe_check(_dev, _skb, _reason, _hash) #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \ (_dev)->ops->msg_update(_dev, _id, _msg, _len) +#define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev) +#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -238,7 +243,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) } #define mtk_wed_device_detach(_dev) do {} while (0) #define mtk_wed_device_start(_dev, _mask) do {} while (0) -#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV #define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV #define mtk_wed_device_reg_read(_dev, _reg) 0 #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) @@ -247,6 +252,8 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0) #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV +#define mtk_wed_device_stop(_dev) do {} while (0) +#define mtk_wed_device_dma_reset(_dev) do {} while (0) #endif #endif diff --git a/include/linux/trace.h b/include/linux/trace.h index b5e16e438448..80ffda871749 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -26,13 +26,13 @@ struct trace_export { int flags; }; +struct trace_array; + #ifdef CONFIG_TRACING int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); -struct trace_array; - void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, diff --git a/include/net/devlink.h b/include/net/devlink.h index 074a79b8933f..5f6eca5e4a40 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -650,6 +650,10 @@ struct devlink_info_req; * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_region_ops { @@ -659,6 +663,10 @@ struct devlink_region_ops { const struct devlink_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -670,6 +678,10 @@ struct devlink_region_ops { * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_port_region_ops { @@ -679,6 +691,10 @@ struct devlink_port_region_ops { const struct devlink_port_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -1746,8 +1762,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn); -int devlink_info_driver_name_put(struct devlink_info_req *req, - const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); diff --git a/include/net/dsa.h b/include/net/dsa.h index 82da44561f4c..96086289aa9b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -22,6 +22,7 @@ #include <net/devlink.h> #include <net/switchdev.h> +struct dsa_8021q_context; struct tc_action; struct phy_device; struct fixed_phy_status; @@ -1285,8 +1286,6 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; -struct net_device *dsa_dev_to_net_device(struct device *dev); - bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index d21210709f84..ed4622dd4828 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -125,6 +125,9 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) +#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \ + NL_SET_ERR_MSG_FMT((info)->extack, msg, ##args) + /* Report that a root attribute is missing */ #define GENL_REQ_ATTR_CHECK(info, attr) ({ \ struct genl_info *__info = (info); \ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3af1e927247d..69174093078f 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -281,7 +281,8 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's * rcv_saddr field should already have been updated when this is called. */ -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); +void inet_bhash2_reset_saddr(struct sock *sk); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); diff --git a/include/net/mrp.h b/include/net/mrp.h index 92cd3fb6cf9d..b28915ffea28 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -124,6 +124,7 @@ struct mrp_applicant { struct sk_buff *pdu; struct rb_root mad; struct rcu_head rcu; + bool active; }; struct mrp_port { diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 20745cf7ae1a..2f2a6023fb0e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,7 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; - int qlen; + u32 qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..fa00dc20a0d7 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,8 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); - /* Frees the entire thing */ - void (*free)(struct sctp_stream *stream); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Enqueue a chunk */ void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); diff --git a/include/net/tcp.h b/include/net/tcp.h index 6b814e788f00..f925377066fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1675,7 +1675,11 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags, - const u8 *newkey, u8 newkeylen, gfp_t gfp); + const u8 *newkey, u8 newkeylen); +int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, + int family, u8 prefixlen, int l3index, + struct tcp_md5sig_key *key); + int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, @@ -1683,7 +1687,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, #ifdef CONFIG_TCP_MD5SIG #include <linux/jump_label.h> -extern struct static_key_false tcp_md5_needed; +extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family); @@ -1691,7 +1695,7 @@ static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { - if (!static_branch_unlikely(&tcp_md5_needed)) + if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family); } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dbc81f5eb553..e0cc6791c001 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1681,8 +1681,9 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); -int verify_spi_info(u8 proto, u32 min, u32 max); -int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); +int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); +int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, + struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, u32 if_id, u8 proto, const xfrm_address_t *daddr, @@ -1703,7 +1704,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap, u32 if_id); + struct xfrm_encap_tmpl *encap, u32 if_id, + struct netlink_ext_ack *extack); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index 6ce3bd22f6c6..5ad7ac2e3a7c 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -26,7 +26,10 @@ #define DDR3PHY_PGSR (0x0C) /* DDR3PHY PHY General Status Register */ #define DDR3PHY_PGSR_IDONE (1 << 0) /* Initialization Done */ -#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ +#define DDR3PHY_ACDLLCR (0x14) /* DDR3PHY AC DLL Control Register */ +#define DDR3PHY_ACDLLCR_DLLSRST (1 << 30) /* DLL Soft Reset */ + +#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ #define DDR3PHY_ACIOCR_CSPDD_CS0 (1 << 18) /* CS#[0] Power Down Driver */ #define DDR3PHY_ACIOCR_CKPDD_CK0 (1 << 8) /* CK[0] Power Down Driver */ #define DDR3PHY_ACIORC_ACPDD (1 << 3) /* AC Power Down Driver */ diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 967ba30ea636..df62be80a193 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -596,221 +596,6 @@ enum ocelot_ptp_pins { TOD_ACC_PIN }; -enum ocelot_stat { - OCELOT_STAT_RX_OCTETS, - OCELOT_STAT_RX_UNICAST, - OCELOT_STAT_RX_MULTICAST, - OCELOT_STAT_RX_BROADCAST, - OCELOT_STAT_RX_SHORTS, - OCELOT_STAT_RX_FRAGMENTS, - OCELOT_STAT_RX_JABBERS, - OCELOT_STAT_RX_CRC_ALIGN_ERRS, - OCELOT_STAT_RX_SYM_ERRS, - OCELOT_STAT_RX_64, - OCELOT_STAT_RX_65_127, - OCELOT_STAT_RX_128_255, - OCELOT_STAT_RX_256_511, - OCELOT_STAT_RX_512_1023, - OCELOT_STAT_RX_1024_1526, - OCELOT_STAT_RX_1527_MAX, - OCELOT_STAT_RX_PAUSE, - OCELOT_STAT_RX_CONTROL, - OCELOT_STAT_RX_LONGS, - OCELOT_STAT_RX_CLASSIFIED_DROPS, - OCELOT_STAT_RX_RED_PRIO_0, - OCELOT_STAT_RX_RED_PRIO_1, - OCELOT_STAT_RX_RED_PRIO_2, - OCELOT_STAT_RX_RED_PRIO_3, - OCELOT_STAT_RX_RED_PRIO_4, - OCELOT_STAT_RX_RED_PRIO_5, - OCELOT_STAT_RX_RED_PRIO_6, - OCELOT_STAT_RX_RED_PRIO_7, - OCELOT_STAT_RX_YELLOW_PRIO_0, - OCELOT_STAT_RX_YELLOW_PRIO_1, - OCELOT_STAT_RX_YELLOW_PRIO_2, - OCELOT_STAT_RX_YELLOW_PRIO_3, - OCELOT_STAT_RX_YELLOW_PRIO_4, - OCELOT_STAT_RX_YELLOW_PRIO_5, - OCELOT_STAT_RX_YELLOW_PRIO_6, - OCELOT_STAT_RX_YELLOW_PRIO_7, - OCELOT_STAT_RX_GREEN_PRIO_0, - OCELOT_STAT_RX_GREEN_PRIO_1, - OCELOT_STAT_RX_GREEN_PRIO_2, - OCELOT_STAT_RX_GREEN_PRIO_3, - OCELOT_STAT_RX_GREEN_PRIO_4, - OCELOT_STAT_RX_GREEN_PRIO_5, - OCELOT_STAT_RX_GREEN_PRIO_6, - OCELOT_STAT_RX_GREEN_PRIO_7, - OCELOT_STAT_TX_OCTETS, - OCELOT_STAT_TX_UNICAST, - OCELOT_STAT_TX_MULTICAST, - OCELOT_STAT_TX_BROADCAST, - OCELOT_STAT_TX_COLLISION, - OCELOT_STAT_TX_DROPS, - OCELOT_STAT_TX_PAUSE, - OCELOT_STAT_TX_64, - OCELOT_STAT_TX_65_127, - OCELOT_STAT_TX_128_255, - OCELOT_STAT_TX_256_511, - OCELOT_STAT_TX_512_1023, - OCELOT_STAT_TX_1024_1526, - OCELOT_STAT_TX_1527_MAX, - OCELOT_STAT_TX_YELLOW_PRIO_0, - OCELOT_STAT_TX_YELLOW_PRIO_1, - OCELOT_STAT_TX_YELLOW_PRIO_2, - OCELOT_STAT_TX_YELLOW_PRIO_3, - OCELOT_STAT_TX_YELLOW_PRIO_4, - OCELOT_STAT_TX_YELLOW_PRIO_5, - OCELOT_STAT_TX_YELLOW_PRIO_6, - OCELOT_STAT_TX_YELLOW_PRIO_7, - OCELOT_STAT_TX_GREEN_PRIO_0, - OCELOT_STAT_TX_GREEN_PRIO_1, - OCELOT_STAT_TX_GREEN_PRIO_2, - OCELOT_STAT_TX_GREEN_PRIO_3, - OCELOT_STAT_TX_GREEN_PRIO_4, - OCELOT_STAT_TX_GREEN_PRIO_5, - OCELOT_STAT_TX_GREEN_PRIO_6, - OCELOT_STAT_TX_GREEN_PRIO_7, - OCELOT_STAT_TX_AGED, - OCELOT_STAT_DROP_LOCAL, - OCELOT_STAT_DROP_TAIL, - OCELOT_STAT_DROP_YELLOW_PRIO_0, - OCELOT_STAT_DROP_YELLOW_PRIO_1, - OCELOT_STAT_DROP_YELLOW_PRIO_2, - OCELOT_STAT_DROP_YELLOW_PRIO_3, - OCELOT_STAT_DROP_YELLOW_PRIO_4, - OCELOT_STAT_DROP_YELLOW_PRIO_5, - OCELOT_STAT_DROP_YELLOW_PRIO_6, - OCELOT_STAT_DROP_YELLOW_PRIO_7, - OCELOT_STAT_DROP_GREEN_PRIO_0, - OCELOT_STAT_DROP_GREEN_PRIO_1, - OCELOT_STAT_DROP_GREEN_PRIO_2, - OCELOT_STAT_DROP_GREEN_PRIO_3, - OCELOT_STAT_DROP_GREEN_PRIO_4, - OCELOT_STAT_DROP_GREEN_PRIO_5, - OCELOT_STAT_DROP_GREEN_PRIO_6, - OCELOT_STAT_DROP_GREEN_PRIO_7, - OCELOT_NUM_STATS, -}; - -struct ocelot_stat_layout { - u32 reg; - char name[ETH_GSTRING_LEN]; -}; - -/* 32-bit counter checked for wraparound by ocelot_port_update_stats() - * and copied to ocelot->stats. - */ -#define OCELOT_STAT(kind) \ - [OCELOT_STAT_ ## kind] = { .reg = SYS_COUNT_ ## kind } -/* Same as above, except also exported to ethtool -S. Standard counters should - * only be exposed to more specific interfaces rather than by their string name. - */ -#define OCELOT_STAT_ETHTOOL(kind, ethtool_name) \ - [OCELOT_STAT_ ## kind] = { .reg = SYS_COUNT_ ## kind, .name = ethtool_name } - -#define OCELOT_COMMON_STATS \ - OCELOT_STAT_ETHTOOL(RX_OCTETS, "rx_octets"), \ - OCELOT_STAT_ETHTOOL(RX_UNICAST, "rx_unicast"), \ - OCELOT_STAT_ETHTOOL(RX_MULTICAST, "rx_multicast"), \ - OCELOT_STAT_ETHTOOL(RX_BROADCAST, "rx_broadcast"), \ - OCELOT_STAT_ETHTOOL(RX_SHORTS, "rx_shorts"), \ - OCELOT_STAT_ETHTOOL(RX_FRAGMENTS, "rx_fragments"), \ - OCELOT_STAT_ETHTOOL(RX_JABBERS, "rx_jabbers"), \ - OCELOT_STAT_ETHTOOL(RX_CRC_ALIGN_ERRS, "rx_crc_align_errs"), \ - OCELOT_STAT_ETHTOOL(RX_SYM_ERRS, "rx_sym_errs"), \ - OCELOT_STAT_ETHTOOL(RX_64, "rx_frames_below_65_octets"), \ - OCELOT_STAT_ETHTOOL(RX_65_127, "rx_frames_65_to_127_octets"), \ - OCELOT_STAT_ETHTOOL(RX_128_255, "rx_frames_128_to_255_octets"), \ - OCELOT_STAT_ETHTOOL(RX_256_511, "rx_frames_256_to_511_octets"), \ - OCELOT_STAT_ETHTOOL(RX_512_1023, "rx_frames_512_to_1023_octets"), \ - OCELOT_STAT_ETHTOOL(RX_1024_1526, "rx_frames_1024_to_1526_octets"), \ - OCELOT_STAT_ETHTOOL(RX_1527_MAX, "rx_frames_over_1526_octets"), \ - OCELOT_STAT_ETHTOOL(RX_PAUSE, "rx_pause"), \ - OCELOT_STAT_ETHTOOL(RX_CONTROL, "rx_control"), \ - OCELOT_STAT_ETHTOOL(RX_LONGS, "rx_longs"), \ - OCELOT_STAT_ETHTOOL(RX_CLASSIFIED_DROPS, "rx_classified_drops"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_0, "rx_red_prio_0"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_1, "rx_red_prio_1"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_2, "rx_red_prio_2"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_3, "rx_red_prio_3"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_4, "rx_red_prio_4"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_5, "rx_red_prio_5"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_6, "rx_red_prio_6"), \ - OCELOT_STAT_ETHTOOL(RX_RED_PRIO_7, "rx_red_prio_7"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_0, "rx_yellow_prio_0"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_1, "rx_yellow_prio_1"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_2, "rx_yellow_prio_2"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_3, "rx_yellow_prio_3"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_4, "rx_yellow_prio_4"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_5, "rx_yellow_prio_5"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_6, "rx_yellow_prio_6"), \ - OCELOT_STAT_ETHTOOL(RX_YELLOW_PRIO_7, "rx_yellow_prio_7"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_0, "rx_green_prio_0"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_1, "rx_green_prio_1"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_2, "rx_green_prio_2"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_3, "rx_green_prio_3"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_4, "rx_green_prio_4"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_5, "rx_green_prio_5"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_6, "rx_green_prio_6"), \ - OCELOT_STAT_ETHTOOL(RX_GREEN_PRIO_7, "rx_green_prio_7"), \ - OCELOT_STAT_ETHTOOL(TX_OCTETS, "tx_octets"), \ - OCELOT_STAT_ETHTOOL(TX_UNICAST, "tx_unicast"), \ - OCELOT_STAT_ETHTOOL(TX_MULTICAST, "tx_multicast"), \ - OCELOT_STAT_ETHTOOL(TX_BROADCAST, "tx_broadcast"), \ - OCELOT_STAT_ETHTOOL(TX_COLLISION, "tx_collision"), \ - OCELOT_STAT_ETHTOOL(TX_DROPS, "tx_drops"), \ - OCELOT_STAT_ETHTOOL(TX_PAUSE, "tx_pause"), \ - OCELOT_STAT_ETHTOOL(TX_64, "tx_frames_below_65_octets"), \ - OCELOT_STAT_ETHTOOL(TX_65_127, "tx_frames_65_to_127_octets"), \ - OCELOT_STAT_ETHTOOL(TX_128_255, "tx_frames_128_255_octets"), \ - OCELOT_STAT_ETHTOOL(TX_256_511, "tx_frames_256_511_octets"), \ - OCELOT_STAT_ETHTOOL(TX_512_1023, "tx_frames_512_1023_octets"), \ - OCELOT_STAT_ETHTOOL(TX_1024_1526, "tx_frames_1024_1526_octets"), \ - OCELOT_STAT_ETHTOOL(TX_1527_MAX, "tx_frames_over_1526_octets"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_0, "tx_yellow_prio_0"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_1, "tx_yellow_prio_1"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_2, "tx_yellow_prio_2"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_3, "tx_yellow_prio_3"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_4, "tx_yellow_prio_4"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_5, "tx_yellow_prio_5"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_6, "tx_yellow_prio_6"), \ - OCELOT_STAT_ETHTOOL(TX_YELLOW_PRIO_7, "tx_yellow_prio_7"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_0, "tx_green_prio_0"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_1, "tx_green_prio_1"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_2, "tx_green_prio_2"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_3, "tx_green_prio_3"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_4, "tx_green_prio_4"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_5, "tx_green_prio_5"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_6, "tx_green_prio_6"), \ - OCELOT_STAT_ETHTOOL(TX_GREEN_PRIO_7, "tx_green_prio_7"), \ - OCELOT_STAT_ETHTOOL(TX_AGED, "tx_aged"), \ - OCELOT_STAT_ETHTOOL(DROP_LOCAL, "drop_local"), \ - OCELOT_STAT_ETHTOOL(DROP_TAIL, "drop_tail"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_0, "drop_yellow_prio_0"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_1, "drop_yellow_prio_1"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_2, "drop_yellow_prio_2"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_3, "drop_yellow_prio_3"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_4, "drop_yellow_prio_4"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_5, "drop_yellow_prio_5"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_6, "drop_yellow_prio_6"), \ - OCELOT_STAT_ETHTOOL(DROP_YELLOW_PRIO_7, "drop_yellow_prio_7"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_0, "drop_green_prio_0"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_1, "drop_green_prio_1"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_2, "drop_green_prio_2"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_3, "drop_green_prio_3"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_4, "drop_green_prio_4"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_5, "drop_green_prio_5"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_6, "drop_green_prio_6"), \ - OCELOT_STAT_ETHTOOL(DROP_GREEN_PRIO_7, "drop_green_prio_7") - -struct ocelot_stats_region { - struct list_head node; - u32 base; - int count; - u32 *buf; -}; - enum ocelot_tag_prefix { OCELOT_TAG_PREFIX_DISABLED = 0, OCELOT_TAG_PREFIX_NONE, @@ -967,7 +752,6 @@ struct ocelot { struct regmap *targets[TARGET_MAX]; struct regmap_field *regfields[REGFIELD_MAX]; const u32 *const *map; - const struct ocelot_stat_layout *stats_layout; struct list_head stats_regions; u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 83fd81c82e4c..9fbd3832bcdc 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -84,8 +84,8 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_BT, /**< AMD ACP BT*/ SOF_DAI_AMD_SP, /**< AMD ACP SP */ SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ - SOF_DAI_AMD_HS, /**< Amd HS */ SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ + SOF_DAI_AMD_HS, /**< Amd HS */ }; /* general purpose DAI configuration */ diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h index 65e86e4e9fd8..75193850ead0 100644 --- a/include/sound/sof/info.h +++ b/include/sound/sof/info.h @@ -36,6 +36,10 @@ enum sof_ipc_ext_data { SOF_IPC_EXT_USER_ABI_INFO = 4, }; +/* Build u32 number in format MMmmmppp */ +#define SOF_FW_VER(MAJOR, MINOR, PATCH) ((uint32_t)( \ + ((MAJOR) << 24) | ((MINOR) << 12) | (PATCH))) + /* FW version - SOF_IPC_GLB_VERSION */ struct sof_ipc_fw_version { struct sof_ipc_hdr hdr; diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 935af4947917..760455dfa860 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin, TRACE_EVENT(mm_khugepaged_scan_file, - TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename, + TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, int present, int swap, int result), - TP_ARGS(mm, page, filename, present, swap, result), + TP_ARGS(mm, page, file, present, swap, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __string(filename, filename) + __string(filename, file->f_path.dentry->d_iname) __field(int, present) __field(int, swap) __field(int, result) @@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TP_fast_assign( __entry->mm = mm; __entry->pfn = page ? page_to_pfn(page) : -1; - __assign_str(filename, filename); + __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; __entry->result = result; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 50a974f7dfb4..25ab1ff9423d 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -42,7 +42,7 @@ TRACE_EVENT(kfree_skb, __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", + TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s", __entry->skbaddr, __entry->protocol, __entry->location, __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe))) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fb4c911d2a03..f89de51a45db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2584,14 +2584,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -2647,7 +2652,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -2808,12 +2813,10 @@ union bpf_attr { * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * @@ -6888,6 +6891,16 @@ struct bpf_dynptr { __u64 :64; } __attribute__((aligned(8))); +struct bpf_list_head { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 498d0d5d0957..70191d96af89 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -610,6 +610,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + DEVLINK_ATTR_REGION_DIRECT, /* flag */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index dfe19bf13f4c..32af2d278cb4 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -160,6 +160,12 @@ struct mptcp_info { * daddr4 | daddr6, sport, dport, backup, if_idx * [, error] * The priority of a subflow has changed. 'error' should not be set. + * + * MPTCP_EVENT_LISTENER_CREATED: family, sport, saddr4 | saddr6 + * A new PM listener is created. + * + * MPTCP_EVENT_LISTENER_CLOSED: family, sport, saddr4 | saddr6 + * A PM listener is closed. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, @@ -174,6 +180,9 @@ enum mptcp_event_type { MPTCP_EVENT_SUB_CLOSED = 11, MPTCP_EVENT_SUB_PRIORITY = 13, + + MPTCP_EVENT_LISTENER_CREATED = 15, + MPTCP_EVENT_LISTENER_CLOSED = 16, }; enum mptcp_event_attr { diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 5da0da59bf01..e2ae82e3f9f7 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -48,7 +48,6 @@ struct sockaddr_nl { * @nlmsg_flags: Additional flags * @nlmsg_seq: Sequence number * @nlmsg_pid: Sending process port ID - * @nlmsg_data: Message payload */ struct nlmsghdr { __u32 nlmsg_len; @@ -56,7 +55,6 @@ struct nlmsghdr { __u16 nlmsg_flags; __u32 nlmsg_seq; __u32 nlmsg_pid; - __u8 nlmsg_data[]; }; /* Flags values */ diff --git a/init/Kconfig b/init/Kconfig index abf65098f1b6..94125d3b6893 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,7 +87,7 @@ config CC_HAS_ASM_GOTO_OUTPUT config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. - def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..68dfc6936aa7 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); return ret; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4a1e482747cc..8840cf3e20f2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1768,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked) io_tw_lock(req->ctx, locked); if (unlikely(req->task->flags & PF_EXITING)) return -EFAULT; - return io_issue_sqe(req, IO_URING_F_NONBLOCK); + return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT); } struct io_wq_work *io_wq_free_work(struct io_wq_work *work) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e99a79f2df9b..50bc3af44953 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -17,8 +17,8 @@ enum { IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, /* - * Intended only when both REQ_F_POLLED and REQ_F_APOLL_MULTISHOT - * are set to indicate to the poll runner that multishot should be + * Intended only when both IO_URING_F_MULTISHOT is passed + * to indicate to the poll runner that multishot should be * removed and the result is set on req->cqe.res. */ IOU_STOP_MULTISHOT = -ECANCELED, @@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline int io_run_task_work(void) { + /* + * Always check-and-clear the task_work notification signal. With how + * signaling works for task_work, we can find it set with nothing to + * run. We need to clear it for that case, like get_signal() does. + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); if (task_work_pending(current)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - clear_notify_signal(); __set_current_state(TASK_RUNNING); task_work_run(); return 1; diff --git a/io_uring/net.c b/io_uring/net.c index 15dea91625e2..ab83da7e80f0 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -67,8 +67,6 @@ struct io_sr_msg { struct io_kiocb *notif; }; -#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) - int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -591,7 +589,8 @@ static inline void io_recv_prep_retry(struct io_kiocb *req) * again (for multishot). */ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, - unsigned int cflags, bool mshot_finished) + unsigned int cflags, bool mshot_finished, + unsigned issue_flags) { if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { io_req_set_res(req, *ret, cflags); @@ -614,7 +613,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, io_req_set_res(req, *ret, cflags); - if (req->flags & REQ_F_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) *ret = IOU_STOP_MULTISHOT; else *ret = IOU_OK; @@ -773,8 +772,7 @@ retry_multishot: if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { ret = io_setup_async_msg(req, kmsg, issue_flags); - if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -803,7 +801,7 @@ retry_multishot: if (kmsg->msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, mshot_finished)) + if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) goto retry_multishot; if (mshot_finished) { @@ -869,7 +867,7 @@ retry_multishot: ret = sock_recvmsg(sock, &msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { - if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { + if (issue_flags & IO_URING_F_MULTISHOT) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -902,7 +900,7 @@ out_free: if (msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, ret <= 0)) + if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) goto retry_multishot; return ret; @@ -1289,8 +1287,7 @@ retry: * return EAGAIN to arm the poll infra since it * has already been done */ - if ((req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) ret = IOU_ISSUE_SKIP_COMPLETE; return ret; } @@ -1315,9 +1312,7 @@ retry: goto retry; io_req_set_res(req, ret, 0); - if (req->flags & REQ_F_POLLED) - return IOU_STOP_MULTISHOT; - return IOU_OK; + return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) diff --git a/io_uring/poll.c b/io_uring/poll.c index f500506984ec..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -228,6 +252,23 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_DONE; if (v & IO_POLL_CANCEL_FLAG) return -ECANCELED; + /* + * cqe.res contains only events of the first wake up + * and all others are be lost. Redo vfs_poll() to get + * up to date state. + */ + if ((v & IO_POLL_REF_MASK) != 1) + req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { @@ -239,6 +280,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; + if (io_is_uring_fops(req->file)) + return IOU_POLL_DONE; /* multishot, just fill a CQE and proceed */ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { @@ -258,11 +301,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return ret; } + /* force the next iteration to vfs_poll() */ + req->cqe.res = 0; + /* * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } @@ -506,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -574,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 7d86f058fb86..bd2fcc4d454e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s) } -static int __shm_open(struct vm_area_struct *vma) +static int __shm_open(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); @@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma) /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) { - int err = __shm_open(vma); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err; + + /* Always call underlying open if present */ + if (sfd->vm_ops->open) + sfd->vm_ops->open(vma); + + err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. @@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp) * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close(struct vm_area_struct *vma) +static void __shm_close(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; @@ -388,6 +392,18 @@ done: up_write(&shm_ids(ns).rwsem); } +static void shm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + /* Always call underlying close if present */ + if (sfd->vm_ops->close) + sfd->vm_ops->close(vma); + + __shm_close(sfd); +} + /* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { @@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller. */ - ret = __shm_open(vma); + ret = __shm_open(sfd); if (ret) return ret; ret = call_mmap(sfd->file, vma); if (ret) { - shm_close(vma); + __shm_close(sfd); return ret; } sfd->vm_ops = vma->vm_ops; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 672eb17ac421..484706959556 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -430,7 +430,6 @@ static void array_map_free(struct bpf_map *map) for (i = 0; i < array->map.max_entries; i++) bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i)); } - bpf_map_free_record(map); } if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 37020078d1c1..b39a46e8fb08 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, gfp_flags | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index d6c9b3705f24..ae0267f150b5 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode) static const struct bpf_func_proto bpf_ima_inode_hash_proto = { .func = bpf_ima_inode_hash, .gpl_only = false, + .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0], @@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file) static const struct bpf_func_proto bpf_ima_file_hash_proto = { .func = bpf_ima_file_hash, .gpl_only = false, + .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_ima_file_hash_btf_ids[0], @@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_bprm_opts_set: return &bpf_bprm_opts_set_proto; case BPF_FUNC_ima_inode_hash: - return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; + return &bpf_ima_inode_hash_proto; case BPF_FUNC_ima_file_hash: - return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; + return &bpf_ima_file_hash_proto; case BPF_FUNC_get_attach_cookie: return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; #ifdef CONFIG_NET diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5579ff3a5b54..d11cbf8cece7 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -199,6 +199,7 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); enum btf_kfunc_hook { + BTF_KFUNC_HOOK_COMMON, BTF_KFUNC_HOOK_XDP, BTF_KFUNC_HOOK_TC, BTF_KFUNC_HOOK_STRUCT_OPS, @@ -237,6 +238,7 @@ struct btf { struct rcu_head rcu; struct btf_kfunc_set_tab *kfunc_set_tab; struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; + struct btf_struct_metas *struct_meta_tab; /* split BTF support */ struct btf *base_btf; @@ -477,16 +479,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t) return !t || btf_type_nosize(t); } -static bool __btf_type_is_struct(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; -} - -static bool btf_type_is_array(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; -} - static bool btf_type_is_datasec(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; @@ -1642,8 +1634,30 @@ static void btf_free_dtor_kfunc_tab(struct btf *btf) btf->dtor_kfunc_tab = NULL; } +static void btf_struct_metas_free(struct btf_struct_metas *tab) +{ + int i; + + if (!tab) + return; + for (i = 0; i < tab->cnt; i++) { + btf_record_free(tab->types[i].record); + kfree(tab->types[i].field_offs); + } + kfree(tab); +} + +static void btf_free_struct_meta_tab(struct btf *btf) +{ + struct btf_struct_metas *tab = btf->struct_meta_tab; + + btf_struct_metas_free(tab); + btf->struct_meta_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_struct_meta_tab(btf); btf_free_dtor_kfunc_tab(btf); btf_free_kfunc_set_tab(btf); kvfree(btf->types); @@ -3205,9 +3219,15 @@ enum { struct btf_field_info { enum btf_field_type type; u32 off; - struct { - u32 type_id; - } kptr; + union { + struct { + u32 type_id; + } kptr; + struct { + const char *node_name; + u32 value_btf_id; + } list_head; + }; }; static int btf_find_struct(const struct btf *btf, const struct btf_type *t, @@ -3261,6 +3281,63 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, return BTF_FIELD_FOUND; } +static const char *btf_find_decl_tag_value(const struct btf *btf, + const struct btf_type *pt, + int comp_idx, const char *tag_key) +{ + int i; + + for (i = 1; i < btf_nr_types(btf); i++) { + const struct btf_type *t = btf_type_by_id(btf, i); + int len = strlen(tag_key); + + if (!btf_type_is_decl_tag(t)) + continue; + if (pt != btf_type_by_id(btf, t->type) || + btf_type_decl_tag(t)->component_idx != comp_idx) + continue; + if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) + continue; + return __btf_name_by_offset(btf, t->name_off) + len; + } + return NULL; +} + +static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt, + const struct btf_type *t, int comp_idx, + u32 off, int sz, struct btf_field_info *info) +{ + const char *value_type; + const char *list_node; + s32 id; + + if (!__btf_type_is_struct(t)) + return BTF_FIELD_IGNORE; + if (t->size != sz) + return BTF_FIELD_IGNORE; + value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); + if (!value_type) + return -EINVAL; + list_node = strstr(value_type, ":"); + if (!list_node) + return -EINVAL; + value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN); + if (!value_type) + return -ENOMEM; + id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); + kfree(value_type); + if (id < 0) + return id; + list_node++; + if (str_is_empty(list_node)) + return -EINVAL; + info->type = BPF_LIST_HEAD; + info->off = off; + info->list_head.value_btf_id = id; + info->list_head.node_name = list_node; + return BTF_FIELD_FOUND; +} + static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, int *align, int *sz) { @@ -3284,6 +3361,18 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, goto end; } } + if (field_mask & BPF_LIST_HEAD) { + if (!strcmp(name, "bpf_list_head")) { + type = BPF_LIST_HEAD; + goto end; + } + } + if (field_mask & BPF_LIST_NODE) { + if (!strcmp(name, "bpf_list_node")) { + type = BPF_LIST_NODE; + goto end; + } + } /* Only return BPF_KPTR when all other types with matchable names fail */ if (field_mask & BPF_KPTR) { type = BPF_KPTR_REF; @@ -3327,6 +3416,7 @@ static int btf_find_struct_field(const struct btf *btf, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_LIST_NODE: ret = btf_find_struct(btf, member_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3339,6 +3429,12 @@ static int btf_find_struct_field(const struct btf *btf, if (ret < 0) return ret; break; + case BPF_LIST_HEAD: + ret = btf_find_list_head(btf, t, member_type, i, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; default: return -EFAULT; } @@ -3381,6 +3477,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_LIST_NODE: ret = btf_find_struct(btf, var_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3393,6 +3490,12 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, if (ret < 0) return ret; break; + case BPF_LIST_HEAD: + ret = btf_find_list_head(btf, var, var_type, -1, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; default: return -EFAULT; } @@ -3491,11 +3594,52 @@ end_btf: return ret; } +static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) +{ + const struct btf_type *t, *n = NULL; + const struct btf_member *member; + u32 offset; + int i; + + t = btf_type_by_id(btf, info->list_head.value_btf_id); + /* We've already checked that value_btf_id is a struct type. We + * just need to figure out the offset of the list_node, and + * verify its type. + */ + for_each_member(i, t, member) { + if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off))) + continue; + /* Invalid BTF, two members with same name */ + if (n) + return -EINVAL; + n = btf_type_by_id(btf, member->type); + if (!__btf_type_is_struct(n)) + return -EINVAL; + if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off))) + return -EINVAL; + offset = __btf_member_bit_offset(n, member); + if (offset % 8) + return -EINVAL; + offset /= 8; + if (offset % __alignof__(struct bpf_list_node)) + return -EINVAL; + + field->list_head.btf = (struct btf *)btf; + field->list_head.value_btf_id = info->list_head.value_btf_id; + field->list_head.node_offset = offset; + } + if (!n) + return -ENOENT; + return 0; +} + struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size) { struct btf_field_info info_arr[BTF_FIELDS_MAX]; struct btf_record *rec; + u32 next_off = 0; int ret, i, cnt; ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); @@ -3505,6 +3649,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type return NULL; cnt = ret; + /* This needs to be kzalloc to zero out padding and unused fields, see + * comment in btf_record_equal. + */ rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM); @@ -3517,6 +3664,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type ret = -EFAULT; goto end; } + if (info_arr[i].off < next_off) { + ret = -EEXIST; + goto end; + } + next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type); rec->field_mask |= info_arr[i].type; rec->fields[i].offset = info_arr[i].off; @@ -3539,18 +3691,93 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type if (ret < 0) goto end; break; + case BPF_LIST_HEAD: + ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_LIST_NODE: + break; default: ret = -EFAULT; goto end; } rec->cnt++; } + + /* bpf_list_head requires bpf_spin_lock */ + if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) { + ret = -EINVAL; + goto end; + } + return rec; end: btf_record_free(rec); return ERR_PTR(ret); } +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) +{ + int i; + + /* There are two owning types, kptr_ref and bpf_list_head. The former + * only supports storing kernel types, which can never store references + * to program allocated local types, atleast not yet. Hence we only need + * to ensure that bpf_list_head ownership does not form cycles. + */ + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD)) + return 0; + for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *meta; + u32 btf_id; + + if (!(rec->fields[i].type & BPF_LIST_HEAD)) + continue; + btf_id = rec->fields[i].list_head.value_btf_id; + meta = btf_find_struct_meta(btf, btf_id); + if (!meta) + return -EFAULT; + rec->fields[i].list_head.value_rec = meta->record; + + if (!(rec->field_mask & BPF_LIST_NODE)) + continue; + + /* We need to ensure ownership acyclicity among all types. The + * proper way to do it would be to topologically sort all BTF + * IDs based on the ownership edges, since there can be multiple + * bpf_list_head in a type. Instead, we use the following + * reasoning: + * + * - A type can only be owned by another type in user BTF if it + * has a bpf_list_node. + * - A type can only _own_ another type in user BTF if it has a + * bpf_list_head. + * + * We ensure that if a type has both bpf_list_head and + * bpf_list_node, its element types cannot be owning types. + * + * To ensure acyclicity: + * + * When A only has bpf_list_head, ownership chain can be: + * A -> B -> C + * Where: + * - B has both bpf_list_head and bpf_list_node. + * - C only has bpf_list_node. + * + * When A has both bpf_list_head and bpf_list_node, some other + * type already owns it in the BTF domain, hence it can not own + * another owning type through any of the bpf_list_head edges. + * A -> B + * Where: + * - B only has bpf_list_node. + */ + if (meta->record->field_mask & BPF_LIST_HEAD) + return -ELOOP; + } + return 0; +} + static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) { const u32 a = *(const u32 *)_a; @@ -3584,7 +3811,7 @@ struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec) u8 *sz; BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz)); - if (IS_ERR_OR_NULL(rec) || WARN_ON_ONCE(rec->cnt > sizeof(foffs->field_off))) + if (IS_ERR_OR_NULL(rec)) return NULL; foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN); @@ -4552,7 +4779,6 @@ static int btf_func_proto_check(struct btf_verifier_env *env, nr_args--; } - err = 0; for (i = 0; i < nr_args; i++) { const struct btf_type *arg_type; u32 arg_type_id; @@ -4561,8 +4787,12 @@ static int btf_func_proto_check(struct btf_verifier_env *env, arg_type = btf_type_by_id(btf, arg_type_id); if (!arg_type) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; + } + + if (btf_type_is_resolve_source_only(arg_type)) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + return -EINVAL; } if (args[i].name_off && @@ -4570,25 +4800,23 @@ static int btf_func_proto_check(struct btf_verifier_env *env, !btf_name_valid_identifier(btf, args[i].name_off))) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } if (btf_type_needs_resolve(arg_type) && !env_type_is_resolved(env, arg_type_id)) { err = btf_resolve(env, arg_type, arg_type_id); if (err) - break; + return err; } if (!btf_type_id_size(btf, &arg_type_id, NULL)) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } } - return err; + return 0; } static int btf_func_check(struct btf_verifier_env *env, @@ -5002,6 +5230,119 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return btf_check_sec_info(env, btf_data_size); } +static const char *alloc_obj_fields[] = { + "bpf_spin_lock", + "bpf_list_head", + "bpf_list_node", +}; + +static struct btf_struct_metas * +btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) +{ + union { + struct btf_id_set set; + struct { + u32 _cnt; + u32 _ids[ARRAY_SIZE(alloc_obj_fields)]; + } _arr; + } aof; + struct btf_struct_metas *tab = NULL; + int i, n, id, ret; + + BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); + BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32)); + + memset(&aof, 0, sizeof(aof)); + for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { + /* Try to find whether this special type exists in user BTF, and + * if so remember its ID so we can easily find it among members + * of structs that we iterate in the next loop. + */ + id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); + if (id < 0) + continue; + aof.set.ids[aof.set.cnt++] = id; + } + + if (!aof.set.cnt) + return NULL; + sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL); + + n = btf_nr_types(btf); + for (i = 1; i < n; i++) { + struct btf_struct_metas *new_tab; + const struct btf_member *member; + struct btf_field_offs *foffs; + struct btf_struct_meta *type; + struct btf_record *record; + const struct btf_type *t; + int j, tab_cnt; + + t = btf_type_by_id(btf, i); + if (!t) { + ret = -EINVAL; + goto free; + } + if (!__btf_type_is_struct(t)) + continue; + + cond_resched(); + + for_each_member(j, t, member) { + if (btf_id_set_contains(&aof.set, member->type)) + goto parse; + } + continue; + parse: + tab_cnt = tab ? tab->cnt : 0; + new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_tab) { + ret = -ENOMEM; + goto free; + } + if (!tab) + new_tab->cnt = 0; + tab = new_tab; + + type = &tab->types[tab->cnt]; + type->btf_id = i; + record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size); + /* The record cannot be unset, treat it as an error if so */ + if (IS_ERR_OR_NULL(record)) { + ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; + goto free; + } + foffs = btf_parse_field_offs(record); + /* We need the field_offs to be valid for a valid record, + * either both should be set or both should be unset. + */ + if (IS_ERR_OR_NULL(foffs)) { + btf_record_free(record); + ret = -EFAULT; + goto free; + } + type->record = record; + type->field_offs = foffs; + tab->cnt++; + } + return tab; +free: + btf_struct_metas_free(tab); + return ERR_PTR(ret); +} + +struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) +{ + struct btf_struct_metas *tab; + + BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0); + tab = btf->struct_meta_tab; + if (!tab) + return NULL; + return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func); +} + static int btf_check_type_tags(struct btf_verifier_env *env, struct btf *btf, int start_id) { @@ -5052,6 +5393,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env, static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, u32 log_level, char __user *log_ubuf, u32 log_size) { + struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; @@ -5120,15 +5462,34 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, if (err) goto errout; + struct_meta_tab = btf_parse_struct_metas(log, btf); + if (IS_ERR(struct_meta_tab)) { + err = PTR_ERR(struct_meta_tab); + goto errout; + } + btf->struct_meta_tab = struct_meta_tab; + + if (struct_meta_tab) { + int i; + + for (i = 0; i < struct_meta_tab->cnt; i++) { + err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record); + if (err < 0) + goto errout_meta; + } + } + if (log->level && bpf_verifier_log_full(log)) { err = -ENOSPC; - goto errout; + goto errout_meta; } btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; +errout_meta: + btf_free_struct_meta_tab(btf); errout: btf_verifier_env_free(env); if (btf) @@ -5170,7 +5531,7 @@ static u8 bpf_ctx_convert_map[] = { #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE -static const struct btf_member * +const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) @@ -5243,6 +5604,26 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, return kern_ctx_type->type; } +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) +{ + const struct btf_member *kctx_member; + const struct btf_type *conv_struct; + const struct btf_type *kctx_type; + u32 kctx_type_id; + + conv_struct = bpf_ctx_convert.t; + /* get member for kernel ctx type */ + kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + kctx_type_id = kctx_member->type; + kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); + if (!btf_type_is_struct(kctx_type)) { + bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); + return -EINVAL; + } + + return kctx_type_id; +} + BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) @@ -5440,6 +5821,21 @@ static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, return nr_args + 1; } +static bool prog_args_trusted(const struct bpf_prog *prog) +{ + enum bpf_attach_type atype = prog->expected_attach_type; + + switch (prog->type) { + case BPF_PROG_TYPE_TRACING: + return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER; + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + return true; + default: + return false; + } +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -5583,6 +5979,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } info->reg_type = PTR_TO_BTF_ID; + if (prog_args_trusted(prog)) + info->reg_type |= PTR_TRUSTED; + if (tgt_prog) { enum bpf_prog_type tgt_type; @@ -5849,6 +6248,9 @@ error: /* check __percpu tag */ if (strcmp(tag_value, "percpu") == 0) tmp_flag = MEM_PERCPU; + /* check __rcu tag */ + if (strcmp(tag_value, "rcu") == 0) + tmp_flag = MEM_RCU; } stype = btf_type_skip_modifiers(btf, mtype->type, &id); @@ -5878,20 +6280,50 @@ error: return -EINVAL; } -int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype __maybe_unused, +int btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype __maybe_unused, u32 *next_btf_id, enum bpf_type_flag *flag) { + const struct btf *btf = reg->btf; enum bpf_type_flag tmp_flag = 0; + const struct btf_type *t; + u32 id = reg->btf_id; int err; - u32 id; + while (type_is_alloc(reg->type)) { + struct btf_struct_meta *meta; + struct btf_record *rec; + int i; + + meta = btf_find_struct_meta(btf, id); + if (!meta) + break; + rec = meta->record; + for (i = 0; i < rec->cnt; i++) { + struct btf_field *field = &rec->fields[i]; + u32 offset = field->offset; + if (off < offset + btf_field_type_size(field->type) && offset < off + size) { + bpf_log(log, + "direct access to %s is disallowed\n", + btf_field_type_name(field->type)); + return -EACCES; + } + } + break; + } + + t = btf_type_by_id(btf, id); do { err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag); switch (err) { case WALK_PTR: + /* For local types, the destination register cannot + * become a pointer again. + */ + if (type_is_alloc(reg->type)) + return SCALAR_VALUE; /* If we found the pointer or scalar on t+off, * we're done. */ @@ -5926,8 +6358,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, * end up with two different module BTFs, but IDs point to the common type in * vmlinux BTF. */ -static bool btf_types_are_same(const struct btf *btf1, u32 id1, - const struct btf *btf2, u32 id2) +bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2) { if (id1 != id2) return false; @@ -6209,122 +6641,19 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr return btf_check_func_type_match(log, btf1, t1, btf2, t2); } -static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { -#ifdef CONFIG_NET - [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], - [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], - [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], -#endif -}; - -/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ -static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int rec) -{ - const struct btf_type *member_type; - const struct btf_member *member; - u32 i; - - if (!btf_type_is_struct(t)) - return false; - - for_each_member(i, t, member) { - const struct btf_array *array; - - member_type = btf_type_skip_modifiers(btf, member->type, NULL); - if (btf_type_is_struct(member_type)) { - if (rec >= 3) { - bpf_log(log, "max struct nesting depth exceeded\n"); - return false; - } - if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1)) - return false; - continue; - } - if (btf_type_is_array(member_type)) { - array = btf_type_array(member_type); - if (!array->nelems) - return false; - member_type = btf_type_skip_modifiers(btf, array->type, NULL); - if (!btf_type_is_scalar(member_type)) - return false; - continue; - } - if (!btf_type_is_scalar(member_type)) - return false; - } - return true; -} - -static bool is_kfunc_arg_mem_size(const struct btf *btf, - const struct btf_param *arg, - const struct bpf_reg_state *reg) -{ - int len, sfx_len = sizeof("__sz") - 1; - const struct btf_type *t; - const char *param_name; - - t = btf_type_skip_modifiers(btf, arg->type, NULL); - if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) - return false; - - /* In the future, this can be ported to use BTF tagging */ - param_name = btf_name_by_offset(btf, arg->name_off); - if (str_is_empty(param_name)) - return false; - len = strlen(param_name); - if (len < sfx_len) - return false; - param_name += len - sfx_len; - if (strncmp(param_name, "__sz", sfx_len)) - return false; - - return true; -} - -static bool btf_is_kfunc_arg_mem_size(const struct btf *btf, - const struct btf_param *arg, - const struct bpf_reg_state *reg, - const char *name) -{ - int len, target_len = strlen(name); - const struct btf_type *t; - const char *param_name; - - t = btf_type_skip_modifiers(btf, arg->type, NULL); - if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) - return false; - - param_name = btf_name_by_offset(btf, arg->name_off); - if (str_is_empty(param_name)) - return false; - len = strlen(param_name); - if (len != target_len) - return false; - if (strcmp(param_name, name)) - return false; - - return true; -} - static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, bool ptr_to_mem_ok, - struct bpf_kfunc_arg_meta *kfunc_meta, bool processing_call) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); - bool rel = false, kptr_get = false, trusted_args = false; - bool sleepable = false; struct bpf_verifier_log *log = &env->log; - u32 i, nargs, ref_id, ref_obj_id = 0; - bool is_kfunc = btf_is_kernel(btf); const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; const struct btf_param *args; - int ref_regno = 0, ret; + u32 i, nargs, ref_id; + int ret; t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { @@ -6350,14 +6679,6 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } - if (is_kfunc && kfunc_meta) { - /* Only kfunc can be release func */ - rel = kfunc_meta->flags & KF_RELEASE; - kptr_get = kfunc_meta->flags & KF_KPTR_GET; - trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS; - sleepable = kfunc_meta->flags & KF_SLEEPABLE; - } - /* check that BTF function arguments match actual types that the * verifier sees. */ @@ -6365,42 +6686,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; - bool obj_ptr = false; t = btf_type_skip_modifiers(btf, args[i].type, NULL); if (btf_type_is_scalar(t)) { - if (is_kfunc && kfunc_meta) { - bool is_buf_size = false; - - /* check for any const scalar parameter of name "rdonly_buf_size" - * or "rdwr_buf_size" - */ - if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg, - "rdonly_buf_size")) { - kfunc_meta->r0_rdonly = true; - is_buf_size = true; - } else if (btf_is_kfunc_arg_mem_size(btf, &args[i], reg, - "rdwr_buf_size")) - is_buf_size = true; - - if (is_buf_size) { - if (kfunc_meta->r0_size) { - bpf_log(log, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); - return -EINVAL; - } - - if (!tnum_is_const(reg->var_off)) { - bpf_log(log, "R%d is not a const\n", regno); - return -EINVAL; - } - - kfunc_meta->r0_size = reg->var_off.value; - ret = mark_chain_precision(env, regno); - if (ret) - return ret; - } - } - if (reg->type == SCALAR_VALUE) continue; bpf_log(log, "R%d is not a scalar\n", regno); @@ -6413,88 +6701,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } - /* These register types have special constraints wrt ref_obj_id - * and offset checks. The rest of trusted args don't. - */ - obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID || - reg2btf_ids[base_type(reg->type)]; - - /* Check if argument must be a referenced pointer, args + i has - * been verified to be a pointer (after skipping modifiers). - * PTR_TO_CTX is ok without having non-zero ref_obj_id. - */ - if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) { - bpf_log(log, "R%d must be referenced\n", regno); - return -EINVAL; - } - ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); - /* Trusted args have the same offset checks as release arguments */ - if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id)) - arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) return ret; - if (is_kfunc && reg->ref_obj_id) { - /* Ensure only one argument is referenced PTR_TO_BTF_ID */ - if (ref_obj_id) { - bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", - regno, reg->ref_obj_id, ref_obj_id); - return -EFAULT; - } - ref_regno = regno; - ref_obj_id = reg->ref_obj_id; - } - - /* kptr_get is only true for kfunc */ - if (i == 0 && kptr_get) { - struct btf_field *kptr_field; - - if (reg->type != PTR_TO_MAP_VALUE) { - bpf_log(log, "arg#0 expected pointer to map value\n"); - return -EINVAL; - } - - /* check_func_arg_reg_off allows var_off for - * PTR_TO_MAP_VALUE, but we need fixed offset to find - * off_desc. - */ - if (!tnum_is_const(reg->var_off)) { - bpf_log(log, "arg#0 must have constant offset\n"); - return -EINVAL; - } - - kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR); - if (!kptr_field || kptr_field->type != BPF_KPTR_REF) { - bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n", - reg->off + reg->var_off.value); - return -EINVAL; - } - - if (!btf_type_is_ptr(ref_t)) { - bpf_log(log, "arg#0 BTF type must be a double pointer\n"); - return -EINVAL; - } - - ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id); - ref_tname = btf_name_by_offset(btf, ref_t->name_off); - - if (!btf_type_is_struct(ref_t)) { - bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", - func_name, i, btf_type_str(ref_t), ref_tname); - return -EINVAL; - } - if (!btf_struct_ids_match(log, btf, ref_id, 0, kptr_field->kptr.btf, - kptr_field->kptr.btf_id, true)) { - bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n", - func_name, i, btf_type_str(ref_t), ref_tname); - return -EINVAL; - } - /* rest of the arguments can be anything, like normal kfunc */ - } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ @@ -6504,109 +6718,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, i, btf_type_str(t)); return -EINVAL; } - } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || - (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) { - const struct btf_type *reg_ref_t; - const struct btf *reg_btf; - const char *reg_ref_tname; - u32 reg_ref_id; - - if (!btf_type_is_struct(ref_t)) { - bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", - func_name, i, btf_type_str(ref_t), - ref_tname); - return -EINVAL; - } - - if (reg->type == PTR_TO_BTF_ID) { - reg_btf = reg->btf; - reg_ref_id = reg->btf_id; - } else { - reg_btf = btf_vmlinux; - reg_ref_id = *reg2btf_ids[base_type(reg->type)]; - } - - reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, - ®_ref_id); - reg_ref_tname = btf_name_by_offset(reg_btf, - reg_ref_t->name_off); - if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, - reg->off, btf, ref_id, - trusted_args || (rel && reg->ref_obj_id))) { - bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", - func_name, i, - btf_type_str(ref_t), ref_tname, - regno, btf_type_str(reg_ref_t), - reg_ref_tname); - return -EINVAL; - } } else if (ptr_to_mem_ok && processing_call) { const struct btf_type *resolve_ret; u32 type_size; - if (is_kfunc) { - bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]); - bool arg_dynptr = btf_type_is_struct(ref_t) && - !strcmp(ref_tname, - stringify_struct(bpf_dynptr_kern)); - - /* Permit pointer to mem, but only when argument - * type is pointer to scalar, or struct composed - * (recursively) of scalars. - * When arg_mem_size is true, the pointer can be - * void *. - * Also permit initialized local dynamic pointers. - */ - if (!btf_type_is_scalar(ref_t) && - !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && - !arg_dynptr && - (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { - bpf_log(log, - "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", - i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); - return -EINVAL; - } - - if (arg_dynptr) { - if (reg->type != PTR_TO_STACK) { - bpf_log(log, "arg#%d pointer type %s %s not to stack\n", - i, btf_type_str(ref_t), - ref_tname); - return -EINVAL; - } - - if (!is_dynptr_reg_valid_init(env, reg)) { - bpf_log(log, - "arg#%d pointer type %s %s must be valid and initialized\n", - i, btf_type_str(ref_t), - ref_tname); - return -EINVAL; - } - - if (!is_dynptr_type_expected(env, reg, - ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) { - bpf_log(log, - "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n", - i, btf_type_str(ref_t), - ref_tname); - return -EINVAL; - } - - continue; - } - - /* Check for mem, len pair */ - if (arg_mem_size) { - if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) { - bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", - i, i + 1); - return -EINVAL; - } - i++; - continue; - } - } - resolve_ret = btf_resolve_size(btf, ref_t, &type_size); if (IS_ERR(resolve_ret)) { bpf_log(log, @@ -6619,36 +6734,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (check_mem_reg(env, reg, regno, type_size)) return -EINVAL; } else { - bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i, - is_kfunc ? "kernel " : "", func_name, func_id); + bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i, + func_name, func_id); return -EINVAL; } } - /* Either both are set, or neither */ - WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno)); - /* We already made sure ref_obj_id is set only for one argument. We do - * allow (!rel && ref_obj_id), so that passing such referenced - * PTR_TO_BTF_ID to other kfuncs works. Note that rel is only true when - * is_kfunc is true. - */ - if (rel && !ref_obj_id) { - bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", - func_name); - return -EINVAL; - } - - if (sleepable && !env->prog->aux->sleepable) { - bpf_log(log, "kernel function %s is sleepable but the program is not\n", - func_name); - return -EINVAL; - } - - if (kfunc_meta && ref_obj_id) - kfunc_meta->ref_obj_id = ref_obj_id; - - /* returns argument register number > 0 in case of reference release kfunc */ - return rel ? ref_regno : 0; + return 0; } /* Compare BTF of a function declaration with given bpf_reg_state. @@ -6678,7 +6770,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, false); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6721,7 +6813,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, NULL, true); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6732,14 +6824,6 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, return err; } -int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, - const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs, - struct bpf_kfunc_arg_meta *meta) -{ - return btf_check_func_arg_match(env, btf, func_id, regs, true, meta, true); -} - /* Convert BTF of a function into bpf_reg_state if possible * Returns: * EFAULT - there is a verifier bug. Abort verification. @@ -7122,23 +7206,6 @@ bool btf_is_module(const struct btf *btf) return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0; } -static int btf_id_cmp_func(const void *a, const void *b) -{ - const int *pa = a, *pb = b; - - return *pa - *pb; -} - -bool btf_id_set_contains(const struct btf_id_set *set, u32 id) -{ - return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; -} - -static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) -{ - return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); -} - enum { BTF_MODULE_F_LIVE = (1 << 0), }; @@ -7499,6 +7566,8 @@ static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) { switch (prog_type) { + case BPF_PROG_TYPE_UNSPEC: + return BTF_KFUNC_HOOK_COMMON; case BPF_PROG_TYPE_XDP: return BTF_KFUNC_HOOK_XDP; case BPF_PROG_TYPE_SCHED_CLS: @@ -7527,6 +7596,11 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id) { enum btf_kfunc_hook hook; + u32 *kfunc_flags; + + kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id); + if (kfunc_flags) + return kfunc_flags; hook = bpf_prog_type_to_kfunc_hook(prog_type); return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index fbc6167c3599..06989d278846 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -164,16 +164,30 @@ static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; struct cgroup *cgrp = aux->cgroup.start; + /* bpf_iter_attach_cgroup() has already acquired an extra reference + * for the start cgroup, but the reference may be released after + * cgroup_iter_seq_init(), so acquire another reference for the + * start cgroup. + */ p->start_css = &cgrp->self; + css_get(p->start_css); p->terminate = false; p->visited_all = false; p->order = aux->cgroup.order; return 0; } +static void cgroup_iter_seq_fini(void *priv) +{ + struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv; + + css_put(p->start_css); +} + static const struct bpf_iter_seq_info cgroup_iter_seq_info = { .seq_ops = &cgroup_iter_seq_ops, .init_seq_private = cgroup_iter_seq_init, + .fini_seq_private = cgroup_iter_seq_fini, .seq_priv_size = sizeof(struct cgroup_iter_priv), }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9c16338bcbe8..2e57fc839a5c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -34,6 +34,7 @@ #include <linux/log2.h> #include <linux/bpf_verifier.h> #include <linux/nodemask.h> +#include <linux/bpf_mem_alloc.h> #include <asm/barrier.h> #include <asm/unaligned.h> @@ -60,6 +61,9 @@ #define CTX regs[BPF_REG_CTX] #define IMM insn->imm +struct bpf_mem_alloc bpf_global_ma; +bool bpf_global_ma_set; + /* No hurry in this branch * * Exported for the bpf jit load helper. @@ -2746,6 +2750,18 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) return -ENOTSUPP; } +#ifdef CONFIG_BPF_SYSCALL +static int __init bpf_global_ma_init(void) +{ + int ret; + + ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); + bpf_global_ma_set = !ret; + return ret; +} +late_initcall(bpf_global_ma_init); +#endif + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 6b6a78c04b90..e0b2d016f0bf 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -667,9 +667,9 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) +static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) { - return __bpf_xdp_redirect_map(map, ifindex, flags, 0, + return __bpf_xdp_redirect_map(map, index, flags, 0, __cpu_map_lookup_elem); } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f9a87dcc5535..d01e4c55b376 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -992,14 +992,14 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, map, key, value, map_flags); } -static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) +static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, __dev_map_lookup_elem); } -static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) +static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 50d254cd0709..5aa2b5525f79 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1511,7 +1511,6 @@ static void htab_map_free(struct bpf_map *map) prealloc_destroy(htab); } - bpf_map_free_record(map); free_percpu(htab->extra_elems); bpf_map_area_free(htab->buckets); bpf_mem_alloc_destroy(&htab->pcpu_ma); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 283f55bbeb70..a5a511430f2a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/bpf-cgroup.h> +#include <linux/cgroup.h> #include <linux/rcupdate.h> #include <linux/random.h> #include <linux/smp.h> @@ -19,6 +20,7 @@ #include <linux/proc_ns.h> #include <linux/security.h> #include <linux/btf_ids.h> +#include <linux/bpf_mem_alloc.h> #include "../../lib/kstrtox.h" @@ -336,6 +338,7 @@ const struct bpf_func_proto bpf_spin_lock_proto = { .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, + .arg1_btf_id = BPF_PTR_POISON, }; static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) @@ -358,6 +361,7 @@ const struct bpf_func_proto bpf_spin_unlock_proto = { .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_SPIN_LOCK, + .arg1_btf_id = BPF_PTR_POISON, }; void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, @@ -657,6 +661,7 @@ BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, const struct bpf_func_proto bpf_copy_from_user_proto = { .func = bpf_copy_from_user, .gpl_only = false, + .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, @@ -687,6 +692,7 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, const struct bpf_func_proto bpf_copy_from_user_task_proto = { .func = bpf_copy_from_user_task, .gpl_only = true, + .might_sleep = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, @@ -1706,20 +1712,367 @@ bpf_base_func_proto(enum bpf_func_id func_id) } } -BTF_SET8_START(tracing_btf_ids) +void bpf_list_head_free(const struct btf_field *field, void *list_head, + struct bpf_spin_lock *spin_lock) +{ + struct list_head *head = list_head, *orig_head = list_head; + + BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head)); + BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head)); + + /* Do the actual list draining outside the lock to not hold the lock for + * too long, and also prevent deadlocks if tracing programs end up + * executing on entry/exit of functions called inside the critical + * section, and end up doing map ops that call bpf_list_head_free for + * the same map value again. + */ + __bpf_spin_lock_irqsave(spin_lock); + if (!head->next || list_empty(head)) + goto unlock; + head = head->next; +unlock: + INIT_LIST_HEAD(orig_head); + __bpf_spin_unlock_irqrestore(spin_lock); + + while (head != orig_head) { + void *obj = head; + + obj -= field->list_head.node_offset; + head = head->next; + /* The contained type can also have resources, including a + * bpf_list_head which needs to be freed. + */ + bpf_obj_free_fields(field->list_head.value_rec, obj); + /* bpf_mem_free requires migrate_disable(), since we can be + * called from map free path as well apart from BPF program (as + * part of map ops doing bpf_obj_free_fields). + */ + migrate_disable(); + bpf_mem_free(&bpf_global_ma, obj); + migrate_enable(); + } +} + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) +{ + struct btf_struct_meta *meta = meta__ign; + u64 size = local_type_id__k; + void *p; + + p = bpf_mem_alloc(&bpf_global_ma, size); + if (!p) + return NULL; + if (meta) + bpf_obj_init(meta->field_offs, p); + return p; +} + +void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) +{ + struct btf_struct_meta *meta = meta__ign; + void *p = p__alloc; + + if (meta) + bpf_obj_free_fields(meta->record, p); + bpf_mem_free(&bpf_global_ma, p); +} + +static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) +{ + struct list_head *n = (void *)node, *h = (void *)head; + + if (unlikely(!h->next)) + INIT_LIST_HEAD(h); + if (unlikely(!n->next)) + INIT_LIST_HEAD(n); + tail ? list_add_tail(n, h) : list_add(n, h); +} + +void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) +{ + return __bpf_list_add(node, head, false); +} + +void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) +{ + return __bpf_list_add(node, head, true); +} + +static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) +{ + struct list_head *n, *h = (void *)head; + + if (unlikely(!h->next)) + INIT_LIST_HEAD(h); + if (list_empty(h)) + return NULL; + n = tail ? h->prev : h->next; + list_del_init(n); + return (struct bpf_list_node *)n; +} + +struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) +{ + return __bpf_list_del(head, false); +} + +struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) +{ + return __bpf_list_del(head, true); +} + +/** + * bpf_task_acquire - Acquire a reference to a task. A task acquired by this + * kfunc which is not stored in a map as a kptr, must be released by calling + * bpf_task_release(). + * @p: The task on which a reference is being acquired. + */ +struct task_struct *bpf_task_acquire(struct task_struct *p) +{ + refcount_inc(&p->rcu_users); + return p; +} + +/** + * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task + * kptr acquired by this kfunc which is not subsequently stored in a map, must + * be released by calling bpf_task_release(). + * @pp: A pointer to a task kptr on which a reference is being acquired. + */ +struct task_struct *bpf_task_kptr_get(struct task_struct **pp) +{ + struct task_struct *p; + + rcu_read_lock(); + p = READ_ONCE(*pp); + + /* Another context could remove the task from the map and release it at + * any time, including after we've done the lookup above. This is safe + * because we're in an RCU read region, so the task is guaranteed to + * remain valid until at least the rcu_read_unlock() below. + */ + if (p && !refcount_inc_not_zero(&p->rcu_users)) + /* If the task had been removed from the map and freed as + * described above, refcount_inc_not_zero() will return false. + * The task will be freed at some point after the current RCU + * gp has ended, so just return NULL to the user. + */ + p = NULL; + rcu_read_unlock(); + + return p; +} + +/** + * bpf_task_release - Release the reference acquired on a struct task_struct *. + * If this kfunc is invoked in an RCU read region, the task_struct is + * guaranteed to not be freed until the current grace period has ended, even if + * its refcount drops to 0. + * @p: The task on which a reference is being released. + */ +void bpf_task_release(struct task_struct *p) +{ + if (!p) + return; + + put_task_struct_rcu_user(p); +} + +#ifdef CONFIG_CGROUPS +/** + * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by + * this kfunc which is not stored in a map as a kptr, must be released by + * calling bpf_cgroup_release(). + * @cgrp: The cgroup on which a reference is being acquired. + */ +struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp) +{ + cgroup_get(cgrp); + return cgrp; +} + +/** + * bpf_cgroup_kptr_get - Acquire a reference on a struct cgroup kptr. A cgroup + * kptr acquired by this kfunc which is not subsequently stored in a map, must + * be released by calling bpf_cgroup_release(). + * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired. + */ +struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp) +{ + struct cgroup *cgrp; + + rcu_read_lock(); + /* Another context could remove the cgroup from the map and release it + * at any time, including after we've done the lookup above. This is + * safe because we're in an RCU read region, so the cgroup is + * guaranteed to remain valid until at least the rcu_read_unlock() + * below. + */ + cgrp = READ_ONCE(*cgrpp); + + if (cgrp && !cgroup_tryget(cgrp)) + /* If the cgroup had been removed from the map and freed as + * described above, cgroup_tryget() will return false. The + * cgroup will be freed at some point after the current RCU gp + * has ended, so just return NULL to the user. + */ + cgrp = NULL; + rcu_read_unlock(); + + return cgrp; +} + +/** + * bpf_cgroup_release - Release the reference acquired on a struct cgroup *. + * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to + * not be freed until the current grace period has ended, even if its refcount + * drops to 0. + * @cgrp: The cgroup on which a reference is being released. + */ +void bpf_cgroup_release(struct cgroup *cgrp) +{ + if (!cgrp) + return; + + cgroup_put(cgrp); +} + +/** + * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor + * array. A cgroup returned by this kfunc which is not subsequently stored in a + * map, must be released by calling bpf_cgroup_release(). + * @cgrp: The cgroup for which we're performing a lookup. + * @level: The level of ancestor to look up. + */ +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) +{ + struct cgroup *ancestor; + + if (level > cgrp->level || level < 0) + return NULL; + + ancestor = cgrp->ancestors[level]; + cgroup_get(ancestor); + return ancestor; +} +#endif /* CONFIG_CGROUPS */ + +/** + * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up + * in the root pid namespace idr. If a task is returned, it must either be + * stored in a map, or released with bpf_task_release(). + * @pid: The pid of the task being looked up. + */ +struct task_struct *bpf_task_from_pid(s32 pid) +{ + struct task_struct *p; + + rcu_read_lock(); + p = find_task_by_pid_ns(pid, &init_pid_ns); + if (p) + bpf_task_acquire(p); + rcu_read_unlock(); + + return p; +} + +void *bpf_cast_to_kern_ctx(void *obj) +{ + return obj; +} + +void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) +{ + return obj__ign; +} + +void bpf_rcu_read_lock(void) +{ + rcu_read_lock(); +} + +void bpf_rcu_read_unlock(void) +{ + rcu_read_unlock(); +} + +__diag_pop(); + +BTF_SET8_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif -BTF_SET8_END(tracing_btf_ids) +BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_list_push_front) +BTF_ID_FLAGS(func, bpf_list_push_back) +BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) +#ifdef CONFIG_CGROUPS +BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) +#endif +BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) +BTF_SET8_END(generic_btf_ids) + +static const struct btf_kfunc_id_set generic_kfunc_set = { + .owner = THIS_MODULE, + .set = &generic_btf_ids, +}; + + +BTF_ID_LIST(generic_dtor_ids) +BTF_ID(struct, task_struct) +BTF_ID(func, bpf_task_release) +#ifdef CONFIG_CGROUPS +BTF_ID(struct, cgroup) +BTF_ID(func, bpf_cgroup_release) +#endif -static const struct btf_kfunc_id_set tracing_kfunc_set = { +BTF_SET8_START(common_btf_ids) +BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) +BTF_ID_FLAGS(func, bpf_rdonly_cast) +BTF_ID_FLAGS(func, bpf_rcu_read_lock) +BTF_ID_FLAGS(func, bpf_rcu_read_unlock) +BTF_SET8_END(common_btf_ids) + +static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, - .set = &tracing_btf_ids, + .set = &common_btf_ids, }; static int __init kfunc_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set); + int ret; + const struct btf_id_dtor_kfunc generic_dtors[] = { + { + .btf_id = generic_dtor_ids[0], + .kfunc_btf_id = generic_dtor_ids[1] + }, +#ifdef CONFIG_CGROUPS + { + .btf_id = generic_dtor_ids[2], + .kfunc_btf_id = generic_dtor_ids[3] + }, +#endif + }; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); + ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, + ARRAY_SIZE(generic_dtors), + THIS_MODULE); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); } late_initcall(kfunc_init); diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 8ca0cca39d49..38136ec4e095 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) struct bpf_map *inner_map, *inner_map_meta; u32 inner_map_meta_size; struct fd f; + int ret; f = fdget(inner_map_ufd); inner_map = __bpf_map_get(f); @@ -20,18 +21,13 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) /* Does not support >1 level map-in-map */ if (inner_map->inner_map_meta) { - fdput(f); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto put; } if (!inner_map->ops->map_meta_equal) { - fdput(f); - return ERR_PTR(-ENOTSUPP); - } - - if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) { - fdput(f); - return ERR_PTR(-ENOTSUPP); + ret = -ENOTSUPP; + goto put; } inner_map_meta_size = sizeof(*inner_map_meta); @@ -41,8 +37,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { - fdput(f); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto put; } inner_map_meta->map_type = inner_map->map_type; @@ -50,15 +46,33 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; + inner_map_meta->record = btf_record_dup(inner_map->record); if (IS_ERR(inner_map_meta->record)) { /* btf_record_dup returns NULL or valid pointer in case of * invalid/empty/valid, but ERR_PTR in case of errors. During * equality NULL or IS_ERR is equivalent. */ - fdput(f); - return ERR_CAST(inner_map_meta->record); + ret = PTR_ERR(inner_map_meta->record); + goto free; } + if (inner_map_meta->record) { + struct btf_field_offs *field_offs; + /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always + * valid. + */ + field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN); + if (!field_offs) { + ret = -ENOMEM; + goto free_rec; + } + inner_map_meta->field_offs = field_offs; + } + /* Note: We must use the same BTF, as we also used btf_record_dup above + * which relies on BTF being same for both maps, as some members like + * record->fields.list_head have pointers like value_rec pointing into + * inner_map->btf. + */ if (inner_map->btf) { btf_get(inner_map->btf); inner_map_meta->btf = inner_map->btf; @@ -74,10 +88,18 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) fdput(f); return inner_map_meta; +free_rec: + btf_record_free(inner_map_meta->record); +free: + kfree(inner_map_meta); +put: + fdput(f); + return ERR_PTR(ret); } void bpf_map_meta_free(struct bpf_map *map_meta) { + kfree(map_meta->field_offs); bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 9e832acf4692..80f4b4d88aaf 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -447,7 +447,7 @@ BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags) const struct bpf_func_proto bpf_ringbuf_reserve_proto = { .func = bpf_ringbuf_reserve, - .ret_type = RET_PTR_TO_ALLOC_MEM_OR_NULL, + .ret_type = RET_PTR_TO_RINGBUF_MEM_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, @@ -490,7 +490,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags) const struct bpf_func_proto bpf_ringbuf_submit_proto = { .func = bpf_ringbuf_submit, .ret_type = RET_VOID, - .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, + .arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE, .arg2_type = ARG_ANYTHING, }; @@ -503,7 +503,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags) const struct bpf_func_proto bpf_ringbuf_discard_proto = { .func = bpf_ringbuf_discard, .ret_type = RET_VOID, - .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, + .arg1_type = ARG_PTR_TO_RINGBUF_MEM | OBJ_RELEASE, .arg2_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85532d301124..35972afb6850 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -175,8 +175,8 @@ static void maybe_wait_bpf_programs(struct bpf_map *map) synchronize_rcu(); } -static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, - void *value, __u64 flags) +static int bpf_map_update_value(struct bpf_map *map, struct file *map_file, + void *key, void *value, __u64 flags) { int err; @@ -190,7 +190,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, map->map_type == BPF_MAP_TYPE_SOCKMAP) { return sock_map_update_elem_sys(map, key, value, flags); } else if (IS_FD_PROG_ARRAY(map)) { - return bpf_fd_array_map_update_elem(map, f.file, key, value, + return bpf_fd_array_map_update_elem(map, map_file, key, value, flags); } @@ -205,12 +205,12 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, flags); } else if (IS_FD_ARRAY(map)) { rcu_read_lock(); - err = bpf_fd_array_map_update_elem(map, f.file, key, value, + err = bpf_fd_array_map_update_elem(map, map_file, key, value, flags); rcu_read_unlock(); } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { rcu_read_lock(); - err = bpf_fd_htab_map_update_elem(map, f.file, key, value, + err = bpf_fd_htab_map_update_elem(map, map_file, key, value, flags); rcu_read_unlock(); } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { @@ -536,6 +536,10 @@ void btf_record_free(struct btf_record *rec) module_put(rec->fields[i].kptr.module); btf_put(rec->fields[i].kptr.btf); break; + case BPF_LIST_HEAD: + case BPF_LIST_NODE: + /* Nothing to release for bpf_list_head */ + break; default: WARN_ON_ONCE(1); continue; @@ -578,6 +582,10 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) goto free; } break; + case BPF_LIST_HEAD: + case BPF_LIST_NODE: + /* Nothing to acquire for bpf_list_head */ + break; default: ret = -EFAULT; WARN_ON_ONCE(1); @@ -603,6 +611,20 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r if (rec_a->cnt != rec_b->cnt) return false; size = offsetof(struct btf_record, fields[rec_a->cnt]); + /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused + * members are zeroed out. So memcmp is safe to do without worrying + * about padding/unused fields. + * + * While spin_lock, timer, and kptr have no relation to map BTF, + * list_head metadata is specific to map BTF, the btf and value_rec + * members in particular. btf is the map BTF, while value_rec points to + * btf_record in that map BTF. + * + * So while by default, we don't rely on the map BTF (which the records + * were parsed from) matching for both records, which is not backwards + * compatible, in case list_head is part of it, we implicitly rely on + * that by way of depending on memcmp succeeding for it. + */ return !memcmp(rec_a, rec_b, size); } @@ -637,6 +659,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) case BPF_KPTR_REF: field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0)); break; + case BPF_LIST_HEAD: + if (WARN_ON_ONCE(rec->spin_lock_off < 0)) + continue; + bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off); + break; + case BPF_LIST_NODE: + break; default: WARN_ON_ONCE(1); continue; @@ -648,14 +677,24 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); + struct btf_field_offs *foffs = map->field_offs; + struct btf_record *rec = map->record; security_bpf_map_free(map); - kfree(map->field_offs); bpf_map_release_memcg(map); - /* implementation dependent freeing, map_free callback also does - * bpf_map_free_record, if needed. - */ + /* implementation dependent freeing */ map->ops->map_free(map); + /* Delay freeing of field_offs and btf_record for maps, as map_free + * callback usually needs access to them. It is better to do it here + * than require each callback to do the free itself manually. + * + * Note that the btf_record stashed in map->inner_map_meta->record was + * already freed using the map_free callback for map in map case which + * eventually calls bpf_map_free_meta, since inner_map_meta is only a + * template bpf_map struct used during verification. + */ + kfree(foffs); + btf_record_free(rec); } static void bpf_map_put_uref(struct bpf_map *map) @@ -965,7 +1004,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (!value_type || value_size != map->value_size) return -EINVAL; - map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR, + map->record = btf_parse_fields(btf, value_type, + BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; @@ -998,7 +1038,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) { - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; goto free_map_tab; } break; @@ -1012,6 +1052,14 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, goto free_map_tab; } break; + case BPF_LIST_HEAD: + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) { + ret = -EOPNOTSUPP; + goto free_map_tab; + } + break; default: /* Fail if map_type checks are missing for a field type */ ret = -EOPNOTSUPP; @@ -1020,6 +1068,10 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, } } + ret = btf_check_and_fixup_fields(btf, map->record); + if (ret < 0) + goto free_map_tab; + if (map->ops->map_check_btf) { ret = map->ops->map_check_btf(map, btf, key_type, value_type); if (ret < 0) @@ -1390,7 +1442,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto free_key; } - err = bpf_map_update_value(map, f, key, value, attr->flags); + err = bpf_map_update_value(map, f.file, key, value, attr->flags); kvfree(value); free_key: @@ -1576,16 +1628,14 @@ int generic_map_delete_batch(struct bpf_map *map, return err; } -int generic_map_update_batch(struct bpf_map *map, +int generic_map_update_batch(struct bpf_map *map, struct file *map_file, const union bpf_attr *attr, union bpf_attr __user *uattr) { void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); u32 value_size, cp, max_count; - int ufd = attr->batch.map_fd; void *key, *value; - struct fd f; int err = 0; if (attr->batch.elem_flags & ~BPF_F_LOCK) @@ -1612,7 +1662,6 @@ int generic_map_update_batch(struct bpf_map *map, return -ENOMEM; } - f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ for (cp = 0; cp < max_count; cp++) { err = -EFAULT; if (copy_from_user(key, keys + cp * map->key_size, @@ -1620,7 +1669,7 @@ int generic_map_update_batch(struct bpf_map *map, copy_from_user(value, values + cp * value_size, value_size)) break; - err = bpf_map_update_value(map, f, key, value, + err = bpf_map_update_value(map, map_file, key, value, attr->batch.elem_flags); if (err) @@ -1633,7 +1682,6 @@ int generic_map_update_batch(struct bpf_map *map, kvfree(value); kvfree(key); - fdput(f); return err; } @@ -4426,13 +4474,13 @@ put_file: #define BPF_MAP_BATCH_LAST_FIELD batch.flags -#define BPF_DO_BATCH(fn) \ +#define BPF_DO_BATCH(fn, ...) \ do { \ if (!fn) { \ err = -ENOTSUPP; \ goto err_put; \ } \ - err = fn(map, attr, uattr); \ + err = fn(__VA_ARGS__); \ } while (0) static int bpf_map_do_batch(const union bpf_attr *attr, @@ -4466,13 +4514,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, } if (cmd == BPF_MAP_LOOKUP_BATCH) - BPF_DO_BATCH(map->ops->map_lookup_batch); + BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr); else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) - BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); + BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr); else if (cmd == BPF_MAP_UPDATE_BATCH) - BPF_DO_BATCH(map->ops->map_update_batch); + BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr); else - BPF_DO_BATCH(map->ops->map_delete_batch); + BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); err_put: if (has_write) bpf_map_write_active_dec(map); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 648a549c41ae..4e7f1d085e53 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -451,10 +451,24 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON; } +static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) +{ + struct btf_record *rec = NULL; + struct btf_struct_meta *meta; + + if (reg->type == PTR_TO_MAP_VALUE) { + rec = reg->map_ptr->record; + } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { + meta = btf_find_struct_meta(reg->btf, reg->btf_id); + if (meta) + rec = meta->record; + } + return rec; +} + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { - return reg->type == PTR_TO_MAP_VALUE && - btf_record_has_field(reg->map_ptr->record, BPF_SPIN_LOCK); + return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK); } static bool type_is_rdonly_mem(u32 type) @@ -513,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id) func_id == BPF_FUNC_user_ringbuf_drain; } +static bool is_storage_get_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_sk_storage_get || + func_id == BPF_FUNC_inode_storage_get || + func_id == BPF_FUNC_task_storage_get || + func_id == BPF_FUNC_cgrp_storage_get; +} + static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, const struct bpf_map *map) { @@ -543,7 +565,7 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type) { - char postfix[16] = {0}, prefix[32] = {0}; + char postfix[16] = {0}, prefix[64] = {0}; static const char * const str[] = { [NOT_INIT] = "?", [SCALAR_VALUE] = "scalar", @@ -575,16 +597,15 @@ static const char *reg_type_str(struct bpf_verifier_env *env, strncpy(postfix, "_or_null", 16); } - if (type & MEM_RDONLY) - strncpy(prefix, "rdonly_", 32); - if (type & MEM_ALLOC) - strncpy(prefix, "alloc_", 32); - if (type & MEM_USER) - strncpy(prefix, "user_", 32); - if (type & MEM_PERCPU) - strncpy(prefix, "percpu_", 32); - if (type & PTR_UNTRUSTED) - strncpy(prefix, "untrusted_", 32); + snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s", + type & MEM_RDONLY ? "rdonly_" : "", + type & MEM_RINGBUF ? "ringbuf_" : "", + type & MEM_USER ? "user_" : "", + type & MEM_PERCPU ? "percpu_" : "", + type & MEM_RCU ? "rcu_" : "", + type & PTR_UNTRUSTED ? "untrusted_" : "", + type & PTR_TRUSTED ? "trusted_" : "" + ); snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); @@ -1010,9 +1031,9 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < bytes) { + if (ksize(dst) < ksize(src)) { kfree(dst); - dst = kmalloc_track_caller(bytes, flags); + dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); if (!dst) return NULL; } @@ -1029,12 +1050,14 @@ out: */ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { + size_t alloc_size; void *new_arr; if (!new_n || old_n == new_n) goto out; - new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); + new_arr = krealloc(arr, alloc_size, GFP_KERNEL); if (!new_arr) { kfree(arr); return NULL; @@ -1206,8 +1229,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->frame[i] = NULL; } dst_state->speculative = src->speculative; + dst_state->active_rcu_lock = src->active_rcu_lock; dst_state->curframe = src->curframe; - dst_state->active_spin_lock = src->active_spin_lock; + dst_state->active_lock.ptr = src->active_lock.ptr; + dst_state->active_lock.id = src->active_lock.id; dst_state->branches = src->branches; dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; @@ -2506,9 +2531,11 @@ static int push_jmp_history(struct bpf_verifier_env *env, { u32 cnt = cur->jmp_history_cnt; struct bpf_idx_pair *p; + size_t alloc_size; cnt++; - p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); + alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); + p = krealloc(cur->jmp_history, alloc_size, GFP_USER); if (!p) return -ENOMEM; p[cnt - 1].idx = env->insn_idx; @@ -3844,7 +3871,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno) { const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); - int perm_flags = PTR_MAYBE_NULL; + int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED; const char *reg_name = ""; /* Only unreferenced case accepts untrusted pointers */ @@ -4241,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_trusted_reg(const struct bpf_reg_state *reg) +{ + /* A referenced register is always trusted. */ + if (reg->ref_obj_id) + return true; + + /* If a register is not referenced, it is trusted if it has the + * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the + * other type modifiers may be safe, but we elect to take an opt-in + * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are + * not. + * + * Eventually, we should make PTR_TRUSTED the single source of truth + * for whether a register is trusted. + */ + return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS && + !bpf_type_has_unsafe_modifiers(reg->type); +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -4687,17 +4733,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EACCES; } - if (env->ops->btf_struct_access) { - ret = env->ops->btf_struct_access(&env->log, reg->btf, t, - off, size, atype, &btf_id, &flag); + if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) { + if (!btf_is_kernel(reg->btf)) { + verbose(env, "verifier internal error: reg->btf must be kernel btf\n"); + return -EFAULT; + } + ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); } else { - if (atype != BPF_READ) { + /* Writes are permitted with default btf_struct_access for + * program allocated objects (which always have ref_obj_id > 0), + * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. + */ + if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { verbose(env, "only read is supported\n"); return -EACCES; } - ret = btf_struct_access(&env->log, reg->btf, t, off, size, - atype, &btf_id, &flag); + if (type_is_alloc(reg->type) && !reg->ref_obj_id) { + verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n"); + return -EFAULT; + } + + ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag); } if (ret < 0) @@ -4709,6 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (type_flag(reg->type) & PTR_UNTRUSTED) flag |= PTR_UNTRUSTED; + /* By default any pointer obtained from walking a trusted pointer is + * no longer trusted except the rcu case below. + */ + flag &= ~PTR_TRUSTED; + + if (flag & MEM_RCU) { + /* Mark value register as MEM_RCU only if it is protected by + * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU + * itself can already indicate trustedness inside the rcu + * read lock region. Also mark it as PTR_TRUSTED. + */ + if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg)) + flag &= ~MEM_RCU; + else + flag |= PTR_TRUSTED; + } else if (reg->type & MEM_RCU) { + /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged + * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. + */ + flag |= PTR_UNTRUSTED; + } + if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); @@ -4723,6 +4802,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, { struct bpf_reg_state *reg = regs + regno; struct bpf_map *map = reg->map_ptr; + struct bpf_reg_state map_reg; enum bpf_type_flag flag = 0; const struct btf_type *t; const char *tname; @@ -4761,7 +4841,10 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return -EACCES; } - ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag); + /* Simulate access to a PTR_TO_BTF_ID */ + memset(&map_reg, 0, sizeof(map_reg)); + mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0); + ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag); if (ret < 0) return ret; @@ -5520,8 +5603,8 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return err; } -int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno) +static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno) { struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; bool may_be_null = type_may_be_null(mem_reg->type); @@ -5549,23 +5632,26 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state } /* Implementation details: - * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL + * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL. + * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL. * Two bpf_map_lookups (even with the same key) will have different reg->id. - * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after - * value_or_null->value transition, since the verifier only cares about - * the range of access to valid map value pointer and doesn't care about actual - * address of the map element. + * Two separate bpf_obj_new will also have different reg->id. + * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier + * clears reg->id after value_or_null->value transition, since the verifier only + * cares about the range of access to valid map value pointer and doesn't care + * about actual address of the map element. * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps * reg->id > 0 after value_or_null->value transition. By doing so * two bpf_map_lookups will be considered two different pointers that - * point to different bpf_spin_locks. + * point to different bpf_spin_locks. Likewise for pointers to allocated objects + * returned from bpf_obj_new. * The verifier allows taking only one bpf_spin_lock at a time to avoid * dead-locks. * Since only one bpf_spin_lock is allowed the checks are simpler than * reg_is_refcounted() logic. The verifier needs to remember only * one spin_lock instead of array of acquired_refs. - * cur_state->active_spin_lock remembers which map value element got locked - * and clears it after bpf_spin_unlock. + * cur_state->active_lock remembers which map value element or allocated + * object got locked and clears it after bpf_spin_unlock. */ static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock) @@ -5573,8 +5659,10 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; struct bpf_verifier_state *cur = env->cur_state; bool is_const = tnum_is_const(reg->var_off); - struct bpf_map *map = reg->map_ptr; u64 val = reg->var_off.value; + struct bpf_map *map = NULL; + struct btf *btf = NULL; + struct btf_record *rec; if (!is_const) { verbose(env, @@ -5582,38 +5670,78 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, regno); return -EINVAL; } - if (!map->btf) { - verbose(env, - "map '%s' has to have BTF in order to use bpf_spin_lock\n", - map->name); - return -EINVAL; + if (reg->type == PTR_TO_MAP_VALUE) { + map = reg->map_ptr; + if (!map->btf) { + verbose(env, + "map '%s' has to have BTF in order to use bpf_spin_lock\n", + map->name); + return -EINVAL; + } + } else { + btf = reg->btf; } - if (!btf_record_has_field(map->record, BPF_SPIN_LOCK)) { - verbose(env, "map '%s' has no valid bpf_spin_lock\n", map->name); + + rec = reg_btf_record(reg); + if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) { + verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local", + map ? map->name : "kptr"); return -EINVAL; } - if (map->record->spin_lock_off != val + reg->off) { + if (rec->spin_lock_off != val + reg->off) { verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n", - val + reg->off, map->record->spin_lock_off); + val + reg->off, rec->spin_lock_off); return -EINVAL; } if (is_lock) { - if (cur->active_spin_lock) { + if (cur->active_lock.ptr) { verbose(env, "Locking two bpf_spin_locks are not allowed\n"); return -EINVAL; } - cur->active_spin_lock = reg->id; + if (map) + cur->active_lock.ptr = map; + else + cur->active_lock.ptr = btf; + cur->active_lock.id = reg->id; } else { - if (!cur->active_spin_lock) { + struct bpf_func_state *fstate = cur_func(env); + void *ptr; + int i; + + if (map) + ptr = map; + else + ptr = btf; + + if (!cur->active_lock.ptr) { verbose(env, "bpf_spin_unlock without taking a lock\n"); return -EINVAL; } - if (cur->active_spin_lock != reg->id) { + if (cur->active_lock.ptr != ptr || + cur->active_lock.id != reg->id) { verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } - cur->active_spin_lock = 0; + cur->active_lock.ptr = NULL; + cur->active_lock.id = 0; + + for (i = 0; i < fstate->acquired_refs; i++) { + int err; + + /* Complain on error because this reference state cannot + * be freed before this point, as bpf_spin_lock critical + * section does not allow functions that release the + * allocated object immediately. + */ + if (!fstate->refs[i].release_on_unlock) + continue; + err = release_reference(env, fstate->refs[i].id); + if (err) { + verbose(env, "failed to release release_on_unlock reference"); + return err; + } + } } return 0; } @@ -5772,6 +5900,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = { PTR_TO_TCP_SOCK, PTR_TO_XDP_SOCK, PTR_TO_BTF_ID, + PTR_TO_BTF_ID | PTR_TRUSTED, }, .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], }; @@ -5785,7 +5914,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, - PTR_TO_MEM | MEM_ALLOC, + PTR_TO_MEM | MEM_RINGBUF, PTR_TO_BUF, }, }; @@ -5800,14 +5929,31 @@ static const struct bpf_reg_types int_ptr_types = { }, }; +static const struct bpf_reg_types spin_lock_types = { + .types = { + PTR_TO_MAP_VALUE, + PTR_TO_BTF_ID | MEM_ALLOC, + } +}; + static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; -static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; +static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } }; static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; -static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; -static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } }; +static const struct bpf_reg_types btf_ptr_types = { + .types = { + PTR_TO_BTF_ID, + PTR_TO_BTF_ID | PTR_TRUSTED, + PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED, + }, +}; +static const struct bpf_reg_types percpu_btf_ptr_types = { + .types = { + PTR_TO_BTF_ID | MEM_PERCPU, + PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, + } +}; static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; @@ -5836,7 +5982,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, + [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, @@ -5895,7 +6041,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, return -EACCES; found: - if (reg->type == PTR_TO_BTF_ID) { + if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) { /* For bpf_sk_release, it needs to match against first member * 'struct sock_common', hence make an exception for it. This * allows bpf_sk_release to work for multiple socket types. @@ -5931,6 +6077,11 @@ found: return -EACCES; } } + } else if (type_is_alloc(reg->type)) { + if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) { + verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); + return -EFAULT; + } } return 0; @@ -5957,20 +6108,24 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MAP_VALUE: case PTR_TO_MEM: case PTR_TO_MEM | MEM_RDONLY: - case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_MEM | MEM_RINGBUF: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ - if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM) + if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM) return 0; break; /* All the rest must be rejected, except PTR_TO_BTF_ID which allows * fixed offset. */ case PTR_TO_BTF_ID: + case PTR_TO_BTF_ID | MEM_ALLOC: + case PTR_TO_BTF_ID | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED: + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: /* When referenced PTR_TO_BTF_ID is passed to release function, * it's fixed offset must be 0. In the other cases, fixed offset * can be non-zero. @@ -6046,7 +6201,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, goto skip_type_check; /* arg_btf_id and arg_size are in a union. */ - if (base_type(arg_type) == ARG_PTR_TO_BTF_ID) + if (base_type(arg_type) == ARG_PTR_TO_BTF_ID || + base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) arg_btf_id = fn->arg_btf_id[arg]; err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); @@ -6664,9 +6820,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) int i; for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { - if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) - return false; - + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID) + return !!fn->arg_btf_id[i]; + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK) + return fn->arg_btf_id[i] == BPF_PTR_POISON; if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && /* arg_btf_id and arg_size are in a union. */ (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || @@ -7413,6 +7570,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (!env->prog->aux->sleepable && fn->might_sleep) { + verbose(env, "helper call might sleep in a non-sleepable prog\n"); + return -EINVAL; + } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { @@ -7431,6 +7593,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return err; } + if (env->cur_state->active_rcu_lock) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (env->prog->aux->sleepable && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { @@ -7634,7 +7807,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; break; - case RET_PTR_TO_ALLOC_MEM: + case RET_PTR_TO_MEM: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = meta.mem_size; @@ -7797,19 +7970,921 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, } } +struct bpf_kfunc_call_arg_meta { + /* In parameters */ + struct btf *btf; + u32 func_id; + u32 kfunc_flags; + const struct btf_type *func_proto; + const char *func_name; + /* Out parameters */ + u32 ref_obj_id; + u8 release_regno; + bool r0_rdonly; + u32 ret_btf_id; + u64 r0_size; + struct { + u64 value; + bool found; + } arg_constant; + struct { + struct btf *btf; + u32 btf_id; + } arg_obj_drop; + struct { + struct btf_field *field; + } arg_list_head; +}; + +static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_ACQUIRE; +} + +static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_RET_NULL; +} + +static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_RELEASE; +} + +static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_TRUSTED_ARGS; +} + +static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_SLEEPABLE; +} + +static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_DESTRUCTIVE; +} + +static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg) +{ + return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET); +} + +static bool __kfunc_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix) +{ + int suffix_len = strlen(suffix), len; + const char *param_name; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len < suffix_len) + return false; + param_name += len - suffix_len; + return !strncmp(param_name, suffix, suffix_len); +} + +static bool is_kfunc_arg_mem_size(const struct btf *btf, + const struct btf_param *arg, + const struct bpf_reg_state *reg) +{ + const struct btf_type *t; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) + return false; + + return __kfunc_param_match_suffix(btf, arg, "__sz"); +} + +static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__k"); +} + +static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__ign"); +} + +static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__alloc"); +} + +static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, + const struct btf_param *arg, + const char *name) +{ + int len, target_len = strlen(name); + const char *param_name; + + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len != target_len) + return false; + if (strcmp(param_name, name)) + return false; + + return true; +} + +enum { + KF_ARG_DYNPTR_ID, + KF_ARG_LIST_HEAD_ID, + KF_ARG_LIST_NODE_ID, +}; + +BTF_ID_LIST(kf_arg_btf_ids) +BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_list_head) +BTF_ID(struct, bpf_list_node) + +static bool __is_kfunc_ptr_arg_type(const struct btf *btf, + const struct btf_param *arg, int type) +{ + const struct btf_type *t; + u32 res_id; + + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t) + return false; + if (!btf_type_is_ptr(t)) + return false; + t = btf_type_skip_modifiers(btf, t->type, &res_id); + if (!t) + return false; + return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]); +} + +static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID); +} + +static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID); +} + +static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID); +} + +/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ +static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env, + const struct btf *btf, + const struct btf_type *t, int rec) +{ + const struct btf_type *member_type; + const struct btf_member *member; + u32 i; + + if (!btf_type_is_struct(t)) + return false; + + for_each_member(i, t, member) { + const struct btf_array *array; + + member_type = btf_type_skip_modifiers(btf, member->type, NULL); + if (btf_type_is_struct(member_type)) { + if (rec >= 3) { + verbose(env, "max struct nesting depth exceeded\n"); + return false; + } + if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1)) + return false; + continue; + } + if (btf_type_is_array(member_type)) { + array = btf_array(member_type); + if (!array->nelems) + return false; + member_type = btf_type_skip_modifiers(btf, array->type, NULL); + if (!btf_type_is_scalar(member_type)) + return false; + continue; + } + if (!btf_type_is_scalar(member_type)) + return false; + } + return true; +} + + +static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { +#ifdef CONFIG_NET + [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], + [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], + [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], +#endif +}; + +enum kfunc_ptr_arg_type { + KF_ARG_PTR_TO_CTX, + KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ + KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */ + KF_ARG_PTR_TO_DYNPTR, + KF_ARG_PTR_TO_LIST_HEAD, + KF_ARG_PTR_TO_LIST_NODE, + KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ + KF_ARG_PTR_TO_MEM, + KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ +}; + +enum special_kfunc_type { + KF_bpf_obj_new_impl, + KF_bpf_obj_drop_impl, + KF_bpf_list_push_front, + KF_bpf_list_push_back, + KF_bpf_list_pop_front, + KF_bpf_list_pop_back, + KF_bpf_cast_to_kern_ctx, + KF_bpf_rdonly_cast, + KF_bpf_rcu_read_lock, + KF_bpf_rcu_read_unlock, +}; + +BTF_SET_START(special_kfunc_set) +BTF_ID(func, bpf_obj_new_impl) +BTF_ID(func, bpf_obj_drop_impl) +BTF_ID(func, bpf_list_push_front) +BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_list_pop_front) +BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx) +BTF_ID(func, bpf_rdonly_cast) +BTF_SET_END(special_kfunc_set) + +BTF_ID_LIST(special_kfunc_list) +BTF_ID(func, bpf_obj_new_impl) +BTF_ID(func, bpf_obj_drop_impl) +BTF_ID(func, bpf_list_push_front) +BTF_ID(func, bpf_list_push_back) +BTF_ID(func, bpf_list_pop_front) +BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_cast_to_kern_ctx) +BTF_ID(func, bpf_rdonly_cast) +BTF_ID(func, bpf_rcu_read_lock) +BTF_ID(func, bpf_rcu_read_unlock) + +static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock]; +} + +static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; +} + +static enum kfunc_ptr_arg_type +get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, + const struct btf_type *t, const struct btf_type *ref_t, + const char *ref_tname, const struct btf_param *args, + int argno, int nargs) +{ + u32 regno = argno + 1; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + bool arg_mem_size = false; + + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) + return KF_ARG_PTR_TO_CTX; + + /* In this function, we verify the kfunc's BTF as per the argument type, + * leaving the rest of the verification with respect to the register + * type to our caller. When a set of conditions hold in the BTF type of + * arguments, we resolve it to a known kfunc_ptr_arg_type. + */ + if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) + return KF_ARG_PTR_TO_CTX; + + if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_ALLOC_BTF_ID; + + if (is_kfunc_arg_kptr_get(meta, argno)) { + if (!btf_type_is_ptr(ref_t)) { + verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n"); + return -EINVAL; + } + ref_t = btf_type_by_id(meta->btf, ref_t->type); + ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off); + if (!btf_type_is_struct(ref_t)) { + verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n", + meta->func_name, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return KF_ARG_PTR_TO_KPTR; + } + + if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_DYNPTR; + + if (is_kfunc_arg_list_head(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_LIST_HEAD; + + if (is_kfunc_arg_list_node(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_LIST_NODE; + + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { + if (!btf_type_is_struct(ref_t)) { + verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return KF_ARG_PTR_TO_BTF_ID; + } + + if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])) + arg_mem_size = true; + + /* This is the catch all argument type of register types supported by + * check_helper_mem_access. However, we only allow when argument type is + * pointer to scalar, or struct composed (recursively) of scalars. When + * arg_mem_size is true, the pointer can be void *. + */ + if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) && + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { + verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", + argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); + return -EINVAL; + } + return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM; +} + +static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const struct btf_type *ref_t, + const char *ref_tname, u32 ref_id, + struct bpf_kfunc_call_arg_meta *meta, + int argno) +{ + const struct btf_type *reg_ref_t; + bool strict_type_match = false; + const struct btf *reg_btf; + const char *reg_ref_tname; + u32 reg_ref_id; + + if (base_type(reg->type) == PTR_TO_BTF_ID) { + reg_btf = reg->btf; + reg_ref_id = reg->btf_id; + } else { + reg_btf = btf_vmlinux; + reg_ref_id = *reg2btf_ids[base_type(reg->type)]; + } + + if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id)) + strict_type_match = true; + + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); + reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); + if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, + btf_type_str(reg_ref_t), reg_ref_tname); + return -EINVAL; + } + return 0; +} + +static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const struct btf_type *ref_t, + const char *ref_tname, + struct bpf_kfunc_call_arg_meta *meta, + int argno) +{ + struct btf_field *kptr_field; + + /* check_func_arg_reg_off allows var_off for + * PTR_TO_MAP_VALUE, but we need fixed offset to find + * off_desc. + */ + if (!tnum_is_const(reg->var_off)) { + verbose(env, "arg#0 must have constant offset\n"); + return -EINVAL; + } + + kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR); + if (!kptr_field || kptr_field->type != BPF_KPTR_REF) { + verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n", + reg->off + reg->var_off.value); + return -EINVAL; + } + + if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf, + kptr_field->kptr.btf_id, true)) { + verbose(env, "kernel function %s args#%d expected pointer to %s %s\n", + meta->func_name, argno, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + return 0; +} + +static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id) +{ + struct bpf_func_state *state = cur_func(env); + struct bpf_reg_state *reg; + int i; + + /* bpf_spin_lock only allows calling list_push and list_pop, no BPF + * subprogs, no global functions. This means that the references would + * not be released inside the critical section but they may be added to + * the reference state, and the acquired_refs are never copied out for a + * different frame as BPF to BPF calls don't work in bpf_spin_lock + * critical sections. + */ + if (!ref_obj_id) { + verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n"); + return -EFAULT; + } + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].id == ref_obj_id) { + if (state->refs[i].release_on_unlock) { + verbose(env, "verifier internal error: expected false release_on_unlock"); + return -EFAULT; + } + state->refs[i].release_on_unlock = true; + /* Now mark everyone sharing same ref_obj_id as untrusted */ + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) + reg->type |= PTR_UNTRUSTED; + })); + return 0; + } + } + verbose(env, "verifier internal error: ref state missing for ref_obj_id\n"); + return -EFAULT; +} + +/* Implementation details: + * + * Each register points to some region of memory, which we define as an + * allocation. Each allocation may embed a bpf_spin_lock which protects any + * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same + * allocation. The lock and the data it protects are colocated in the same + * memory region. + * + * Hence, everytime a register holds a pointer value pointing to such + * allocation, the verifier preserves a unique reg->id for it. + * + * The verifier remembers the lock 'ptr' and the lock 'id' whenever + * bpf_spin_lock is called. + * + * To enable this, lock state in the verifier captures two values: + * active_lock.ptr = Register's type specific pointer + * active_lock.id = A unique ID for each register pointer value + * + * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two + * supported register types. + * + * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of + * allocated objects is the reg->btf pointer. + * + * The active_lock.id is non-unique for maps supporting direct_value_addr, as we + * can establish the provenance of the map value statically for each distinct + * lookup into such maps. They always contain a single map value hence unique + * IDs for each pseudo load pessimizes the algorithm and rejects valid programs. + * + * So, in case of global variables, they use array maps with max_entries = 1, + * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point + * into the same map value as max_entries is 1, as described above). + * + * In case of inner map lookups, the inner map pointer has same map_ptr as the + * outer map pointer (in verifier context), but each lookup into an inner map + * assigns a fresh reg->id to the lookup, so while lookups into distinct inner + * maps from the same outer map share the same map_ptr as active_lock.ptr, they + * will get different reg->id assigned to each lookup, hence different + * active_lock.id. + * + * In case of allocated objects, active_lock.ptr is the reg->btf, and the + * reg->id is a unique ID preserved after the NULL pointer check on the pointer + * returned from bpf_obj_new. Each allocation receives a new reg->id. + */ +static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + void *ptr; + u32 id; + + switch ((int)reg->type) { + case PTR_TO_MAP_VALUE: + ptr = reg->map_ptr; + break; + case PTR_TO_BTF_ID | MEM_ALLOC: + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED: + ptr = reg->btf; + break; + default: + verbose(env, "verifier internal error: unknown reg type for lock check\n"); + return -EFAULT; + } + id = reg->id; + + if (!env->cur_state->active_lock.ptr) + return -EINVAL; + if (env->cur_state->active_lock.ptr != ptr || + env->cur_state->active_lock.id != id) { + verbose(env, "held lock and object are not in the same allocation\n"); + return -EINVAL; + } + return 0; +} + +static bool is_bpf_list_api_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_list_push_front] || + btf_id == special_kfunc_list[KF_bpf_list_push_back] || + btf_id == special_kfunc_list[KF_bpf_list_pop_front] || + btf_id == special_kfunc_list[KF_bpf_list_pop_back]; +} + +static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct btf_field *field; + struct btf_record *rec; + u32 list_head_off; + + if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) { + verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n"); + return -EFAULT; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n", + regno); + return -EINVAL; + } + + rec = reg_btf_record(reg); + list_head_off = reg->off + reg->var_off.value; + field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD); + if (!field) { + verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off); + return -EINVAL; + } + + /* All functions require bpf_list_head to be protected using a bpf_spin_lock */ + if (check_reg_allocation_locked(env, reg)) { + verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n", + rec->spin_lock_off); + return -EINVAL; + } + + if (meta->arg_list_head.field) { + verbose(env, "verifier internal error: repeating bpf_list_head arg\n"); + return -EFAULT; + } + meta->arg_list_head.field = field; + return 0; +} + +static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + const struct btf_type *et, *t; + struct btf_field *field; + struct btf_record *rec; + u32 list_node_off; + + if (meta->btf != btf_vmlinux || + (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] && + meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) { + verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n"); + return -EFAULT; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n", + regno); + return -EINVAL; + } + + rec = reg_btf_record(reg); + list_node_off = reg->off + reg->var_off.value; + field = btf_record_find(rec, list_node_off, BPF_LIST_NODE); + if (!field || field->offset != list_node_off) { + verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off); + return -EINVAL; + } + + field = meta->arg_list_head.field; + + et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id); + t = btf_type_by_id(reg->btf, reg->btf_id); + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf, + field->list_head.value_btf_id, true)) { + verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d " + "in struct %s, but arg is at offset=%d in struct %s\n", + field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off), + list_node_off, btf_name_by_offset(reg->btf, t->name_off)); + return -EINVAL; + } + + if (list_node_off != field->list_head.node_offset) { + verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n", + list_node_off, field->list_head.node_offset, + btf_name_by_offset(field->list_head.btf, et->name_off)); + return -EINVAL; + } + /* Set arg#1 for expiration after unlock */ + return ref_set_release_on_unlock(env, reg->ref_obj_id); +} + +static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta) +{ + const char *func_name = meta->func_name, *ref_tname; + const struct btf *btf = meta->btf; + const struct btf_param *args; + u32 i, nargs; + int ret; + + args = (const struct btf_param *)(meta->func_proto + 1); + nargs = btf_type_vlen(meta->func_proto); + if (nargs > MAX_BPF_FUNC_REG_ARGS) { + verbose(env, "Function %s has %d > %d args\n", func_name, nargs, + MAX_BPF_FUNC_REG_ARGS); + return -EINVAL; + } + + /* Check that BTF function arguments match actual types that the + * verifier sees. + */ + for (i = 0; i < nargs; i++) { + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1]; + const struct btf_type *t, *ref_t, *resolve_ret; + enum bpf_arg_type arg_type = ARG_DONTCARE; + u32 regno = i + 1, ref_id, type_size; + bool is_ret_buf_sz = false; + int kf_arg_type; + + t = btf_type_skip_modifiers(btf, args[i].type, NULL); + + if (is_kfunc_arg_ignore(btf, &args[i])) + continue; + + if (btf_type_is_scalar(t)) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "R%d is not a scalar\n", regno); + return -EINVAL; + } + + if (is_kfunc_arg_constant(meta->btf, &args[i])) { + if (meta->arg_constant.found) { + verbose(env, "verifier internal error: only one constant argument permitted\n"); + return -EFAULT; + } + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d must be a known constant\n", regno); + return -EINVAL; + } + ret = mark_chain_precision(env, regno); + if (ret < 0) + return ret; + meta->arg_constant.found = true; + meta->arg_constant.value = reg->var_off.value; + } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) { + meta->r0_rdonly = true; + is_ret_buf_sz = true; + } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) { + is_ret_buf_sz = true; + } + + if (is_ret_buf_sz) { + if (meta->r0_size) { + verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); + return -EINVAL; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a const\n", regno); + return -EINVAL; + } + + meta->r0_size = reg->var_off.value; + ret = mark_chain_precision(env, regno); + if (ret) + return ret; + } + continue; + } + + if (!btf_type_is_ptr(t)) { + verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t)); + return -EINVAL; + } + + if (reg->ref_obj_id) { + if (is_kfunc_release(meta) && meta->ref_obj_id) { + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, + meta->ref_obj_id); + return -EFAULT; + } + meta->ref_obj_id = reg->ref_obj_id; + if (is_kfunc_release(meta)) + meta->release_regno = regno; + } + + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + + kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs); + if (kf_arg_type < 0) + return kf_arg_type; + + switch (kf_arg_type) { + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + case KF_ARG_PTR_TO_BTF_ID: + if (!is_kfunc_trusted_args(meta)) + break; + + if (!is_trusted_reg(reg)) { + verbose(env, "R%d must be referenced or trusted\n", regno); + return -EINVAL; + } + fallthrough; + case KF_ARG_PTR_TO_CTX: + /* Trusted arguments have the same offset checks as release arguments */ + arg_type |= OBJ_RELEASE; + break; + case KF_ARG_PTR_TO_KPTR: + case KF_ARG_PTR_TO_DYNPTR: + case KF_ARG_PTR_TO_LIST_HEAD: + case KF_ARG_PTR_TO_LIST_NODE: + case KF_ARG_PTR_TO_MEM: + case KF_ARG_PTR_TO_MEM_SIZE: + /* Trusted by default */ + break; + default: + WARN_ON_ONCE(1); + return -EFAULT; + } + + if (is_kfunc_release(meta) && reg->ref_obj_id) + arg_type |= OBJ_RELEASE; + ret = check_func_arg_reg_off(env, reg, regno, arg_type); + if (ret < 0) + return ret; + + switch (kf_arg_type) { + case KF_ARG_PTR_TO_CTX: + if (reg->type != PTR_TO_CTX) { + verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); + return -EINVAL; + } + + if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog)); + if (ret < 0) + return -EINVAL; + meta->ret_btf_id = ret; + } + break; + case KF_ARG_PTR_TO_ALLOC_BTF_ID: + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to allocated object\n", i); + return -EINVAL; + } + if (!reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + if (meta->btf == btf_vmlinux && + meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + meta->arg_obj_drop.btf = reg->btf; + meta->arg_obj_drop.btf_id = reg->btf_id; + } + break; + case KF_ARG_PTR_TO_KPTR: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#0 expected pointer to map value\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_DYNPTR: + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d expected pointer to stack\n", i); + return -EINVAL; + } + + if (!is_dynptr_reg_valid_init(env, reg)) { + verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n", + i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + + if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) { + verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n", + i, btf_type_str(ref_t), ref_tname); + return -EINVAL; + } + break; + case KF_ARG_PTR_TO_LIST_HEAD: + if (reg->type != PTR_TO_MAP_VALUE && + reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); + return -EINVAL; + } + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_LIST_NODE: + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + verbose(env, "arg#%d expected pointer to allocated object\n", i); + return -EINVAL; + } + if (!reg->ref_obj_id) { + verbose(env, "allocated object must be referenced\n"); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_BTF_ID: + /* Only base_type is checked, further checks are done here */ + if ((base_type(reg->type) != PTR_TO_BTF_ID || + bpf_type_has_unsafe_modifiers(reg->type)) && + !reg2btf_ids[base_type(reg->type)]) { + verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type)); + verbose(env, "expected %s or socket\n", + reg_type_str(env, base_type(reg->type) | + (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); + return -EINVAL; + } + ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_MEM: + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); + if (IS_ERR(resolve_ret)) { + verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", + i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret)); + return -EINVAL; + } + ret = check_mem_reg(env, reg, regno, type_size); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_MEM_SIZE: + ret = check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1); + if (ret < 0) { + verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); + return ret; + } + /* Skip next '__sz' argument */ + i++; + break; + } + } + + if (is_kfunc_release(meta) && !meta->release_regno) { + verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", + func_name); + return -EINVAL; + } + + return 0; +} + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { const struct btf_type *t, *func, *func_proto, *ptr_type; struct bpf_reg_state *regs = cur_regs(env); - struct bpf_kfunc_arg_meta meta = { 0 }; const char *func_name, *ptr_type_name; + bool sleepable, rcu_lock, rcu_unlock; + struct bpf_kfunc_call_arg_meta meta; u32 i, nargs, func_id, ptr_type_id; int err, insn_idx = *insn_idx_p; const struct btf_param *args; + const struct btf_type *ret_t; struct btf *desc_btf; u32 *kfunc_flags; - bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ if (!insn->imm) @@ -7830,24 +8905,68 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name); return -EACCES; } - if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) { - verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n"); + + /* Prepare kfunc call metadata */ + memset(&meta, 0, sizeof(meta)); + meta.btf = desc_btf; + meta.func_id = func_id; + meta.kfunc_flags = *kfunc_flags; + meta.func_proto = func_proto; + meta.func_name = func_name; + + if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { + verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); + return -EACCES; + } + + sleepable = is_kfunc_sleepable(&meta); + if (sleepable && !env->prog->aux->sleepable) { + verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); + return -EACCES; + } + + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); + rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); + if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) { + verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name); return -EACCES; } - acq = *kfunc_flags & KF_ACQUIRE; + if (env->cur_state->active_rcu_lock) { + struct bpf_func_state *state; + struct bpf_reg_state *reg; - meta.flags = *kfunc_flags; + if (rcu_lock) { + verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); + return -EINVAL; + } else if (rcu_unlock) { + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->type & MEM_RCU) { + reg->type &= ~(MEM_RCU | PTR_TRUSTED); + reg->type |= PTR_UNTRUSTED; + } + })); + env->cur_state->active_rcu_lock = false; + } else if (sleepable) { + verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name); + return -EACCES; + } + } else if (rcu_lock) { + env->cur_state->active_rcu_lock = true; + } else if (rcu_unlock) { + verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); + return -EINVAL; + } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, &meta); + err = check_kfunc_args(env, &meta); if (err < 0) return err; /* In case of release function, we get register number of refcounted - * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now + * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ - if (err) { - err = release_reference(env, regs[err].ref_obj_id); + if (meta.release_regno) { + err = release_reference(env, regs[meta.release_regno].ref_obj_id); if (err) { verbose(env, "kfunc %s#%d reference has not been acquired before\n", func_name, func_id); @@ -7861,18 +8980,92 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Check return type */ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); - if (acq && !btf_type_is_struct_ptr(desc_btf, t)) { - verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); - return -EINVAL; + if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { + /* Only exception is bpf_obj_new_impl */ + if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) { + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); + return -EINVAL; + } } if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(desc_btf, t->type, - &ptr_type_id); - if (!btf_type_is_struct(ptr_type)) { + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); + + if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { + if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + struct btf *ret_btf; + u32 ret_btf_id; + + if (unlikely(!bpf_global_ma_set)) + return -ENOMEM; + + if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { + verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); + return -EINVAL; + } + + ret_btf = env->prog->aux->btf; + ret_btf_id = meta.arg_constant.value; + + /* This may be NULL due to user not supplying a BTF */ + if (!ret_btf) { + verbose(env, "bpf_obj_new requires prog BTF\n"); + return -EINVAL; + } + + ret_t = btf_type_by_id(ret_btf, ret_btf_id); + if (!ret_t || !__btf_type_is_struct(ret_t)) { + verbose(env, "bpf_obj_new type ID argument must be of a struct\n"); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = ret_btf; + regs[BPF_REG_0].btf_id = ret_btf_id; + + env->insn_aux_data[insn_idx].obj_new_size = ret_t->size; + env->insn_aux_data[insn_idx].kptr_struct_meta = + btf_find_struct_meta(ret_btf, ret_btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + env->insn_aux_data[insn_idx].kptr_struct_meta = + btf_find_struct_meta(meta.arg_obj_drop.btf, + meta.arg_obj_drop.btf_id); + } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || + meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { + struct btf_field *field = meta.arg_list_head.field; + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; + regs[BPF_REG_0].btf = field->list_head.btf; + regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; + regs[BPF_REG_0].off = field->list_head.node_offset; + } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value); + if (!ret_t || !btf_type_is_struct(ret_t)) { + verbose(env, + "kfunc bpf_rdonly_cast type ID argument must be of a struct\n"); + return -EINVAL; + } + + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + regs[BPF_REG_0].btf = desc_btf; + regs[BPF_REG_0].btf_id = meta.arg_constant.value; + } else { + verbose(env, "kernel function %s unhandled dynamic return type\n", + meta.func_name); + return -EFAULT; + } + } else if (!__btf_type_is_struct(ptr_type)) { if (!meta.r0_size) { ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); @@ -7900,20 +9093,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; } - if (*kfunc_flags & KF_RET_NULL) { + + if (is_kfunc_ret_null(&meta)) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ regs[BPF_REG_0].id = ++env->id_gen; } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); - if (acq) { + if (is_kfunc_acquire(&meta)) { int id = acquire_reference_state(env, insn_idx); if (id < 0) return id; - regs[BPF_REG_0].id = id; + if (is_kfunc_ret_null(&meta)) + regs[BPF_REG_0].id = id; regs[BPF_REG_0].ref_obj_id = id; } + if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) + regs[BPF_REG_0].id = ++env->id_gen; } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ nargs = btf_type_vlen(func_proto); @@ -10086,16 +11283,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, { if (type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { - if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || - !tnum_equals_const(reg->var_off, 0) || - reg->off)) { - /* Old offset (both fixed and variable parts) should - * have been known-zero, because we don't allow pointer - * arithmetic on pointers that might be NULL. If we - * see this happening, don't convert the register. - */ + /* Old offset (both fixed and variable parts) should have been + * known-zero, because we don't allow pointer arithmetic on + * pointers that might be NULL. If we see this happening, don't + * convert the register. + * + * But in some cases, some helpers that return local kptrs + * advance offset for the returned pointer. In those cases, it + * is fine to expect to see reg->off. + */ + if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0))) + return; + if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off)) return; - } if (is_null) { reg->type = SCALAR_VALUE; /* We don't need id and ref_obj_id from this point @@ -10269,6 +11469,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_verifier_state *other_branch; struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; + struct bpf_reg_state *eq_branch_regs; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -10378,8 +11579,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, /* detect if we are comparing against a constant value so we can adjust * our min/max values for our dst register. * this is only legit if both are scalars (or pointers to the same - * object, I suppose, but we don't support that right now), because - * otherwise the different base pointers mean the offsets aren't + * object, I suppose, see the PTR_MAYBE_NULL related if block below), + * because otherwise the different base pointers mean the offsets aren't * comparable. */ if (BPF_SRC(insn->code) == BPF_X) { @@ -10428,6 +11629,36 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); } + /* if one pointer register is compared to another pointer + * register check if PTR_MAYBE_NULL could be lifted. + * E.g. register A - maybe null + * register B - not null + * for JNE A, B, ... - A is not null in the false branch; + * for JEQ A, B, ... - A is not null in the true branch. + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && + __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && + type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { + eq_branch_regs = NULL; + switch (opcode) { + case BPF_JEQ: + eq_branch_regs = other_branch_regs; + break; + case BPF_JNE: + eq_branch_regs = regs; + break; + default: + /* do nothing */ + break; + } + if (eq_branch_regs) { + if (type_may_be_null(src_reg->type)) + mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]); + else + mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]); + } + } + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). * NOTE: these optimizations below are related with pointer comparison * which will never be JMP32. @@ -10534,8 +11765,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; - if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) - dst_reg->id = ++env->id_gen; + WARN_ON_ONCE(map->max_entries != 1); + /* We want reg->id to be same (0) as map_value is not distinct */ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || insn->src_reg == BPF_PSEUDO_MAP_IDX) { dst_reg->type = CONST_PTR_TO_MAP; @@ -10613,11 +11844,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } - if (env->cur_state->active_spin_lock) { + if (env->cur_state->active_lock.ptr) { verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); return -EINVAL; } + if (env->cur_state->active_rcu_lock) { + verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n"); + return -EINVAL; + } + if (regs[ctx_reg].type != PTR_TO_CTX) { verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); @@ -11879,7 +13115,11 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; - if (old->active_spin_lock != cur->active_spin_lock) + if (old->active_lock.ptr != cur->active_lock.ptr || + old->active_lock.id != cur->active_lock.id) + return false; + + if (old->active_rcu_lock != cur->active_rcu_lock) return false; /* for states to be equal callsites have to be the same @@ -12524,11 +13764,14 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_spin_lock && - (insn->src_reg == BPF_PSEUDO_CALL || - insn->imm != BPF_FUNC_spin_unlock)) { - verbose(env, "function calls are not allowed while holding a lock\n"); - return -EINVAL; + if (env->cur_state->active_lock.ptr) { + if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || + (insn->src_reg == BPF_PSEUDO_CALL) || + (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && + (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) { + verbose(env, "function calls are not allowed while holding a lock\n"); + return -EINVAL; + } } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); @@ -12561,11 +13804,16 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_spin_lock) { + if (env->cur_state->active_lock.ptr) { verbose(env, "bpf_spin_unlock is missing\n"); return -EINVAL; } + if (env->cur_state->active_rcu_lock) { + verbose(env, "bpf_rcu_read_unlock is missing\n"); + return -EINVAL; + } + /* We must do check_reference_leak here before * prepare_func_exit to handle the case when * state->curframe > 0, it may be a callback @@ -12818,6 +14066,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(prog); + if (btf_record_has_field(map->record, BPF_LIST_HEAD)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_list_head yet\n"); + return -EINVAL; + } + } + if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) { if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); @@ -13654,6 +14909,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) break; case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_UNTRUSTED: + /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike + * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot + * be said once it is marked PTR_UNTRUSTED, hence we must handle + * any faults for loads into such types. BPF_WRITE is disallowed + * for this case. + */ + case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: if (type == BPF_READ) { insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); @@ -14019,8 +15281,8 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } -static int fixup_kfunc_call(struct bpf_verifier_env *env, - struct bpf_insn *insn) +static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; @@ -14039,8 +15301,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, return -EFAULT; } + *cnt = 0; insn->imm = desc->imm; - + if (insn->off) + return 0; + if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; + u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size; + + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size); + insn_buf[1] = addr[0]; + insn_buf[2] = addr[1]; + insn_buf[3] = *insn; + *cnt = 4; + } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; + struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; + + insn_buf[0] = addr[0]; + insn_buf[1] = addr[1]; + insn_buf[2] = *insn; + *cnt = 3; + } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || + desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { + insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); + *cnt = 1; + } return 0; } @@ -14182,9 +15469,19 @@ static int do_misc_fixups(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) continue; if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { - ret = fixup_kfunc_call(env, insn); + ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); if (ret) return ret; + if (cnt == 0) + continue; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; } @@ -14302,14 +15599,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_call_imm; } - if (insn->imm == BPF_FUNC_task_storage_get || - insn->imm == BPF_FUNC_sk_storage_get || - insn->imm == BPF_FUNC_inode_storage_get || - insn->imm == BPF_FUNC_cgrp_storage_get) { - if (env->prog->aux->sleepable) - insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); - else + if (is_storage_get_function(insn->imm)) { + if (!env->prog->aux->sleepable || + env->insn_aux_data[i + delta].storage_get_func_atomic) insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); + else + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); insn_buf[1] = *insn; cnt = 2; @@ -14379,7 +15674,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, - (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, (int (*)(struct bpf_map *map, bpf_callback_t callback_fn, @@ -15388,6 +16683,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); + env->rcu_tag_supported = btf_vmlinux && + btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0; if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; diff --git a/kernel/events/core.c b/kernel/events/core.c index 4ec3717003d5..7091bbf88ee7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9030,7 +9030,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } @@ -9273,6 +9273,19 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -9307,15 +9320,38 @@ static int __perf_event_overflow(struct perf_event *event, if (event->attr.sigtrap) { /* - * Should not be able to return to user space without processing - * pending_sigtrap (kernel events can overflow multiple times). + * The desired behaviour of sigtrap vs invalid samples is a bit + * tricky; on the one hand, one should not loose the SIGTRAP if + * it is the first event, on the other hand, we should also not + * trigger the WARN or override the data address. */ - WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel); + bool valid_sample = sample_is_allowed(event, regs); + unsigned int pending_id = 1; + + if (regs) + pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; if (!event->pending_sigtrap) { - event->pending_sigtrap = 1; + event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); + } else if (event->attr.exclude_kernel && valid_sample) { + /* + * Should not be able to return to user space without + * consuming pending_sigtrap; with exceptions: + * + * 1. Where !exclude_kernel, events can overflow again + * in the kernel without returning to user space. + * + * 2. Events that can overflow again before the IRQ- + * work without user space progress (e.g. hrtimer). + * To approximate progress (with false negatives), + * check 32-bit hash of the current IP. + */ + WARN_ON_ONCE(event->pending_sigtrap != pending_id); } - event->pending_addr = data->addr; + + event->pending_addr = 0; + if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) + event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index cbb0bed958ab..7670a811a565 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) for (i = 0; i < sfn_ptr->num_counters; i++) dfn_ptr->counters[i] += sfn_ptr->counters[i]; + + sfn_ptr = list_next_entry(sfn_ptr, head); } } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 714ac4c3b556..d9c822bbffb8 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -113,11 +113,40 @@ int static_key_count(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_count); -void static_key_slow_inc_cpuslocked(struct static_key *key) +/* + * static_key_fast_inc_not_disabled - adds a user for a static key + * @key: static key that must be already enabled + * + * The caller must make sure that the static key can't get disabled while + * in this function. It doesn't patch jump labels, only adds a user to + * an already enabled static key. + * + * Returns true if the increment was done. Unlike refcount_t the ref counter + * is not saturated, but will fail to increment on overflow. + */ +bool static_key_fast_inc_not_disabled(struct static_key *key) { - int v, v1; + int v; STATIC_KEY_CHECK_USE(key); + /* + * Negative key->enabled has a special meaning: it sends + * static_key_slow_inc() down the slow path, and it is non-zero + * so it counts as "enabled" in jump_label_update(). Note that + * atomic_inc_unless_negative() checks >= 0, so roll our own. + */ + v = atomic_read(&key->enabled); + do { + if (v <= 0 || (v + 1) < 0) + return false; + } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); + + return true; +} +EXPORT_SYMBOL_GPL(static_key_fast_inc_not_disabled); + +bool static_key_slow_inc_cpuslocked(struct static_key *key) +{ lockdep_assert_cpus_held(); /* @@ -126,17 +155,9 @@ void static_key_slow_inc_cpuslocked(struct static_key *key) * jump_label_update() process. At the same time, however, * the jump_label_update() call below wants to see * static_key_enabled(&key) for jumps to be updated properly. - * - * So give a special meaning to negative key->enabled: it sends - * static_key_slow_inc() down the slow path, and it is non-zero - * so it counts as "enabled" in jump_label_update(). Note that - * atomic_inc_unless_negative() checks >= 0, so roll our own. */ - for (v = atomic_read(&key->enabled); v > 0; v = v1) { - v1 = atomic_cmpxchg(&key->enabled, v, v + 1); - if (likely(v1 == v)) - return; - } + if (static_key_fast_inc_not_disabled(key)) + return true; jump_label_lock(); if (atomic_read(&key->enabled) == 0) { @@ -148,16 +169,23 @@ void static_key_slow_inc_cpuslocked(struct static_key *key) */ atomic_set_release(&key->enabled, 1); } else { - atomic_inc(&key->enabled); + if (WARN_ON_ONCE(!static_key_fast_inc_not_disabled(key))) { + jump_label_unlock(); + return false; + } } jump_label_unlock(); + return true; } -void static_key_slow_inc(struct static_key *key) +bool static_key_slow_inc(struct static_key *key) { + bool ret; + cpus_read_lock(); - static_key_slow_inc_cpuslocked(key); + ret = static_key_slow_inc_cpuslocked(key); cpus_read_unlock(); + return ret; } EXPORT_SYMBOL_GPL(static_key_slow_inc); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index cd9f5a66a690..3050631e528d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1766,7 +1766,13 @@ static int __unregister_kprobe_top(struct kprobe *p) if ((list_p != p) && (list_p->post_handler)) goto noclean; } - ap->post_handler = NULL; + /* + * For the kprobe-on-ftrace case, we keep the + * post_handler setting to identify this aggrprobe + * armed with kprobe_ipmodify_ops. + */ + if (!kprobe_ftrace(ap)) + ap->post_handler = NULL; } noclean: /* diff --git a/kernel/rseq.c b/kernel/rseq.c index bda8175f8f99..d38ab944105d 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -171,12 +171,27 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) return 0; } +static bool rseq_warn_flags(const char *str, u32 flags) +{ + u32 test_flags; + + if (!flags) + return false; + test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str); + test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str); + return true; +} + static int rseq_need_restart(struct task_struct *t, u32 cs_flags) { u32 flags, event_mask; int ret; - if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags) + if (rseq_warn_flags("rseq_cs", cs_flags)) return -EINVAL; /* Get thread flags. */ @@ -184,7 +199,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) if (ret) return ret; - if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags) + if (rseq_warn_flags("rseq", flags)) return -EINVAL; /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cb2aa2b54c7a..daff72f00385 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4200,6 +4200,40 @@ out: return success; } +static bool __task_needs_rq_lock(struct task_struct *p) +{ + unsigned int state = READ_ONCE(p->__state); + + /* + * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when + * the task is blocked. Make sure to check @state since ttwu() can drop + * locks at the end, see ttwu_queue_wakelist(). + */ + if (state == TASK_RUNNING || state == TASK_WAKING) + return true; + + /* + * Ensure we load p->on_rq after p->__state, otherwise it would be + * possible to, falsely, observe p->on_rq == 0. + * + * See try_to_wake_up() for a longer comment. + */ + smp_rmb(); + if (p->on_rq) + return true; + +#ifdef CONFIG_SMP + /* + * Ensure the task has finished __schedule() and will not be referenced + * anymore. Again, see try_to_wake_up() for a longer comment. + */ + smp_rmb(); + smp_cond_load_acquire(&p->on_cpu, !VAL); +#endif + + return false; +} + /** * task_call_func - Invoke a function on task in fixed state * @p: Process for which the function is to be invoked, can be @current. @@ -4217,28 +4251,12 @@ out: int task_call_func(struct task_struct *p, task_call_f func, void *arg) { struct rq *rq = NULL; - unsigned int state; struct rq_flags rf; int ret; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - state = READ_ONCE(p->__state); - - /* - * Ensure we load p->on_rq after p->__state, otherwise it would be - * possible to, falsely, observe p->on_rq == 0. - * - * See try_to_wake_up() for a longer comment. - */ - smp_rmb(); - - /* - * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when - * the task is blocked. Make sure to check @state since ttwu() can drop - * locks at the end, see ttwu_queue_wakelist(). - */ - if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) + if (__task_needs_rq_lock(p)) rq = __task_rq_lock(p, &rf); /* diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9161d1136d01..1207c78f85c1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,9 +25,6 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; - /* max CPU capacity, which is equal for all CPUs in freq. domain */ - unsigned long max; - /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -51,6 +48,7 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; + unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = sg_cpu->iowait_boost * sg_policy->max; - boost >>= SCHED_CAPACITY_SHIFT; + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), - sg_policy->max); + map_util_perf(sg_cpu->util), sg_cpu->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0; + unsigned long util = 0, max = 1; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); + unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; - util = max(j_sg_cpu->util, util); + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } } - return get_next_freq(sg_policy, util, sg_policy->max); + return get_next_freq(sg_policy, util, max); } static void @@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu = cpumask_first(policy->cpus); + unsigned int cpu; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; - sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f2d8d070d024..3bbd3f0c810c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -774,7 +774,7 @@ BPF_CALL_0(bpf_get_current_task_btf) const struct bpf_func_proto bpf_get_current_task_btf_proto = { .func = bpf_get_current_task_btf, .gpl_only = true, - .ret_type = RET_PTR_TO_BTF_ID, + .ret_type = RET_PTR_TO_BTF_ID_TRUSTED, .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; @@ -1485,9 +1485,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_task_stack: return &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: - return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; + return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: - return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL; + return &bpf_copy_from_user_task_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_per_cpu_ptr: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 705b990d264d..f2260bc65226 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1289,6 +1289,7 @@ static int ftrace_add_mod(struct trace_array *tr, if (!ftrace_mod) return -ENOMEM; + INIT_LIST_HEAD(&ftrace_mod->list); ftrace_mod->func = kstrdup(func, GFP_KERNEL); ftrace_mod->module = kstrdup(module, GFP_KERNEL); ftrace_mod->enable = enable; @@ -3190,7 +3191,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) /* if we can't allocate this size, try something smaller */ if (!order) return -ENOMEM; - order >>= 1; + order--; goto again; } @@ -7391,7 +7392,7 @@ void __init ftrace_init(void) } pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, count / ENTRIES_PER_PAGE + 1); + count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); ret = ftrace_process_locs(NULL, __start_mcount_loc, diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index d81f7c51025c..c736487fc0e4 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -73,6 +73,10 @@ static struct trace_event_file *gen_kretprobe_test; #define KPROBE_GEN_TEST_ARG3 NULL #endif +static bool trace_event_file_is_valid(struct trace_event_file *input) +{ + return input && !IS_ERR(input); +} /* * Test to make sure we can create a kprobe event, then add more @@ -139,6 +143,8 @@ static int __init test_gen_kprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kprobe_test)) + gen_kprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); goto out; @@ -202,6 +208,8 @@ static int __init test_gen_kretprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kretprobe_test)) + gen_kretprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); goto out; @@ -217,10 +225,12 @@ static int __init kprobe_event_gen_test_init(void) ret = test_gen_kretprobe_cmd(); if (ret) { - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); - trace_put_event_file(gen_kretprobe_test); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + } WARN_ON(kprobe_event_delete("gen_kretprobe_test")); } @@ -229,24 +239,30 @@ static int __init kprobe_event_gen_test_init(void) static void __exit kprobe_event_gen_test_exit(void) { - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, - "kprobes", - "gen_kprobe_test", false)); + if (trace_event_file_is_valid(gen_kprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kprobe_test")); - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kretprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kretprobe_test")); diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index c69d82273ce7..32c3dfdb4d6a 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -83,8 +83,10 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler) { struct rethook *rh = kzalloc(sizeof(struct rethook), GFP_KERNEL); - if (!rh || !handler) + if (!rh || !handler) { + kfree(rh); return NULL; + } rh->data = data; rh->handler = handler; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9712083832f4..b21bf14bae9b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -519,6 +519,7 @@ struct ring_buffer_per_cpu { local_t committing; local_t commits; local_t pages_touched; + local_t pages_lost; local_t pages_read; long last_pages_touch; size_t shortest_full; @@ -894,10 +895,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; + size_t lost; size_t cnt; read = local_read(&buffer->buffers[cpu]->pages_read); + lost = local_read(&buffer->buffers[cpu]->pages_lost); cnt = local_read(&buffer->buffers[cpu]->pages_touched); + + if (WARN_ON_ONCE(cnt < lost)) + return 0; + + cnt -= lost; + /* The reader can read an empty page, but not more than that */ if (cnt < read) { WARN_ON_ONCE(read > cnt + 1); @@ -907,6 +916,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) return cnt - read; } +static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + size_t nr_pages; + size_t dirty; + + nr_pages = cpu_buffer->nr_pages; + if (!nr_pages || !full) + return true; + + dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + + return (dirty * 100) > (full * nr_pages); +} + /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * @@ -1046,22 +1070,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) !ring_buffer_empty_cpu(buffer, cpu)) { unsigned long flags; bool pagebusy; - size_t nr_pages; - size_t dirty; + bool done; if (!full) break; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + done = !pagebusy && full_hit(buffer, cpu, full); + if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (!pagebusy && - (!nr_pages || (dirty * 100) > full * nr_pages)) + if (done) break; } @@ -1087,6 +1109,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor + * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise @@ -1096,14 +1119,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table) + struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if (cpu == RING_BUFFER_ALL_CPUS) + if (cpu == RING_BUFFER_ALL_CPUS) { work = &buffer->irq_work; - else { + full = 0; + } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -EINVAL; @@ -1111,8 +1135,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + if (full) { + poll_wait(filp, &work->full_waiters, poll_table); + work->full_waiters_pending = true; + } else { + poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + } + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1128,6 +1158,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); + if (full) + return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; @@ -1769,9 +1802,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(cpu_buffer->reader_page); - rb_head_page_deactivate(cpu_buffer); - if (head) { + rb_head_page_deactivate(cpu_buffer); + list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); @@ -2007,6 +2040,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) */ local_add(page_entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); } /* @@ -2491,6 +2525,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, */ local_add(entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); /* * The entries will be zeroed out when we move the @@ -3155,10 +3190,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { - size_t nr_pages; - size_t dirty; - size_t full; - if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ @@ -3182,10 +3213,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); - full = cpu_buffer->shortest_full; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); - if (full && nr_pages && (dirty * 100) <= full * nr_pages) + if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) return; cpu_buffer->irq_work.wakeup_full = true; @@ -5248,6 +5276,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); local_set(&cpu_buffer->pages_touched, 0); + local_set(&cpu_buffer->pages_lost, 0); local_set(&cpu_buffer->pages_read, 0); cpu_buffer->last_pages_touch = 0; cpu_buffer->shortest_full = 0; diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 0b15e975d2c2..8d77526892f4 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void) /* Now generate a gen_synth_test event */ ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("gen_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } /* @@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void) /* Now trace an empty_synth_test event */ ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("empty_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } static struct synth_field_desc create_synth_test_fields[] = { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 47a44b055a1d..5cfc95a52bc3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf) } /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void) } } +void tracing_reset_all_online_cpus(void) +{ + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); +} + /* * The tgid_map array maps from pid to tgid; i.e. the value stored at index i * is the tgid last observed corresponding to pid=i. @@ -6657,6 +6666,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + kfree(iter->fmt); mutex_destroy(&iter->mutex); kfree(iter); @@ -6681,7 +6691,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl return EPOLLIN | EPOLLRDNORM; else return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, - filp, poll_table); + filp, poll_table, iter->tr->buffer_percent); } static __poll_t @@ -7802,6 +7812,7 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, int len) { struct tracing_log_err *err; + char *cmd; if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { err = alloc_tracing_log_err(len); @@ -7810,12 +7821,12 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, return err; } - + cmd = kzalloc(len, GFP_KERNEL); + if (!cmd) + return ERR_PTR(-ENOMEM); err = list_first_entry(&tr->err_log, struct tracing_log_err, list); kfree(err->cmd); - err->cmd = kzalloc(len, GFP_KERNEL); - if (!err->cmd) - return ERR_PTR(-ENOMEM); + err->cmd = cmd; list_del(&err->list); return err; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54ee5711c729..d42e24507152 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -580,6 +580,7 @@ int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); +void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 154996684fb5..4376887e0d8a 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); out: argv_free(argv); @@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 5dd0617e5df6..352b65e2b910 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -52,6 +52,7 @@ static void trace_event_probe_cleanup(struct trace_eprobe *ep) kfree(ep->event_system); if (ep->event) trace_event_put_ref(ep->event); + kfree(ep->filter_str); kfree(ep); } @@ -563,6 +564,9 @@ static void eprobe_trigger_func(struct event_trigger_data *data, { struct eprobe_data *edata = data->private_data; + if (unlikely(!rec)) + return; + __eprobe_trace_func(edata, rec); } @@ -642,7 +646,7 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) INIT_LIST_HEAD(&trigger->list); if (ep->filter_str) { - ret = create_event_filter(file->tr, file->event_call, + ret = create_event_filter(file->tr, ep->event, ep->filter_str, false, &filter); if (ret) goto error; @@ -900,7 +904,7 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[]) { - struct event_filter *dummy; + struct event_filter *dummy = NULL; int i, ret, len = 0; char *p; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74..f71ea6e79b3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ @@ -2972,7 +2981,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48465f7e97b4..1c82478e8dff 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, * A trigger can define one or more variables. If any one of them is * currently referenced by any other trigger, this function will * determine that. - + * * Typically used to determine whether or not a trigger can be removed * - if there are any references to a trigger's variables, it cannot. * @@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, * events. However, for convenience, users are allowed to directly * specify an event field in an action, which will be automatically * converted into a variable on their behalf. - + * * This function creates a field variable with the name var_name on * the hist trigger currently being defined on the target event. If * subsys_name and event_name are specified, this function simply @@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *key = NULL; unsigned int i; + if (unlikely(!rbe)) + return; + memset(compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e310052dc83c..c3b582d19b62 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -828,10 +828,9 @@ static int register_synth_event(struct synth_event *event) } ret = set_synth_event_print_fmt(call); - if (ret < 0) { + /* unregister_trace_event() will be called inside */ + if (ret < 0) trace_remove_event_call(call); - goto err; - } out: return ret; err: @@ -1426,7 +1425,6 @@ int synth_event_delete(const char *event_name) mutex_unlock(&event_mutex); if (mod) { - mutex_lock(&trace_types_lock); /* * It is safest to reset the ring buffer if the module * being unloaded registered any events that were @@ -1438,7 +1436,6 @@ int synth_event_delete(const char *event_name) * occur. */ tracing_reset_all_online_cpus(); - mutex_unlock(&trace_types_lock); } return ret; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index ae78c2d53c8a..539b08ae7020 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command) group = current_user_event_group(); - if (!group) + if (!group) { + kfree(name); return -ENOENT; + } mutex_lock(&group->reg_mutex); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 78d536d3ff3d..4300c5dc4e5d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id) void osnoise_trace_irq_exit(int id, const char *desc) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) static void thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) { - int duration; + s64 duration; if (!osn_var->sampling) return; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b69e207012c9..942ddbdace4a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -201,8 +201,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags, return trace_handle_return(s); } -extern char *__bad_type_size(void); - #define SYSCALL_FIELD(_type, _name) { \ .type = #_type, .name = #_name, \ .size = sizeof(_type), .align = __alignof__(_type), \ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade3..a1005415f0f4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2107,6 +2107,7 @@ config KPROBES_SANITY_TEST depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 96e092de5b72..adb2f9355ee6 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f2ba5787055a..3ae3399f3651 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -434,7 +434,7 @@ out_free: static int __init test_rhashtable_max(struct test_obj *array, unsigned int entries) { - unsigned int i, insert_retries = 0; + unsigned int i; int err; test_rht_params.max_size = roundup_pow_of_two(entries / 8); @@ -447,9 +447,7 @@ static int __init test_rhashtable_max(struct test_obj *array, obj->value.id = i * 2; err = insert_retry(&ht, obj, test_rht_params); - if (err > 0) - insert_retries += err; - else if (err) + if (err < 0) return err; } diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 9f1219a67e3f..5ce403378c20 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2339,6 +2339,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_stats *sysfs_stats; + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; sysfs_stats->nr_tried = scheme->stat.nr_tried; sysfs_stats->sz_tried = scheme->stat.sz_tried; diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e48f8ef45b17..f1385c3b6c96 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, /* we rely on prep_new_huge_page to set the destructor */ set_compound_order(page, order); + __ClearPageReserved(page); __SetPageHead(page); for (i = 0; i < nr_pages; i++) { p = nth_page(page, i); @@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * on the head page when they need know if put_page() is needed * after get_user_pages(). */ - __ClearPageReserved(p); + if (i != 0) /* head page cleared above */ + __ClearPageReserved(p); /* * Subtle and very unlikely * diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 7e496856c2eb..46ecea18c4ca 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* - * In case of tail calls from any of the below - * to any of the above. + * In case of tail calls from any of the below to any of + * the above, optimized by the compiler such that the + * stack trace would omit the initial entry point below. */ fallback = skipnr + 1; } - /* Also the *_bulk() variants by only checking prefixes. */ + /* + * The below list should only include the initial entry points + * into the slab allocators. Includes the *_bulk() variants by + * checking prefixes. + */ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || - str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f7940..a8d5ef2a77d2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -97,8 +97,8 @@ struct collapse_control { /* Num pages scanned per node */ u32 node_load[MAX_NUMNODES]; - /* Last target selected in hpage_collapse_find_target_node() */ - int last_target_node; + /* nodemask for allocation fallback */ + nodemask_t alloc_nmask; }; /** @@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { .is_khugepaged = true, - .last_target_node = NUMA_NO_NODE, }; static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) @@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) target_node = nid; } - /* do some balance if several nodes have the same hit record */ - if (target_node <= cc->last_target_node) - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == cc->node_load[nid]) { - target_node = nid; - break; - } + for_each_online_node(nid) { + if (max_value == cc->node_load[nid]) + node_set(nid, cc->alloc_nmask); + } - cc->last_target_node = target_node; return target_node; } #else @@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, + nodemask_t *nmask) { - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; @@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, struct collapse_control *cc) { - /* Only allocate from the target node */ gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : - GFP_TRANSHUGE) | __GFP_THISNODE; + GFP_TRANSHUGE); int node = hpage_collapse_find_target_node(cc); - if (!hpage_collapse_alloc_page(hpage, gfp, node)) + if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) return SCAN_ALLOC_HUGE_PAGE_FAIL; if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) return SCAN_CGROUP_CHARGE_FAIL; @@ -1144,6 +1138,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -2077,6 +2072,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, present = 0; swap = 0; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); rcu_read_lock(); xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { if (xas_retry(&xas, page)) @@ -2157,8 +2153,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } } - trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname, - present, swap, result); + trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); return result; } #else @@ -2576,7 +2571,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!cc) return -ENOMEM; cc->is_khugepaged = false; - cc->last_target_node = NUMA_NO_NODE; mmgrab(mm); lru_add_drain_all(); @@ -2602,6 +2596,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d8549ae1b30..a1a35c12635e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) { struct obj_cgroup *objcg; - if (!memcg_kmem_enabled() || memcg_kmem_bypass()) + if (!memcg_kmem_enabled()) return NULL; if (PageMemcgKmem(page)) { diff --git a/mm/memory.c b/mm/memory.c index f88c351aecd4..8a6d5c823f91 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3763,7 +3763,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - vmf->page->pgmap->ops->migrate_to_ram(vmf); + ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 6fa682eef7a0..721b2365dbca 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) } /* - * Unmaps pages for migration. Returns number of unmapped pages. + * Unmaps pages for migration. Returns number of source pfns marked as + * migrating. */ static unsigned long migrate_device_unmap(unsigned long *src_pfns, unsigned long npages, @@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, struct page *page = migrate_pfn_to_page(src_pfns[i]); struct folio *folio; - if (!page) + if (!page) { + if (src_pfns[i] & MIGRATE_PFN_MIGRATE) + unmapped++; continue; + } /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { diff --git a/mm/mmap.c b/mm/mmap.c index c3c5c1d6103d..74a84eb33b90 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -456,7 +456,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) * vma_mas_szero() - Set a given range to zero. Used when modifying a * vm_area_struct start or end. * - * @mm: The struct_mm + * @mas: The maple tree ma_state * @start: The start address to zero * @end: The end address to zero. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 218b28ee49ed..6e60657875d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/page_ext.c b/mm/page_ext.c index affe80243b6d..ddf1968560f0 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page) /** * page_ext_put() - Working with page extended information is done. - * @page_ext - Page extended information received from page_ext_get(). + * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. diff --git a/mm/swapfile.c b/mm/swapfile.c index 5fc1237a9f21..72e481aacd5d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -973,23 +973,23 @@ done: scan: spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; } offset = si->lowest_bit; while (offset < scan_base) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; offset++; } spin_lock(&si->lock); diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..026199c047e0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, * the flushers simply cannot keep up with the allocation * rate. Nudge the flusher threads in case they are asleep. */ - if (stat.nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { wakeup_flusher_threads(WB_REASON_VMSCAN); + /* + * For cgroupv1 dirty throttling is achieved by waking up + * the kernel flusher here and later waiting on folios + * which are in writeback to finish (see shrink_folio_list()). + * + * Flusher may not be able to issue writeback quickly + * enough for cgroupv1 writeback throttling to work + * on a large system. + */ + if (!writeback_throttling_sane(sc)) + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + } sc->nr.dirty += stat.nr_dirty; sc->nr.congested += stat.nr_congested; @@ -4971,10 +4983,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap int scanned; int reclaimed; LIST_HEAD(list); + LIST_HEAD(clean); struct folio *folio; + struct folio *next; enum vm_event_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; + bool skip_retry = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -4991,20 +5006,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap if (list_empty(&list)) return scanned; - +retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); + sc->nr_reclaimed += reclaimed; - list_for_each_entry(folio, &list, lru) { - /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ - if (folio_test_workingset(folio)) - folio_set_referenced(folio); + list_for_each_entry_safe_reverse(folio, next, &list, lru) { + if (!folio_evictable(folio)) { + list_del(&folio->lru); + folio_putback_lru(folio); + continue; + } - /* don't add rejected pages to the oldest generation */ if (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio))) - folio_clear_active(folio); - else - folio_set_active(folio); + (folio_test_dirty(folio) || folio_test_writeback(folio))) { + /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ + if (folio_test_workingset(folio)) + folio_set_referenced(folio); + continue; + } + + if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) || + folio_mapped(folio) || folio_test_locked(folio) || + folio_test_dirty(folio) || folio_test_writeback(folio)) { + /* don't add rejected folios to the oldest generation */ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, + BIT(PG_active)); + continue; + } + + /* retry folios that may have missed folio_rotate_reclaimable() */ + list_move(&folio->lru, &clean); + sc->nr_scanned -= folio_nr_pages(folio); } spin_lock_irq(&lruvec->lru_lock); @@ -5026,7 +5058,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap mem_cgroup_uncharge_list(&list); free_unref_page_list(&list); - sc->nr_reclaimed += reclaimed; + INIT_LIST_HEAD(&list); + list_splice_init(&clean, &list); + + if (!list_empty(&list)) { + skip_retry = true; + goto retry; + } if (need_swapping && type == LRU_GEN_ANON) *need_swapping = true; @@ -5844,8 +5882,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + bool proportional_reclaim; struct blk_plug plug; - bool scan_adjusted; if (lru_gen_enabled()) { lru_gen_shrink_lruvec(lruvec, sc); @@ -5868,8 +5906,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -5889,7 +5927,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* @@ -5940,8 +5978,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); - - scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; diff --git a/net/802/mrp.c b/net/802/mrp.c index 155f74d8b14f..6c927d4b35f0 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -606,7 +606,10 @@ static void mrp_join_timer(struct timer_list *t) spin_unlock(&app->lock); mrp_queue_xmit(app); - mrp_join_timer_arm(app); + spin_lock(&app->lock); + if (likely(app->active)) + mrp_join_timer_arm(app); + spin_unlock(&app->lock); } static void mrp_periodic_timer_arm(struct mrp_applicant *app) @@ -620,11 +623,12 @@ static void mrp_periodic_timer(struct timer_list *t) struct mrp_applicant *app = from_timer(app, t, periodic_timer); spin_lock(&app->lock); - mrp_mad_event(app, MRP_EVENT_PERIODIC); - mrp_pdu_queue(app); + if (likely(app->active)) { + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + mrp_periodic_timer_arm(app); + } spin_unlock(&app->lock); - - mrp_periodic_timer_arm(app); } static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) @@ -872,6 +876,7 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) app->dev = dev; app->app = appl; app->mad = RB_ROOT; + app->active = true; spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); @@ -900,6 +905,9 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) RCU_INIT_POINTER(port->applicants[appl->type], NULL); + spin_lock_bh(&app->lock); + app->active = false; + spin_unlock_bh(&app->lock); /* Delete timer and generate a final TX event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 56a186768750..07db2f436d44 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -120,7 +120,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -202,9 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } spin_unlock(&m->req_lock); @@ -291,7 +293,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -314,7 +316,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, @@ -322,14 +324,6 @@ static void p9_read_work(struct work_struct *work) goto error; } - if (m->rc.size >= m->client->msize) { - p9_debug(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", - m->rc.size); - err = -EIO; - goto error; - } - p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); @@ -342,6 +336,14 @@ static void p9_read_work(struct work_struct *work) goto error; } + if (m->rc.size > m->rreq->rc.capacity) { + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d for tag %d with capacity %zd\n", + m->rc.size, m->rc.tag, m->rreq->rc.capacity); + err = -EIO; + goto error; + } + if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", @@ -860,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index b15c64128c3e..aaa5fd364691 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -208,6 +208,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -217,6 +225,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE(ring)); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index e78dadfc5829..2d434c1f4617 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -156,29 +156,29 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size, } static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag) { const struct btf_type *state; + const struct btf_type *t; s32 type_id; int err; - type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state", + type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state", BTF_KIND_STRUCT); if (type_id < 0) return -EINVAL; - state = btf_type_by_id(btf, type_id); + t = btf_type_by_id(reg->btf, reg->btf_id); + state = btf_type_by_id(reg->btf, type_id); if (t != state) { bpf_log(log, "only access to bpf_dummy_ops_state is supported\n"); return -EACCES; } - err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id, - flag); + err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); if (err < 0) return err; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fcb3e6c5e03c..6094ef7cffcd 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -980,9 +980,6 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) { struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; - if (!skb->len) - return -EINVAL; - if (!__skb) return 0; diff --git a/net/core/devlink.c b/net/core/devlink.c index d93bc95cd7cb..907df7124157 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -41,7 +41,7 @@ struct devlink_dev_stats { struct devlink { u32 index; - struct list_head port_list; + struct xarray ports; struct list_head rate_list; struct list_head sb_list; struct list_head dpipe_table_list; @@ -382,19 +382,7 @@ static struct devlink *devlink_get_from_attrs(struct net *net, static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, unsigned int port_index) { - struct devlink_port *devlink_port; - - list_for_each_entry(devlink_port, &devlink->port_list, list) { - if (devlink_port->index == port_index) - return devlink_port; - } - return NULL; -} - -static bool devlink_port_index_exists(struct devlink *devlink, - unsigned int port_index) -{ - return devlink_port_get_by_index(devlink, port_index); + return xa_load(&devlink->ports, port_index); } static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, @@ -1565,14 +1553,14 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; struct devlink_port *devlink_port; + unsigned long index, port_index; int start = cb->args[0]; - unsigned long index; int idx = 0; int err; devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); - list_for_each_entry(devlink_port, &devlink->port_list, list) { + xa_for_each(&devlink->ports, port_index, devlink_port) { if (idx < start) { idx++; continue; @@ -2886,10 +2874,11 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, { struct devlink_port *devlink_port; u16 pool_count = devlink_sb_pool_count(devlink_sb); + unsigned long port_index; u16 pool_index; int err; - list_for_each_entry(devlink_port, &devlink->port_list, list) { + xa_for_each(&devlink->ports, port_index, devlink_port) { for (pool_index = 0; pool_index < pool_count; pool_index++) { if (*p_idx < start) { (*p_idx)++; @@ -3107,10 +3096,11 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, u32 portid, u32 seq) { struct devlink_port *devlink_port; + unsigned long port_index; u16 tc_index; int err; - list_for_each_entry(devlink_port, &devlink->port_list, list) { + xa_for_each(&devlink->ports, port_index, devlink_port) { for (tc_index = 0; tc_index < devlink_sb->ingress_tc_count; tc_index++) { if (*p_idx < start) { @@ -6207,6 +6197,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, { struct devlink_region *region; struct devlink_port *port; + unsigned long port_index; int err = 0; devl_lock(devlink); @@ -6225,7 +6216,7 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, (*idx)++; } - list_for_each_entry(port, &devlink->port_list, list) { + xa_for_each(&devlink->ports, port_index, port) { err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx, start); if (err) @@ -6431,7 +6422,6 @@ unlock: } static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, - struct devlink *devlink, u8 *chunk, u32 chunk_size, u64 addr) { @@ -6461,39 +6451,37 @@ nla_put_failure: #define DEVLINK_REGION_READ_CHUNK_SIZE 256 -static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, - struct devlink *devlink, - struct devlink_region *region, - struct nlattr **attrs, - u64 start_offset, - u64 end_offset, - u64 *new_offset) +typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, + struct netlink_ext_ack *extack); + +static int +devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb, + void *cb_priv, u64 start_offset, u64 end_offset, + u64 *new_offset, struct netlink_ext_ack *extack) { - struct devlink_snapshot *snapshot; u64 curr_offset = start_offset; - u32 snapshot_id; int err = 0; + u8 *data; - *new_offset = start_offset; + /* Allocate and re-use a single buffer */ + data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; - snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]); - snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) - return -EINVAL; + *new_offset = start_offset; while (curr_offset < end_offset) { u32 data_size; - u8 *data; - if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE) - data_size = end_offset - curr_offset; - else - data_size = DEVLINK_REGION_READ_CHUNK_SIZE; + data_size = min_t(u32, end_offset - curr_offset, + DEVLINK_REGION_READ_CHUNK_SIZE); - data = &snapshot->data[curr_offset]; - err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink, - data, data_size, - curr_offset); + err = cb(cb_priv, data, data_size, curr_offset, extack); + if (err) + break; + + err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset); if (err) break; @@ -6501,21 +6489,57 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, } *new_offset = curr_offset; + kfree(data); + return err; } +static int +devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, + struct netlink_ext_ack __always_unused *extack) +{ + struct devlink_snapshot *snapshot = cb_priv; + + memcpy(chunk, &snapshot->data[curr_offset], chunk_size); + + return 0; +} + +static int +devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->port_ops->read(region->port, region->port_ops, extack, + curr_offset, chunk_size, chunk); +} + +static int +devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->ops->read(region->devlink, region->ops, extack, + curr_offset, chunk_size, chunk); +} + static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct nlattr *chunks_attr, *region_attr, *snapshot_attr; u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->attrs; struct devlink_port *port = NULL; + devlink_chunk_fill_t *region_cb; struct devlink_region *region; - struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; unsigned int index; + void *region_cb_priv; void *hdr; int err; @@ -6527,8 +6551,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, devl_lock(devlink); - if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + if (!attrs[DEVLINK_ATTR_REGION_NAME]) { + NL_SET_ERR_MSG(cb->extack, "No region name provided"); err = -EINVAL; goto out_unlock; } @@ -6543,7 +6567,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, } } - region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); + region_attr = attrs[DEVLINK_ATTR_REGION_NAME]; + region_name = nla_data(region_attr); if (port) region = devlink_port_region_get_by_name(port, region_name); @@ -6551,10 +6576,51 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, region = devlink_region_get_by_name(devlink, region_name); if (!region) { + NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist"); err = -EINVAL; goto out_unlock; } + snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; + if (!snapshot_attr) { + if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG(cb->extack, "No snapshot id provided"); + err = -EINVAL; + goto out_unlock; + } + + if (!region->ops->read) { + NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read"); + err = -EOPNOTSUPP; + goto out_unlock; + } + + if (port) + region_cb = &devlink_region_port_direct_fill; + else + region_cb = &devlink_region_direct_fill; + region_cb_priv = region; + } else { + struct devlink_snapshot *snapshot; + u32 snapshot_id; + + if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot"); + err = -EINVAL; + goto out_unlock; + } + + snapshot_id = nla_get_u32(snapshot_attr); + snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); + if (!snapshot) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist"); + err = -EINVAL; + goto out_unlock; + } + region_cb = &devlink_region_snapshot_fill; + region_cb_priv = snapshot; + } + if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { if (!start_offset) @@ -6603,10 +6669,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; } - err = devlink_nl_region_read_snapshot_fill(skb, devlink, - region, attrs, - start_offset, - end_offset, &ret_offset); + err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv, + start_offset, end_offset, &ret_offset, + cb->extack); if (err && err != -EMSGSIZE) goto nla_put_failure; @@ -6633,14 +6698,6 @@ out_unlock: return err; } -int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - if (!req->msg) - return 0; - return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); -} -EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); - int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) @@ -6749,11 +6806,25 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); +static int devlink_nl_driver_info_get(struct device_driver *drv, + struct devlink_info_req *req) +{ + if (!drv) + return 0; + + if (drv->name[0]) + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, + drv->name); + + return 0; +} + static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { + struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; @@ -6767,7 +6838,13 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, goto err_cancel_msg; req.msg = msg; - err = devlink->ops->info_get(devlink, &req, extack); + if (devlink->ops->info_get) { + err = devlink->ops->info_get(devlink, &req, extack); + if (err) + goto err_cancel_msg; + } + + err = devlink_nl_driver_info_get(dev->driver, &req); if (err) goto err_cancel_msg; @@ -6786,9 +6863,6 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - if (!devlink->ops->info_get) - return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6814,7 +6888,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int err = 0; devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (idx < start || !devlink->ops->info_get) + if (idx < start) goto inc; devl_lock(devlink); @@ -7846,8 +7920,6 @@ int devlink_health_report(struct devlink_health_reporter *reporter, return -ECANCELED; } - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - if (reporter->auto_dump) { mutex_lock(&reporter->dump_lock); /* store current dump of current error, for later analysis */ @@ -7976,10 +8048,10 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct devlink_health_reporter *reporter; + unsigned long index, port_index; struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; - unsigned long index; int idx = 0; int err; @@ -8008,7 +8080,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); - list_for_each_entry(port, &devlink->port_list, list) { + xa_for_each(&devlink->ports, port_index, port) { mutex_lock(&port->reporters_lock); list_for_each_entry(reporter, &port->reporter_list, list) { if (idx < start) { @@ -9253,6 +9325,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -9726,9 +9799,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, devlink->dev = dev; devlink->ops = ops; + xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); - INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->linecard_list); INIT_LIST_HEAD(&devlink->sb_list); @@ -9780,12 +9853,13 @@ static void devlink_notify_register(struct devlink *devlink) struct devlink_linecard *linecard; struct devlink_rate *rate_node; struct devlink_region *region; + unsigned long port_index; devlink_notify(devlink, DEVLINK_CMD_NEW); list_for_each_entry(linecard, &devlink->linecard_list, list) devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - list_for_each_entry(devlink_port, &devlink->port_list, list) + xa_for_each(&devlink->ports, port_index, devlink_port) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) @@ -9819,6 +9893,7 @@ static void devlink_notify_unregister(struct devlink *devlink) struct devlink_port *devlink_port; struct devlink_rate *rate_node; struct devlink_region *region; + unsigned long port_index; list_for_each_entry_reverse(param_item, &devlink->param_list, list) devlink_param_notify(devlink, 0, param_item, @@ -9841,7 +9916,7 @@ static void devlink_notify_unregister(struct devlink *devlink) devlink_trap_policer_notify(devlink, policer_item, DEVLINK_CMD_TRAP_POLICER_DEL); - list_for_each_entry_reverse(devlink_port, &devlink->port_list, list) + xa_for_each(&devlink->ports, port_index, devlink_port) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); devlink_notify(devlink, DEVLINK_CMD_DEL); } @@ -9905,12 +9980,13 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->sb_list)); WARN_ON(!list_empty(&devlink->rate_list)); WARN_ON(!list_empty(&devlink->linecard_list)); - WARN_ON(!list_empty(&devlink->port_list)); + WARN_ON(!xa_empty(&devlink->ports)); xa_destroy(&devlink->snapshot_ids); + xa_destroy(&devlink->ports); - unregister_netdevice_notifier_net(devlink_net(devlink), - &devlink->netdevice_nb); + WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink), + &devlink->netdevice_nb)); xa_erase(&devlinks, devlink->index); @@ -10006,10 +10082,9 @@ int devl_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) { - devl_assert_locked(devlink); + int err; - if (devlink_port_index_exists(devlink, port_index)) - return -EEXIST; + devl_assert_locked(devlink); ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); @@ -10019,7 +10094,11 @@ int devl_port_register(struct devlink *devlink, spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); - list_add_tail(&devlink_port->list, &devlink->port_list); + err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL); + if (err) { + mutex_destroy(&devlink_port->reporters_lock); + return err; + } INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); @@ -10068,7 +10147,7 @@ void devl_port_unregister(struct devlink_port *devlink_port) devlink_port_type_warn_cancel(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); - list_del(&devlink_port->list); + xa_erase(&devlink_port->devlink->ports, devlink_port->index); WARN_ON(!list_empty(&devlink_port->reporter_list)); mutex_destroy(&devlink_port->reporters_lock); devlink_port->registered = false; diff --git a/net/core/filter.c b/net/core/filter.c index da4f697e4fa4..37baaa6b8fc3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2124,12 +2124,13 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, { unsigned int mlen = skb_network_offset(skb); + if (unlikely(skb->len <= mlen)) { + kfree_skb(skb); + return -ERANGE; + } + if (mlen) { __skb_pull(skb, mlen); - if (unlikely(!skb->len)) { - kfree_skb(skb); - return -ERANGE; - } /* At ingress, the mac header has already been pulled once. * At egress, skb_pospull_rcsum has to be done in case that @@ -2149,7 +2150,7 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, u32 flags) { /* Verify that a link layer header is carried */ - if (unlikely(skb->mac_header >= skb->network_header)) { + if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) { kfree_skb(skb); return -ERANGE; } @@ -4108,7 +4109,10 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { .arg2_type = ARG_ANYTHING, }; -/* XDP_REDIRECT works by a three-step process, implemented in the functions +/** + * DOC: xdp redirect + * + * XDP_REDIRECT works by a three-step process, implemented in the functions * below: * * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target @@ -4123,7 +4127,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(), * which will flush all the different bulk queues, thus completing the * redirect. - * + */ +/* * Pointers to the map entries will be kept around for this whole sequence of * steps, protected by RCU. However, there is no top-level rcu_read_lock() in * the core code; instead, the RCU protection relies on everything happening @@ -4414,10 +4419,10 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, +BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key, u64, flags) { - return map->ops->map_redirect(map, ifindex, flags); + return map->ops->map_redirect(map, key, flags); } static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { @@ -8651,28 +8656,25 @@ static bool tc_cls_act_is_valid_access(int off, int size, DEFINE_MUTEX(nf_conn_btf_access_lock); EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); -int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); +int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag); EXPORT_SYMBOL_GPL(nfct_btf_struct_access); static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) { int ret = -EACCES; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, - flag); + return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); mutex_unlock(&nf_conn_btf_access_lock); return ret; @@ -8738,21 +8740,18 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); static int xdp_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) { int ret = -EACCES; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, - flag); + return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, btf, t, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); mutex_unlock(&nf_conn_btf_access_lock); return ret; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3e81798ed3e0..25fb0bbc310f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -296,7 +296,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - key->ct_mark = ct->mark; + key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6fac2f0ef074..711cd3b4347a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -48,9 +48,11 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "RPL"; case LWTUNNEL_ENCAP_IOAM6: return "IOAM6"; + case LWTUNNEL_ENCAP_XFRM: + /* module autoload not supported for encap type */ + return NULL; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: - case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a77a85e357e0..952a54763358 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,7 +307,31 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_parms_qlen_dec(struct net_device *dev, int family) +{ + struct neigh_parms *p; + + rcu_read_lock(); + p = neigh_get_dev_parms_rcu(dev, family); + if (p) + p->qlen--; + rcu_read_unlock(); +} + +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, + int family) { struct sk_buff_head tmp; unsigned long flags; @@ -321,13 +345,7 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) struct net_device *dev = skb->dev; if (net == NULL || net_eq(dev_net(dev), net)) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, family); __skb_unlink(skb, list); __skb_queue_tail(&tmp, skb); } @@ -409,7 +427,8 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, + tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; @@ -1621,13 +1640,8 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; - struct in_device *in_dev; - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, tbl->family); __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) { @@ -1821,7 +1835,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue, NULL); + pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@ -3539,18 +3553,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, - int family) -{ - switch (family) { - case AF_INET: - return __in_dev_arp_parms_get_rcu(dev); - case AF_INET6: - return __in6_dev_nd_parms_get_rcu(dev); - } - return NULL; -} - static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { diff --git a/net/core/of_net.c b/net/core/of_net.c index f1a9bf7578e7..55d3fe229269 100644 --- a/net/core/of_net.c +++ b/net/core/of_net.c @@ -57,7 +57,7 @@ static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) return -ENODEV; } -static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) +int of_get_mac_address_nvmem(struct device_node *np, u8 *addr) { struct platform_device *pdev = of_find_device_by_node(np); struct nvmem_cell *cell; @@ -94,6 +94,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) return 0; } +EXPORT_SYMBOL(of_get_mac_address_nvmem); /** * of_get_mac_address() @@ -140,7 +141,7 @@ int of_get_mac_address(struct device_node *np, u8 *addr) if (!ret) return 0; - return of_get_mac_addr_nvmem(np, addr); + return of_get_mac_address_nvmem(np, addr); } EXPORT_SYMBOL(of_get_mac_address); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index b11593cae5a0..b1e29e18d1d6 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -30,7 +30,10 @@ u64 __sock_gen_cookie(struct sock *sk) if (!res) { u64 new = gen_cookie_next(&sock_cookie); - atomic64_try_cmpxchg(&sk->sk_cookie, &res, new); + atomic64_cmpxchg(&sk->sk_cookie, res, new); + + /* Another thread might have changed sk_cookie before us. */ + res = atomic64_read(&sk->sk_cookie); } return res; } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 713b7b8dad7e..b780827f5e0a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly; int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) daddr = fl4->daddr; if (inet->inet_saddr == 0) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = - inet_bhashfn_portaddr(&dccp_hashinfo, sk, - sock_net(sk), - inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } inet->inet_dport = usin->sin_port; @@ -157,6 +143,7 @@ failure: * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ae62b1591dea..4260fe466993 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (saddr == NULL) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo, - sk, sock_net(sk), - inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } - saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -985,6 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); + inet_bhash2_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9494b0d224f9..a06b5641287a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -285,8 +285,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 3eef72ce99a4..8e698bea99a3 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -18,6 +18,12 @@ if NET_DSA # Drivers must select the appropriate tagging format(s) +config NET_DSA_TAG_NONE + tristate "No-op tag driver" + help + Say Y or M if you want to enable support for switches which don't tag + frames over the CPU port. + config NET_DSA_TAG_AR9331 tristate "Tag driver for Atheros AR9331 SoC with built-in switch" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index bf57ef3bce2a..cc7e93a562fe 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -2,13 +2,14 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += \ + devlink.o \ dsa.o \ - dsa2.o \ master.o \ netlink.o \ port.o \ slave.o \ switch.o \ + tag.o \ tag_8021q.o # tagging formats @@ -20,6 +21,7 @@ obj-$(CONFIG_NET_DSA_TAG_HELLCREEK) += tag_hellcreek.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +obj-$(CONFIG_NET_DSA_TAG_NONE) += tag_none.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/devlink.c b/net/dsa/devlink.c new file mode 100644 index 000000000000..431bf52290a1 --- /dev/null +++ b/net/dsa/devlink.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DSA devlink handling + */ + +#include <net/dsa.h> +#include <net/devlink.h> + +#include "devlink.h" + +static int dsa_devlink_info_get(struct devlink *dl, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (ds->ops->devlink_info_get) + return ds->ops->devlink_info_get(ds, req, extack); + + return -EOPNOTSUPP; +} + +static int dsa_devlink_sb_pool_get(struct devlink *dl, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index, + pool_info); +} + +static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size, + threshold_type, extack); +} + +static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index, + pool_index, p_threshold); +} + +static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index, + pool_index, threshold, extack); +} + +static int +dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index, + tc_index, pool_type, + pool_index, threshold, + extack); +} + +static int dsa_devlink_sb_occ_snapshot(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_snapshot) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_snapshot(ds, sb_index); +} + +static int dsa_devlink_sb_occ_max_clear(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_max_clear) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_max_clear(ds, sb_index); +} + +static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, + u16 pool_index, u32 *p_cur, + u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index, + pool_index, p_cur, p_max); +} + +static int +dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port, + sb_index, tc_index, + pool_type, p_cur, + p_max); +} + +static const struct devlink_ops dsa_devlink_ops = { + .info_get = dsa_devlink_info_get, + .sb_pool_get = dsa_devlink_sb_pool_get, + .sb_pool_set = dsa_devlink_sb_pool_set, + .sb_port_pool_get = dsa_devlink_sb_port_pool_get, + .sb_port_pool_set = dsa_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot, + .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get, +}; + +int dsa_devlink_param_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_param_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_param_get(ds, id, ctx); +} +EXPORT_SYMBOL_GPL(dsa_devlink_param_get); + +int dsa_devlink_param_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_param_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_param_set(ds, id, ctx); +} +EXPORT_SYMBOL_GPL(dsa_devlink_param_set); + +int dsa_devlink_params_register(struct dsa_switch *ds, + const struct devlink_param *params, + size_t params_count) +{ + return devlink_params_register(ds->devlink, params, params_count); +} +EXPORT_SYMBOL_GPL(dsa_devlink_params_register); + +void dsa_devlink_params_unregister(struct dsa_switch *ds, + const struct devlink_param *params, + size_t params_count) +{ + devlink_params_unregister(ds->devlink, params, params_count); +} +EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister); + +int dsa_devlink_resource_register(struct dsa_switch *ds, + const char *resource_name, + u64 resource_size, + u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *size_params) +{ + return devlink_resource_register(ds->devlink, resource_name, + resource_size, resource_id, + parent_resource_id, + size_params); +} +EXPORT_SYMBOL_GPL(dsa_devlink_resource_register); + +void dsa_devlink_resources_unregister(struct dsa_switch *ds) +{ + devlink_resources_unregister(ds->devlink); +} +EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister); + +void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, + u64 resource_id, + devlink_resource_occ_get_t *occ_get, + void *occ_get_priv) +{ + return devlink_resource_occ_get_register(ds->devlink, resource_id, + occ_get, occ_get_priv); +} +EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register); + +void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, + u64 resource_id) +{ + devlink_resource_occ_get_unregister(ds->devlink, resource_id); +} +EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister); + +struct devlink_region * +dsa_devlink_region_create(struct dsa_switch *ds, + const struct devlink_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + return devlink_region_create(ds->devlink, ops, region_max_snapshots, + region_size); +} +EXPORT_SYMBOL_GPL(dsa_devlink_region_create); + +struct devlink_region * +dsa_devlink_port_region_create(struct dsa_switch *ds, + int port, + const struct devlink_port_region_ops *ops, + u32 region_max_snapshots, u64 region_size) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + + return devlink_port_region_create(&dp->devlink_port, ops, + region_max_snapshots, + region_size); +} +EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create); + +void dsa_devlink_region_destroy(struct devlink_region *region) +{ + devlink_region_destroy(region); +} +EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy); + +int dsa_port_devlink_setup(struct dsa_port *dp) +{ + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch_tree *dst = dp->ds->dst; + struct devlink_port_attrs attrs = {}; + struct devlink *dl = dp->ds->devlink; + struct dsa_switch *ds = dp->ds; + const unsigned char *id; + unsigned char len; + int err; + + memset(dlp, 0, sizeof(*dlp)); + devlink_port_init(dl, dlp); + + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + id = (const unsigned char *)&dst->index; + len = sizeof(dst->index); + + attrs.phys.port_number = dp->index; + memcpy(attrs.switch_id.id, id, len); + attrs.switch_id.id_len = len; + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; + break; + case DSA_PORT_TYPE_CPU: + attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; + break; + case DSA_PORT_TYPE_DSA: + attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; + break; + case DSA_PORT_TYPE_USER: + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + break; + } + + devlink_port_attrs_set(dlp, &attrs); + err = devlink_port_register(dl, dlp, dp->index); + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + return err; + } + + return 0; +} + +void dsa_port_devlink_teardown(struct dsa_port *dp) +{ + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; + + devlink_port_unregister(dlp); + + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + + devlink_port_fini(dlp); +} + +void dsa_switch_devlink_register(struct dsa_switch *ds) +{ + devlink_register(ds->devlink); +} + +void dsa_switch_devlink_unregister(struct dsa_switch *ds) +{ + devlink_unregister(ds->devlink); +} + +int dsa_switch_devlink_alloc(struct dsa_switch *ds) +{ + struct dsa_devlink_priv *dl_priv; + struct devlink *dl; + + /* Add the switch to devlink before calling setup, so that setup can + * add dpipe tables + */ + dl = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); + if (!dl) + return -ENOMEM; + + ds->devlink = dl; + + dl_priv = devlink_priv(ds->devlink); + dl_priv->ds = ds; + + return 0; +} + +void dsa_switch_devlink_free(struct dsa_switch *ds) +{ + devlink_free(ds->devlink); + ds->devlink = NULL; +} diff --git a/net/dsa/devlink.h b/net/dsa/devlink.h new file mode 100644 index 000000000000..4d9f4f23705b --- /dev/null +++ b/net/dsa/devlink.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_DEVLINK_H +#define __DSA_DEVLINK_H + +struct dsa_port; +struct dsa_switch; + +int dsa_port_devlink_setup(struct dsa_port *dp); +void dsa_port_devlink_teardown(struct dsa_port *dp); +void dsa_switch_devlink_register(struct dsa_switch *ds); +void dsa_switch_devlink_unregister(struct dsa_switch *ds); +int dsa_switch_devlink_alloc(struct dsa_switch *ds); +void dsa_switch_devlink_free(struct dsa_switch *ds); + +#endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 4afd3edbd64d..e5f156940c67 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1,472 +1,1637 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * net/dsa/dsa.c - Hardware switch handling + * DSA topology and switch handling + * * Copyright (c) 2008-2009 Marvell Semiconductor * Copyright (c) 2013 Florian Fainelli <[email protected]> + * Copyright (c) 2016 Andrew Lunn <[email protected]> */ #include <linux/device.h> +#include <linux/err.h> #include <linux/list.h> #include <linux/module.h> #include <linux/netdevice.h> -#include <linux/sysfs.h> -#include <linux/ptp_classify.h> -#include <net/dst_metadata.h> +#include <linux/slab.h> +#include <linux/rtnetlink.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <net/sch_generic.h> + +#include "devlink.h" +#include "dsa.h" +#include "master.h" +#include "netlink.h" +#include "port.h" +#include "slave.h" +#include "switch.h" +#include "tag.h" + +#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG + +static DEFINE_MUTEX(dsa2_mutex); +LIST_HEAD(dsa_tree_list); -#include "dsa_priv.h" +static struct workqueue_struct *dsa_owq; -static LIST_HEAD(dsa_tag_drivers_list); -static DEFINE_MUTEX(dsa_tag_drivers_lock); +/* Track the bridges with forwarding offload enabled */ +static unsigned long dsa_fwd_offloading_bridges; -static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, - struct net_device *dev) +bool dsa_schedule_work(struct work_struct *work) { - /* Just return the original SKB */ - return skb; + return queue_work(dsa_owq, work); } -static const struct dsa_device_ops none_ops = { - .name = "none", - .proto = DSA_TAG_PROTO_NONE, - .xmit = dsa_slave_notag_xmit, - .rcv = NULL, -}; +void dsa_flush_workqueue(void) +{ + flush_workqueue(dsa_owq); +} +EXPORT_SYMBOL_GPL(dsa_flush_workqueue); -DSA_TAG_DRIVER(none_ops); +/** + * dsa_lag_map() - Map LAG structure to a linear LAG array + * @dst: Tree in which to record the mapping. + * @lag: LAG structure that is to be mapped to the tree's array. + * + * dsa_lag_id/dsa_lag_by_id can then be used to translate between the + * two spaces. The size of the mapping space is determined by the + * driver by setting ds->num_lag_ids. It is perfectly legal to leave + * it unset if it is not needed, in which case these functions become + * no-ops. + */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag) +{ + unsigned int id; + + for (id = 1; id <= dst->lags_len; id++) { + if (!dsa_lag_by_id(dst, id)) { + dst->lags[id - 1] = lag; + lag->id = id; + return; + } + } -static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver, - struct module *owner) + /* No IDs left, which is OK. Some drivers do not need it. The + * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id + * returns an error for this device when joining the LAG. The + * driver can then return -EOPNOTSUPP back to DSA, which will + * fall back to a software LAG. + */ +} + +/** + * dsa_lag_unmap() - Remove a LAG ID mapping + * @dst: Tree in which the mapping is recorded. + * @lag: LAG structure that was mapped. + * + * As there may be multiple users of the mapping, it is only removed + * if there are no other references to it. + */ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag) { - dsa_tag_driver->owner = owner; + unsigned int id; - mutex_lock(&dsa_tag_drivers_lock); - list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list); - mutex_unlock(&dsa_tag_drivers_lock); + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_by_id(dst, id) == lag) { + dst->lags[id - 1] = NULL; + lag->id = 0; + break; + } + } } -void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count, struct module *owner) +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev) { - unsigned int i; + struct dsa_port *dp; - for (i = 0; i < count; i++) - dsa_tag_driver_register(dsa_tag_driver_array[i], owner); + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_lag_dev_get(dp) == lag_dev) + return dp->lag; + + return NULL; } -static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver) +struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, + const struct net_device *br) { - mutex_lock(&dsa_tag_drivers_lock); - list_del(&dsa_tag_driver->list); - mutex_unlock(&dsa_tag_drivers_lock); + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_bridge_dev_get(dp) == br) + return dp->bridge; + + return NULL; } -EXPORT_SYMBOL_GPL(dsa_tag_drivers_register); -void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count) +static int dsa_bridge_num_find(const struct net_device *bridge_dev) { - unsigned int i; + struct dsa_switch_tree *dst; - for (i = 0; i < count; i++) - dsa_tag_driver_unregister(dsa_tag_driver_array[i]); + list_for_each_entry(dst, &dsa_tree_list, list) { + struct dsa_bridge *bridge; + + bridge = dsa_tree_bridge_find(dst, bridge_dev); + if (bridge) + return bridge->num; + } + + return 0; } -EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister); -const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) +unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) { - return ops->name; -}; + unsigned int bridge_num = dsa_bridge_num_find(bridge_dev); + + /* Switches without FDB isolation support don't get unique + * bridge numbering + */ + if (!max) + return 0; + + if (!bridge_num) { + /* First port that requests FDB isolation or TX forwarding + * offload for this bridge + */ + bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES, + 1); + if (bridge_num >= max) + return 0; + + set_bit(bridge_num, &dsa_fwd_offloading_bridges); + } + + return bridge_num; +} + +void dsa_bridge_num_put(const struct net_device *bridge_dev, + unsigned int bridge_num) +{ + /* Since we refcount bridges, we know that when we call this function + * it is no longer in use, so we can just go ahead and remove it from + * the bit mask. + */ + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); +} + +struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + list_for_each_entry(dst, &dsa_tree_list, list) { + if (dst->index != tree_index) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->index != sw_index) + continue; + + return dp->ds; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_switch_find); + +static struct dsa_switch_tree *dsa_tree_find(int index) +{ + struct dsa_switch_tree *dst; + + list_for_each_entry(dst, &dsa_tree_list, list) + if (dst->index == index) + return dst; + + return NULL; +} + +static struct dsa_switch_tree *dsa_tree_alloc(int index) +{ + struct dsa_switch_tree *dst; + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return NULL; + + dst->index = index; + + INIT_LIST_HEAD(&dst->rtable); + + INIT_LIST_HEAD(&dst->ports); + + INIT_LIST_HEAD(&dst->list); + list_add_tail(&dst->list, &dsa_tree_list); + + kref_init(&dst->refcount); -/* Function takes a reference on the module owning the tagger, - * so dsa_tag_driver_put must be called afterwards. + return dst; +} + +static void dsa_tree_free(struct dsa_switch_tree *dst) +{ + if (dst->tag_ops) + dsa_tag_driver_put(dst->tag_ops); + list_del(&dst->list); + kfree(dst); +} + +static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) +{ + if (dst) + kref_get(&dst->refcount); + + return dst; +} + +static struct dsa_switch_tree *dsa_tree_touch(int index) +{ + struct dsa_switch_tree *dst; + + dst = dsa_tree_find(index); + if (dst) + return dsa_tree_get(dst); + else + return dsa_tree_alloc(index); +} + +static void dsa_tree_release(struct kref *ref) +{ + struct dsa_switch_tree *dst; + + dst = container_of(ref, struct dsa_switch_tree, refcount); + + dsa_tree_free(dst); +} + +static void dsa_tree_put(struct dsa_switch_tree *dst) +{ + if (dst) + kref_put(&dst->refcount, dsa_tree_release); +} + +static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, + struct device_node *dn) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dp->dn == dn) + return dp; + + return NULL; +} + +static struct dsa_link *dsa_link_touch(struct dsa_port *dp, + struct dsa_port *link_dp) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst; + struct dsa_link *dl; + + dst = ds->dst; + + list_for_each_entry(dl, &dst->rtable, list) + if (dl->dp == dp && dl->link_dp == link_dp) + return dl; + + dl = kzalloc(sizeof(*dl), GFP_KERNEL); + if (!dl) + return NULL; + + dl->dp = dp; + dl->link_dp = link_dp; + + INIT_LIST_HEAD(&dl->list); + list_add_tail(&dl->list, &dst->rtable); + + return dl; +} + +static bool dsa_port_setup_routing_table(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + struct device_node *dn = dp->dn; + struct of_phandle_iterator it; + struct dsa_port *link_dp; + struct dsa_link *dl; + int err; + + of_for_each_phandle(&it, err, dn, "link", NULL, 0) { + link_dp = dsa_tree_find_port_by_node(dst, it.node); + if (!link_dp) { + of_node_put(it.node); + return false; + } + + dl = dsa_link_touch(dp, link_dp); + if (!dl) { + of_node_put(it.node); + return false; + } + } + + return true; +} + +static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst) +{ + bool complete = true; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_is_dsa(dp)) { + complete = dsa_port_setup_routing_table(dp); + if (!complete) + break; + } + } + + return complete; +} + +static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_cpu(dp)) + return dp; + + return NULL; +} + +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +{ + struct device_node *ethernet; + struct net_device *master; + struct dsa_port *cpu_dp; + + cpu_dp = dsa_tree_find_first_cpu(dst); + ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); + master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); + + return master; +} + +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. */ -const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name) +static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { - const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT); - struct dsa_tag_driver *dsa_tag_driver; + struct dsa_port *cpu_dp, *dp; - request_module("%s%s", DSA_TAG_DRIVER_ALIAS, name); + cpu_dp = dsa_tree_find_first_cpu(dst); + if (!cpu_dp) { + pr_err("DSA: tree %d has no CPU port\n", dst->index); + return -EINVAL; + } + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; - mutex_lock(&dsa_tag_drivers_lock); - list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { - const struct dsa_device_ops *tmp = dsa_tag_driver->ops; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + + return 0; +} - if (strcmp(name, tmp->name)) +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) continue; - if (!try_module_get(dsa_tag_driver->owner)) - break; + /* Prefer a local CPU port */ + dsa_switch_for_each_port(dp, cpu_dp->ds) { + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; - ops = tmp; - break; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } } - mutex_unlock(&dsa_tag_drivers_lock); - return ops; + return dsa_tree_setup_default_cpu(dst); +} + +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = NULL; } -const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol) +static int dsa_port_setup(struct dsa_port *dp) { - struct dsa_tag_driver *dsa_tag_driver; - const struct dsa_device_ops *ops; - bool found = false; + bool dsa_port_link_registered = false; + struct dsa_switch *ds = dp->ds; + bool dsa_port_enabled = false; + int err = 0; - request_module("%sid-%d", DSA_TAG_DRIVER_ALIAS, tag_protocol); + if (dp->setup) + return 0; - mutex_lock(&dsa_tag_drivers_lock); - list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { - ops = dsa_tag_driver->ops; - if (ops->proto == tag_protocol) { - found = true; + err = dsa_port_devlink_setup(dp); + if (err) + return err; + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + dsa_port_disable(dp); + break; + case DSA_PORT_TYPE_CPU: + if (dp->dn) { + err = dsa_shared_port_link_register_of(dp); + if (err) + break; + dsa_port_link_registered = true; + } else { + dev_warn(ds->dev, + "skipping link registration for CPU port %d\n", + dp->index); + } + + err = dsa_port_enable(dp, NULL); + if (err) break; + dsa_port_enabled = true; + + break; + case DSA_PORT_TYPE_DSA: + if (dp->dn) { + err = dsa_shared_port_link_register_of(dp); + if (err) + break; + dsa_port_link_registered = true; + } else { + dev_warn(ds->dev, + "skipping link registration for DSA port %d\n", + dp->index); } + + err = dsa_port_enable(dp, NULL); + if (err) + break; + dsa_port_enabled = true; + + break; + case DSA_PORT_TYPE_USER: + of_get_mac_address(dp->dn, dp->mac); + err = dsa_slave_create(dp); + break; } - if (found) { - if (!try_module_get(dsa_tag_driver->owner)) - ops = ERR_PTR(-ENOPROTOOPT); - } else { - ops = ERR_PTR(-ENOPROTOOPT); + if (err && dsa_port_enabled) + dsa_port_disable(dp); + if (err && dsa_port_link_registered) + dsa_shared_port_link_unregister_of(dp); + if (err) { + dsa_port_devlink_teardown(dp); + return err; } - mutex_unlock(&dsa_tag_drivers_lock); + dp->setup = true; - return ops; + return 0; } -void dsa_tag_driver_put(const struct dsa_device_ops *ops) +static void dsa_port_teardown(struct dsa_port *dp) { - struct dsa_tag_driver *dsa_tag_driver; + if (!dp->setup) + return; - mutex_lock(&dsa_tag_drivers_lock); - list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { - if (dsa_tag_driver->ops == ops) { - module_put(dsa_tag_driver->owner); - break; + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + dsa_port_disable(dp); + if (dp->dn) + dsa_shared_port_link_unregister_of(dp); + break; + case DSA_PORT_TYPE_DSA: + dsa_port_disable(dp); + if (dp->dn) + dsa_shared_port_link_unregister_of(dp); + break; + case DSA_PORT_TYPE_USER: + if (dp->slave) { + dsa_slave_destroy(dp->slave); + dp->slave = NULL; } + break; } - mutex_unlock(&dsa_tag_drivers_lock); + + dsa_port_devlink_teardown(dp); + + dp->setup = false; } -static int dev_is_class(struct device *dev, void *class) +static int dsa_port_setup_as_unused(struct dsa_port *dp) { - if (dev->class != NULL && !strcmp(dev->class->name, class)) - return 1; + dp->type = DSA_PORT_TYPE_UNUSED; + return dsa_port_setup(dp); +} + +static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) +{ + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; + struct dsa_switch_tree *dst = ds->dst; + int err; + + if (tag_ops->proto == dst->default_proto) + goto connect; + + rtnl_lock(); + err = ds->ops->change_tag_protocol(ds, tag_ops->proto); + rtnl_unlock(); + if (err) { + dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + return err; + } + +connect: + if (tag_ops->connect) { + err = tag_ops->connect(ds); + if (err) + return err; + } + + if (ds->ops->connect_tag_protocol) { + err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); + if (err) { + dev_err(ds->dev, + "Unable to connect to tag protocol \"%s\": %pe\n", + tag_ops->name, ERR_PTR(err)); + goto disconnect; + } + } return 0; + +disconnect: + if (tag_ops->disconnect) + tag_ops->disconnect(ds); + + return err; } -static struct device *dev_find_class(struct device *parent, char *class) +static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds) { - if (dev_is_class(parent, class)) { - get_device(parent); - return parent; - } + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; - return device_find_child(parent, class, dev_is_class); + if (tag_ops->disconnect) + tag_ops->disconnect(ds); } -struct net_device *dsa_dev_to_net_device(struct device *dev) +static int dsa_switch_setup(struct dsa_switch *ds) { - struct device *d; + struct device_node *dn; + int err; - d = dev_find_class(dev, "net"); - if (d != NULL) { - struct net_device *nd; + if (ds->setup) + return 0; - nd = to_net_dev(d); - dev_hold(nd); - put_device(d); + /* Initialize ds->phys_mii_mask before registering the slave MDIO bus + * driver and before ops->setup() has run, since the switch drivers and + * the slave MDIO bus driver rely on these values for probing PHY + * devices or not + */ + ds->phys_mii_mask |= dsa_user_ports(ds); - return nd; + err = dsa_switch_devlink_alloc(ds); + if (err) + return err; + + err = dsa_switch_register_notifier(ds); + if (err) + goto devlink_free; + + ds->configure_vlan_while_not_filtering = true; + + err = ds->ops->setup(ds); + if (err < 0) + goto unregister_notifier; + + err = dsa_switch_setup_tag_protocol(ds); + if (err) + goto teardown; + + if (!ds->slave_mii_bus && ds->ops->phy_read) { + ds->slave_mii_bus = mdiobus_alloc(); + if (!ds->slave_mii_bus) { + err = -ENOMEM; + goto teardown; + } + + dsa_slave_mii_bus_init(ds); + + dn = of_get_child_by_name(ds->dev->of_node, "mdio"); + + err = of_mdiobus_register(ds->slave_mii_bus, dn); + of_node_put(dn); + if (err < 0) + goto free_slave_mii_bus; } - return NULL; + dsa_switch_devlink_register(ds); + + ds->setup = true; + return 0; + +free_slave_mii_bus: + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->slave_mii_bus); +teardown: + if (ds->ops->teardown) + ds->ops->teardown(ds); +unregister_notifier: + dsa_switch_unregister_notifier(ds); +devlink_free: + dsa_switch_devlink_free(ds); + return err; } -EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); -/* Determine if we should defer delivery of skb until we have a rx timestamp. - * - * Called from dsa_switch_rcv. For now, this will only work if tagging is - * enabled on the switch. Normally the MAC driver would retrieve the hardware - * timestamp when it reads the packet out of the hardware. However in a DSA - * switch, the DSA driver owning the interface to which the packet is - * delivered is never notified unless we do so here. - */ -static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, - struct sk_buff *skb) +static void dsa_switch_teardown(struct dsa_switch *ds) { - struct dsa_switch *ds = p->dp->ds; - unsigned int type; + if (!ds->setup) + return; - if (skb_headroom(skb) < ETH_HLEN) - return false; + dsa_switch_devlink_unregister(ds); - __skb_push(skb, ETH_HLEN); + if (ds->slave_mii_bus && ds->ops->phy_read) { + mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); + ds->slave_mii_bus = NULL; + } - type = ptp_classify_raw(skb); + dsa_switch_teardown_tag_protocol(ds); - __skb_pull(skb, ETH_HLEN); + if (ds->ops->teardown) + ds->ops->teardown(ds); - if (type == PTP_CLASS_NONE) - return false; + dsa_switch_unregister_notifier(ds); - if (likely(ds->ops->port_rxtstamp)) - return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type); + dsa_switch_devlink_free(ds); - return false; + ds->setup = false; +} + +/* First tear down the non-shared, then the shared ports. This ensures that + * all work items scheduled by our switchdev handlers for user ports have + * completed before we destroy the refcounting kept on the shared ports. + */ +static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) + dsa_port_teardown(dp); + + dsa_flush_workqueue(); + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + dsa_port_teardown(dp); } -static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *unused) +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) { - struct metadata_dst *md_dst = skb_metadata_dst(skb); - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct sk_buff *nskb = NULL; - struct dsa_slave_priv *p; + struct dsa_port *dp; - if (unlikely(!cpu_dp)) { - kfree_skb(skb); - return 0; + list_for_each_entry(dp, &dst->ports, list) + dsa_switch_teardown(dp->ds); +} + +/* Bring shared ports up first, then non-shared ports */ +static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + int err = 0; + + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) { + err = dsa_port_setup(dp); + if (err) + goto teardown; + } } - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return 0; + list_for_each_entry(dp, &dst->ports, list) { + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { + err = dsa_port_setup(dp); + if (err) { + err = dsa_port_setup_as_unused(dp); + if (err) + goto teardown; + } + } + } - if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { - unsigned int port = md_dst->u.port_info.port_id; + return 0; + +teardown: + dsa_tree_teardown_ports(dst); - skb_dst_drop(skb); - if (!skb_has_extensions(skb)) - skb->slow_gro = 0; + return err; +} - skb->dev = dsa_master_find_slave(dev, 0, port); - if (likely(skb->dev)) { - dsa_default_offload_fwd_mark(skb); - nskb = skb; +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + int err = 0; + + list_for_each_entry(dp, &dst->ports, list) { + err = dsa_switch_setup(dp->ds); + if (err) { + dsa_tree_teardown_switches(dst); + break; } - } else { - nskb = cpu_dp->rcv(skb, dev); } - if (!nskb) { - kfree_skb(skb); - return 0; + return err; +} + +static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp; + int err = 0; + + rtnl_lock(); + + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; + bool admin_up = (master->flags & IFF_UP) && + !qdisc_tx_is_noop(master); + + err = dsa_master_setup(master, cpu_dp); + if (err) + break; + + /* Replay master state event */ + dsa_tree_master_admin_state_change(dst, master, admin_up); + dsa_tree_master_oper_state_change(dst, master, + netif_oper_up(master)); } - skb = nskb; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); + rtnl_unlock(); + + return err; +} + +static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp; + + rtnl_lock(); + + dsa_tree_for_each_cpu_port(cpu_dp, dst) { + struct net_device *master = cpu_dp->master; - if (unlikely(!dsa_slave_dev_check(skb->dev))) { - /* Packet is to be injected directly on an upper - * device, e.g. a team/bond, so skip all DSA-port - * specific actions. + /* Synthesizing an "admin down" state is sufficient for + * the switches to get a notification if the master is + * currently up and running. */ - netif_rx(skb); - return 0; + dsa_tree_master_admin_state_change(dst, master, false); + + dsa_master_teardown(master); } - p = netdev_priv(skb->dev); + rtnl_unlock(); +} - if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { - nskb = dsa_untag_bridge_pvid(skb); - if (!nskb) { - kfree_skb(skb); - return 0; - } - skb = nskb; +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) +{ + unsigned int len = 0; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->num_lag_ids > len) + len = dp->ds->num_lag_ids; } - dev_sw_netstats_rx_add(skb->dev, skb->len); + if (!len) + return 0; + + dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); + if (!dst->lags) + return -ENOMEM; + + dst->lags_len = len; + return 0; +} + +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) +{ + kfree(dst->lags); +} - if (dsa_skb_defer_rx_timestamp(p, skb)) +static int dsa_tree_setup(struct dsa_switch_tree *dst) +{ + bool complete; + int err; + + if (dst->setup) { + pr_err("DSA: tree %d already setup! Disjoint trees?\n", + dst->index); + return -EEXIST; + } + + complete = dsa_tree_setup_routing_table(dst); + if (!complete) return 0; - gro_cells_receive(&p->gcells, skb); + err = dsa_tree_setup_cpu_ports(dst); + if (err) + return err; + + err = dsa_tree_setup_switches(dst); + if (err) + goto teardown_cpu_ports; + + err = dsa_tree_setup_ports(dst); + if (err) + goto teardown_switches; + + err = dsa_tree_setup_master(dst); + if (err) + goto teardown_ports; + + err = dsa_tree_setup_lags(dst); + if (err) + goto teardown_master; + + dst->setup = true; + + pr_info("DSA: tree %d setup\n", dst->index); return 0; + +teardown_master: + dsa_tree_teardown_master(dst); +teardown_ports: + dsa_tree_teardown_ports(dst); +teardown_switches: + dsa_tree_teardown_switches(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); + + return err; } -#ifdef CONFIG_PM_SLEEP -static bool dsa_port_is_initialized(const struct dsa_port *dp) +static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - return dp->type == DSA_PORT_TYPE_USER && dp->slave; + struct dsa_link *dl, *next; + + if (!dst->setup) + return; + + dsa_tree_teardown_lags(dst); + + dsa_tree_teardown_master(dst); + + dsa_tree_teardown_ports(dst); + + dsa_tree_teardown_switches(dst); + + dsa_tree_teardown_cpu_ports(dst); + + list_for_each_entry_safe(dl, next, &dst->rtable, list) { + list_del(&dl->list); + kfree(dl); + } + + pr_info("DSA: tree %d torn down\n", dst->index); + + dst->setup = false; } -int dsa_switch_suspend(struct dsa_switch *ds) +static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops) +{ + const struct dsa_device_ops *old_tag_ops = dst->tag_ops; + struct dsa_notifier_tag_proto_info info; + int err; + + dst->tag_ops = tag_ops; + + /* Notify the switches from this tree about the connection + * to the new tagger + */ + info.tag_ops = tag_ops; + err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info); + if (err && err != -EOPNOTSUPP) + goto out_disconnect; + + /* Notify the old tagger about the disconnection from this tree */ + info.tag_ops = old_tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); + + return 0; + +out_disconnect: + info.tag_ops = tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); + dst->tag_ops = old_tag_ops; + + return err; +} + +/* Since the dsa/tagging sysfs device attribute is per master, the assumption + * is that all DSA switches within a tree share the same tagger, otherwise + * they would have formed disjoint trees (different "dsa,member" values). + */ +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops) { + struct dsa_notifier_tag_proto_info info; struct dsa_port *dp; - int ret = 0; + int err = -EBUSY; + + if (!rtnl_trylock()) + return restart_syscall(); + + /* At the moment we don't allow changing the tag protocol under + * traffic. The rtnl_mutex also happens to serialize concurrent + * attempts to change the tagging protocol. If we ever lift the IFF_UP + * restriction, there needs to be another mutex which serializes this. + */ + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp)->flags & IFF_UP) + goto out_unlock; + + if (dp->slave->flags & IFF_UP) + goto out_unlock; + } - /* Suspend slave network devices */ - dsa_switch_for_each_port(dp, ds) { - if (!dsa_port_is_initialized(dp)) - continue; + /* Notify the tag protocol change */ + info.tag_ops = tag_ops; + err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); + if (err) + goto out_unwind_tagger; - ret = dsa_slave_suspend(dp->slave); - if (ret) - return ret; + err = dsa_tree_bind_tag_proto(dst, tag_ops); + if (err) + goto out_unwind_tagger; + + rtnl_unlock(); + + return 0; + +out_unwind_tagger: + info.tag_ops = old_tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); +out_unlock: + rtnl_unlock(); + return err; +} + +static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, + struct net_device *master) +{ + struct dsa_notifier_master_state_info info; + struct dsa_port *cpu_dp = master->dsa_ptr; + + info.master = master; + info.operational = dsa_port_master_is_operational(cpu_dp); + + dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); +} + +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + /* Don't keep track of admin state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (up && cpu_dp->master_oper_up)) + notify = true; + + cpu_dp->master_admin_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + bool notify = false; + + /* Don't keep track of oper state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + + if ((dsa_port_master_is_operational(cpu_dp)) != + (cpu_dp->master_admin_up && up)) + notify = true; + + cpu_dp->master_oper_up = up; + + if (notify) + dsa_tree_master_state_change(dst, master); +} + +static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) +{ + struct dsa_switch_tree *dst = ds->dst; + struct dsa_port *dp; + + dsa_switch_for_each_port(dp, ds) + if (dp->index == index) + return dp; + + dp = kzalloc(sizeof(*dp), GFP_KERNEL); + if (!dp) + return NULL; + + dp->ds = ds; + dp->index = index; + + mutex_init(&dp->addr_lists_lock); + mutex_init(&dp->vlans_lock); + INIT_LIST_HEAD(&dp->fdbs); + INIT_LIST_HEAD(&dp->mdbs); + INIT_LIST_HEAD(&dp->vlans); + INIT_LIST_HEAD(&dp->list); + list_add_tail(&dp->list, &dst->ports); + + return dp; +} + +static int dsa_port_parse_user(struct dsa_port *dp, const char *name) +{ + dp->type = DSA_PORT_TYPE_USER; + dp->name = name; + + return 0; +} + +static int dsa_port_parse_dsa(struct dsa_port *dp) +{ + dp->type = DSA_PORT_TYPE_DSA; + + return 0; +} + +static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, + struct net_device *master) +{ + enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE; + struct dsa_switch *mds, *ds = dp->ds; + unsigned int mdp_upstream; + struct dsa_port *mdp; + + /* It is possible to stack DSA switches onto one another when that + * happens the switch driver may want to know if its tagging protocol + * is going to work in such a configuration. + */ + if (dsa_slave_dev_check(master)) { + mdp = dsa_slave_to_port(master); + mds = mdp->ds; + mdp_upstream = dsa_upstream_port(mds, mdp->index); + tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream, + DSA_TAG_PROTO_NONE); } - if (ds->ops->suspend) - ret = ds->ops->suspend(ds); + /* If the master device is not itself a DSA slave in a disjoint DSA + * tree, then return immediately. + */ + return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); +} - return ret; +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, + const char *user_protocol) +{ + const struct dsa_device_ops *tag_ops = NULL; + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + enum dsa_tag_protocol default_proto; + + /* Find out which protocol the switch would prefer. */ + default_proto = dsa_get_tag_protocol(dp, master); + if (dst->default_proto) { + if (dst->default_proto != default_proto) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + return -EINVAL; + } + } else { + dst->default_proto = default_proto; + } + + /* See if the user wants to override that preference. */ + if (user_protocol) { + if (!ds->ops->change_tag_protocol) { + dev_err(ds->dev, "Tag protocol cannot be modified\n"); + return -EINVAL; + } + + tag_ops = dsa_tag_driver_get_by_name(user_protocol); + if (IS_ERR(tag_ops)) { + dev_warn(ds->dev, + "Failed to find a tagging driver for protocol %s, using default\n", + user_protocol); + tag_ops = NULL; + } + } + + if (!tag_ops) + tag_ops = dsa_tag_driver_get_by_id(default_proto); + + if (IS_ERR(tag_ops)) { + if (PTR_ERR(tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; + + dev_warn(ds->dev, "No tagger for this switch\n"); + return PTR_ERR(tag_ops); + } + + if (dst->tag_ops) { + if (dst->tag_ops != tag_ops) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + + dsa_tag_driver_put(tag_ops); + return -EINVAL; + } + + /* In the case of multiple CPU ports per switch, the tagging + * protocol is still reference-counted only per switch tree. + */ + dsa_tag_driver_put(tag_ops); + } else { + dst->tag_ops = tag_ops; + } + + dp->master = master; + dp->type = DSA_PORT_TYPE_CPU; + dsa_port_set_tag_protocol(dp, dst->tag_ops); + dp->dst = dst; + + /* At this point, the tree may be configured to use a different + * tagger than the one chosen by the switch driver during + * .setup, in the case when a user selects a custom protocol + * through the DT. + * + * This is resolved by syncing the driver with the tree in + * dsa_switch_setup_tag_protocol once .setup has run and the + * driver is ready to accept calls to .change_tag_protocol. If + * the driver does not support the custom protocol at that + * point, the tree is wholly rejected, thereby ensuring that the + * tree and driver are always in agreement on the protocol to + * use. + */ + return 0; } -EXPORT_SYMBOL_GPL(dsa_switch_suspend); -int dsa_switch_resume(struct dsa_switch *ds) +static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) +{ + struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); + const char *name = of_get_property(dn, "label", NULL); + bool link = of_property_read_bool(dn, "link"); + + dp->dn = dn; + + if (ethernet) { + struct net_device *master; + const char *user_protocol; + + master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); + if (!master) + return -EPROBE_DEFER; + + user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); + return dsa_port_parse_cpu(dp, master, user_protocol); + } + + if (link) + return dsa_port_parse_dsa(dp); + + return dsa_port_parse_user(dp, name); +} + +static int dsa_switch_parse_ports_of(struct dsa_switch *ds, + struct device_node *dn) { + struct device_node *ports, *port; struct dsa_port *dp; - int ret = 0; + int err = 0; + u32 reg; + + ports = of_get_child_by_name(dn, "ports"); + if (!ports) { + /* The second possibility is "ethernet-ports" */ + ports = of_get_child_by_name(dn, "ethernet-ports"); + if (!ports) { + dev_err(ds->dev, "no ports child node found\n"); + return -EINVAL; + } + } - if (ds->ops->resume) - ret = ds->ops->resume(ds); + for_each_available_child_of_node(ports, port) { + err = of_property_read_u32(port, "reg", ®); + if (err) { + of_node_put(port); + goto out_put_node; + } - if (ret) - return ret; + if (reg >= ds->num_ports) { + dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n", + port, reg, ds->num_ports); + of_node_put(port); + err = -EINVAL; + goto out_put_node; + } - /* Resume slave network devices */ - dsa_switch_for_each_port(dp, ds) { - if (!dsa_port_is_initialized(dp)) - continue; + dp = dsa_to_port(ds, reg); - ret = dsa_slave_resume(dp->slave); - if (ret) - return ret; + err = dsa_port_parse_of(dp, port); + if (err) { + of_node_put(port); + goto out_put_node; + } + } + +out_put_node: + of_node_put(ports); + return err; +} + +static int dsa_switch_parse_member_of(struct dsa_switch *ds, + struct device_node *dn) +{ + u32 m[2] = { 0, 0 }; + int sz; + + /* Don't error out if this optional property isn't found */ + sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2); + if (sz < 0 && sz != -EINVAL) + return sz; + + ds->index = m[1]; + + ds->dst = dsa_tree_touch(m[0]); + if (!ds->dst) + return -ENOMEM; + + if (dsa_switch_find(ds->dst->index, ds->index)) { + dev_err(ds->dev, + "A DSA switch with index %d already exists in tree %d\n", + ds->index, ds->dst->index); + return -EEXIST; } + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } -EXPORT_SYMBOL_GPL(dsa_switch_resume); -#endif -static struct packet_type dsa_pack_type __read_mostly = { - .type = cpu_to_be16(ETH_P_XDSA), - .func = dsa_switch_rcv, -}; +static int dsa_switch_touch_ports(struct dsa_switch *ds) +{ + struct dsa_port *dp; + int port; -static struct workqueue_struct *dsa_owq; + for (port = 0; port < ds->num_ports; port++) { + dp = dsa_port_touch(ds, port); + if (!dp) + return -ENOMEM; + } -bool dsa_schedule_work(struct work_struct *work) + return 0; +} + +static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) { - return queue_work(dsa_owq, work); + int err; + + err = dsa_switch_parse_member_of(ds, dn); + if (err) + return err; + + err = dsa_switch_touch_ports(ds); + if (err) + return err; + + return dsa_switch_parse_ports_of(ds, dn); } -void dsa_flush_workqueue(void) +static int dev_is_class(struct device *dev, void *class) { - flush_workqueue(dsa_owq); + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; } -EXPORT_SYMBOL_GPL(dsa_flush_workqueue); -int dsa_devlink_param_get(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +static struct net_device *dsa_dev_to_net_device(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "net"); + if (d != NULL) { + struct net_device *nd; + + nd = to_net_dev(d); + dev_hold(nd); + put_device(d); + + return nd; + } + + return NULL; +} + +static int dsa_port_parse(struct dsa_port *dp, const char *name, + struct device *dev) +{ + if (!strcmp(name, "cpu")) { + struct net_device *master; + + master = dsa_dev_to_net_device(dev); + if (!master) + return -EPROBE_DEFER; + + dev_put(master); + + return dsa_port_parse_cpu(dp, master, NULL); + } + + if (!strcmp(name, "dsa")) + return dsa_port_parse_dsa(dp); + + return dsa_port_parse_user(dp, name); +} + +static int dsa_switch_parse_ports(struct dsa_switch *ds, + struct dsa_chip_data *cd) { - struct dsa_switch *ds = dsa_devlink_to_ds(dl); + bool valid_name_found = false; + struct dsa_port *dp; + struct device *dev; + const char *name; + unsigned int i; + int err; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + name = cd->port_names[i]; + dev = cd->netdev[i]; + dp = dsa_to_port(ds, i); + + if (!name) + continue; + + err = dsa_port_parse(dp, name, dev); + if (err) + return err; - if (!ds->ops->devlink_param_get) - return -EOPNOTSUPP; + valid_name_found = true; + } + + if (!valid_name_found && i == DSA_MAX_PORTS) + return -EINVAL; - return ds->ops->devlink_param_get(ds, id, ctx); + return 0; } -EXPORT_SYMBOL_GPL(dsa_devlink_param_get); -int dsa_devlink_param_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) +static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) { - struct dsa_switch *ds = dsa_devlink_to_ds(dl); + int err; - if (!ds->ops->devlink_param_set) - return -EOPNOTSUPP; + ds->cd = cd; + + /* We don't support interconnected switches nor multiple trees via + * platform data, so this is the unique switch of the tree. + */ + ds->index = 0; + ds->dst = dsa_tree_touch(0); + if (!ds->dst) + return -ENOMEM; - return ds->ops->devlink_param_set(ds, id, ctx); + err = dsa_switch_touch_ports(ds); + if (err) + return err; + + return dsa_switch_parse_ports(ds, cd); } -EXPORT_SYMBOL_GPL(dsa_devlink_param_set); -int dsa_devlink_params_register(struct dsa_switch *ds, - const struct devlink_param *params, - size_t params_count) +static void dsa_switch_release_ports(struct dsa_switch *ds) { - return devlink_params_register(ds->devlink, params, params_count); + struct dsa_port *dp, *next; + + dsa_switch_for_each_port_safe(dp, next, ds) { + WARN_ON(!list_empty(&dp->fdbs)); + WARN_ON(!list_empty(&dp->mdbs)); + WARN_ON(!list_empty(&dp->vlans)); + list_del(&dp->list); + kfree(dp); + } } -EXPORT_SYMBOL_GPL(dsa_devlink_params_register); -void dsa_devlink_params_unregister(struct dsa_switch *ds, - const struct devlink_param *params, - size_t params_count) +static int dsa_switch_probe(struct dsa_switch *ds) { - devlink_params_unregister(ds->devlink, params, params_count); + struct dsa_switch_tree *dst; + struct dsa_chip_data *pdata; + struct device_node *np; + int err; + + if (!ds->dev) + return -ENODEV; + + pdata = ds->dev->platform_data; + np = ds->dev->of_node; + + if (!ds->num_ports) + return -EINVAL; + + if (np) { + err = dsa_switch_parse_of(ds, np); + if (err) + dsa_switch_release_ports(ds); + } else if (pdata) { + err = dsa_switch_parse(ds, pdata); + if (err) + dsa_switch_release_ports(ds); + } else { + err = -ENODEV; + } + + if (err) + return err; + + dst = ds->dst; + dsa_tree_get(dst); + err = dsa_tree_setup(dst); + if (err) { + dsa_switch_release_ports(ds); + dsa_tree_put(dst); + } + + return err; } -EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister); -int dsa_devlink_resource_register(struct dsa_switch *ds, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) +int dsa_register_switch(struct dsa_switch *ds) { - return devlink_resource_register(ds->devlink, resource_name, - resource_size, resource_id, - parent_resource_id, - size_params); + int err; + + mutex_lock(&dsa2_mutex); + err = dsa_switch_probe(ds); + dsa_tree_put(ds->dst); + mutex_unlock(&dsa2_mutex); + + return err; } -EXPORT_SYMBOL_GPL(dsa_devlink_resource_register); +EXPORT_SYMBOL_GPL(dsa_register_switch); -void dsa_devlink_resources_unregister(struct dsa_switch *ds) +static void dsa_switch_remove(struct dsa_switch *ds) { - devlink_resources_unregister(ds->devlink); + struct dsa_switch_tree *dst = ds->dst; + + dsa_tree_teardown(dst); + dsa_switch_release_ports(ds); + dsa_tree_put(dst); } -EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister); -void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) +void dsa_unregister_switch(struct dsa_switch *ds) { - return devlink_resource_occ_get_register(ds->devlink, resource_id, - occ_get, occ_get_priv); + mutex_lock(&dsa2_mutex); + dsa_switch_remove(ds); + mutex_unlock(&dsa2_mutex); } -EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register); +EXPORT_SYMBOL_GPL(dsa_unregister_switch); -void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, - u64 resource_id) +/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is + * blocking that operation from completion, due to the dev_hold taken inside + * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of + * the DSA master, so that the system can reboot successfully. + */ +void dsa_switch_shutdown(struct dsa_switch *ds) { - devlink_resource_occ_get_unregister(ds->devlink, resource_id); + struct net_device *master, *slave_dev; + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); + + if (!ds->setup) + goto out; + + rtnl_lock(); + + dsa_switch_for_each_user_port(dp, ds) { + master = dsa_port_to_master(dp); + slave_dev = dp->slave; + + netdev_upper_dev_unlink(master, slave_dev); + } + + /* Disconnect from further netdevice notifiers on the master, + * since netdev_uses_dsa() will now return false. + */ + dsa_switch_for_each_cpu_port(dp, ds) + dp->master->dsa_ptr = NULL; + + rtnl_unlock(); +out: + mutex_unlock(&dsa2_mutex); } -EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister); +EXPORT_SYMBOL_GPL(dsa_switch_shutdown); -struct devlink_region * -dsa_devlink_region_create(struct dsa_switch *ds, - const struct devlink_region_ops *ops, - u32 region_max_snapshots, u64 region_size) +#ifdef CONFIG_PM_SLEEP +static bool dsa_port_is_initialized(const struct dsa_port *dp) { - return devlink_region_create(ds->devlink, ops, region_max_snapshots, - region_size); + return dp->type == DSA_PORT_TYPE_USER && dp->slave; } -EXPORT_SYMBOL_GPL(dsa_devlink_region_create); -struct devlink_region * -dsa_devlink_port_region_create(struct dsa_switch *ds, - int port, - const struct devlink_port_region_ops *ops, - u32 region_max_snapshots, u64 region_size) +int dsa_switch_suspend(struct dsa_switch *ds) { - struct dsa_port *dp = dsa_to_port(ds, port); + struct dsa_port *dp; + int ret = 0; + + /* Suspend slave network devices */ + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) + continue; + + ret = dsa_slave_suspend(dp->slave); + if (ret) + return ret; + } + + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); - return devlink_port_region_create(&dp->devlink_port, ops, - region_max_snapshots, - region_size); + return ret; } -EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create); +EXPORT_SYMBOL_GPL(dsa_switch_suspend); -void dsa_devlink_region_destroy(struct devlink_region *region) +int dsa_switch_resume(struct dsa_switch *ds) { - devlink_region_destroy(region); + struct dsa_port *dp; + int ret = 0; + + if (ds->ops->resume) + ret = ds->ops->resume(ds); + + if (ret) + return ret; + + /* Resume slave network devices */ + dsa_switch_for_each_port(dp, ds) { + if (!dsa_port_is_initialized(dp)) + continue; + + ret = dsa_slave_resume(dp->slave); + if (ret) + return ret; + } + + return 0; } -EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy); +EXPORT_SYMBOL_GPL(dsa_switch_resume); +#endif struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) { @@ -552,9 +1717,6 @@ static int __init dsa_init_module(void) dev_add_pack(&dsa_pack_type); - dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), - THIS_MODULE); - rc = rtnl_link_register(&dsa_link_ops); if (rc) goto netlink_register_fail; @@ -562,7 +1724,6 @@ static int __init dsa_init_module(void) return 0; netlink_register_fail: - dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); register_notifier_fail: @@ -575,7 +1736,6 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { rtnl_link_unregister(&dsa_link_ops); - dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); diff --git a/net/dsa/dsa.h b/net/dsa/dsa.h new file mode 100644 index 000000000000..b7e17ae1094d --- /dev/null +++ b/net/dsa/dsa.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_H +#define __DSA_H + +#include <linux/list.h> +#include <linux/types.h> + +struct dsa_db; +struct dsa_device_ops; +struct dsa_lag; +struct dsa_switch_tree; +struct net_device; +struct work_struct; + +extern struct list_head dsa_tree_list; + +bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b); +bool dsa_schedule_work(struct work_struct *work); +void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); +struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, + const struct net_device *lag_dev); +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops); +void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); +void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *master, + bool up); +unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); +void dsa_bridge_num_put(const struct net_device *bridge_dev, + unsigned int bridge_num); +struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, + const struct net_device *br); + +#endif diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c deleted file mode 100644 index f8df55e2e23a..000000000000 --- a/net/dsa/dsa2.c +++ /dev/null @@ -1,1817 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/dsa/dsa2.c - Hardware switch handling, binding version 2 - * Copyright (c) 2008-2009 Marvell Semiconductor - * Copyright (c) 2013 Florian Fainelli <[email protected]> - * Copyright (c) 2016 Andrew Lunn <[email protected]> - */ - -#include <linux/device.h> -#include <linux/err.h> -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/slab.h> -#include <linux/rtnetlink.h> -#include <linux/of.h> -#include <linux/of_mdio.h> -#include <linux/of_net.h> -#include <net/devlink.h> -#include <net/sch_generic.h> - -#include "dsa_priv.h" - -static DEFINE_MUTEX(dsa2_mutex); -LIST_HEAD(dsa_tree_list); - -/* Track the bridges with forwarding offload enabled */ -static unsigned long dsa_fwd_offloading_bridges; - -/** - * dsa_tree_notify - Execute code for all switches in a DSA switch tree. - * @dst: collection of struct dsa_switch devices to notify. - * @e: event, must be of type DSA_NOTIFIER_* - * @v: event-specific value. - * - * Given a struct dsa_switch_tree, this can be used to run a function once for - * each member DSA switch. The other alternative of traversing the tree is only - * through its ports list, which does not uniquely list the switches. - */ -int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) -{ - struct raw_notifier_head *nh = &dst->nh; - int err; - - err = raw_notifier_call_chain(nh, e, v); - - return notifier_to_errno(err); -} - -/** - * dsa_broadcast - Notify all DSA trees in the system. - * @e: event, must be of type DSA_NOTIFIER_* - * @v: event-specific value. - * - * Can be used to notify the switching fabric of events such as cross-chip - * bridging between disjoint trees (such as islands of tagger-compatible - * switches bridged by an incompatible middle switch). - * - * WARNING: this function is not reliable during probe time, because probing - * between trees is asynchronous and not all DSA trees might have probed. - */ -int dsa_broadcast(unsigned long e, void *v) -{ - struct dsa_switch_tree *dst; - int err = 0; - - list_for_each_entry(dst, &dsa_tree_list, list) { - err = dsa_tree_notify(dst, e, v); - if (err) - break; - } - - return err; -} - -/** - * dsa_lag_map() - Map LAG structure to a linear LAG array - * @dst: Tree in which to record the mapping. - * @lag: LAG structure that is to be mapped to the tree's array. - * - * dsa_lag_id/dsa_lag_by_id can then be used to translate between the - * two spaces. The size of the mapping space is determined by the - * driver by setting ds->num_lag_ids. It is perfectly legal to leave - * it unset if it is not needed, in which case these functions become - * no-ops. - */ -void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag) -{ - unsigned int id; - - for (id = 1; id <= dst->lags_len; id++) { - if (!dsa_lag_by_id(dst, id)) { - dst->lags[id - 1] = lag; - lag->id = id; - return; - } - } - - /* No IDs left, which is OK. Some drivers do not need it. The - * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id - * returns an error for this device when joining the LAG. The - * driver can then return -EOPNOTSUPP back to DSA, which will - * fall back to a software LAG. - */ -} - -/** - * dsa_lag_unmap() - Remove a LAG ID mapping - * @dst: Tree in which the mapping is recorded. - * @lag: LAG structure that was mapped. - * - * As there may be multiple users of the mapping, it is only removed - * if there are no other references to it. - */ -void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag) -{ - unsigned int id; - - dsa_lags_foreach_id(id, dst) { - if (dsa_lag_by_id(dst, id) == lag) { - dst->lags[id - 1] = NULL; - lag->id = 0; - break; - } - } -} - -struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, - const struct net_device *lag_dev) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_lag_dev_get(dp) == lag_dev) - return dp->lag; - - return NULL; -} - -struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, - const struct net_device *br) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_bridge_dev_get(dp) == br) - return dp->bridge; - - return NULL; -} - -static int dsa_bridge_num_find(const struct net_device *bridge_dev) -{ - struct dsa_switch_tree *dst; - - list_for_each_entry(dst, &dsa_tree_list, list) { - struct dsa_bridge *bridge; - - bridge = dsa_tree_bridge_find(dst, bridge_dev); - if (bridge) - return bridge->num; - } - - return 0; -} - -unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) -{ - unsigned int bridge_num = dsa_bridge_num_find(bridge_dev); - - /* Switches without FDB isolation support don't get unique - * bridge numbering - */ - if (!max) - return 0; - - if (!bridge_num) { - /* First port that requests FDB isolation or TX forwarding - * offload for this bridge - */ - bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges, - DSA_MAX_NUM_OFFLOADING_BRIDGES, - 1); - if (bridge_num >= max) - return 0; - - set_bit(bridge_num, &dsa_fwd_offloading_bridges); - } - - return bridge_num; -} - -void dsa_bridge_num_put(const struct net_device *bridge_dev, - unsigned int bridge_num) -{ - /* Since we refcount bridges, we know that when we call this function - * it is no longer in use, so we can just go ahead and remove it from - * the bit mask. - */ - clear_bit(bridge_num, &dsa_fwd_offloading_bridges); -} - -struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) -{ - struct dsa_switch_tree *dst; - struct dsa_port *dp; - - list_for_each_entry(dst, &dsa_tree_list, list) { - if (dst->index != tree_index) - continue; - - list_for_each_entry(dp, &dst->ports, list) { - if (dp->ds->index != sw_index) - continue; - - return dp->ds; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(dsa_switch_find); - -static struct dsa_switch_tree *dsa_tree_find(int index) -{ - struct dsa_switch_tree *dst; - - list_for_each_entry(dst, &dsa_tree_list, list) - if (dst->index == index) - return dst; - - return NULL; -} - -static struct dsa_switch_tree *dsa_tree_alloc(int index) -{ - struct dsa_switch_tree *dst; - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) - return NULL; - - dst->index = index; - - INIT_LIST_HEAD(&dst->rtable); - - INIT_LIST_HEAD(&dst->ports); - - INIT_LIST_HEAD(&dst->list); - list_add_tail(&dst->list, &dsa_tree_list); - - kref_init(&dst->refcount); - - return dst; -} - -static void dsa_tree_free(struct dsa_switch_tree *dst) -{ - if (dst->tag_ops) - dsa_tag_driver_put(dst->tag_ops); - list_del(&dst->list); - kfree(dst); -} - -static struct dsa_switch_tree *dsa_tree_get(struct dsa_switch_tree *dst) -{ - if (dst) - kref_get(&dst->refcount); - - return dst; -} - -static struct dsa_switch_tree *dsa_tree_touch(int index) -{ - struct dsa_switch_tree *dst; - - dst = dsa_tree_find(index); - if (dst) - return dsa_tree_get(dst); - else - return dsa_tree_alloc(index); -} - -static void dsa_tree_release(struct kref *ref) -{ - struct dsa_switch_tree *dst; - - dst = container_of(ref, struct dsa_switch_tree, refcount); - - dsa_tree_free(dst); -} - -static void dsa_tree_put(struct dsa_switch_tree *dst) -{ - if (dst) - kref_put(&dst->refcount, dsa_tree_release); -} - -static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, - struct device_node *dn) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dp->dn == dn) - return dp; - - return NULL; -} - -static struct dsa_link *dsa_link_touch(struct dsa_port *dp, - struct dsa_port *link_dp) -{ - struct dsa_switch *ds = dp->ds; - struct dsa_switch_tree *dst; - struct dsa_link *dl; - - dst = ds->dst; - - list_for_each_entry(dl, &dst->rtable, list) - if (dl->dp == dp && dl->link_dp == link_dp) - return dl; - - dl = kzalloc(sizeof(*dl), GFP_KERNEL); - if (!dl) - return NULL; - - dl->dp = dp; - dl->link_dp = link_dp; - - INIT_LIST_HEAD(&dl->list); - list_add_tail(&dl->list, &dst->rtable); - - return dl; -} - -static bool dsa_port_setup_routing_table(struct dsa_port *dp) -{ - struct dsa_switch *ds = dp->ds; - struct dsa_switch_tree *dst = ds->dst; - struct device_node *dn = dp->dn; - struct of_phandle_iterator it; - struct dsa_port *link_dp; - struct dsa_link *dl; - int err; - - of_for_each_phandle(&it, err, dn, "link", NULL, 0) { - link_dp = dsa_tree_find_port_by_node(dst, it.node); - if (!link_dp) { - of_node_put(it.node); - return false; - } - - dl = dsa_link_touch(dp, link_dp); - if (!dl) { - of_node_put(it.node); - return false; - } - } - - return true; -} - -static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst) -{ - bool complete = true; - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_dsa(dp)) { - complete = dsa_port_setup_routing_table(dp); - if (!complete) - break; - } - } - - return complete; -} - -static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_cpu(dp)) - return dp; - - return NULL; -} - -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) -{ - struct device_node *ethernet; - struct net_device *master; - struct dsa_port *cpu_dp; - - cpu_dp = dsa_tree_find_first_cpu(dst); - ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); - master = of_find_net_device_by_node(ethernet); - of_node_put(ethernet); - - return master; -} - -/* Assign the default CPU port (the first one in the tree) to all ports of the - * fabric which don't already have one as part of their own switch. - */ -static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) -{ - struct dsa_port *cpu_dp, *dp; - - cpu_dp = dsa_tree_find_first_cpu(dst); - if (!cpu_dp) { - pr_err("DSA: tree %d has no CPU port\n", dst->index); - return -EINVAL; - } - - list_for_each_entry(dp, &dst->ports, list) { - if (dp->cpu_dp) - continue; - - if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) - dp->cpu_dp = cpu_dp; - } - - return 0; -} - -/* Perform initial assignment of CPU ports to user ports and DSA links in the - * fabric, giving preference to CPU ports local to each switch. Default to - * using the first CPU port in the switch tree if the port does not have a CPU - * port local to this switch. - */ -static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) -{ - struct dsa_port *cpu_dp, *dp; - - list_for_each_entry(cpu_dp, &dst->ports, list) { - if (!dsa_port_is_cpu(cpu_dp)) - continue; - - /* Prefer a local CPU port */ - dsa_switch_for_each_port(dp, cpu_dp->ds) { - /* Prefer the first local CPU port found */ - if (dp->cpu_dp) - continue; - - if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) - dp->cpu_dp = cpu_dp; - } - } - - return dsa_tree_setup_default_cpu(dst); -} - -static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) - dp->cpu_dp = NULL; -} - -static int dsa_port_devlink_setup(struct dsa_port *dp) -{ - struct devlink_port *dlp = &dp->devlink_port; - struct dsa_switch_tree *dst = dp->ds->dst; - struct devlink_port_attrs attrs = {}; - struct devlink *dl = dp->ds->devlink; - struct dsa_switch *ds = dp->ds; - const unsigned char *id; - unsigned char len; - int err; - - memset(dlp, 0, sizeof(*dlp)); - devlink_port_init(dl, dlp); - - if (ds->ops->port_setup) { - err = ds->ops->port_setup(ds, dp->index); - if (err) - return err; - } - - id = (const unsigned char *)&dst->index; - len = sizeof(dst->index); - - attrs.phys.port_number = dp->index; - memcpy(attrs.switch_id.id, id, len); - attrs.switch_id.id_len = len; - - switch (dp->type) { - case DSA_PORT_TYPE_UNUSED: - attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; - break; - case DSA_PORT_TYPE_CPU: - attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; - break; - case DSA_PORT_TYPE_DSA: - attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; - break; - case DSA_PORT_TYPE_USER: - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - break; - } - - devlink_port_attrs_set(dlp, &attrs); - err = devlink_port_register(dl, dlp, dp->index); - if (err) { - if (ds->ops->port_teardown) - ds->ops->port_teardown(ds, dp->index); - return err; - } - - return 0; -} - -static void dsa_port_devlink_teardown(struct dsa_port *dp) -{ - struct devlink_port *dlp = &dp->devlink_port; - struct dsa_switch *ds = dp->ds; - - devlink_port_unregister(dlp); - - if (ds->ops->port_teardown) - ds->ops->port_teardown(ds, dp->index); - - devlink_port_fini(dlp); -} - -static int dsa_port_setup(struct dsa_port *dp) -{ - bool dsa_port_link_registered = false; - struct dsa_switch *ds = dp->ds; - bool dsa_port_enabled = false; - int err = 0; - - if (dp->setup) - return 0; - - err = dsa_port_devlink_setup(dp); - if (err) - return err; - - switch (dp->type) { - case DSA_PORT_TYPE_UNUSED: - dsa_port_disable(dp); - break; - case DSA_PORT_TYPE_CPU: - if (dp->dn) { - err = dsa_shared_port_link_register_of(dp); - if (err) - break; - dsa_port_link_registered = true; - } else { - dev_warn(ds->dev, - "skipping link registration for CPU port %d\n", - dp->index); - } - - err = dsa_port_enable(dp, NULL); - if (err) - break; - dsa_port_enabled = true; - - break; - case DSA_PORT_TYPE_DSA: - if (dp->dn) { - err = dsa_shared_port_link_register_of(dp); - if (err) - break; - dsa_port_link_registered = true; - } else { - dev_warn(ds->dev, - "skipping link registration for DSA port %d\n", - dp->index); - } - - err = dsa_port_enable(dp, NULL); - if (err) - break; - dsa_port_enabled = true; - - break; - case DSA_PORT_TYPE_USER: - of_get_mac_address(dp->dn, dp->mac); - err = dsa_slave_create(dp); - break; - } - - if (err && dsa_port_enabled) - dsa_port_disable(dp); - if (err && dsa_port_link_registered) - dsa_shared_port_link_unregister_of(dp); - if (err) { - dsa_port_devlink_teardown(dp); - return err; - } - - dp->setup = true; - - return 0; -} - -static void dsa_port_teardown(struct dsa_port *dp) -{ - if (!dp->setup) - return; - - switch (dp->type) { - case DSA_PORT_TYPE_UNUSED: - break; - case DSA_PORT_TYPE_CPU: - dsa_port_disable(dp); - if (dp->dn) - dsa_shared_port_link_unregister_of(dp); - break; - case DSA_PORT_TYPE_DSA: - dsa_port_disable(dp); - if (dp->dn) - dsa_shared_port_link_unregister_of(dp); - break; - case DSA_PORT_TYPE_USER: - if (dp->slave) { - dsa_slave_destroy(dp->slave); - dp->slave = NULL; - } - break; - } - - dsa_port_devlink_teardown(dp); - - dp->setup = false; -} - -static int dsa_port_setup_as_unused(struct dsa_port *dp) -{ - dp->type = DSA_PORT_TYPE_UNUSED; - return dsa_port_setup(dp); -} - -static int dsa_devlink_info_get(struct devlink *dl, - struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dsa_devlink_to_ds(dl); - - if (ds->ops->devlink_info_get) - return ds->ops->devlink_info_get(ds, req, extack); - - return -EOPNOTSUPP; -} - -static int dsa_devlink_sb_pool_get(struct devlink *dl, - unsigned int sb_index, u16 pool_index, - struct devlink_sb_pool_info *pool_info) -{ - struct dsa_switch *ds = dsa_devlink_to_ds(dl); - - if (!ds->ops->devlink_sb_pool_get) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index, - pool_info); -} - -static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index, - u16 pool_index, u32 size, - enum devlink_sb_threshold_type threshold_type, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dsa_devlink_to_ds(dl); - - if (!ds->ops->devlink_sb_pool_set) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size, - threshold_type, extack); -} - -static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp, - unsigned int sb_index, u16 pool_index, - u32 *p_threshold) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_port_pool_get) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index, - pool_index, p_threshold); -} - -static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp, - unsigned int sb_index, u16 pool_index, - u32 threshold, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_port_pool_set) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index, - pool_index, threshold, extack); -} - -static int -dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp, - unsigned int sb_index, u16 tc_index, - enum devlink_sb_pool_type pool_type, - u16 *p_pool_index, u32 *p_threshold) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_tc_pool_bind_get) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index, - tc_index, pool_type, - p_pool_index, p_threshold); -} - -static int -dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp, - unsigned int sb_index, u16 tc_index, - enum devlink_sb_pool_type pool_type, - u16 pool_index, u32 threshold, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_tc_pool_bind_set) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index, - tc_index, pool_type, - pool_index, threshold, - extack); -} - -static int dsa_devlink_sb_occ_snapshot(struct devlink *dl, - unsigned int sb_index) -{ - struct dsa_switch *ds = dsa_devlink_to_ds(dl); - - if (!ds->ops->devlink_sb_occ_snapshot) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_occ_snapshot(ds, sb_index); -} - -static int dsa_devlink_sb_occ_max_clear(struct devlink *dl, - unsigned int sb_index) -{ - struct dsa_switch *ds = dsa_devlink_to_ds(dl); - - if (!ds->ops->devlink_sb_occ_max_clear) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_occ_max_clear(ds, sb_index); -} - -static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp, - unsigned int sb_index, - u16 pool_index, u32 *p_cur, - u32 *p_max) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_occ_port_pool_get) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index, - pool_index, p_cur, p_max); -} - -static int -dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp, - unsigned int sb_index, u16 tc_index, - enum devlink_sb_pool_type pool_type, - u32 *p_cur, u32 *p_max) -{ - struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); - int port = dsa_devlink_port_to_port(dlp); - - if (!ds->ops->devlink_sb_occ_tc_port_bind_get) - return -EOPNOTSUPP; - - return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port, - sb_index, tc_index, - pool_type, p_cur, - p_max); -} - -static const struct devlink_ops dsa_devlink_ops = { - .info_get = dsa_devlink_info_get, - .sb_pool_get = dsa_devlink_sb_pool_get, - .sb_pool_set = dsa_devlink_sb_pool_set, - .sb_port_pool_get = dsa_devlink_sb_port_pool_get, - .sb_port_pool_set = dsa_devlink_sb_port_pool_set, - .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get, - .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set, - .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot, - .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear, - .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get, - .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get, -}; - -static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) -{ - const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; - struct dsa_switch_tree *dst = ds->dst; - int err; - - if (tag_ops->proto == dst->default_proto) - goto connect; - - rtnl_lock(); - err = ds->ops->change_tag_protocol(ds, tag_ops->proto); - rtnl_unlock(); - if (err) { - dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n", - tag_ops->name, ERR_PTR(err)); - return err; - } - -connect: - if (tag_ops->connect) { - err = tag_ops->connect(ds); - if (err) - return err; - } - - if (ds->ops->connect_tag_protocol) { - err = ds->ops->connect_tag_protocol(ds, tag_ops->proto); - if (err) { - dev_err(ds->dev, - "Unable to connect to tag protocol \"%s\": %pe\n", - tag_ops->name, ERR_PTR(err)); - goto disconnect; - } - } - - return 0; - -disconnect: - if (tag_ops->disconnect) - tag_ops->disconnect(ds); - - return err; -} - -static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds) -{ - const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; - - if (tag_ops->disconnect) - tag_ops->disconnect(ds); -} - -static int dsa_switch_setup(struct dsa_switch *ds) -{ - struct dsa_devlink_priv *dl_priv; - struct device_node *dn; - int err; - - if (ds->setup) - return 0; - - /* Initialize ds->phys_mii_mask before registering the slave MDIO bus - * driver and before ops->setup() has run, since the switch drivers and - * the slave MDIO bus driver rely on these values for probing PHY - * devices or not - */ - ds->phys_mii_mask |= dsa_user_ports(ds); - - /* Add the switch to devlink before calling setup, so that setup can - * add dpipe tables - */ - ds->devlink = - devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); - if (!ds->devlink) - return -ENOMEM; - dl_priv = devlink_priv(ds->devlink); - dl_priv->ds = ds; - - err = dsa_switch_register_notifier(ds); - if (err) - goto devlink_free; - - ds->configure_vlan_while_not_filtering = true; - - err = ds->ops->setup(ds); - if (err < 0) - goto unregister_notifier; - - err = dsa_switch_setup_tag_protocol(ds); - if (err) - goto teardown; - - if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = mdiobus_alloc(); - if (!ds->slave_mii_bus) { - err = -ENOMEM; - goto teardown; - } - - dsa_slave_mii_bus_init(ds); - - dn = of_get_child_by_name(ds->dev->of_node, "mdio"); - - err = of_mdiobus_register(ds->slave_mii_bus, dn); - of_node_put(dn); - if (err < 0) - goto free_slave_mii_bus; - } - - ds->setup = true; - devlink_register(ds->devlink); - return 0; - -free_slave_mii_bus: - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_free(ds->slave_mii_bus); -teardown: - if (ds->ops->teardown) - ds->ops->teardown(ds); -unregister_notifier: - dsa_switch_unregister_notifier(ds); -devlink_free: - devlink_free(ds->devlink); - ds->devlink = NULL; - return err; -} - -static void dsa_switch_teardown(struct dsa_switch *ds) -{ - if (!ds->setup) - return; - - if (ds->devlink) - devlink_unregister(ds->devlink); - - if (ds->slave_mii_bus && ds->ops->phy_read) { - mdiobus_unregister(ds->slave_mii_bus); - mdiobus_free(ds->slave_mii_bus); - ds->slave_mii_bus = NULL; - } - - dsa_switch_teardown_tag_protocol(ds); - - if (ds->ops->teardown) - ds->ops->teardown(ds); - - dsa_switch_unregister_notifier(ds); - - if (ds->devlink) { - devlink_free(ds->devlink); - ds->devlink = NULL; - } - - ds->setup = false; -} - -/* First tear down the non-shared, then the shared ports. This ensures that - * all work items scheduled by our switchdev handlers for user ports have - * completed before we destroy the refcounting kept on the shared ports. - */ -static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) - dsa_port_teardown(dp); - - dsa_flush_workqueue(); - - list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) - dsa_port_teardown(dp); -} - -static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); -} - -/* Bring shared ports up first, then non-shared ports */ -static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - int err = 0; - - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) { - err = dsa_port_setup(dp); - if (err) - goto teardown; - } - } - - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { - err = dsa_port_setup(dp); - if (err) { - err = dsa_port_setup_as_unused(dp); - if (err) - goto teardown; - } - } - } - - return 0; - -teardown: - dsa_tree_teardown_ports(dst); - - return err; -} - -static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - int err = 0; - - list_for_each_entry(dp, &dst->ports, list) { - err = dsa_switch_setup(dp->ds); - if (err) { - dsa_tree_teardown_switches(dst); - break; - } - } - - return err; -} - -static int dsa_tree_setup_master(struct dsa_switch_tree *dst) -{ - struct dsa_port *cpu_dp; - int err = 0; - - rtnl_lock(); - - dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; - bool admin_up = (master->flags & IFF_UP) && - !qdisc_tx_is_noop(master); - - err = dsa_master_setup(master, cpu_dp); - if (err) - break; - - /* Replay master state event */ - dsa_tree_master_admin_state_change(dst, master, admin_up); - dsa_tree_master_oper_state_change(dst, master, - netif_oper_up(master)); - } - - rtnl_unlock(); - - return err; -} - -static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) -{ - struct dsa_port *cpu_dp; - - rtnl_lock(); - - dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; - - /* Synthesizing an "admin down" state is sufficient for - * the switches to get a notification if the master is - * currently up and running. - */ - dsa_tree_master_admin_state_change(dst, master, false); - - dsa_master_teardown(master); - } - - rtnl_unlock(); -} - -static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) -{ - unsigned int len = 0; - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) { - if (dp->ds->num_lag_ids > len) - len = dp->ds->num_lag_ids; - } - - if (!len) - return 0; - - dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); - if (!dst->lags) - return -ENOMEM; - - dst->lags_len = len; - return 0; -} - -static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) -{ - kfree(dst->lags); -} - -static int dsa_tree_setup(struct dsa_switch_tree *dst) -{ - bool complete; - int err; - - if (dst->setup) { - pr_err("DSA: tree %d already setup! Disjoint trees?\n", - dst->index); - return -EEXIST; - } - - complete = dsa_tree_setup_routing_table(dst); - if (!complete) - return 0; - - err = dsa_tree_setup_cpu_ports(dst); - if (err) - return err; - - err = dsa_tree_setup_switches(dst); - if (err) - goto teardown_cpu_ports; - - err = dsa_tree_setup_ports(dst); - if (err) - goto teardown_switches; - - err = dsa_tree_setup_master(dst); - if (err) - goto teardown_ports; - - err = dsa_tree_setup_lags(dst); - if (err) - goto teardown_master; - - dst->setup = true; - - pr_info("DSA: tree %d setup\n", dst->index); - - return 0; - -teardown_master: - dsa_tree_teardown_master(dst); -teardown_ports: - dsa_tree_teardown_ports(dst); -teardown_switches: - dsa_tree_teardown_switches(dst); -teardown_cpu_ports: - dsa_tree_teardown_cpu_ports(dst); - - return err; -} - -static void dsa_tree_teardown(struct dsa_switch_tree *dst) -{ - struct dsa_link *dl, *next; - - if (!dst->setup) - return; - - dsa_tree_teardown_lags(dst); - - dsa_tree_teardown_master(dst); - - dsa_tree_teardown_ports(dst); - - dsa_tree_teardown_switches(dst); - - dsa_tree_teardown_cpu_ports(dst); - - list_for_each_entry_safe(dl, next, &dst->rtable, list) { - list_del(&dl->list); - kfree(dl); - } - - pr_info("DSA: tree %d torn down\n", dst->index); - - dst->setup = false; -} - -static int dsa_tree_bind_tag_proto(struct dsa_switch_tree *dst, - const struct dsa_device_ops *tag_ops) -{ - const struct dsa_device_ops *old_tag_ops = dst->tag_ops; - struct dsa_notifier_tag_proto_info info; - int err; - - dst->tag_ops = tag_ops; - - /* Notify the switches from this tree about the connection - * to the new tagger - */ - info.tag_ops = tag_ops; - err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_CONNECT, &info); - if (err && err != -EOPNOTSUPP) - goto out_disconnect; - - /* Notify the old tagger about the disconnection from this tree */ - info.tag_ops = old_tag_ops; - dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); - - return 0; - -out_disconnect: - info.tag_ops = tag_ops; - dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO_DISCONNECT, &info); - dst->tag_ops = old_tag_ops; - - return err; -} - -/* Since the dsa/tagging sysfs device attribute is per master, the assumption - * is that all DSA switches within a tree share the same tagger, otherwise - * they would have formed disjoint trees (different "dsa,member" values). - */ -int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, - const struct dsa_device_ops *tag_ops, - const struct dsa_device_ops *old_tag_ops) -{ - struct dsa_notifier_tag_proto_info info; - struct dsa_port *dp; - int err = -EBUSY; - - if (!rtnl_trylock()) - return restart_syscall(); - - /* At the moment we don't allow changing the tag protocol under - * traffic. The rtnl_mutex also happens to serialize concurrent - * attempts to change the tagging protocol. If we ever lift the IFF_UP - * restriction, there needs to be another mutex which serializes this. - */ - dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp)->flags & IFF_UP) - goto out_unlock; - - if (dp->slave->flags & IFF_UP) - goto out_unlock; - } - - /* Notify the tag protocol change */ - info.tag_ops = tag_ops; - err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); - if (err) - goto out_unwind_tagger; - - err = dsa_tree_bind_tag_proto(dst, tag_ops); - if (err) - goto out_unwind_tagger; - - rtnl_unlock(); - - return 0; - -out_unwind_tagger: - info.tag_ops = old_tag_ops; - dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); -out_unlock: - rtnl_unlock(); - return err; -} - -static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, - struct net_device *master) -{ - struct dsa_notifier_master_state_info info; - struct dsa_port *cpu_dp = master->dsa_ptr; - - info.master = master; - info.operational = dsa_port_master_is_operational(cpu_dp); - - dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); -} - -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) -{ - struct dsa_port *cpu_dp = master->dsa_ptr; - bool notify = false; - - /* Don't keep track of admin state on LAG DSA masters, - * but rather just of physical DSA masters - */ - if (netif_is_lag_master(master)) - return; - - if ((dsa_port_master_is_operational(cpu_dp)) != - (up && cpu_dp->master_oper_up)) - notify = true; - - cpu_dp->master_admin_up = up; - - if (notify) - dsa_tree_master_state_change(dst, master); -} - -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) -{ - struct dsa_port *cpu_dp = master->dsa_ptr; - bool notify = false; - - /* Don't keep track of oper state on LAG DSA masters, - * but rather just of physical DSA masters - */ - if (netif_is_lag_master(master)) - return; - - if ((dsa_port_master_is_operational(cpu_dp)) != - (cpu_dp->master_admin_up && up)) - notify = true; - - cpu_dp->master_oper_up = up; - - if (notify) - dsa_tree_master_state_change(dst, master); -} - -static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) -{ - struct dsa_switch_tree *dst = ds->dst; - struct dsa_port *dp; - - dsa_switch_for_each_port(dp, ds) - if (dp->index == index) - return dp; - - dp = kzalloc(sizeof(*dp), GFP_KERNEL); - if (!dp) - return NULL; - - dp->ds = ds; - dp->index = index; - - mutex_init(&dp->addr_lists_lock); - mutex_init(&dp->vlans_lock); - INIT_LIST_HEAD(&dp->fdbs); - INIT_LIST_HEAD(&dp->mdbs); - INIT_LIST_HEAD(&dp->vlans); - INIT_LIST_HEAD(&dp->list); - list_add_tail(&dp->list, &dst->ports); - - return dp; -} - -static int dsa_port_parse_user(struct dsa_port *dp, const char *name) -{ - dp->type = DSA_PORT_TYPE_USER; - dp->name = name; - - return 0; -} - -static int dsa_port_parse_dsa(struct dsa_port *dp) -{ - dp->type = DSA_PORT_TYPE_DSA; - - return 0; -} - -static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, - struct net_device *master) -{ - enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE; - struct dsa_switch *mds, *ds = dp->ds; - unsigned int mdp_upstream; - struct dsa_port *mdp; - - /* It is possible to stack DSA switches onto one another when that - * happens the switch driver may want to know if its tagging protocol - * is going to work in such a configuration. - */ - if (dsa_slave_dev_check(master)) { - mdp = dsa_slave_to_port(master); - mds = mdp->ds; - mdp_upstream = dsa_upstream_port(mds, mdp->index); - tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream, - DSA_TAG_PROTO_NONE); - } - - /* If the master device is not itself a DSA slave in a disjoint DSA - * tree, then return immediately. - */ - return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); -} - -static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, - const char *user_protocol) -{ - const struct dsa_device_ops *tag_ops = NULL; - struct dsa_switch *ds = dp->ds; - struct dsa_switch_tree *dst = ds->dst; - enum dsa_tag_protocol default_proto; - - /* Find out which protocol the switch would prefer. */ - default_proto = dsa_get_tag_protocol(dp, master); - if (dst->default_proto) { - if (dst->default_proto != default_proto) { - dev_err(ds->dev, - "A DSA switch tree can have only one tagging protocol\n"); - return -EINVAL; - } - } else { - dst->default_proto = default_proto; - } - - /* See if the user wants to override that preference. */ - if (user_protocol) { - if (!ds->ops->change_tag_protocol) { - dev_err(ds->dev, "Tag protocol cannot be modified\n"); - return -EINVAL; - } - - tag_ops = dsa_tag_driver_get_by_name(user_protocol); - if (IS_ERR(tag_ops)) { - dev_warn(ds->dev, - "Failed to find a tagging driver for protocol %s, using default\n", - user_protocol); - tag_ops = NULL; - } - } - - if (!tag_ops) - tag_ops = dsa_tag_driver_get_by_id(default_proto); - - if (IS_ERR(tag_ops)) { - if (PTR_ERR(tag_ops) == -ENOPROTOOPT) - return -EPROBE_DEFER; - - dev_warn(ds->dev, "No tagger for this switch\n"); - return PTR_ERR(tag_ops); - } - - if (dst->tag_ops) { - if (dst->tag_ops != tag_ops) { - dev_err(ds->dev, - "A DSA switch tree can have only one tagging protocol\n"); - - dsa_tag_driver_put(tag_ops); - return -EINVAL; - } - - /* In the case of multiple CPU ports per switch, the tagging - * protocol is still reference-counted only per switch tree. - */ - dsa_tag_driver_put(tag_ops); - } else { - dst->tag_ops = tag_ops; - } - - dp->master = master; - dp->type = DSA_PORT_TYPE_CPU; - dsa_port_set_tag_protocol(dp, dst->tag_ops); - dp->dst = dst; - - /* At this point, the tree may be configured to use a different - * tagger than the one chosen by the switch driver during - * .setup, in the case when a user selects a custom protocol - * through the DT. - * - * This is resolved by syncing the driver with the tree in - * dsa_switch_setup_tag_protocol once .setup has run and the - * driver is ready to accept calls to .change_tag_protocol. If - * the driver does not support the custom protocol at that - * point, the tree is wholly rejected, thereby ensuring that the - * tree and driver are always in agreement on the protocol to - * use. - */ - return 0; -} - -static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) -{ - struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); - const char *name = of_get_property(dn, "label", NULL); - bool link = of_property_read_bool(dn, "link"); - - dp->dn = dn; - - if (ethernet) { - struct net_device *master; - const char *user_protocol; - - master = of_find_net_device_by_node(ethernet); - of_node_put(ethernet); - if (!master) - return -EPROBE_DEFER; - - user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); - return dsa_port_parse_cpu(dp, master, user_protocol); - } - - if (link) - return dsa_port_parse_dsa(dp); - - return dsa_port_parse_user(dp, name); -} - -static int dsa_switch_parse_ports_of(struct dsa_switch *ds, - struct device_node *dn) -{ - struct device_node *ports, *port; - struct dsa_port *dp; - int err = 0; - u32 reg; - - ports = of_get_child_by_name(dn, "ports"); - if (!ports) { - /* The second possibility is "ethernet-ports" */ - ports = of_get_child_by_name(dn, "ethernet-ports"); - if (!ports) { - dev_err(ds->dev, "no ports child node found\n"); - return -EINVAL; - } - } - - for_each_available_child_of_node(ports, port) { - err = of_property_read_u32(port, "reg", ®); - if (err) { - of_node_put(port); - goto out_put_node; - } - - if (reg >= ds->num_ports) { - dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%u)\n", - port, reg, ds->num_ports); - of_node_put(port); - err = -EINVAL; - goto out_put_node; - } - - dp = dsa_to_port(ds, reg); - - err = dsa_port_parse_of(dp, port); - if (err) { - of_node_put(port); - goto out_put_node; - } - } - -out_put_node: - of_node_put(ports); - return err; -} - -static int dsa_switch_parse_member_of(struct dsa_switch *ds, - struct device_node *dn) -{ - u32 m[2] = { 0, 0 }; - int sz; - - /* Don't error out if this optional property isn't found */ - sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2); - if (sz < 0 && sz != -EINVAL) - return sz; - - ds->index = m[1]; - - ds->dst = dsa_tree_touch(m[0]); - if (!ds->dst) - return -ENOMEM; - - if (dsa_switch_find(ds->dst->index, ds->index)) { - dev_err(ds->dev, - "A DSA switch with index %d already exists in tree %d\n", - ds->index, ds->dst->index); - return -EEXIST; - } - - if (ds->dst->last_switch < ds->index) - ds->dst->last_switch = ds->index; - - return 0; -} - -static int dsa_switch_touch_ports(struct dsa_switch *ds) -{ - struct dsa_port *dp; - int port; - - for (port = 0; port < ds->num_ports; port++) { - dp = dsa_port_touch(ds, port); - if (!dp) - return -ENOMEM; - } - - return 0; -} - -static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) -{ - int err; - - err = dsa_switch_parse_member_of(ds, dn); - if (err) - return err; - - err = dsa_switch_touch_ports(ds); - if (err) - return err; - - return dsa_switch_parse_ports_of(ds, dn); -} - -static int dsa_port_parse(struct dsa_port *dp, const char *name, - struct device *dev) -{ - if (!strcmp(name, "cpu")) { - struct net_device *master; - - master = dsa_dev_to_net_device(dev); - if (!master) - return -EPROBE_DEFER; - - dev_put(master); - - return dsa_port_parse_cpu(dp, master, NULL); - } - - if (!strcmp(name, "dsa")) - return dsa_port_parse_dsa(dp); - - return dsa_port_parse_user(dp, name); -} - -static int dsa_switch_parse_ports(struct dsa_switch *ds, - struct dsa_chip_data *cd) -{ - bool valid_name_found = false; - struct dsa_port *dp; - struct device *dev; - const char *name; - unsigned int i; - int err; - - for (i = 0; i < DSA_MAX_PORTS; i++) { - name = cd->port_names[i]; - dev = cd->netdev[i]; - dp = dsa_to_port(ds, i); - - if (!name) - continue; - - err = dsa_port_parse(dp, name, dev); - if (err) - return err; - - valid_name_found = true; - } - - if (!valid_name_found && i == DSA_MAX_PORTS) - return -EINVAL; - - return 0; -} - -static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) -{ - int err; - - ds->cd = cd; - - /* We don't support interconnected switches nor multiple trees via - * platform data, so this is the unique switch of the tree. - */ - ds->index = 0; - ds->dst = dsa_tree_touch(0); - if (!ds->dst) - return -ENOMEM; - - err = dsa_switch_touch_ports(ds); - if (err) - return err; - - return dsa_switch_parse_ports(ds, cd); -} - -static void dsa_switch_release_ports(struct dsa_switch *ds) -{ - struct dsa_port *dp, *next; - - dsa_switch_for_each_port_safe(dp, next, ds) { - WARN_ON(!list_empty(&dp->fdbs)); - WARN_ON(!list_empty(&dp->mdbs)); - WARN_ON(!list_empty(&dp->vlans)); - list_del(&dp->list); - kfree(dp); - } -} - -static int dsa_switch_probe(struct dsa_switch *ds) -{ - struct dsa_switch_tree *dst; - struct dsa_chip_data *pdata; - struct device_node *np; - int err; - - if (!ds->dev) - return -ENODEV; - - pdata = ds->dev->platform_data; - np = ds->dev->of_node; - - if (!ds->num_ports) - return -EINVAL; - - if (np) { - err = dsa_switch_parse_of(ds, np); - if (err) - dsa_switch_release_ports(ds); - } else if (pdata) { - err = dsa_switch_parse(ds, pdata); - if (err) - dsa_switch_release_ports(ds); - } else { - err = -ENODEV; - } - - if (err) - return err; - - dst = ds->dst; - dsa_tree_get(dst); - err = dsa_tree_setup(dst); - if (err) { - dsa_switch_release_ports(ds); - dsa_tree_put(dst); - } - - return err; -} - -int dsa_register_switch(struct dsa_switch *ds) -{ - int err; - - mutex_lock(&dsa2_mutex); - err = dsa_switch_probe(ds); - dsa_tree_put(ds->dst); - mutex_unlock(&dsa2_mutex); - - return err; -} -EXPORT_SYMBOL_GPL(dsa_register_switch); - -static void dsa_switch_remove(struct dsa_switch *ds) -{ - struct dsa_switch_tree *dst = ds->dst; - - dsa_tree_teardown(dst); - dsa_switch_release_ports(ds); - dsa_tree_put(dst); -} - -void dsa_unregister_switch(struct dsa_switch *ds) -{ - mutex_lock(&dsa2_mutex); - dsa_switch_remove(ds); - mutex_unlock(&dsa2_mutex); -} -EXPORT_SYMBOL_GPL(dsa_unregister_switch); - -/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is - * blocking that operation from completion, due to the dev_hold taken inside - * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of - * the DSA master, so that the system can reboot successfully. - */ -void dsa_switch_shutdown(struct dsa_switch *ds) -{ - struct net_device *master, *slave_dev; - struct dsa_port *dp; - - mutex_lock(&dsa2_mutex); - - if (!ds->setup) - goto out; - - rtnl_lock(); - - dsa_switch_for_each_user_port(dp, ds) { - master = dsa_port_to_master(dp); - slave_dev = dp->slave; - - netdev_upper_dev_unlink(master, slave_dev); - } - - /* Disconnect from further netdevice notifiers on the master, - * since netdev_uses_dsa() will now return false. - */ - dsa_switch_for_each_cpu_port(dp, ds) - dp->master->dsa_ptr = NULL; - - rtnl_unlock(); -out: - mutex_unlock(&dsa2_mutex); -} -EXPORT_SYMBOL_GPL(dsa_switch_shutdown); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h deleted file mode 100644 index 24e0ea218a35..000000000000 --- a/net/dsa/dsa_priv.h +++ /dev/null @@ -1,663 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * net/dsa/dsa_priv.h - Hardware switch handling - * Copyright (c) 2008-2009 Marvell Semiconductor - */ - -#ifndef __DSA_PRIV_H -#define __DSA_PRIV_H - -#include <linux/if_bridge.h> -#include <linux/if_vlan.h> -#include <linux/phy.h> -#include <linux/netdevice.h> -#include <linux/netpoll.h> -#include <net/dsa.h> -#include <net/gro_cells.h> - -#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG - -/* Create 2 modaliases per tagging protocol, one to auto-load the module - * given the ID reported by get_tag_protocol(), and the other by name. - */ -#define DSA_TAG_DRIVER_ALIAS "dsa_tag:" -#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto, __name) \ - MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __name); \ - MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS "id-" \ - __stringify(__proto##_VALUE)) - -struct dsa_tag_driver { - const struct dsa_device_ops *ops; - struct list_head list; - struct module *owner; -}; - -void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count, - struct module *owner); -void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count); - -#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ -static int __init dsa_tag_driver_module_init(void) \ -{ \ - dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ - THIS_MODULE); \ - return 0; \ -} \ -module_init(dsa_tag_driver_module_init); \ - \ -static void __exit dsa_tag_driver_module_exit(void) \ -{ \ - dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ -} \ -module_exit(dsa_tag_driver_module_exit) - -/** - * module_dsa_tag_drivers() - Helper macro for registering DSA tag - * drivers - * @__ops_array: Array of tag driver structures - * - * Helper macro for DSA tag drivers which do not do anything special - * in module init/exit. Each module may only use this macro once, and - * calling it replaces module_init() and module_exit(). - */ -#define module_dsa_tag_drivers(__ops_array) \ -dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) - -#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops - -/* Create a static structure we can build a linked list of dsa_tag - * drivers - */ -#define DSA_TAG_DRIVER(__ops) \ -static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ - .ops = &__ops, \ -} - -/** - * module_dsa_tag_driver() - Helper macro for registering a single DSA tag - * driver - * @__ops: Single tag driver structures - * - * Helper macro for DSA tag drivers which do not do anything special - * in module init/exit. Each module may only use this macro once, and - * calling it replaces module_init() and module_exit(). - */ -#define module_dsa_tag_driver(__ops) \ -DSA_TAG_DRIVER(__ops); \ - \ -static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ - &DSA_TAG_DRIVER_NAME(__ops) \ -}; \ -module_dsa_tag_drivers(dsa_tag_driver_array) - -enum { - DSA_NOTIFIER_AGEING_TIME, - DSA_NOTIFIER_BRIDGE_JOIN, - DSA_NOTIFIER_BRIDGE_LEAVE, - DSA_NOTIFIER_FDB_ADD, - DSA_NOTIFIER_FDB_DEL, - DSA_NOTIFIER_HOST_FDB_ADD, - DSA_NOTIFIER_HOST_FDB_DEL, - DSA_NOTIFIER_LAG_FDB_ADD, - DSA_NOTIFIER_LAG_FDB_DEL, - DSA_NOTIFIER_LAG_CHANGE, - DSA_NOTIFIER_LAG_JOIN, - DSA_NOTIFIER_LAG_LEAVE, - DSA_NOTIFIER_MDB_ADD, - DSA_NOTIFIER_MDB_DEL, - DSA_NOTIFIER_HOST_MDB_ADD, - DSA_NOTIFIER_HOST_MDB_DEL, - DSA_NOTIFIER_VLAN_ADD, - DSA_NOTIFIER_VLAN_DEL, - DSA_NOTIFIER_HOST_VLAN_ADD, - DSA_NOTIFIER_HOST_VLAN_DEL, - DSA_NOTIFIER_MTU, - DSA_NOTIFIER_TAG_PROTO, - DSA_NOTIFIER_TAG_PROTO_CONNECT, - DSA_NOTIFIER_TAG_PROTO_DISCONNECT, - DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, - DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, - DSA_NOTIFIER_MASTER_STATE_CHANGE, -}; - -/* DSA_NOTIFIER_AGEING_TIME */ -struct dsa_notifier_ageing_time_info { - unsigned int ageing_time; -}; - -/* DSA_NOTIFIER_BRIDGE_* */ -struct dsa_notifier_bridge_info { - const struct dsa_port *dp; - struct dsa_bridge bridge; - bool tx_fwd_offload; - struct netlink_ext_ack *extack; -}; - -/* DSA_NOTIFIER_FDB_* */ -struct dsa_notifier_fdb_info { - const struct dsa_port *dp; - const unsigned char *addr; - u16 vid; - struct dsa_db db; -}; - -/* DSA_NOTIFIER_LAG_FDB_* */ -struct dsa_notifier_lag_fdb_info { - struct dsa_lag *lag; - const unsigned char *addr; - u16 vid; - struct dsa_db db; -}; - -/* DSA_NOTIFIER_MDB_* */ -struct dsa_notifier_mdb_info { - const struct dsa_port *dp; - const struct switchdev_obj_port_mdb *mdb; - struct dsa_db db; -}; - -/* DSA_NOTIFIER_LAG_* */ -struct dsa_notifier_lag_info { - const struct dsa_port *dp; - struct dsa_lag lag; - struct netdev_lag_upper_info *info; - struct netlink_ext_ack *extack; -}; - -/* DSA_NOTIFIER_VLAN_* */ -struct dsa_notifier_vlan_info { - const struct dsa_port *dp; - const struct switchdev_obj_port_vlan *vlan; - struct netlink_ext_ack *extack; -}; - -/* DSA_NOTIFIER_MTU */ -struct dsa_notifier_mtu_info { - const struct dsa_port *dp; - int mtu; -}; - -/* DSA_NOTIFIER_TAG_PROTO_* */ -struct dsa_notifier_tag_proto_info { - const struct dsa_device_ops *tag_ops; -}; - -/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ -struct dsa_notifier_tag_8021q_vlan_info { - const struct dsa_port *dp; - u16 vid; -}; - -/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ -struct dsa_notifier_master_state_info { - const struct net_device *master; - bool operational; -}; - -struct dsa_switchdev_event_work { - struct net_device *dev; - struct net_device *orig_dev; - struct work_struct work; - unsigned long event; - /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and - * SWITCHDEV_FDB_DEL_TO_DEVICE - */ - unsigned char addr[ETH_ALEN]; - u16 vid; - bool host_addr; -}; - -enum dsa_standalone_event { - DSA_UC_ADD, - DSA_UC_DEL, - DSA_MC_ADD, - DSA_MC_DEL, -}; - -struct dsa_standalone_event_work { - struct work_struct work; - struct net_device *dev; - enum dsa_standalone_event event; - unsigned char addr[ETH_ALEN]; - u16 vid; -}; - -struct dsa_slave_priv { - /* Copy of CPU port xmit for faster access in slave transmit hot path */ - struct sk_buff * (*xmit)(struct sk_buff *skb, - struct net_device *dev); - - struct gro_cells gcells; - - /* DSA port data, such as switch, port index, etc. */ - struct dsa_port *dp; - -#ifdef CONFIG_NET_POLL_CONTROLLER - struct netpoll *netpoll; -#endif - - /* TC context */ - struct list_head mall_tc_list; -}; - -/* dsa.c */ -const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol); -const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name); -void dsa_tag_driver_put(const struct dsa_device_ops *ops); - -bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b); - -bool dsa_schedule_work(struct work_struct *work); -const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); - -static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) -{ - return ops->needed_headroom + ops->needed_tailroom; -} - -/* master.c */ -int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); -void dsa_master_teardown(struct net_device *dev); -int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack); -void dsa_master_lag_teardown(struct net_device *lag_dev, - struct dsa_port *cpu_dp); - -static inline struct net_device *dsa_master_find_slave(struct net_device *dev, - int device, int port) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch_tree *dst = cpu_dp->dst; - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - if (dp->ds->index == device && dp->index == port && - dp->type == DSA_PORT_TYPE_USER) - return dp->slave; - - return NULL; -} - -/* netlink.c */ -extern struct rtnl_link_ops dsa_link_ops __read_mostly; - -/* port.c */ -bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); -void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, - const struct dsa_device_ops *tag_ops); -int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); -int dsa_port_set_mst_state(struct dsa_port *dp, - const struct switchdev_mst_state *state, - struct netlink_ext_ack *extack); -int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); -int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); -void dsa_port_disable_rt(struct dsa_port *dp); -void dsa_port_disable(struct dsa_port *dp); -int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, - struct netlink_ext_ack *extack); -void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br); -void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); -int dsa_port_lag_change(struct dsa_port *dp, - struct netdev_lag_lower_state_info *linfo); -int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack); -void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); -void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); -int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct netlink_ext_ack *extack); -bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); -int dsa_port_mst_enable(struct dsa_port *dp, bool on, - struct netlink_ext_ack *extack); -int dsa_port_vlan_msti(struct dsa_port *dp, - const struct switchdev_vlan_msti *msti); -int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); -int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, - const unsigned char *addr, u16 vid); -int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, - const unsigned char *addr, u16 vid); -int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, - u16 vid); -int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); -int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_mdb_del(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb); -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, - struct switchdev_brport_flags flags, - struct netlink_ext_ack *extack); -int dsa_port_bridge_flags(struct dsa_port *dp, - struct switchdev_brport_flags flags, - struct netlink_ext_ack *extack); -int dsa_port_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack); -int dsa_port_vlan_del(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan); -int dsa_port_host_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct netlink_ext_ack *extack); -int dsa_port_host_vlan_del(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan); -int dsa_port_mrp_add(const struct dsa_port *dp, - const struct switchdev_obj_mrp *mrp); -int dsa_port_mrp_del(const struct dsa_port *dp, - const struct switchdev_obj_mrp *mrp); -int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, - const struct switchdev_obj_ring_role_mrp *mrp); -int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, - const struct switchdev_obj_ring_role_mrp *mrp); -int dsa_port_phylink_create(struct dsa_port *dp); -void dsa_port_phylink_destroy(struct dsa_port *dp); -int dsa_shared_port_link_register_of(struct dsa_port *dp); -void dsa_shared_port_link_unregister_of(struct dsa_port *dp); -int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); -void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); -int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); -void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); -void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); -int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, - struct netlink_ext_ack *extack); - -/* slave.c */ -extern const struct dsa_device_ops notag_netdev_ops; -extern struct notifier_block dsa_slave_switchdev_notifier; -extern struct notifier_block dsa_slave_switchdev_blocking_notifier; - -void dsa_slave_mii_bus_init(struct dsa_switch *ds); -int dsa_slave_create(struct dsa_port *dp); -void dsa_slave_destroy(struct net_device *slave_dev); -int dsa_slave_suspend(struct net_device *slave_dev); -int dsa_slave_resume(struct net_device *slave_dev); -int dsa_slave_register_notifier(void); -void dsa_slave_unregister_notifier(void); -void dsa_slave_sync_ha(struct net_device *dev); -void dsa_slave_unsync_ha(struct net_device *dev); -void dsa_slave_setup_tagger(struct net_device *slave); -int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); -int dsa_slave_change_master(struct net_device *dev, struct net_device *master, - struct netlink_ext_ack *extack); -int dsa_slave_manage_vlan_filtering(struct net_device *dev, - bool vlan_filtering); - -static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - return p->dp; -} - -static inline struct net_device * -dsa_slave_to_master(const struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_to_master(dp); -} - -/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged - * frames as untagged, since the bridge will not untag them. - */ -static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) -{ - struct dsa_port *dp = dsa_slave_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *dev = skb->dev; - struct net_device *upper_dev; - u16 vid, pvid, proto; - int err; - - if (!br || br_vlan_enabled(br)) - return skb; - - err = br_vlan_get_proto(br, &proto); - if (err) - return skb; - - /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { - skb = skb_vlan_untag(skb); - if (!skb) - return NULL; - } - - if (!skb_vlan_tag_present(skb)) - return skb; - - vid = skb_vlan_tag_get_id(skb); - - /* We already run under an RCU read-side critical section since - * we are called from netif_receive_skb_list_internal(). - */ - err = br_vlan_get_pvid_rcu(dev, &pvid); - if (err) - return skb; - - if (vid != pvid) - return skb; - - /* The sad part about attempting to untag from DSA is that we - * don't know, unless we check, if the skb will end up in - * the bridge's data path - br_allowed_ingress() - or not. - * For example, there might be an 8021q upper for the - * default_pvid of the bridge, which will steal VLAN-tagged traffic - * from the bridge's data path. This is a configuration that DSA - * supports because vlan_filtering is 0. In that case, we should - * definitely keep the tag, to make sure it keeps working. - */ - upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); - if (upper_dev) - return skb; - - __vlan_hwaccel_clear_tag(skb); - - return skb; -} - -/* For switches without hardware support for DSA tagging to be able - * to support termination through the bridge. - */ -static inline struct net_device * -dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) -{ - struct dsa_port *cpu_dp = master->dsa_ptr; - struct dsa_switch_tree *dst = cpu_dp->dst; - struct bridge_vlan_info vinfo; - struct net_device *slave; - struct dsa_port *dp; - int err; - - list_for_each_entry(dp, &dst->ports, list) { - if (dp->type != DSA_PORT_TYPE_USER) - continue; - - if (!dp->bridge) - continue; - - if (dp->stp_state != BR_STATE_LEARNING && - dp->stp_state != BR_STATE_FORWARDING) - continue; - - /* Since the bridge might learn this packet, keep the CPU port - * affinity with the port that will be used for the reply on - * xmit. - */ - if (dp->cpu_dp != cpu_dp) - continue; - - slave = dp->slave; - - err = br_vlan_get_info_rcu(slave, vid, &vinfo); - if (err) - continue; - - return slave; - } - - return NULL; -} - -/* If the ingress port offloads the bridge, we mark the frame as autonomously - * forwarded by hardware, so the software bridge doesn't forward in twice, back - * to us, because we already did. However, if we're in fallback mode and we do - * software bridging, we are not offloading it, therefore the dp->bridge - * pointer is not populated, and flooding needs to be done by software (we are - * effectively operating in standalone ports mode). - */ -static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) -{ - struct dsa_port *dp = dsa_slave_to_port(skb->dev); - - skb->offload_fwd_mark = !!(dp->bridge); -} - -/* Helper for removing DSA header tags from packets in the RX path. - * Must not be called before skb_pull(len). - * skb->data - * | - * v - * | | | | | | | | | | | | | | | | | | | - * +-----------------------+-----------------------+---------------+-------+ - * | Destination MAC | Source MAC | DSA header | EType | - * +-----------------------+-----------------------+---------------+-------+ - * | | - * <----- len -----> <----- len -----> - * | - * >>>>>>> v - * >>>>>>> | | | | | | | | | | | | | | | - * >>>>>>> +-----------------------+-----------------------+-------+ - * >>>>>>> | Destination MAC | Source MAC | EType | - * +-----------------------+-----------------------+-------+ - * ^ - * | - * skb->data - */ -static inline void dsa_strip_etype_header(struct sk_buff *skb, int len) -{ - memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN); -} - -/* Helper for creating space for DSA header tags in TX path packets. - * Must not be called before skb_push(len). - * - * Before: - * - * <<<<<<< | | | | | | | | | | | | | | | - * ^ <<<<<<< +-----------------------+-----------------------+-------+ - * | <<<<<<< | Destination MAC | Source MAC | EType | - * | +-----------------------+-----------------------+-------+ - * <----- len -----> - * | - * | - * skb->data - * - * After: - * - * | | | | | | | | | | | | | | | | | | | - * +-----------------------+-----------------------+---------------+-------+ - * | Destination MAC | Source MAC | DSA header | EType | - * +-----------------------+-----------------------+---------------+-------+ - * ^ | | - * | <----- len -----> - * skb->data - */ -static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) -{ - memmove(skb->data, skb->data + len, 2 * ETH_ALEN); -} - -/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from - * skb_mac_header(skb), which leaves skb->data pointing at the first byte after - * what the DSA master perceives as the EtherType (the beginning of the L3 - * protocol). Since DSA EtherType header taggers treat the EtherType as part of - * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header - * is located 2 bytes behind skb->data. Note that EtherType in this context - * means the first 2 bytes of the DSA header, not the encapsulated EtherType - * that will become visible after the DSA header is stripped. - */ -static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb) -{ - return skb->data - 2; -} - -/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType - * header taggers start exactly where the EtherType is (the EtherType is - * treated as part of the DSA header). - */ -static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) -{ - return skb->data + 2 * ETH_ALEN; -} - -/* switch.c */ -int dsa_switch_register_notifier(struct dsa_switch *ds); -void dsa_switch_unregister_notifier(struct dsa_switch *ds); - -static inline bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) -{ - return ds->ops->port_fdb_add && ds->ops->port_fdb_del && - ds->fdb_isolation && !ds->vlan_filtering_is_global && - !ds->needs_standalone_vlan_filtering; -} - -static inline bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) -{ - return ds->ops->port_mdb_add && ds->ops->port_mdb_del && - ds->fdb_isolation && !ds->vlan_filtering_is_global && - !ds->needs_standalone_vlan_filtering; -} - -/* dsa2.c */ -void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); -void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); -struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, - const struct net_device *lag_dev); -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); -int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); -int dsa_broadcast(unsigned long e, void *v); -int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, - const struct dsa_device_ops *tag_ops, - const struct dsa_device_ops *old_tag_ops); -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up); -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up); -unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); -void dsa_bridge_num_put(const struct net_device *bridge_dev, - unsigned int bridge_num); -struct dsa_bridge *dsa_tree_bridge_find(struct dsa_switch_tree *dst, - const struct net_device *br); - -/* tag_8021q.c */ -int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, - struct dsa_notifier_tag_8021q_vlan_info *info); -int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, - struct dsa_notifier_tag_8021q_vlan_info *info); - -extern struct list_head dsa_tree_list; - -#endif diff --git a/net/dsa/master.c b/net/dsa/master.c index e24f02743c21..26d90140d271 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -6,7 +6,15 @@ * Vivien Didelot <[email protected]> */ -#include "dsa_priv.h" +#include <linux/ethtool.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <net/dsa.h> + +#include "dsa.h" +#include "master.h" +#include "port.h" +#include "tag.h" static int dsa_master_get_regs_len(struct net_device *dev) { diff --git a/net/dsa/master.h b/net/dsa/master.h new file mode 100644 index 000000000000..3fc0e610b5b5 --- /dev/null +++ b/net/dsa/master.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_MASTER_H +#define __DSA_MASTER_H + +struct dsa_port; +struct net_device; +struct netdev_lag_upper_info; +struct netlink_ext_ack; + +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_master_teardown(struct net_device *dev); +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); + +#endif diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c index ecf9ed1de185..bd4bbaf851de 100644 --- a/net/dsa/netlink.c +++ b/net/dsa/netlink.c @@ -4,7 +4,8 @@ #include <linux/netdevice.h> #include <net/rtnetlink.h> -#include "dsa_priv.h" +#include "netlink.h" +#include "slave.h" static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { [IFLA_DSA_MASTER] = { .type = NLA_U32 }, diff --git a/net/dsa/netlink.h b/net/dsa/netlink.h new file mode 100644 index 000000000000..7eda2fa15722 --- /dev/null +++ b/net/dsa/netlink.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_NETLINK_H +#define __DSA_NETLINK_H + +extern struct rtnl_link_ops dsa_link_ops __read_mostly; + +#endif diff --git a/net/dsa/port.c b/net/dsa/port.c index 707bd854cea2..67ad1adec2a2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -12,7 +12,11 @@ #include <linux/of_mdio.h> #include <linux/of_net.h> -#include "dsa_priv.h" +#include "dsa.h" +#include "port.h" +#include "slave.h" +#include "switch.h" +#include "tag_8021q.h" /** * dsa_port_notify - Notify the switching fabric of changes to a port diff --git a/net/dsa/port.h b/net/dsa/port.h new file mode 100644 index 000000000000..9c218660d223 --- /dev/null +++ b/net/dsa/port.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_PORT_H +#define __DSA_PORT_H + +#include <linux/types.h> +#include <net/dsa.h> + +struct ifreq; +struct netdev_lag_lower_state_info; +struct netdev_lag_upper_info; +struct netlink_ext_ack; +struct switchdev_mst_state; +struct switchdev_obj_port_mdb; +struct switchdev_vlan_msti; +struct phy_device; + +bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); +void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops); +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); +int dsa_port_set_mst_state(struct dsa_port *dp, + const struct switchdev_mst_state *state, + struct netlink_ext_ack *extack); +int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable_rt(struct dsa_port *dp); +void dsa_port_disable(struct dsa_port *dp); +int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, + struct netlink_ext_ack *extack); +void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br); +void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, + struct netlink_ext_ack *extack); +bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); +int dsa_port_mst_enable(struct dsa_port *dp, bool on, + struct netlink_ext_ack *extack); +int dsa_port_vlan_msti(struct dsa_port *dp, + const struct switchdev_vlan_msti *msti); +int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu); +int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, + const unsigned char *addr, u16 vid); +int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_lag_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); +int dsa_port_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, + const struct switchdev_obj_port_mdb *mdb); +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); +int dsa_port_bridge_flags(struct dsa_port *dp, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); +int dsa_port_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int dsa_port_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_port_host_vlan_add(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack); +int dsa_port_host_vlan_del(struct dsa_port *dp, + const struct switchdev_obj_port_vlan *vlan); +int dsa_port_mrp_add(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp); +int dsa_port_mrp_del(const struct dsa_port *dp, + const struct switchdev_obj_mrp *mrp); +int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp); +int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, + const struct switchdev_obj_ring_role_mrp *mrp); +int dsa_port_phylink_create(struct dsa_port *dp); +void dsa_port_phylink_destroy(struct dsa_port *dp); +int dsa_shared_port_link_register_of(struct dsa_port *dp); +void dsa_shared_port_link_unregister_of(struct dsa_port *dp); +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); +void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); +void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack); + +#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 24d8ad36fc8b..aab79c355224 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -22,7 +22,54 @@ #include <net/dcbnl.h> #include <linux/netpoll.h> -#include "dsa_priv.h" +#include "dsa.h" +#include "port.h" +#include "master.h" +#include "netlink.h" +#include "slave.h" +#include "tag.h" + +struct dsa_switchdev_event_work { + struct net_device *dev; + struct net_device *orig_dev; + struct work_struct work; + unsigned long event; + /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and + * SWITCHDEV_FDB_DEL_TO_DEVICE + */ + unsigned char addr[ETH_ALEN]; + u16 vid; + bool host_addr; +}; + +enum dsa_standalone_event { + DSA_UC_ADD, + DSA_UC_DEL, + DSA_MC_ADD, + DSA_MC_DEL, +}; + +struct dsa_standalone_event_work { + struct work_struct work; + struct net_device *dev; + enum dsa_standalone_event event; + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +static bool dsa_switch_supports_uc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_fdb_add && ds->ops->port_fdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} + +static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) +{ + return ds->ops->port_mdb_add && ds->ops->port_mdb_del && + ds->fdb_isolation && !ds->vlan_filtering_is_global && + !ds->needs_standalone_vlan_filtering; +} static void dsa_slave_standalone_event_work(struct work_struct *work) { diff --git a/net/dsa/slave.h b/net/dsa/slave.h new file mode 100644 index 000000000000..d0abe609e00d --- /dev/null +++ b/net/dsa/slave.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_SLAVE_H +#define __DSA_SLAVE_H + +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/list.h> +#include <linux/netpoll.h> +#include <linux/types.h> +#include <net/dsa.h> +#include <net/gro_cells.h> + +struct net_device; +struct netlink_ext_ack; + +extern struct notifier_block dsa_slave_switchdev_notifier; +extern struct notifier_block dsa_slave_switchdev_blocking_notifier; + +struct dsa_slave_priv { + /* Copy of CPU port xmit for faster access in slave transmit hot path */ + struct sk_buff * (*xmit)(struct sk_buff *skb, + struct net_device *dev); + + struct gro_cells gcells; + + /* DSA port data, such as switch, port index, etc. */ + struct dsa_port *dp; + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif + + /* TC context */ + struct list_head mall_tc_list; +}; + +void dsa_slave_mii_bus_init(struct dsa_switch *ds); +int dsa_slave_create(struct dsa_port *dp); +void dsa_slave_destroy(struct net_device *slave_dev); +int dsa_slave_suspend(struct net_device *slave_dev); +int dsa_slave_resume(struct net_device *slave_dev); +int dsa_slave_register_notifier(void); +void dsa_slave_unregister_notifier(void); +void dsa_slave_sync_ha(struct net_device *dev); +void dsa_slave_unsync_ha(struct net_device *dev); +void dsa_slave_setup_tagger(struct net_device *slave); +int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack); +int dsa_slave_manage_vlan_filtering(struct net_device *dev, + bool vlan_filtering); + +static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + return p->dp; +} + +static inline struct net_device * +dsa_slave_to_master(const struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dsa_port_to_master(dp); +} + +#endif diff --git a/net/dsa/switch.c b/net/dsa/switch.c index ce56acdba203..d5bc4bb7310d 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -12,7 +12,12 @@ #include <linux/if_vlan.h> #include <net/switchdev.h> -#include "dsa_priv.h" +#include "dsa.h" +#include "netlink.h" +#include "port.h" +#include "slave.h" +#include "switch.h" +#include "tag_8021q.h" static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) @@ -1013,6 +1018,52 @@ static int dsa_switch_event(struct notifier_block *nb, return notifier_from_errno(err); } +/** + * dsa_tree_notify - Execute code for all switches in a DSA switch tree. + * @dst: collection of struct dsa_switch devices to notify. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Given a struct dsa_switch_tree, this can be used to run a function once for + * each member DSA switch. The other alternative of traversing the tree is only + * through its ports list, which does not uniquely list the switches. + */ +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) +{ + struct raw_notifier_head *nh = &dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + +/** + * dsa_broadcast - Notify all DSA trees in the system. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Can be used to notify the switching fabric of events such as cross-chip + * bridging between disjoint trees (such as islands of tagger-compatible + * switches bridged by an incompatible middle switch). + * + * WARNING: this function is not reliable during probe time, because probing + * between trees is asynchronous and not all DSA trees might have probed. + */ +int dsa_broadcast(unsigned long e, void *v) +{ + struct dsa_switch_tree *dst; + int err = 0; + + list_for_each_entry(dst, &dsa_tree_list, list) { + err = dsa_tree_notify(dst, e, v); + if (err) + break; + } + + return err; +} + int dsa_switch_register_notifier(struct dsa_switch *ds) { ds->nb.notifier_call = dsa_switch_event; diff --git a/net/dsa/switch.h b/net/dsa/switch.h new file mode 100644 index 000000000000..15e67b95eb6e --- /dev/null +++ b/net/dsa/switch.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_SWITCH_H +#define __DSA_SWITCH_H + +#include <net/dsa.h> + +struct netlink_ext_ack; + +enum { + DSA_NOTIFIER_AGEING_TIME, + DSA_NOTIFIER_BRIDGE_JOIN, + DSA_NOTIFIER_BRIDGE_LEAVE, + DSA_NOTIFIER_FDB_ADD, + DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_HOST_FDB_ADD, + DSA_NOTIFIER_HOST_FDB_DEL, + DSA_NOTIFIER_LAG_FDB_ADD, + DSA_NOTIFIER_LAG_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, + DSA_NOTIFIER_MDB_ADD, + DSA_NOTIFIER_MDB_DEL, + DSA_NOTIFIER_HOST_MDB_ADD, + DSA_NOTIFIER_HOST_MDB_DEL, + DSA_NOTIFIER_VLAN_ADD, + DSA_NOTIFIER_VLAN_DEL, + DSA_NOTIFIER_HOST_VLAN_ADD, + DSA_NOTIFIER_HOST_VLAN_DEL, + DSA_NOTIFIER_MTU, + DSA_NOTIFIER_TAG_PROTO, + DSA_NOTIFIER_TAG_PROTO_CONNECT, + DSA_NOTIFIER_TAG_PROTO_DISCONNECT, + DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, + DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, + DSA_NOTIFIER_MASTER_STATE_CHANGE, +}; + +/* DSA_NOTIFIER_AGEING_TIME */ +struct dsa_notifier_ageing_time_info { + unsigned int ageing_time; +}; + +/* DSA_NOTIFIER_BRIDGE_* */ +struct dsa_notifier_bridge_info { + const struct dsa_port *dp; + struct dsa_bridge bridge; + bool tx_fwd_offload; + struct netlink_ext_ack *extack; +}; + +/* DSA_NOTIFIER_FDB_* */ +struct dsa_notifier_fdb_info { + const struct dsa_port *dp; + const unsigned char *addr; + u16 vid; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_LAG_FDB_* */ +struct dsa_notifier_lag_fdb_info { + struct dsa_lag *lag; + const unsigned char *addr; + u16 vid; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_MDB_* */ +struct dsa_notifier_mdb_info { + const struct dsa_port *dp; + const struct switchdev_obj_port_mdb *mdb; + struct dsa_db db; +}; + +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + const struct dsa_port *dp; + struct dsa_lag lag; + struct netdev_lag_upper_info *info; + struct netlink_ext_ack *extack; +}; + +/* DSA_NOTIFIER_VLAN_* */ +struct dsa_notifier_vlan_info { + const struct dsa_port *dp; + const struct switchdev_obj_port_vlan *vlan; + struct netlink_ext_ack *extack; +}; + +/* DSA_NOTIFIER_MTU */ +struct dsa_notifier_mtu_info { + const struct dsa_port *dp; + int mtu; +}; + +/* DSA_NOTIFIER_TAG_PROTO_* */ +struct dsa_notifier_tag_proto_info { + const struct dsa_device_ops *tag_ops; +}; + +/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */ +struct dsa_notifier_tag_8021q_vlan_info { + const struct dsa_port *dp; + u16 vid; +}; + +/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ +struct dsa_notifier_master_state_info { + const struct net_device *master; + bool operational; +}; + +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); +int dsa_broadcast(unsigned long e, void *v); + +int dsa_switch_register_notifier(struct dsa_switch *ds); +void dsa_switch_unregister_notifier(struct dsa_switch *ds); + +#endif diff --git a/net/dsa/tag.c b/net/dsa/tag.c new file mode 100644 index 000000000000..383721e167d6 --- /dev/null +++ b/net/dsa/tag.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DSA tagging protocol handling + * + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <[email protected]> + * Copyright (c) 2016 Andrew Lunn <[email protected]> + */ + +#include <linux/netdevice.h> +#include <linux/ptp_classify.h> +#include <linux/skbuff.h> +#include <net/dsa.h> +#include <net/dst_metadata.h> + +#include "slave.h" +#include "tag.h" + +static LIST_HEAD(dsa_tag_drivers_list); +static DEFINE_MUTEX(dsa_tag_drivers_lock); + +/* Determine if we should defer delivery of skb until we have a rx timestamp. + * + * Called from dsa_switch_rcv. For now, this will only work if tagging is + * enabled on the switch. Normally the MAC driver would retrieve the hardware + * timestamp when it reads the packet out of the hardware. However in a DSA + * switch, the DSA driver owning the interface to which the packet is + * delivered is never notified unless we do so here. + */ +static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, + struct sk_buff *skb) +{ + struct dsa_switch *ds = p->dp->ds; + unsigned int type; + + if (skb_headroom(skb) < ETH_HLEN) + return false; + + __skb_push(skb, ETH_HLEN); + + type = ptp_classify_raw(skb); + + __skb_pull(skb, ETH_HLEN); + + if (type == PTP_CLASS_NONE) + return false; + + if (likely(ds->ops->port_rxtstamp)) + return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type); + + return false; +} + +static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *unused) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct sk_buff *nskb = NULL; + struct dsa_slave_priv *p; + + if (unlikely(!cpu_dp)) { + kfree_skb(skb); + return 0; + } + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return 0; + + if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { + unsigned int port = md_dst->u.port_info.port_id; + + skb_dst_drop(skb); + if (!skb_has_extensions(skb)) + skb->slow_gro = 0; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (likely(skb->dev)) { + dsa_default_offload_fwd_mark(skb); + nskb = skb; + } + } else { + nskb = cpu_dp->rcv(skb, dev); + } + + if (!nskb) { + kfree_skb(skb); + return 0; + } + + skb = nskb; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + if (unlikely(!dsa_slave_dev_check(skb->dev))) { + /* Packet is to be injected directly on an upper + * device, e.g. a team/bond, so skip all DSA-port + * specific actions. + */ + netif_rx(skb); + return 0; + } + + p = netdev_priv(skb->dev); + + if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { + nskb = dsa_untag_bridge_pvid(skb); + if (!nskb) { + kfree_skb(skb); + return 0; + } + skb = nskb; + } + + dev_sw_netstats_rx_add(skb->dev, skb->len); + + if (dsa_skb_defer_rx_timestamp(p, skb)) + return 0; + + gro_cells_receive(&p->gcells, skb); + + return 0; +} + +struct packet_type dsa_pack_type __read_mostly = { + .type = cpu_to_be16(ETH_P_XDSA), + .func = dsa_switch_rcv, +}; + +static void dsa_tag_driver_register(struct dsa_tag_driver *dsa_tag_driver, + struct module *owner) +{ + dsa_tag_driver->owner = owner; + + mutex_lock(&dsa_tag_drivers_lock); + list_add_tail(&dsa_tag_driver->list, &dsa_tag_drivers_list); + mutex_unlock(&dsa_tag_drivers_lock); +} + +void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count, struct module *owner) +{ + unsigned int i; + + for (i = 0; i < count; i++) + dsa_tag_driver_register(dsa_tag_driver_array[i], owner); +} + +static void dsa_tag_driver_unregister(struct dsa_tag_driver *dsa_tag_driver) +{ + mutex_lock(&dsa_tag_drivers_lock); + list_del(&dsa_tag_driver->list); + mutex_unlock(&dsa_tag_drivers_lock); +} +EXPORT_SYMBOL_GPL(dsa_tag_drivers_register); + +void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + dsa_tag_driver_unregister(dsa_tag_driver_array[i]); +} +EXPORT_SYMBOL_GPL(dsa_tag_drivers_unregister); + +const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) +{ + return ops->name; +}; + +/* Function takes a reference on the module owning the tagger, + * so dsa_tag_driver_put must be called afterwards. + */ +const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name) +{ + const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT); + struct dsa_tag_driver *dsa_tag_driver; + + request_module("%s%s", DSA_TAG_DRIVER_ALIAS, name); + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + const struct dsa_device_ops *tmp = dsa_tag_driver->ops; + + if (strcmp(name, tmp->name)) + continue; + + if (!try_module_get(dsa_tag_driver->owner)) + break; + + ops = tmp; + break; + } + mutex_unlock(&dsa_tag_drivers_lock); + + return ops; +} + +const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol) +{ + struct dsa_tag_driver *dsa_tag_driver; + const struct dsa_device_ops *ops; + bool found = false; + + request_module("%sid-%d", DSA_TAG_DRIVER_ALIAS, tag_protocol); + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + ops = dsa_tag_driver->ops; + if (ops->proto == tag_protocol) { + found = true; + break; + } + } + + if (found) { + if (!try_module_get(dsa_tag_driver->owner)) + ops = ERR_PTR(-ENOPROTOOPT); + } else { + ops = ERR_PTR(-ENOPROTOOPT); + } + + mutex_unlock(&dsa_tag_drivers_lock); + + return ops; +} + +void dsa_tag_driver_put(const struct dsa_device_ops *ops) +{ + struct dsa_tag_driver *dsa_tag_driver; + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + if (dsa_tag_driver->ops == ops) { + module_put(dsa_tag_driver->owner); + break; + } + } + mutex_unlock(&dsa_tag_drivers_lock); +} diff --git a/net/dsa/tag.h b/net/dsa/tag.h new file mode 100644 index 000000000000..7cfbca824f1c --- /dev/null +++ b/net/dsa/tag.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_TAG_H +#define __DSA_TAG_H + +#include <linux/if_vlan.h> +#include <linux/list.h> +#include <linux/types.h> +#include <net/dsa.h> + +#include "port.h" +#include "slave.h" + +struct dsa_tag_driver { + const struct dsa_device_ops *ops; + struct list_head list; + struct module *owner; +}; + +extern struct packet_type dsa_pack_type; + +const struct dsa_device_ops *dsa_tag_driver_get_by_id(int tag_protocol); +const struct dsa_device_ops *dsa_tag_driver_get_by_name(const char *name); +void dsa_tag_driver_put(const struct dsa_device_ops *ops); +const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); + +static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) +{ + return ops->needed_headroom + ops->needed_tailroom; +} + +static inline struct net_device *dsa_master_find_slave(struct net_device *dev, + int device, int port) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dp->ds->index == device && dp->index == port && + dp->type == DSA_PORT_TYPE_USER) + return dp->slave; + + return NULL; +} + +/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged + * frames as untagged, since the bridge will not untag them. + */ +static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + struct net_device *dev = skb->dev; + struct net_device *upper_dev; + u16 vid, pvid, proto; + int err; + + if (!br || br_vlan_enabled(br)) + return skb; + + err = br_vlan_get_proto(br, &proto); + if (err) + return skb; + + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + /* We already run under an RCU read-side critical section since + * we are called from netif_receive_skb_list_internal(). + */ + err = br_vlan_get_pvid_rcu(dev, &pvid); + if (err) + return skb; + + if (vid != pvid) + return skb; + + /* The sad part about attempting to untag from DSA is that we + * don't know, unless we check, if the skb will end up in + * the bridge's data path - br_allowed_ingress() - or not. + * For example, there might be an 8021q upper for the + * default_pvid of the bridge, which will steal VLAN-tagged traffic + * from the bridge's data path. This is a configuration that DSA + * supports because vlan_filtering is 0. In that case, we should + * definitely keep the tag, to make sure it keeps working. + */ + upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); + if (upper_dev) + return skb; + + __vlan_hwaccel_clear_tag(skb); + + return skb; +} + +/* For switches without hardware support for DSA tagging to be able + * to support termination through the bridge. + */ +static inline struct net_device * +dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct bridge_vlan_info vinfo; + struct net_device *slave; + struct dsa_port *dp; + int err; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->type != DSA_PORT_TYPE_USER) + continue; + + if (!dp->bridge) + continue; + + if (dp->stp_state != BR_STATE_LEARNING && + dp->stp_state != BR_STATE_FORWARDING) + continue; + + /* Since the bridge might learn this packet, keep the CPU port + * affinity with the port that will be used for the reply on + * xmit. + */ + if (dp->cpu_dp != cpu_dp) + continue; + + slave = dp->slave; + + err = br_vlan_get_info_rcu(slave, vid, &vinfo); + if (err) + continue; + + return slave; + } + + return NULL; +} + +/* If the ingress port offloads the bridge, we mark the frame as autonomously + * forwarded by hardware, so the software bridge doesn't forward in twice, back + * to us, because we already did. However, if we're in fallback mode and we do + * software bridging, we are not offloading it, therefore the dp->bridge + * pointer is not populated, and flooding needs to be done by software (we are + * effectively operating in standalone ports mode). + */ +static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + + skb->offload_fwd_mark = !!(dp->bridge); +} + +/* Helper for removing DSA header tags from packets in the RX path. + * Must not be called before skb_pull(len). + * skb->data + * | + * v + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * | | + * <----- len -----> <----- len -----> + * | + * >>>>>>> v + * >>>>>>> | | | | | | | | | | | | | | | + * >>>>>>> +-----------------------+-----------------------+-------+ + * >>>>>>> | Destination MAC | Source MAC | EType | + * +-----------------------+-----------------------+-------+ + * ^ + * | + * skb->data + */ +static inline void dsa_strip_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN); +} + +/* Helper for creating space for DSA header tags in TX path packets. + * Must not be called before skb_push(len). + * + * Before: + * + * <<<<<<< | | | | | | | | | | | | | | | + * ^ <<<<<<< +-----------------------+-----------------------+-------+ + * | <<<<<<< | Destination MAC | Source MAC | EType | + * | +-----------------------+-----------------------+-------+ + * <----- len -----> + * | + * | + * skb->data + * + * After: + * + * | | | | | | | | | | | | | | | | | | | + * +-----------------------+-----------------------+---------------+-------+ + * | Destination MAC | Source MAC | DSA header | EType | + * +-----------------------+-----------------------+---------------+-------+ + * ^ | | + * | <----- len -----> + * skb->data + */ +static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) +{ + memmove(skb->data, skb->data + len, 2 * ETH_ALEN); +} + +/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from + * skb_mac_header(skb), which leaves skb->data pointing at the first byte after + * what the DSA master perceives as the EtherType (the beginning of the L3 + * protocol). Since DSA EtherType header taggers treat the EtherType as part of + * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header + * is located 2 bytes behind skb->data. Note that EtherType in this context + * means the first 2 bytes of the DSA header, not the encapsulated EtherType + * that will become visible after the DSA header is stripped. + */ +static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb) +{ + return skb->data - 2; +} + +/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType + * header taggers start exactly where the EtherType is (the EtherType is + * treated as part of the DSA header). + */ +static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb) +{ + return skb->data + 2 * ETH_ALEN; +} + +/* Create 2 modaliases per tagging protocol, one to auto-load the module + * given the ID reported by get_tag_protocol(), and the other by name. + */ +#define DSA_TAG_DRIVER_ALIAS "dsa_tag:" +#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto, __name) \ + MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __name); \ + MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS "id-" \ + __stringify(__proto##_VALUE)) + +void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count, + struct module *owner); +void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], + unsigned int count); + +#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ +static int __init dsa_tag_driver_module_init(void) \ +{ \ + dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ + THIS_MODULE); \ + return 0; \ +} \ +module_init(dsa_tag_driver_module_init); \ + \ +static void __exit dsa_tag_driver_module_exit(void) \ +{ \ + dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ +} \ +module_exit(dsa_tag_driver_module_exit) + +/** + * module_dsa_tag_drivers() - Helper macro for registering DSA tag + * drivers + * @__ops_array: Array of tag driver structures + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_drivers(__ops_array) \ +dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) + +#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops + +/* Create a static structure we can build a linked list of dsa_tag + * drivers + */ +#define DSA_TAG_DRIVER(__ops) \ +static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ + .ops = &__ops, \ +} + +/** + * module_dsa_tag_driver() - Helper macro for registering a single DSA tag + * driver + * @__ops: Single tag driver structures + * + * Helper macro for DSA tag drivers which do not do anything special + * in module init/exit. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit(). + */ +#define module_dsa_tag_driver(__ops) \ +DSA_TAG_DRIVER(__ops); \ + \ +static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ + &DSA_TAG_DRIVER_NAME(__ops) \ +}; \ +module_dsa_tag_drivers(dsa_tag_driver_array) + +#endif diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 34e5ec5d3e23..b1263917fcb2 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -7,7 +7,10 @@ #include <linux/if_vlan.h> #include <linux/dsa/8021q.h> -#include "dsa_priv.h" +#include "port.h" +#include "switch.h" +#include "tag.h" +#include "tag_8021q.h" /* Binary structure of the fake 12-bit VID field (when the TPID is * ETH_P_DSA_8021Q): @@ -60,6 +63,20 @@ #define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \ DSA_8021Q_PORT_MASK) +struct dsa_tag_8021q_vlan { + struct list_head list; + int port; + u16 vid; + refcount_t refcount; +}; + +struct dsa_8021q_context { + struct dsa_switch *ds; + struct list_head vlans; + /* EtherType of RX VID, used for filtering on master interface */ + __be16 proto; +}; + u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num) { /* The VBID value of 0 is reserved for precise TX, but it is also diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h new file mode 100644 index 000000000000..b75cbaa028ef --- /dev/null +++ b/net/dsa/tag_8021q.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_TAG_8021Q_H +#define __DSA_TAG_8021Q_H + +#include <net/dsa.h> + +#include "switch.h" + +struct sk_buff; +struct net_device; + +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci); + +void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, + int *vbid); + +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, + int vbid); + +int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); +int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, + struct dsa_notifier_tag_8021q_vlan_info *info); + +#endif diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index bfa161a4f502..7f3b7d730b85 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -7,7 +7,7 @@ #include <linux/bitfield.h> #include <linux/etherdevice.h> -#include "dsa_priv.h" +#include "tag.h" #define AR9331_NAME "ar9331" diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 9e7477ed70f1..10239daa5745 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -10,7 +10,7 @@ #include <linux/list.h> #include <linux/slab.h> -#include "dsa_priv.h" +#include "tag.h" #define BRCM_NAME "brcm" #define BRCM_LEGACY_NAME "brcm-legacy" diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 9fe77f5cc759..1fd7fa26db64 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -50,7 +50,7 @@ #include <linux/list.h> #include <linux/slab.h> -#include "dsa_priv.h" +#include "tag.h" #define DSA_NAME "dsa" #define EDSA_NAME "edsa" diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 020050dff3e4..e279cd9057b0 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -10,7 +10,7 @@ #include <linux/skbuff.h> #include <net/dsa.h> -#include "dsa_priv.h" +#include "tag.h" #define GSWIP_NAME "gswip" diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 03fd5f2877c8..71884296fc70 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -11,7 +11,7 @@ #include <linux/skbuff.h> #include <net/dsa.h> -#include "dsa_priv.h" +#include "tag.h" #define HELLCREEK_NAME "hellcreek" diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 37db5156f9a3..0f6ae143afc9 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -7,7 +7,8 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <net/dsa.h> -#include "dsa_priv.h" + +#include "tag.h" #define KSZ8795_NAME "ksz8795" #define KSZ9477_NAME "ksz9477" diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 4118292ed218..c25f5536706b 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -7,7 +7,7 @@ #include <linux/list.h> #include <linux/slab.h> -#include "dsa_priv.h" +#include "tag.h" /* To define the outgoing port and to discover the incoming port a regular * VLAN tag is used by the LAN9303. But its VID meaning is 'special': diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index ba37495ab5f4..40af80452747 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -8,7 +8,7 @@ #include <linux/etherdevice.h> #include <linux/if_vlan.h> -#include "dsa_priv.h" +#include "tag.h" #define MTK_NAME "mtk" @@ -27,6 +27,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, u8 xmit_tpid; u8 *mtk_tag; + skb_set_queue_mapping(skb, dp->index); + /* Build the special tag after the MAC Source Address. If VLAN header * is present, it's required that VLAN header and special tag is * being combined. Only in this way we can allow the switch can parse diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c new file mode 100644 index 000000000000..d2fd179c4227 --- /dev/null +++ b/net/dsa/tag_none.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * net/dsa/tag_none.c - Traffic handling for switches with no tag + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <[email protected]> + * + * WARNING: do not use this for new switches. In case of no hardware + * tagging support, look at tag_8021q.c instead. + */ + +#include "tag.h" + +#define NONE_NAME "none" + +static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* Just return the original SKB */ + return skb; +} + +static const struct dsa_device_ops none_ops = { + .name = NONE_NAME, + .proto = DSA_TAG_PROTO_NONE, + .xmit = dsa_slave_notag_xmit, +}; + +module_dsa_tag_driver(none_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_NONE, NONE_NAME); +MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 8cc31ab47e28..28ebecafdd24 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -2,7 +2,8 @@ /* Copyright 2019 NXP */ #include <linux/dsa/ocelot.h> -#include "dsa_priv.h" + +#include "tag.h" #define OCELOT_NAME "ocelot" #define SEVILLE_NAME "seville" diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index d1ec68001487..1f0b8c20eba5 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -10,7 +10,9 @@ */ #include <linux/dsa/8021q.h> #include <linux/dsa/ocelot.h> -#include "dsa_priv.h" + +#include "tag.h" +#include "tag_8021q.h" #define OCELOT_8021Q_NAME "ocelot-8021q" diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 73d6e111228d..e757c8de06f1 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -8,7 +8,7 @@ #include <net/dsa.h> #include <linux/dsa/tag_qca.h> -#include "dsa_priv.h" +#include "tag.h" #define QCA_NAME "qca" diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 18b52d77d200..c327314b95e3 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -18,7 +18,7 @@ #include <linux/etherdevice.h> #include <linux/bits.h> -#include "dsa_priv.h" +#include "tag.h" #define RTL4_A_NAME "rtl4a" diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index 030a8cf0ad48..4f67834fd121 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -77,7 +77,7 @@ #include <linux/bits.h> #include <linux/etherdevice.h> -#include "dsa_priv.h" +#include "tag.h" /* Protocols supported: * diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c index b9135069f9fc..437a6820ac42 100644 --- a/net/dsa/tag_rzn1_a5psw.c +++ b/net/dsa/tag_rzn1_a5psw.c @@ -10,7 +10,7 @@ #include <linux/if_ether.h> #include <net/dsa.h> -#include "dsa_priv.h" +#include "tag.h" /* To define the outgoing port and to discover the incoming port a TAG is * inserted after Src MAC : diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 3b6e642a90e9..f14f51b41491 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -5,7 +5,9 @@ #include <linux/dsa/sja1105.h> #include <linux/dsa/8021q.h> #include <linux/packing.h> -#include "dsa_priv.h" + +#include "tag.h" +#include "tag_8021q.h" #define SJA1105_NAME "sja1105" #define SJA1110_NAME "sja1110" diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 8754dfe680f6..7361b9106382 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -8,7 +8,7 @@ #include <linux/list.h> #include <linux/slab.h> -#include "dsa_priv.h" +#include "tag.h" #define TRAILER_NAME "trailer" diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index dc935dd90f98..af19969f9bc4 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -7,7 +7,7 @@ #include <linux/bitops.h> -#include "dsa_priv.h" +#include "tag.h" #define XRS700X_NAME "xrs700x" diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 99272a67525c..c2f1a542e6fa 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2013,7 +2013,8 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } else { /* Driver expects to be called at twice the frequency in rc */ int n = rc * 2, interval = HZ / n; - u64 count = n * id.data, i = 0; + u64 count = mul_u32_u32(n, id.data); + u64 i = 0; do { rtnl_lock(); diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index de476a417631..1a195efc79cd 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/errno.h> #include <linux/debugfs.h> -#include <linux/jhash.h> #include "hsr_main.h" #include "hsr_framereg.h" @@ -21,7 +20,6 @@ hsr_node_table_show(struct seq_file *sfp, void *data) { struct hsr_priv *priv = (struct hsr_priv *)sfp->private; struct hsr_node *node; - int i; seq_printf(sfp, "Node Table entries for (%s) device\n", (priv->prot_version == PRP_V1 ? "PRP" : "HSR")); @@ -33,28 +31,22 @@ hsr_node_table_show(struct seq_file *sfp, void *data) seq_puts(sfp, "DAN-H\n"); rcu_read_lock(); - - for (i = 0 ; i < priv->hash_buckets; i++) { - hlist_for_each_entry_rcu(node, &priv->node_db[i], mac_list) { - /* skip self node */ - if (hsr_addr_is_self(priv, node->macaddress_A)) - continue; - seq_printf(sfp, "%pM ", &node->macaddress_A[0]); - seq_printf(sfp, "%pM ", &node->macaddress_B[0]); - seq_printf(sfp, "%10lx, ", - node->time_in[HSR_PT_SLAVE_A]); - seq_printf(sfp, "%10lx, ", - node->time_in[HSR_PT_SLAVE_B]); - seq_printf(sfp, "%14x, ", node->addr_B_port); - - if (priv->prot_version == PRP_V1) - seq_printf(sfp, "%5x, %5x, %5x\n", - node->san_a, node->san_b, - (node->san_a == 0 && - node->san_b == 0)); - else - seq_printf(sfp, "%5x\n", 1); - } + list_for_each_entry_rcu(node, &priv->node_db, mac_list) { + /* skip self node */ + if (hsr_addr_is_self(priv, node->macaddress_A)) + continue; + seq_printf(sfp, "%pM ", &node->macaddress_A[0]); + seq_printf(sfp, "%pM ", &node->macaddress_B[0]); + seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]); + seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]); + seq_printf(sfp, "%14x, ", node->addr_B_port); + + if (priv->prot_version == PRP_V1) + seq_printf(sfp, "%5x, %5x, %5x\n", + node->san_a, node->san_b, + (node->san_a == 0 && node->san_b == 0)); + else + seq_printf(sfp, "%5x\n", 1); } rcu_read_unlock(); return 0; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 6ffef47e9be5..5a236aae2366 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -219,7 +219,9 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb->dev = master->dev; skb_reset_mac_header(skb); skb_reset_mac_len(skb); + spin_lock_bh(&hsr->seqnr_lock); hsr_forward_skb(skb, master); + spin_unlock_bh(&hsr->seqnr_lock); } else { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); @@ -278,7 +280,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master, __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; - unsigned long irqflags; struct sk_buff *skb; *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); @@ -299,7 +300,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); + spin_lock_bh(&hsr->seqnr_lock); if (hsr->prot_version > 0) { hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; @@ -307,7 +308,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_stag->sequence_nr = htons(hsr->sequence_nr); hsr->sequence_nr++; } - spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->tlv.HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ @@ -318,11 +318,13 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); - if (skb_put_padto(skb, ETH_ZLEN)) + if (skb_put_padto(skb, ETH_ZLEN)) { + spin_unlock_bh(&hsr->seqnr_lock); return; + } hsr_forward_skb(skb, master); - + spin_unlock_bh(&hsr->seqnr_lock); return; } @@ -332,7 +334,6 @@ static void send_prp_supervision_frame(struct hsr_port *master, struct hsr_priv *hsr = master->hsr; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; - unsigned long irqflags; struct sk_buff *skb; skb = hsr_init_skb(master); @@ -347,7 +348,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); + spin_lock_bh(&hsr->seqnr_lock); hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; @@ -358,13 +359,12 @@ static void send_prp_supervision_frame(struct hsr_port *master, ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); if (skb_put_padto(skb, ETH_ZLEN)) { - spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); + spin_unlock_bh(&hsr->seqnr_lock); return; } - spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); - hsr_forward_skb(skb, master); + spin_unlock_bh(&hsr->seqnr_lock); } /* Announce (supervision frame) timer function @@ -444,7 +444,7 @@ void hsr_dev_setup(struct net_device *dev) dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); - dev->priv_flags |= IFF_NO_QUEUE; + dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; dev->needs_free_netdev = true; @@ -485,16 +485,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], { bool unregister = false; struct hsr_priv *hsr; - int res, i; + int res; hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); - INIT_HLIST_HEAD(&hsr->self_node_db); - hsr->hash_buckets = HSR_HSIZE; - get_random_bytes(&hsr->hash_seed, sizeof(hsr->hash_seed)); - for (i = 0; i < hsr->hash_buckets; i++) - INIT_HLIST_HEAD(&hsr->node_db[i]); - + INIT_LIST_HEAD(&hsr->node_db); spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a50429a62f74..629daacc9607 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } @@ -499,7 +500,6 @@ static void handle_std_frame(struct sk_buff *skb, { struct hsr_port *port = frame->port_rcv; struct hsr_priv *hsr = port->hsr; - unsigned long irqflags; frame->skb_hsr = NULL; frame->skb_prp = NULL; @@ -509,10 +509,9 @@ static void handle_std_frame(struct sk_buff *skb, frame->is_from_san = true; } else { /* Sequence nr for the master node */ - spin_lock_irqsave(&hsr->seqnr_lock, irqflags); + lockdep_assert_held(&hsr->seqnr_lock); frame->sequence_nr = hsr->sequence_nr; hsr->sequence_nr++; - spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags); } } @@ -570,23 +569,20 @@ static int fill_frame_info(struct hsr_frame_info *frame, struct ethhdr *ethhdr; __be16 proto; int ret; - u32 hash; /* Check if skb contains ethhdr */ if (skb->mac_len < sizeof(struct ethhdr)) return -EINVAL; memset(frame, 0, sizeof(*frame)); - - ethhdr = (struct ethhdr *)skb_mac_header(skb); - hash = hsr_mac_hash(port->hsr, ethhdr->h_source); frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(port, &hsr->node_db[hash], skb, + frame->node_src = hsr_get_node(port, &hsr->node_db, skb, frame->is_supervision, port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); frame->is_vlan = false; proto = ethhdr->h_proto; @@ -616,11 +612,13 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) { struct hsr_frame_info frame; + rcu_read_lock(); if (fill_frame_info(&frame, skb, port) < 0) goto out_drop; hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); hsr_forward_do(&frame); + rcu_read_unlock(); /* Gets called for ingress frames as well as egress from master port. * So check and increment stats for master port only here. */ @@ -635,6 +633,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) return; out_drop: + rcu_read_unlock(); port->dev->stats.tx_dropped++; kfree_skb(skb); } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 584e21788799..00db74d96583 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -15,37 +15,10 @@ #include <linux/etherdevice.h> #include <linux/slab.h> #include <linux/rculist.h> -#include <linux/jhash.h> #include "hsr_main.h" #include "hsr_framereg.h" #include "hsr_netlink.h" -#ifdef CONFIG_LOCKDEP -int lockdep_hsr_is_held(spinlock_t *lock) -{ - return lockdep_is_held(lock); -} -#endif - -u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr) -{ - u32 hash = jhash(addr, ETH_ALEN, hsr->hash_seed); - - return reciprocal_scale(hash, hsr->hash_buckets); -} - -struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock) -{ - struct hlist_node *first; - - first = rcu_dereference_bh_check(hlist_first_rcu(head), - lockdep_hsr_is_held(lock)); - if (first) - return hlist_entry(first, struct hsr_node, mac_list); - - return NULL; -} - /* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, * false otherwise. */ @@ -65,30 +38,32 @@ static bool seq_nr_after(u16 a, u16 b) bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { - struct hsr_node *node; + struct hsr_self_node *sn; + bool ret = false; - node = hsr_node_get_first(&hsr->self_node_db, &hsr->list_lock); - if (!node) { + rcu_read_lock(); + sn = rcu_dereference(hsr->self_node); + if (!sn) { WARN_ONCE(1, "HSR: No self node\n"); - return false; + goto out; } - if (ether_addr_equal(addr, node->macaddress_A)) - return true; - if (ether_addr_equal(addr, node->macaddress_B)) - return true; - - return false; + if (ether_addr_equal(addr, sn->macaddress_A) || + ether_addr_equal(addr, sn->macaddress_B)) + ret = true; +out: + rcu_read_unlock(); + return ret; } /* Search for mac entry. Caller must hold rcu read lock. */ -static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db, +static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, const unsigned char addr[ETH_ALEN]) { struct hsr_node *node; - hlist_for_each_entry_rcu(node, node_db, mac_list) { + list_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, addr)) return node; } @@ -96,58 +71,51 @@ static struct hsr_node *find_node_by_addr_A(struct hlist_head *node_db, return NULL; } -/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize +/* Helper for device init; the self_node is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], const unsigned char addr_b[ETH_ALEN]) { - struct hlist_head *self_node_db = &hsr->self_node_db; - struct hsr_node *node, *oldnode; + struct hsr_self_node *sn, *old; - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) + sn = kmalloc(sizeof(*sn), GFP_KERNEL); + if (!sn) return -ENOMEM; - ether_addr_copy(node->macaddress_A, addr_a); - ether_addr_copy(node->macaddress_B, addr_b); + ether_addr_copy(sn->macaddress_A, addr_a); + ether_addr_copy(sn->macaddress_B, addr_b); spin_lock_bh(&hsr->list_lock); - oldnode = hsr_node_get_first(self_node_db, &hsr->list_lock); - if (oldnode) { - hlist_replace_rcu(&oldnode->mac_list, &node->mac_list); - spin_unlock_bh(&hsr->list_lock); - kfree_rcu(oldnode, rcu_head); - } else { - hlist_add_tail_rcu(&node->mac_list, self_node_db); - spin_unlock_bh(&hsr->list_lock); - } + old = rcu_replace_pointer(hsr->self_node, sn, + lockdep_is_held(&hsr->list_lock)); + spin_unlock_bh(&hsr->list_lock); + if (old) + kfree_rcu(old, rcu_head); return 0; } void hsr_del_self_node(struct hsr_priv *hsr) { - struct hlist_head *self_node_db = &hsr->self_node_db; - struct hsr_node *node; + struct hsr_self_node *old; spin_lock_bh(&hsr->list_lock); - node = hsr_node_get_first(self_node_db, &hsr->list_lock); - if (node) { - hlist_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - } + old = rcu_replace_pointer(hsr->self_node, NULL, + lockdep_is_held(&hsr->list_lock)); spin_unlock_bh(&hsr->list_lock); + if (old) + kfree_rcu(old, rcu_head); } -void hsr_del_nodes(struct hlist_head *node_db) +void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; - struct hlist_node *tmp; + struct hsr_node *tmp; - hlist_for_each_entry_safe(node, tmp, node_db, mac_list) - kfree_rcu(node, rcu_head); + list_for_each_entry_safe(node, tmp, node_db, mac_list) + kfree(node); } void prp_handle_san_frame(bool san, enum hsr_port_type port, @@ -168,7 +136,7 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, * originating from the newly added node. */ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, - struct hlist_head *node_db, + struct list_head *node_db, unsigned char addr[], u16 seq_out, bool san, enum hsr_port_type rx_port) @@ -182,6 +150,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, return NULL; ether_addr_copy(new_node->macaddress_A, addr); + spin_lock_init(&new_node->seq_out_lock); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). @@ -198,14 +167,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, hsr->proto_ops->handle_san_frame(san, rx_port, new_node); spin_lock_bh(&hsr->list_lock); - hlist_for_each_entry_rcu(node, node_db, mac_list, - lockdep_hsr_is_held(&hsr->list_lock)) { + list_for_each_entry_rcu(node, node_db, mac_list, + lockdep_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) goto out; } - hlist_add_tail_rcu(&new_node->mac_list, node_db); + list_add_tail_rcu(&new_node->mac_list, node_db); spin_unlock_bh(&hsr->list_lock); return new_node; out: @@ -225,7 +194,7 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup) /* Get the hsr_node from which 'skb' was sent. */ -struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, +struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, struct sk_buff *skb, bool is_sup, enum hsr_port_type rx_port) { @@ -241,7 +210,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, ethhdr = (struct ethhdr *)skb_mac_header(skb); - hlist_for_each_entry_rcu(node, node_db, mac_list) { + list_for_each_entry_rcu(node, node_db, mac_list) { if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { if (hsr->proto_ops->update_san_info) hsr->proto_ops->update_san_info(node, is_sup); @@ -291,12 +260,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) struct hsr_sup_tlv *hsr_sup_tlv; struct hsr_node *node_real; struct sk_buff *skb = NULL; - struct hlist_head *node_db; + struct list_head *node_db; struct ethhdr *ethhdr; int i; unsigned int pull_size = 0; unsigned int total_pull_size = 0; - u32 hash; /* Here either frame->skb_hsr or frame->skb_prp should be * valid as supervision frame always will have protocol @@ -334,13 +302,11 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) hsr_sp = (struct hsr_sup_payload *)skb->data; /* Merge node_curr (registered on macaddress_B) into node_real */ - node_db = port_rcv->hsr->node_db; - hash = hsr_mac_hash(hsr, hsr_sp->macaddress_A); - node_real = find_node_by_addr_A(&node_db[hash], hsr_sp->macaddress_A); + node_db = &port_rcv->hsr->node_db; + node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A); if (!node_real) /* No frame received from AddrA of this node yet */ - node_real = hsr_add_node(hsr, &node_db[hash], - hsr_sp->macaddress_A, + node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, HSR_SEQNR_START - 1, true, port_rcv->type); if (!node_real) @@ -374,14 +340,14 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) hsr_sp = (struct hsr_sup_payload *)skb->data; /* Check if redbox mac and node mac are equal. */ - if (!ether_addr_equal(node_real->macaddress_A, - hsr_sp->macaddress_A)) { + if (!ether_addr_equal(node_real->macaddress_A, hsr_sp->macaddress_A)) { /* This is a redbox supervision frame for a VDAN! */ goto done; } } ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); + spin_lock_bh(&node_real->seq_out_lock); for (i = 0; i < HSR_PT_PORTS; i++) { if (!node_curr->time_in_stale[i] && time_after(node_curr->time_in[i], node_real->time_in[i])) { @@ -392,12 +358,16 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) node_real->seq_out[i] = node_curr->seq_out[i]; } + spin_unlock_bh(&node_real->seq_out_lock); node_real->addr_B_port = port_rcv->type; spin_lock_bh(&hsr->list_lock); - hlist_del_rcu(&node_curr->mac_list); + if (!node_curr->removed) { + list_del_rcu(&node_curr->mac_list); + node_curr->removed = true; + kfree_rcu(node_curr, rcu_head); + } spin_unlock_bh(&hsr->list_lock); - kfree_rcu(node_curr, rcu_head); done: /* Push back here */ @@ -433,7 +403,6 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, struct hsr_port *port) { struct hsr_node *node_dst; - u32 hash; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); @@ -443,8 +412,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) return; - hash = hsr_mac_hash(port->hsr, eth_hdr(skb)->h_dest); - node_dst = find_node_by_addr_A(&port->hsr->node_db[hash], + node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { if (net_ratelimit()) @@ -484,13 +452,17 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr) { + spin_lock_bh(&node->seq_out_lock); if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && time_is_after_jiffies(node->time_out[port->type] + - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) { + spin_unlock_bh(&node->seq_out_lock); return 1; + } node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; + spin_unlock_bh(&node->seq_out_lock); return 0; } @@ -520,71 +492,60 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr, void hsr_prune_nodes(struct timer_list *t) { struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); - struct hlist_node *tmp; struct hsr_node *node; + struct hsr_node *tmp; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; - int i; spin_lock_bh(&hsr->list_lock); + list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) { + /* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A] + * nor time_in[HSR_PT_SLAVE_B], will ever be updated for + * the master port. Thus the master node will be repeatedly + * pruned leading to packet loss. + */ + if (hsr_addr_is_self(hsr, node->macaddress_A)) + continue; + + /* Shorthand */ + time_a = node->time_in[HSR_PT_SLAVE_A]; + time_b = node->time_in[HSR_PT_SLAVE_B]; + + /* Check for timestamps old enough to risk wrap-around */ + if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) + node->time_in_stale[HSR_PT_SLAVE_A] = true; + if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) + node->time_in_stale[HSR_PT_SLAVE_B] = true; + + /* Get age of newest frame from node. + * At least one time_in is OK here; nodes get pruned long + * before both time_ins can get stale + */ + timestamp = time_a; + if (node->time_in_stale[HSR_PT_SLAVE_A] || + (!node->time_in_stale[HSR_PT_SLAVE_B] && + time_after(time_b, time_a))) + timestamp = time_b; + + /* Warn of ring error only as long as we get frames at all */ + if (time_is_after_jiffies(timestamp + + msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { + rcu_read_lock(); + port = get_late_port(hsr, node); + if (port) + hsr_nl_ringerror(hsr, node->macaddress_A, port); + rcu_read_unlock(); + } - for (i = 0; i < hsr->hash_buckets; i++) { - hlist_for_each_entry_safe(node, tmp, &hsr->node_db[i], - mac_list) { - /* Don't prune own node. - * Neither time_in[HSR_PT_SLAVE_A] - * nor time_in[HSR_PT_SLAVE_B], will ever be updated - * for the master port. Thus the master node will be - * repeatedly pruned leading to packet loss. - */ - if (hsr_addr_is_self(hsr, node->macaddress_A)) - continue; - - /* Shorthand */ - time_a = node->time_in[HSR_PT_SLAVE_A]; - time_b = node->time_in[HSR_PT_SLAVE_B]; - - /* Check for timestamps old enough to - * risk wrap-around - */ - if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET / 2)) - node->time_in_stale[HSR_PT_SLAVE_A] = true; - if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET / 2)) - node->time_in_stale[HSR_PT_SLAVE_B] = true; - - /* Get age of newest frame from node. - * At least one time_in is OK here; nodes get pruned - * long before both time_ins can get stale - */ - timestamp = time_a; - if (node->time_in_stale[HSR_PT_SLAVE_A] || - (!node->time_in_stale[HSR_PT_SLAVE_B] && - time_after(time_b, time_a))) - timestamp = time_b; - - /* Warn of ring error only as long as we get - * frames at all - */ - if (time_is_after_jiffies(timestamp + - msecs_to_jiffies(1.5 * MAX_SLAVE_DIFF))) { - rcu_read_lock(); - port = get_late_port(hsr, node); - if (port) - hsr_nl_ringerror(hsr, - node->macaddress_A, - port); - rcu_read_unlock(); - } - - /* Prune old entries */ - if (time_is_before_jiffies(timestamp + - msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { - hsr_nl_nodedown(hsr, node->macaddress_A); - hlist_del_rcu(&node->mac_list); - /* Note that we need to free this - * entry later: - */ + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); + if (!node->removed) { + list_del_rcu(&node->mac_list); + node->removed = true; + /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); } } @@ -600,20 +561,17 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { struct hsr_node *node; - u32 hash; - - hash = hsr_mac_hash(hsr, addr); if (!_pos) { - node = hsr_node_get_first(&hsr->node_db[hash], - &hsr->list_lock); + node = list_first_or_null_rcu(&hsr->node_db, + struct hsr_node, mac_list); if (node) ether_addr_copy(addr, node->macaddress_A); return node; } node = _pos; - hlist_for_each_entry_continue_rcu(node, mac_list) { + list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { ether_addr_copy(addr, node->macaddress_A); return node; } @@ -633,11 +591,8 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_node *node; struct hsr_port *port; unsigned long tdiff; - u32 hash; - - hash = hsr_mac_hash(hsr, addr); - node = find_node_by_addr_A(&hsr->node_db[hash], addr); + node = find_node_by_addr_A(&hsr->node_db, addr); if (!node) return -ENOENT; diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index f3762e9e42b5..b23556251d62 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -28,17 +28,9 @@ struct hsr_frame_info { bool is_from_san; }; -#ifdef CONFIG_LOCKDEP -int lockdep_hsr_is_held(spinlock_t *lock); -#else -#define lockdep_hsr_is_held(lock) 1 -#endif - -u32 hsr_mac_hash(struct hsr_priv *hsr, const unsigned char *addr); -struct hsr_node *hsr_node_get_first(struct hlist_head *head, spinlock_t *lock); void hsr_del_self_node(struct hsr_priv *hsr); -void hsr_del_nodes(struct hlist_head *node_db); -struct hsr_node *hsr_get_node(struct hsr_port *port, struct hlist_head *node_db, +void hsr_del_nodes(struct list_head *node_db); +struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, struct sk_buff *skb, bool is_sup, enum hsr_port_type rx_port); void hsr_handle_sup_frame(struct hsr_frame_info *frame); @@ -76,7 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, void prp_update_san_info(struct hsr_node *node, bool is_sup); struct hsr_node { - struct hlist_node mac_list; + struct list_head mac_list; + /* Protect R/W access to seq_out */ + spinlock_t seq_out_lock; unsigned char macaddress_A[ETH_ALEN]; unsigned char macaddress_B[ETH_ALEN]; /* Local slave through which AddrB frames are received from this node */ @@ -88,6 +82,7 @@ struct hsr_node { bool san_a; bool san_b; u16 seq_out[HSR_PT_PORTS]; + bool removed; struct rcu_head rcu_head; }; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index b158ba409f9a..5584c80a5c79 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -47,9 +47,6 @@ #define HSR_V1_SUP_LSDUSIZE 52 -#define HSR_HSIZE_SHIFT 8 -#define HSR_HSIZE BIT(HSR_HSIZE_SHIFT) - /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or * equivalently, the 4 most significant bits of HSR tag byte 14). @@ -185,11 +182,17 @@ struct hsr_proto_ops { void (*update_san_info)(struct hsr_node *node, bool is_sup); }; +struct hsr_self_node { + unsigned char macaddress_A[ETH_ALEN]; + unsigned char macaddress_B[ETH_ALEN]; + struct rcu_head rcu_head; +}; + struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; - struct hlist_head node_db[HSR_HSIZE]; /* Known HSR nodes */ - struct hlist_head self_node_db; /* MACs of slaves */ + struct list_head node_db; /* Known HSR nodes */ + struct hsr_self_node __rcu *self_node; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ struct timer_list prune_timer; int announce_count; @@ -199,8 +202,6 @@ struct hsr_priv { spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; - u32 hash_buckets; - u32 hash_seed; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set * based on SLAVE_A or SLAVE_B */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 7174a9092900..78fe40eb9f01 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -105,7 +105,6 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, static void hsr_dellink(struct net_device *dev, struct list_head *head) { struct hsr_priv *hsr = netdev_priv(dev); - int i; del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); @@ -114,8 +113,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) hsr_del_ports(hsr); hsr_del_self_node(hsr); - for (i = 0; i < hsr->hash_buckets; i++) - hsr_del_nodes(&hsr->node_db[i]); + hsr_del_nodes(&hsr->node_db); unregister_netdevice_queue(dev, head); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 378bcd777514..ab4a06be489b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -522,9 +522,9 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Make sure we are allowed to bind here. */ if (snum || !(inet->bind_address_no_port || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { - if (sk->sk_prot->get_port(sk, snum)) { + err = sk->sk_prot->get_port(sk, snum); + if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; - err = -EADDRINUSE; goto out_release_sock; } if (!(flags & BIND_FROM_BPF)) { @@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { - struct inet_bind_hashbucket *prev_addr_hashbucket; struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; @@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; } - prev_addr_hashbucket = - inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, - sock_net(sk), inet->inet_num); - - inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; - - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { - inet->inet_saddr = old_saddr; - inet->inet_rcv_saddr = old_saddr; ip_rt_put(rt); return err; } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 6da16ae6a962..4517d2bd186a 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -61,7 +61,9 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info)) return false; - if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id) + if (base_type(info->reg_type) == PTR_TO_BTF_ID && + !bpf_type_has_unsafe_modifiers(info->reg_type) && + info->btf_id == sock_id) /* promote it to tcp_sock */ info->btf_id = tcp_sock_id; @@ -69,18 +71,17 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, } static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) { + const struct btf_type *t; size_t end; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, - flag); + return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); + t = btf_type_by_id(reg->btf, reg->btf_id); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); return -EACCES; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 170152772d33..3969fa805679 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f721c308248b..19a662003eef 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 452ff177e4da..74d403dbd2b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct key_vector { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ - struct key_vector __rcu *tnode[0]; + DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode); }; }; @@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); - if (WARN_ON_ONCE(!l)) + if (WARN_ON_ONCE(!l)) { + err = -ENOENT; goto out_free_new_fa; + } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4e84ed21d16f..4a34bc7cb15e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -471,11 +471,11 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; bool found_port = false, check_bind_conflict = true; bool bhash_created = false, bhash2_created = false; + int ret = -EADDRINUSE, port = snum, l3mdev; struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2 = NULL; struct inet_bind_bucket *tb = NULL; bool head2_lock_acquired = false; - int ret = 1, port = snum, l3mdev; struct net *net = sock_net(sk); l3mdev = inet_sk_bound_l3mdev(sk); @@ -1186,7 +1186,7 @@ int inet_csk_listen_start(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - int err = -EADDRINUSE; + int err; reqsk_queue_alloc(&icsk->icsk_accept_queue); @@ -1202,7 +1202,8 @@ int inet_csk_listen_start(struct sock *sk) * after validation is complete. */ inet_sk_state_store(sk, TCP_LISTEN); - if (!sk->sk_prot->get_port(sk, inet->inet_num)) { + err = sk->sk_prot->get_port(sk, inet->inet_num); + if (!err) { inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 033bf3c2538f..3cec471a2cd2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -858,34 +858,80 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +static void inet_update_saddr(struct sock *sk, void *saddr, int family) +{ + if (family == AF_INET) { + inet_sk(sk)->inet_saddr = *(__be32 *)saddr; + sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else { + sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; + } +#endif +} + +static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); - struct inet_bind_hashbucket *head2; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + int bhash; + + if (!inet_csk(sk)->icsk_bind2_hash) { + /* Not bind()ed before. */ + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + return 0; + } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); - if (!new_tb2) + if (!new_tb2) { + if (reset) { + /* The (INADDR_ANY, port) bucket might have already + * been freed, then we cannot fixup icsk_bind2_hash, + * so we give up and unlink sk from bhash/bhash2 not + * to leave inconsistency in bhash2. + */ + inet_put_port(sk); + inet_reset_saddr(sk); + } + return -ENOMEM; + } + bhash = inet_bhashfn(net, port, hinfo->bhash_size); + head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); - if (prev_saddr) { - spin_lock_bh(&prev_saddr->lock); - __sk_del_bind2_node(sk); - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); - } + /* If we change saddr locklessly, another thread + * iterating over bhash might see corrupted address. + */ + spin_lock_bh(&head->lock); - spin_lock_bh(&head2->lock); + spin_lock(&head2->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); + spin_unlock(&head2->lock); + + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; @@ -893,15 +939,29 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; - spin_unlock_bh(&head2->lock); + spin_unlock(&head2->lock); + + spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +{ + return __inet_bhash2_update_saddr(sk, saddr, family, false); +} EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); +void inet_bhash2_reset_saddr(struct sock *sk) +{ + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + __inet_bhash2_update_saddr(sk, NULL, 0, true); +} +EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b512390b3cf..e880ce77322a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; + } else { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) + IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f8e176c77d1c..b3cc416ed292 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + WRITE_ONCE(ct->mark, hash); break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: @@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) #ifdef DEBUG nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); if (!clusterip_responsible(cipinfo->config, hash)) { pr_debug("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bde333b24837..bb9854c2b7a1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -138,7 +138,7 @@ next_port: fail: spin_unlock(&ping_table.lock); - return 1; + return -EADDRINUSE; } EXPORT_SYMBOL_GPL(ping_get_port); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4a69c5fcfedc..001947136b0a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); @@ -4465,11 +4464,8 @@ bool tcp_alloc_md5sig_pool(void) if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool_populated) { + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); - if (tcp_md5sig_pool_populated) - static_branch_inc(&tcp_md5_needed); - } mutex_unlock(&tcp_md5sig_mutex); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0ae291e53eab..1efacbe948da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6845,7 +6845,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) xchg(&queue->synflood_warned, 1) == 0) { if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) { net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n", - proto, &sk->sk_v6_rcv_saddr, + proto, inet6_rcv_saddr(sk), sk->sk_num, msg); } else { net_info_ratelimited("%s: Possible SYN flooding on port %pI4:%u. %s.\n", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f0343538d1f8..7fae586405cf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_timewait_death_row *tcp_death_row; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct ip_options_rcu *inet_opt; struct net *net = sock_net(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!inet->inet_saddr) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { @@ -343,6 +331,7 @@ failure: * if necessary. */ tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; @@ -1064,7 +1053,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) * We need to maintain these in the sk structure. */ -DEFINE_STATIC_KEY_FALSE(tcp_md5_needed); +DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); EXPORT_SYMBOL(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) @@ -1172,10 +1161,25 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, } EXPORT_SYMBOL(tcp_v4_md5_lookup); +static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_info *md5sig; + + md5sig = kmalloc(sizeof(*md5sig), gfp); + if (!md5sig) + return -ENOMEM; + + sk_gso_disable(sk); + INIT_HLIST_HEAD(&md5sig->head); + rcu_assign_pointer(tp->md5sig_info, md5sig); + return 0; +} + /* This can be called on a newly created socket, from other files */ -int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, u8 prefixlen, int l3index, u8 flags, - const u8 *newkey, u8 newkeylen, gfp_t gfp) +static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, u8 prefixlen, int l3index, u8 flags, + const u8 *newkey, u8 newkeylen, gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; @@ -1204,15 +1208,6 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); - if (!md5sig) { - md5sig = kmalloc(sizeof(*md5sig), gfp); - if (!md5sig) - return -ENOMEM; - - sk_gso_disable(sk); - INIT_HLIST_HEAD(&md5sig->head); - rcu_assign_pointer(tp->md5sig_info, md5sig); - } key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) @@ -1234,8 +1229,59 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } + +int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, u8 prefixlen, int l3index, u8 flags, + const u8 *newkey, u8 newkeylen) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { + if (tcp_md5sig_info_add(sk, GFP_KERNEL)) + return -ENOMEM; + + if (!static_branch_inc(&tcp_md5_needed.key)) { + struct tcp_md5sig_info *md5sig; + + md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); + rcu_assign_pointer(tp->md5sig_info, NULL); + kfree_rcu(md5sig); + return -EUSERS; + } + } + + return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, + newkey, newkeylen, GFP_KERNEL); +} EXPORT_SYMBOL(tcp_md5_do_add); +int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, + int family, u8 prefixlen, int l3index, + struct tcp_md5sig_key *key) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { + if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) + return -ENOMEM; + + if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { + struct tcp_md5sig_info *md5sig; + + md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); + net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); + rcu_assign_pointer(tp->md5sig_info, NULL); + kfree_rcu(md5sig); + return -EUSERS; + } + } + + return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, + key->flags, key->key, key->keylen, + sk_gfp_mask(sk, GFP_ATOMIC)); +} +EXPORT_SYMBOL(tcp_md5_key_copy); + int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) { @@ -1322,7 +1368,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, - cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + cmd.tcpm_key, cmd.tcpm_keylen); } static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, @@ -1573,14 +1619,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, addr = (union tcp_md5_addr *)&newinet->inet_daddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); if (key) { - /* - * We're using one, so create a matching key - * on the newsk structure. If we fail to get - * memory, then we end up not copying the key - * across. Shucks. - */ - tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags, - key->key, key->keylen, GFP_ATOMIC); + if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key)) + goto put_and_exit; sk_gso_disable(newsk); } #endif @@ -2272,6 +2312,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_clear_md5_list(sk); kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); tp->md5sig_info = NULL; + static_branch_slow_dec_deferred(&tcp_md5_needed); } #endif diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c375f603a16c..e002f2e1d4f2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -240,6 +240,40 @@ kill: } EXPORT_SYMBOL(tcp_timewait_state_process); +static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) +{ +#ifdef CONFIG_TCP_MD5SIG + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; + + /* + * The timewait bucket does not have the key DB from the + * sock structure. We just make a quick copy of the + * md5 key being used (if indeed we are using one) + * so the timewait ack generating code has the key. + */ + tcptw->tw_md5_key = NULL; + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return; + + key = tp->af_specific->md5_lookup(sk, sk); + if (key) { + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); + if (!tcptw->tw_md5_key) + return; + if (!tcp_alloc_md5sig_pool()) + goto out_free; + if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) + goto out_free; + } + return; +out_free: + WARN_ON_ONCE(1); + kfree(tcptw->tw_md5_key); + tcptw->tw_md5_key = NULL; +#endif +} + /* * Move a socket to time-wait or dead fin-wait-2 state. */ @@ -282,26 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } #endif -#ifdef CONFIG_TCP_MD5SIG - /* - * The timewait bucket does not have the key DB from the - * sock structure. We just make a quick copy of the - * md5 key being used (if indeed we are using one) - * so the timewait ack generating code has the key. - */ - do { - tcptw->tw_md5_key = NULL; - if (static_branch_unlikely(&tcp_md5_needed)) { - struct tcp_md5sig_key *key; - - key = tp->af_specific->md5_lookup(sk, sk); - if (key) { - tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); - } - } - } while (0); -#endif + tcp_time_wait_init(sk, tcptw); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -337,11 +352,13 @@ EXPORT_SYMBOL(tcp_time_wait); void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed)) { + if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) + if (twsk->tw_md5_key) { kfree_rcu(twsk->tw_md5_key, rcu); + static_branch_slow_dec_deferred(&tcp_md5_needed); + } } #endif } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 894410dc9293..71d01cf3c13e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -766,7 +766,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed.key) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { @@ -922,7 +922,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed.key) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1fb7d1ed1cb1..9592fe3e444a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -240,7 +240,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; struct net *net = sock_net(sk); - int error = 1; + int error = -EADDRINUSE; if (!snum) { DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index bc3a043a5d5c..029219749785 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -624,6 +624,8 @@ __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, continue; nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); + if (!nest) + return -EMSGSIZE; if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, utn->entries[table][j].port) || diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 68075295d587..fee9163382c2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -410,10 +410,10 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Make sure we are allowed to bind here. */ if (snum || !(inet->bind_address_no_port || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { - if (sk->sk_prot->get_port(sk, snum)) { + err = sk->sk_prot->get_port(sk, snum); + if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); - err = -EADDRINUSE; goto out; } if (!(flags & BIND_FROM_BPF)) { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 79d43548279c..75c02992c520 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -56,12 +56,11 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, __be32 seq; __be32 spi; int nhoff; - int err; if (!pskb_pull(skb, offset)) return NULL; - if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0) goto out; xo = xfrm_offload(skb); @@ -346,6 +345,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f676be14e6b6..11b736a76bd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ @@ -359,6 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); + inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; @@ -677,12 +665,11 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, l3index, flags, - cmd.tcpm_key, cmd.tcpm_keylen, - GFP_KERNEL); + cmd.tcpm_key, cmd.tcpm_keylen); return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, AF_INET6, prefixlen, l3index, flags, - cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); + cmd.tcpm_key, cmd.tcpm_keylen); } static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, @@ -1377,14 +1364,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); if (key) { - /* We're using one, so create a matching key - * on the newsk structure. If we fail to get - * memory, then we end up not copying the key - * across. Shucks. - */ - tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, - AF_INET6, 128, l3index, key->flags, key->key, key->keylen, - sk_gfp_mask(sk, GFP_ATOMIC)); + const union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto out; + } } #endif diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a4b0e49ec92..ea435eba3053 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,9 +287,13 @@ int __init xfrm6_init(void) if (ret) goto out_state; - register_pernet_subsys(&xfrm6_net_ops); + ret = register_pernet_subsys(&xfrm6_net_ops); + if (ret) + goto out_protocol; out: return ret; +out_protocol: + xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: diff --git a/net/key/af_key.c b/net/key/af_key.c index c85df5b958d2..2bdbcec781cd 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1377,13 +1377,13 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ max_spi = range->sadb_spirange_max; } - err = verify_spi_info(x->id.proto, min_spi, max_spi); + err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL); if (err) { xfrm_state_put(x); return err; } - err = xfrm_alloc_spi(x, min_spi, max_spi); + err = xfrm_alloc_spi(x, min_spi, max_spi, NULL); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { @@ -2626,7 +2626,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net, NULL, 0); + kma ? &k : NULL, net, NULL, 0, NULL); out: return err; @@ -2905,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2923,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2934,16 +2934,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2971,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } -static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3019,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3150,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3161,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } - skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -3224,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { @@ -3382,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; - hdr->sadb_msg_seq = x->km.seq = get_acqseq(); + hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 754fdda8a5f5..9a1415fe3fa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1474,11 +1474,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1507,8 +1508,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1522,16 +1521,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2e66598fac79..e8ebd343e2bf 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 6e7df47c9584..a3829ce548f9 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o + mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c new file mode 100644 index 000000000000..d237d142171c --- /dev/null +++ b/net/mptcp/fastopen.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* MPTCP Fast Open Mechanism + * + * Copyright (c) 2021-2022, Dmytro SHYTYI + */ + +#include "protocol.h" + +void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, + struct request_sock *req) +{ + struct sock *ssk = subflow->tcp_sock; + struct sock *sk = subflow->conn; + struct sk_buff *skb; + struct tcp_sock *tp; + + tp = tcp_sk(ssk); + + subflow->is_mptfo = 1; + + skb = skb_peek(&ssk->sk_receive_queue); + if (WARN_ON_ONCE(!skb)) + return; + + /* dequeue the skb from sk receive queue */ + __skb_unlink(skb, &ssk->sk_receive_queue); + skb_ext_reset(skb); + skb_orphan(skb); + + /* We copy the fastopen data, but that don't belong to the mptcp sequence + * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset() + */ + tp->copied_seq += skb->len; + subflow->ssn_offset += skb->len; + + /* initialize a dummy sequence number, we will update it at MPC + * completion, if needed + */ + MPTCP_SKB_CB(skb)->map_seq = -skb->len; + MPTCP_SKB_CB(skb)->end_seq = 0; + MPTCP_SKB_CB(skb)->offset = 0; + MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + + mptcp_data_lock(sk); + + mptcp_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + + sk->sk_data_ready(sk); + + mptcp_data_unlock(sk); +} + +void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + struct sock *sk = (struct sock *)msk; + struct sk_buff *skb; + + mptcp_data_lock(sk); + skb = skb_peek_tail(&sk->sk_receive_queue); + if (skb) { + WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); + pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk, + MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, + MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); + MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; + MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; + } + + pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); + mptcp_data_unlock(sk); +} diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 30d289044e71..5ded85e2c374 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, { u8 subtype = *ptr >> 4; int expected_opsize; + u16 subopt; u8 version; u8 flags; u8 i; @@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA; else expected_opsize = TCPOLEN_MPTCP_MPC_ACK; + subopt = OPTION_MPTCP_MPC_ACK; } else { - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) { expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK; - else + subopt = OPTION_MPTCP_MPC_SYNACK; + } else { expected_opsize = TCPOLEN_MPTCP_MPC_SYN; + subopt = OPTION_MPTCP_MPC_SYN; + } } /* Cfr RFC 8684 Section 3.3.0: @@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0); - mp_opt->suboptions |= OPTIONS_MPTCP_MPC; + mp_opt->suboptions |= subopt; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { mp_opt->sndr_key = get_unaligned_be64(ptr); ptr += 8; @@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && !subflow->request_join) tcp_send_ack(ssk); - goto fully_established; + goto check_notify; } /* we must process OoO packets before the first subflow is fully @@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) { if (subflow->mp_join) goto reset; + if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) + goto set_fully_established; return subflow->mp_capable; } - if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) { + if (subflow->remote_key_valid && + (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); - goto fully_established; + goto check_notify; } /* If the first established packet does not contain MP_CAPABLE + data @@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); +set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); mptcp_subflow_fully_established(subflow, mp_opt); -fully_established: +check_notify: /* if the subflow is not already linked into the conn_list, we can't * notify the PM: this subflow is still on the listener queue * and the PM possibly acquiring the subflow lock could race with diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9813ed0fde9b..eef69d0e44ec 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -912,10 +912,14 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, */ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) pernet->next_id = 1; - if (pernet->addrs >= MPTCP_PM_ADDR_MAX) + if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { + ret = -ERANGE; goto out; - if (test_bit(entry->addr.id, pernet->id_bitmap)) + } + if (test_bit(entry->addr.id, pernet->id_bitmap)) { + ret = -EBUSY; goto out; + } /* do not insert duplicate address, differentiate on port only * singled addresses @@ -929,8 +933,10 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, * endpoint is an implicit one and the user-space * did not provide an endpoint id */ - if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) + if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { + ret = -EEXIST; goto out; + } if (entry->addr.id) goto out; @@ -1003,16 +1009,12 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return err; msk = mptcp_sk(entry->lsk->sk); - if (!msk) { - err = -EINVAL; - goto out; - } + if (!msk) + return -EINVAL; ssock = __mptcp_nmpc_socket(msk); - if (!ssock) { - err = -EINVAL; - goto out; - } + if (!ssock) + return -EINVAL; mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1020,22 +1022,16 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, addrlen = sizeof(struct sockaddr_in6); #endif err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen); - if (err) { - pr_warn("kernel_bind error, err=%d", err); - goto out; - } + if (err) + return err; err = kernel_listen(ssock, backlog); - if (err) { - pr_warn("kernel_listen error, err=%d", err); - goto out; - } + if (err) + return err; - return 0; + mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); -out: - sock_release(entry->lsk); - return err; + return 0; } int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) @@ -1327,7 +1323,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - entry = kmalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); + entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); if (!entry) { GENL_SET_ERR_MSG(info, "can't allocate addr"); return -ENOMEM; @@ -1337,23 +1333,22 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) if (entry->addr.port) { ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); if (ret) { - GENL_SET_ERR_MSG(info, "create listen socket error"); - kfree(entry); - return ret; + GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); + goto out_free; } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) { - GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); - return ret; + GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); + goto out_free; } mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); - return 0; + +out_free: + __mptcp_pm_release_addr_entry(entry); + return ret; } int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, @@ -2159,6 +2154,58 @@ nla_put_failure: kfree_skb(skb); } +void mptcp_event_pm_listener(const struct sock *ssk, + enum mptcp_event_type event) +{ + const struct inet_sock *issk = inet_sk(ssk); + struct net *net = sock_net(ssk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event); + if (!nlh) + goto nla_put_failure; + + if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) + goto nla_put_failure; + + if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) + goto nla_put_failure; + + switch (ssk->sk_family) { + case AF_INET: + if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *np = inet6_sk(ssk); + + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) + goto nla_put_failure; + break; + } +#endif + default: + WARN_ON_ONCE(1); + goto nla_put_failure; + } + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, GFP_KERNEL); + return; + +nla_put_failure: + kfree_skb(skb); +} + void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { @@ -2204,6 +2251,9 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, if (mptcp_event_sub_closed(skb, msk, ssk) < 0) goto nla_put_failure; break; + case MPTCP_EVENT_LISTENER_CREATED: + case MPTCP_EVENT_LISTENER_CLOSED: + break; } genlmsg_end(skb, nlh); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3796d1bfef6b..f6f93957275b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -36,15 +36,6 @@ struct mptcp6_sock { }; #endif -struct mptcp_skb_cb { - u64 map_seq; - u64 end_seq; - u32 offset; - u8 has_rxtstamp:1; -}; - -#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) - enum { MPTCP_CMSG_TS = BIT(0), MPTCP_CMSG_INQ = BIT(1), @@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb) mptcp_rmem_uncharge(sk, len); } -static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) +void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); skb->sk = sk; @@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int ret = 0; long timeo; - /* we don't support FASTOPEN yet */ - if (msg->msg_flags & MSG_FASTOPEN) - return -EOPNOTSUPP; - /* silently ignore everything else */ - msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL; + msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); - if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { + if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || + msg->msg_flags & MSG_FASTOPEN))) { int copied_syn = 0; ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); @@ -2352,12 +2340,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - /* if we are invoked by the msk cleanup code, the subflow is - * already orphaned - */ - if (ssk->sk_socket) - sock_orphan(ssk); - + sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2372,6 +2355,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, tcp_set_state(ssk, TCP_CLOSE); mptcp_subflow_queue_clean(ssk); inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); } __tcp_close(ssk, 0); @@ -2940,7 +2924,11 @@ cleanup: if (ssk == msk->first) subflow->fail_tout = 0; - sock_orphan(ssk); + /* detach from the parent socket, but allow data_ready to + * push incoming data into the mptcp stack, to properly ack it + */ + ssk->sk_socket = NULL; + ssk->sk_wq = NULL; unlock_sock_fast(ssk, slow); } sock_orphan(sk); @@ -3049,7 +3037,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); struct mptcp_sock *msk; - u64 ack_seq; if (!nsk) return NULL; @@ -3075,15 +3062,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; - if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { - msk->can_ack = true; - msk->remote_key = mp_opt->sndr_key; - mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); - ack_seq++; - WRITE_ONCE(msk->ack_seq, ack_seq); - atomic64_set(&msk->rcv_wnd_sent, ack_seq); - } - sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); @@ -3356,7 +3334,6 @@ void mptcp_finish_connect(struct sock *ssk) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk; struct sock *sk; - u64 ack_seq; subflow = mptcp_subflow_ctx(ssk); sk = subflow->conn; @@ -3364,22 +3341,16 @@ void mptcp_finish_connect(struct sock *ssk) pr_debug("msk=%p, token=%u", sk, subflow->token); - mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); - ack_seq++; - subflow->map_seq = ack_seq; + subflow->map_seq = subflow->iasn; subflow->map_subflow_seq = 1; /* the socket is not connected yet, no msk/subflow ops can access/race * accessing the field below */ - WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->local_key, subflow->local_key); WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); - WRITE_ONCE(msk->ack_seq, ack_seq); - WRITE_ONCE(msk->can_ack, 1); WRITE_ONCE(msk->snd_una, msk->write_seq); - atomic64_set(&msk->rcv_wnd_sent, ack_seq); mptcp_pm_new_connection(msk, ssk, 0); @@ -3677,6 +3648,8 @@ static int mptcp_listen(struct socket *sock, int backlog) if (!err) mptcp_copy_inaddrs(sock->sk, ssock->sk); + mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); + unlock: release_sock(sock->sk); return err; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6a09ab99a12d..955fb3d88eb3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -126,6 +126,15 @@ #define MPTCP_CONNECTED 6 #define MPTCP_RESET_SCHEDULER 7 +struct mptcp_skb_cb { + u64 map_seq; + u64 end_seq; + u32 offset; + u8 has_rxtstamp:1; +}; + +#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) + static inline bool before64(__u64 seq1, __u64 seq2) { return (__s64)(seq1 - seq2) < 0; @@ -467,17 +476,22 @@ struct mptcp_subflow_context { send_fastclose : 1, send_infinite_map : 1, rx_eof : 1, - can_ack : 1, /* only after processing the remote a key */ + remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1, /* local_id is correctly initialized */ - valid_csum_seen : 1; /* at least one csum validated */ + valid_csum_seen : 1, /* at least one csum validated */ + is_mptfo : 1, /* subflow is doing TFO */ + __unused : 8; enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; u32 local_nonce; u32 remote_token; - u8 hmac[MPTCPOPT_HMAC_LEN]; + union { + u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ + u64 iasn; /* initial ack sequence number, MPC subflows only */ + }; u8 local_id; u8 remote_id; u8 reset_seen:1; @@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - struct mptcp_options_received *mp_opt); + const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); @@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); +void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); @@ -824,8 +839,15 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); +void mptcp_event_pm_listener(const struct sock *ssk, + enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); +void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); +void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, + struct request_sock *req); + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f62f6483ef77..a47423ebb33a 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -559,7 +559,9 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return true; } @@ -569,9 +571,6 @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ - /* TCP_FASTOPEN_KEY, TCP_FASTOPEN are not supported because - * fastopen for the listener side is currently unsupported - */ } return false; } @@ -801,7 +800,9 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); return 0; + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); @@ -1166,7 +1167,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_INFO: case TCP_CC_INFO: case TCP_DEFER_ACCEPT: + case TCP_FASTOPEN: case TCP_FASTOPEN_CONNECT: + case TCP_FASTOPEN_KEY: case TCP_FASTOPEN_NO_COOKIE: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 437a283ba6ea..29904303f5c2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -307,7 +307,48 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, return NULL; } +static void subflow_prep_synack(const struct sock *sk, struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct inet_request_sock *ireq = inet_rsk(req); + + /* clear tstamp_ok, as needed depending on cookie */ + if (foc && foc->len > -1) + ireq->tstamp_ok = 0; + + if (synack_type == TCP_SYNACK_FASTOPEN) + mptcp_fastopen_subflow_synack_set_params(subflow, req); +} + +static int subflow_v4_send_synack(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) +{ + subflow_prep_synack(sk, req, foc, synack_type); + + return tcp_request_sock_ipv4_ops.send_synack(sk, dst, fl, req, foc, + synack_type, syn_skb); +} + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) +{ + subflow_prep_synack(sk, req, foc, synack_type); + + return tcp_request_sock_ipv6_ops.send_synack(sk, dst, fl, req, foc, + synack_type, syn_skb); +} + static struct dst_entry *subflow_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, @@ -392,11 +433,33 @@ static void mptcp_set_connected(struct sock *sk) mptcp_data_unlock(sk); } +static void subflow_set_remote_key(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + /* active MPC subflow will reach here multiple times: + * at subflow_finish_connect() time and at 4th ack time + */ + if (subflow->remote_key_valid) + return; + + subflow->remote_key_valid = 1; + subflow->remote_key = mp_opt->sndr_key; + mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn); + subflow->iasn++; + + WRITE_ONCE(msk->remote_key, subflow->remote_key); + WRITE_ONCE(msk->ack_seq, subflow->iasn); + WRITE_ONCE(msk->can_ack, true); + atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; + struct mptcp_sock *msk; subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -404,6 +467,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (subflow->conn_finished) return; + msk = mptcp_sk(parent); mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; @@ -416,19 +480,16 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); mptcp_do_fallback(sk); - pr_fallback(mptcp_sk(subflow->conn)); + pr_fallback(msk); goto fallback; } if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD) - WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); + WRITE_ONCE(msk->csum_enabled, true); if (mp_opt.deny_join_id0) - WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); + WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; - subflow->can_ack = 1; - subflow->remote_key = mp_opt.sndr_key; - pr_debug("subflow=%p, remote_key=%llu", subflow, - subflow->remote_key); + subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); mptcp_set_connected(parent); @@ -466,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); - if (subflow_use_different_dport(mptcp_sk(parent), sk)) { + if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(parent)->inet_dport)); @@ -474,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_rcv_space_init(mptcp_sk(parent), sk); + mptcp_rcv_space_init(msk, sk); mptcp_set_connected(parent); } return; @@ -637,14 +698,16 @@ static void subflow_drop_ctx(struct sock *ssk) } void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - struct mptcp_options_received *mp_opt) + const struct mptcp_options_received *mp_opt) { struct mptcp_sock *msk = mptcp_sk(subflow->conn); - subflow->remote_key = mp_opt->sndr_key; + subflow_set_remote_key(msk, subflow, mp_opt); subflow->fully_established = 1; - subflow->can_ack = 1; WRITE_ONCE(msk->fully_established, true); + + if (subflow->is_mptfo) + mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -760,7 +823,7 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTIONS_MPTCP_MPC) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) mptcp_subflow_fully_established(ctx, &mp_opt); } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -1198,16 +1261,8 @@ static bool subflow_check_data_avail(struct sock *ssk) if (WARN_ON_ONCE(!skb)) goto no_data; - /* if msk lacks the remote key, this subflow must provide an - * MP_CAPABLE-based mapping - */ - if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) - goto fallback; - WRITE_ONCE(msk->remote_key, subflow->remote_key); - WRITE_ONCE(msk->ack_seq, subflow->map_seq); - WRITE_ONCE(msk->can_ack, true); - } + if (unlikely(!READ_ONCE(msk->can_ack))) + goto fallback; old_ack = READ_ONCE(msk->ack_seq); ack_seq = mptcp_subflow_get_mapped_dsn(subflow); @@ -1480,6 +1535,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, &flags, &ifindex); + subflow->remote_key_valid = 1; subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; @@ -1747,16 +1803,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow, do_cancel_work; + bool do_cancel_work; sock_hold(sk); - slow = lock_sock_fast_nested(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; do_cancel_work = __mptcp_close(sk, 0); - unlock_sock_fast(sk, slow); + release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); sock_put(sk); @@ -1873,6 +1929,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; new_ctx->fully_established = 1; + new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; new_ctx->remote_id = subflow_req->remote_id; new_ctx->token = subflow_req->token; @@ -1929,6 +1986,7 @@ void __init mptcp_subflow_init(void) subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; + subflow_request_sock_ipv4_ops.send_synack = subflow_v4_send_synack; subflow_specific = ipv4_specific; subflow_specific.conn_request = subflow_v4_conn_request; @@ -1942,6 +2000,7 @@ void __init mptcp_subflow_init(void) #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; + subflow_request_sock_ipv6_ops.send_synack = subflow_v6_send_synack; subflow_v6_specific = ipv6_specific; subflow_v6_specific.conn_request = subflow_v6_conn_request; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3adc291d9ce1..7499192af586 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -916,7 +916,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; - else if (h->bucketsize < multi) + else if (h->bucketsize <= multi) h->bucketsize += AHASH_INIT_SIZE; #endif if (n->size >= AHASH_MAX(h)) { diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index dd30c03d5a23..75d556d71652 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -151,18 +151,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) return -ERANGE; - if (retried) { + if (retried) ip = ntohl(h->next.ip); - e.ip = htonl(ip); - } for (; ip <= ip_to;) { + e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; - e.ip = htonl(ip); - if (e.ip == 0) + if (ip == 0) return 0; ret = 0; diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 8639e7efd0e2..24002bc61e07 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -191,19 +191,16 @@ BTF_ID(struct, nf_conn___init) /* Check writes into `struct nf_conn` */ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, - int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag) { - const struct btf_type *ncit; - const struct btf_type *nct; + const struct btf_type *ncit, *nct, *t; size_t end; - ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]); - nct = btf_type_by_id(btf, btf_nf_conn_ids[0]); - + ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]); + nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]); + t = btf_type_by_id(reg->btf, reg->btf_id); if (t != nct && t != ncit) { bpf_log(log, "only read is supported\n"); return -EACCES; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 057ebdcc25d7..b96338b4bf36 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1777,7 +1777,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7562b215b932..d71150a40fb0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,9 +328,9 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) { - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -543,7 +543,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -722,6 +722,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; + u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -826,8 +827,9 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & (1 << IPCT_MARK) || ct->mark) - && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if ((events & (1 << IPCT_MARK) || mark) && + ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1154,7 +1156,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((ct->mark & filter->mark.mask) != filter->mark.val) + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif status = (u32)READ_ONCE(ct->status); @@ -2002,9 +2004,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; + if (newmark != READ_ONCE(ct->mark)) + WRITE_ONCE(ct->mark, newmark); } #endif @@ -2669,6 +2671,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; + u32 mark; zone = nf_ct_zone(ct); @@ -2730,7 +2733,8 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if (mark && ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ffe84c5a82c..bca839ab1ae8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) - seq_printf(s, "mark=%u ", ct->mark); + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b04645ced89b..00b522890d77 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1098,6 +1098,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_cb *block_cb, *next; int err = 0; + down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); @@ -1112,6 +1113,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, WARN_ON_ONCE(1); err = -EOPNOTSUPP; } + up_write(&flowtable->flow_block_lock); return err; } @@ -1168,7 +1170,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); + down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + up_write(&flowtable->flow_block_lock); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2fa52b8d5ce1..6269b0d9977c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6006,7 +6006,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &timeout); if (err) return err; - } else if (set->flags & NFT_SET_TIMEOUT) { + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = set->timeout; } @@ -6072,7 +6073,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EOPNOTSUPP; goto err_set_elem_expr; } - } else if (set->num_exprs > 0) { + } else if (set->num_exprs > 0 && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a0696d7ea10c..c68e2151defe 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -98,7 +98,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - *dest = ct->mark; + *dest = READ_ONCE(ct->mark); return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -297,8 +297,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - if (ct->mark != value) { - ct->mark = value; + if (READ_ONCE(ct->mark) != value) { + WRITE_ONCE(ct->mark, value); nf_conntrack_event_cache(IPCT_MARK, ct); } break; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e5ebc0810675..ad3c033db64e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; + u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) @@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) switch (info->mode) { case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + oldmark = READ_ONCE(ct->mark); + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; @@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) else new_targetmark <<= info->shift_bits; - newmark = (ct->mark & ~info->ctmask) ^ + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - new_targetmark = (ct->mark & info->ctmask); + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else @@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - return ((ct->mark & info->mask) == info->mark) ^ info->invert; + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9ebdf3262015..d73091f6bb0f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2514,7 +2514,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, if (!nlmsg_append(skb, nlmsg_len(nlh))) goto err_bad_put; - memcpy(errmsg->msg.nlmsg_data, nlh->nlmsg_data, + memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh), nlmsg_len(nlh)); } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index dbe5258e13ff..fff755dde30d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -543,7 +543,7 @@ static int nci_open_device(struct nci_dev *ndev) skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); - ndev->flags = 0; + ndev->flags &= BIT(NCI_UNREG); } done: diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index aa5e712adf07..3d36ea5701f0 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_plen(skb->data)); conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return; + } /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4348321856af..d78f0fc4337d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -152,7 +152,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) static u32 ovs_ct_get_mark(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - return ct ? ct->mark : 0; + return ct ? READ_ONCE(ct->mark) : 0; #else return 0; #endif @@ -340,9 +340,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) u32 new_mark; - new_mark = ct_mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 44f20cf8a0c0..41c4ccc3a5d6 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2294,8 +2294,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3521,8 +3520,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0273a9029229..f5c538ce3e23 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -405,6 +405,7 @@ enum rxrpc_conn_proto_state { struct rxrpc_bundle { struct rxrpc_conn_parameters params; refcount_t ref; + atomic_t active; /* Number of active users */ unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f020f308ed9e..f11c97e28d2a 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle) dead = __refcount_dec_and_test(&bundle->ref, &r); - _debug("PUT B=%x %d", d, r); + _debug("PUT B=%x %d", d, r - 1); if (dead) rxrpc_free_bundle(bundle); } @@ -338,6 +341,7 @@ found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); return bundle; @@ -436,6 +440,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; + atomic_inc(&bundle->active); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -726,6 +731,7 @@ granted_channel: smp_rmb(); out_put_bundle: + rxrpc_deactivate_bundle(bundle); rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); @@ -901,9 +907,8 @@ out: static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; - struct rxrpc_local *local = bundle->params.local; unsigned int bindex; - bool need_drop = false, need_put = false; + bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -922,15 +927,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) } spin_unlock(&bundle->channel_lock); - /* If there are no more connections, remove the bundle */ - if (!bundle->avail_chans) { - _debug("maybe unbundle"); - spin_lock(&local->client_bundles_lock); + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) - if (bundle->conns[i]) - break; - if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -940,10 +952,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_put) rxrpc_put_bundle(bundle); } - - if (need_drop) - rxrpc_put_connection(conn); - _leave(""); } /* diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1e8ab4749c6c..4662a6ce8a7e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE + depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 66b143bb04ac..d41002e4613f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -61,7 +61,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_get(skb, &ctinfo); if (c) { - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; @@ -81,7 +81,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); nf_ct_put(c); out: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index da0b7f665277..dd5ae7551956 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -179,7 +179,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -955,9 +955,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index d4102f0a9abd..eaa02f098d1c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -32,7 +32,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, { u8 dscp, newdscp; - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { @@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct sk_buff *skb) { ca->stats_cpmark_set++; - skb->mark = ct->mark & cp->cpmarkmask; + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, @@ -130,7 +130,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, } if (cp->mode & CTINFO_MODE_DSCP) - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ad565ed5627..330067002deb 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,7 +46,7 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } -static void sctp_sched_fcfs_free(struct sctp_stream *stream) +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) { } @@ -96,7 +96,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, - .free = sctp_sched_fcfs_free, + .free_sid = sctp_sched_fcfs_free_sid, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, .dequeue_done = sctp_sched_fcfs_dequeue_done, @@ -126,6 +126,23 @@ void sctp_sched_ops_init(void) sctp_sched_ops_rr_init(); } +static void sctp_sched_free_sched(struct sctp_stream *stream) +{ + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + struct sctp_stream_out_ext *soute; + int i; + + sched->unsched_all(stream); + for (i = 0; i < stream->outcnt; i++) { + soute = SCTP_SO(stream, i)->ext; + if (!soute) + continue; + sched->free_sid(stream, i); + /* Give the next scheduler a clean slate. */ + memset_after(soute, 0, outq); + } +} + int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { @@ -141,18 +158,8 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (sched > SCTP_SS_MAX) return -EINVAL; - if (old) { - old->free(&asoc->stream); - - /* Give the next scheduler a clean slate. */ - for (i = 0; i < asoc->stream.outcnt; i++) { - struct sctp_stream_out_ext *ext = SCTP_SO(&asoc->stream, i)->ext; - - if (!ext) - continue; - memset_after(ext, 0, outq); - } - } + if (old) + sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = n; n->init(&asoc->stream); @@ -176,7 +183,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, return ret; err: - n->free(&asoc->stream); + sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ return ret; diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..42d4800f263d 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,30 +204,22 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } -static void sctp_sched_prio_free(struct sctp_stream *stream) +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) { - struct sctp_stream_priorities *prio, *n; - LIST_HEAD(list); + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; int i; - /* As we don't keep a list of priorities, to avoid multiple - * frees we have to do it in 3 steps: - * 1. unsched everyone, so the lists are free to use in 2. - * 2. build the list of the priorities - * 3. free the list - */ - sctp_sched_prio_unsched_all(stream); + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; for (i = 0; i < stream->outcnt; i++) { - if (!SCTP_SO(stream, i)->ext) - continue; - prio = SCTP_SO(stream, i)->ext->prio_head; - if (prio && list_empty(&prio->prio_sched)) - list_add(&prio->prio_sched, &list); - } - list_for_each_entry_safe(prio, n, &list, prio_sched) { - list_del_init(&prio->prio_sched); - kfree(prio); + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; } + + kfree(prio); } static void sctp_sched_prio_enqueue(struct sctp_outq *q, @@ -323,7 +315,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, - .free = sctp_sched_prio_free, + .free_sid = sctp_sched_prio_free_sid, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, .dequeue_done = sctp_sched_prio_dequeue_done, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..1f235e7f643a 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,9 +90,8 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } -static void sctp_sched_rr_free(struct sctp_stream *stream) +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) { - sctp_sched_rr_unsched_all(stream); } static void sctp_sched_rr_enqueue(struct sctp_outq *q, @@ -177,7 +176,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, - .free = sctp_sched_rr_free, + .free_sid = sctp_sched_rr_free_sid, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, .dequeue_done = sctp_sched_rr_dequeue_done, diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a9035f..d67440de011e 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1971,6 +1971,9 @@ rcv: /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index e8630707901e..e8dcdf267c0c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d92ec92f0b71..e3b427a70398 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + conn_get(con); + spin_unlock_bh(&s->idr_lock); return con; } @@ -467,7 +469,7 @@ static void tipc_topsrv_accept(struct work_struct *work) ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -479,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } @@ -577,17 +579,17 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 8cf405776dde..790bc31cf82e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -327,7 +327,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ @@ -2519,10 +2520,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index acc8e52a4f5f..771d0fa90ef5 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -231,9 +231,9 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags) +static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) { - return __bpf_xdp_redirect_map(map, ifindex, flags, 0, + return __bpf_xdp_redirect_map(map, index, flags, 0, __xsk_map_lookup_elem); } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f5aafd418af..21269e8f2db4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) } } +static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 seq = xo->seq.low; + + seq += skb_shinfo(skb)->gso_segs; + if (unlikely(seq < xo->seq.low)) + return true; + + return false; +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 97074f6f2bde..c06e54a10540 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -671,6 +671,7 @@ resume: x->curlft.bytes += skb->len; x->curlft.packets++; + x->lastused = ktime_get_real_seconds(); spin_unlock(&x->lock); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9a5e79a38c67..78cb8d0a6a18 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -209,8 +209,6 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) __skb_pull(skb, hdr_len); memmove(ipv6_hdr(skb), iph, hdr_len); - x->lastused = ktime_get_real_seconds(); - return 0; #else WARN_ON_ONCE(1); @@ -534,6 +532,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) x->curlft.bytes += skb->len; x->curlft.packets++; + x->lastused = ktime_get_real_seconds(); spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e392d8d05e0c..9b9e2765363d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -605,7 +605,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(struct net *net, int total) +static void xfrm_byidx_resize(struct net *net) { unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); @@ -683,7 +683,7 @@ static void xfrm_hash_resize(struct work_struct *work) xfrm_bydst_resize(net, dir); } if (xfrm_byidx_should_resize(net, total)) - xfrm_byidx_resize(net, total); + xfrm_byidx_resize(net); mutex_unlock(&hash_resize_mutex); } @@ -4333,7 +4333,8 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm /* update endpoint address(es) of template(s) */ static int xfrm_policy_migrate(struct xfrm_policy *pol, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct netlink_ext_ack *extack) { struct xfrm_migrate *mp; int i, j, n = 0; @@ -4341,6 +4342,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, write_lock_bh(&pol->lock); if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ + NL_SET_ERR_MSG(extack, "Target policy not found"); write_unlock_bh(&pol->lock); return -ENOENT; } @@ -4372,17 +4374,22 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, return 0; } -static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate, + struct netlink_ext_ack *extack) { int i, j; - if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) + if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) { + NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)"); return -EINVAL; + } for (i = 0; i < num_migrate; i++) { if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || - xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) + xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) { + NL_SET_ERR_MSG(extack, "Addresses in the MIGRATE attribute's list cannot be null"); return -EINVAL; + } /* check if there is any duplicated entry */ for (j = i + 1; j < num_migrate; j++) { @@ -4393,8 +4400,10 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) m[i].proto == m[j].proto && m[i].mode == m[j].mode && m[i].reqid == m[j].reqid && - m[i].old_family == m[j].old_family) + m[i].old_family == m[j].old_family) { + NL_SET_ERR_MSG(extack, "Entries in the MIGRATE attribute's list must be unique"); return -EINVAL; + } } } @@ -4404,7 +4413,8 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap, u32 if_id) + struct xfrm_encap_tmpl *encap, u32 if_id, + struct netlink_ext_ack *extack) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -4414,16 +4424,20 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *mp; /* Stage 0 - sanity checks */ - if ((err = xfrm_migrate_check(m, num_migrate)) < 0) + err = xfrm_migrate_check(m, num_migrate, extack); + if (err < 0) goto out; if (dir >= XFRM_POLICY_MAX) { + NL_SET_ERR_MSG(extack, "Invalid policy direction"); err = -EINVAL; goto out; } /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) { + pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); + if (!pol) { + NL_SET_ERR_MSG(extack, "Target policy not found"); err = -ENOENT; goto out; } @@ -4445,7 +4459,8 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 3 - update policy */ - if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) + err = xfrm_policy_migrate(pol, m, num_migrate, extack); + if (err < 0) goto restore_state; /* Stage 4 - delete old state(s) */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9f4d42eb090f..ce56d659c55a 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(xo->seq.low < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; replay_esn->oseq_hi = oseq_hi; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3d2fe7712ac5..9ec481fbfb63 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2017,7 +2017,7 @@ u32 xfrm_get_acqseq(void) } EXPORT_SYMBOL(xfrm_get_acqseq); -int verify_spi_info(u8 proto, u32 min, u32 max) +int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack) { switch (proto) { case IPPROTO_AH: @@ -2026,22 +2026,28 @@ int verify_spi_info(u8 proto, u32 min, u32 max) case IPPROTO_COMP: /* IPCOMP spi is 16-bits. */ - if (max >= 0x10000) + if (max >= 0x10000) { + NL_SET_ERR_MSG(extack, "IPCOMP SPI must be <= 65535"); return -EINVAL; + } break; default: + NL_SET_ERR_MSG(extack, "Invalid protocol, must be one of AH, ESP, IPCOMP"); return -EINVAL; } - if (min > max) + if (min > max) { + NL_SET_ERR_MSG(extack, "Invalid SPI range: min > max"); return -EINVAL; + } return 0; } EXPORT_SYMBOL(verify_spi_info); -int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) +int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, + struct netlink_ext_ack *extack) { struct net *net = xs_net(x); unsigned int h; @@ -2053,8 +2059,10 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_DEAD) + if (x->km.state == XFRM_STATE_DEAD) { + NL_SET_ERR_MSG(extack, "Target ACQUIRE is in DEAD state"); goto unlock; + } err = 0; if (x->id.spi) @@ -2065,6 +2073,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (minspi == maxspi) { x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { + NL_SET_ERR_MSG(extack, "Requested SPI is already in use"); xfrm_state_put(x0); goto unlock; } @@ -2089,6 +2098,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; + } else { + NL_SET_ERR_MSG(extack, "No SPI available in the requested range"); } unlock: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e73f9efc54c1..0eb4696661c8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -515,7 +515,8 @@ static int attach_aead(struct xfrm_state *x, struct nlattr *rta, } static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, - struct nlattr *rp) + struct nlattr *rp, + struct netlink_ext_ack *extack) { struct xfrm_replay_state_esn *up; unsigned int ulen; @@ -528,13 +529,25 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es /* Check the overall length and the internal bitmap length to avoid * potential overflow. */ - if (nla_len(rp) < (int)ulen || - xfrm_replay_state_esn_len(replay_esn) != ulen || - replay_esn->bmp_len != up->bmp_len) + if (nla_len(rp) < (int)ulen) { + NL_SET_ERR_MSG(extack, "ESN attribute is too short"); return -EINVAL; + } + + if (xfrm_replay_state_esn_len(replay_esn) != ulen) { + NL_SET_ERR_MSG(extack, "New ESN size doesn't match the existing SA's ESN size"); + return -EINVAL; + } - if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) + if (replay_esn->bmp_len != up->bmp_len) { + NL_SET_ERR_MSG(extack, "New ESN bitmap size doesn't match the existing SA's ESN bitmap"); return -EINVAL; + } + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) { + NL_SET_ERR_MSG(extack, "ESN replay window is longer than the bitmap"); + return -EINVAL; + } return 0; } @@ -862,12 +875,12 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; if (xfrm_state_kern(x)) { + NL_SET_ERR_MSG(extack, "SA is in use by tunnels"); err = -EPERM; goto out; } err = xfrm_state_delete(x); - if (err < 0) goto out; @@ -1354,20 +1367,28 @@ static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_SPD_IPV4_HTHRESH]) { struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH]; - if (nla_len(rta) < sizeof(*thresh4)) + if (nla_len(rta) < sizeof(*thresh4)) { + NL_SET_ERR_MSG(extack, "Invalid SPD_IPV4_HTHRESH attribute length"); return -EINVAL; + } thresh4 = nla_data(rta); - if (thresh4->lbits > 32 || thresh4->rbits > 32) + if (thresh4->lbits > 32 || thresh4->rbits > 32) { + NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 32 for IPv4)"); return -EINVAL; + } } if (attrs[XFRMA_SPD_IPV6_HTHRESH]) { struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH]; - if (nla_len(rta) < sizeof(*thresh6)) + if (nla_len(rta) < sizeof(*thresh6)) { + NL_SET_ERR_MSG(extack, "Invalid SPD_IPV6_HTHRESH attribute length"); return -EINVAL; + } thresh6 = nla_data(rta); - if (thresh6->lbits > 128 || thresh6->rbits > 128) + if (thresh6->lbits > 128 || thresh6->rbits > 128) { + NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 128 for IPv6)"); return -EINVAL; + } } if (thresh4 || thresh6) { @@ -1510,7 +1531,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, u32 if_id = 0; p = nlmsg_data(nlh); - err = verify_spi_info(p->info.id.proto, p->min, p->max); + err = verify_spi_info(p->info.id.proto, p->min, p->max, extack); if (err) goto out_noput; @@ -1538,10 +1559,12 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, &p->info.saddr, 1, family); err = -ENOENT; - if (x == NULL) + if (!x) { + NL_SET_ERR_MSG(extack, "Target ACQUIRE not found"); goto out_noput; + } - err = xfrm_alloc_spi(x, p->min, p->max); + err = xfrm_alloc_spi(x, p->min, p->max, extack); if (err) goto out; @@ -2433,12 +2456,16 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; - if (!lt && !rp && !re && !et && !rt) + if (!lt && !rp && !re && !et && !rt) { + NL_SET_ERR_MSG(extack, "Missing required attribute for AE"); return err; + } /* pedantic mode - thou shalt sayeth replaceth */ - if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) { + NL_SET_ERR_MSG(extack, "NLM_F_REPLACE flag is required"); return err; + } mark = xfrm_mark_get(attrs, &m); @@ -2446,10 +2473,12 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (x == NULL) return -ESRCH; - if (x->km.state != XFRM_STATE_VALID) + if (x->km.state != XFRM_STATE_VALID) { + NL_SET_ERR_MSG(extack, "SA must be in VALID state"); goto out; + } - err = xfrm_replay_verify_len(x->replay_esn, re); + err = xfrm_replay_verify_len(x->replay_esn, re, extack); if (err) goto out; @@ -2584,8 +2613,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); err = -EINVAL; - if (x->km.state != XFRM_STATE_VALID) + if (x->km.state != XFRM_STATE_VALID) { + NL_SET_ERR_MSG(extack, "SA must be in VALID state"); goto out; + } + km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { @@ -2665,7 +2697,8 @@ nomem: #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, struct xfrm_kmaddress *k, - struct nlattr **attrs, int *num) + struct nlattr **attrs, int *num, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; @@ -2684,8 +2717,10 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma, um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); - if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) + if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) { + NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)"); return -EINVAL; + } for (i = 0; i < num_migrate; i++, um++, ma++) { memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr)); @@ -2718,8 +2753,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_encap_tmpl *encap = NULL; u32 if_id = 0; - if (attrs[XFRMA_MIGRATE] == NULL) + if (!attrs[XFRMA_MIGRATE]) { + NL_SET_ERR_MSG(extack, "Missing required MIGRATE attribute"); return -EINVAL; + } kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; @@ -2727,7 +2764,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); + err = copy_from_user_migrate(m, kmp, attrs, &n, extack); if (err) return err; @@ -2744,7 +2781,8 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); - err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id); + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, + if_id, extack); kfree(encap); diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh index 12faf5847e22..395573be6ae8 100755 --- a/samples/bpf/test_cgrp2_tc.sh +++ b/samples/bpf/test_cgrp2_tc.sh @@ -115,7 +115,7 @@ do_exit() { if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] then echo "------ DEBUG ------" - echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo + echo "mount: "; mount | grep -E '(cgroup2|bpf)'; echo echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo if [ -d "$BPF_FS_TC_SHARE" ] then diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 683913bbf279..9d41db09c480 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -162,7 +162,7 @@ static void read_route(struct nlmsghdr *nh, int nll) __be32 gw; } *prefix_value; - prefix_key = alloca(sizeof(*prefix_key) + 3); + prefix_key = alloca(sizeof(*prefix_key) + 4); prefix_value = alloca(sizeof(*prefix_value)); prefix_key->prefixlen = 32; diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 933194257a24..dd4e53ae9b73 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -191,7 +191,7 @@ function extend_addr6() fi # if shrink '::' occurs multiple, it's malformed. - shrink=( $(egrep -o "$sep{2,}" <<< $addr) ) + shrink=( $(grep -E -o "$sep{2,}" <<< $addr) ) if [[ ${#shrink[@]} -ne 0 ]]; then if [[ ${#shrink[@]} -gt 1 || ( ${shrink[0]} != $sep2 ) ]]; then err 5 "Invalid IP6 address: $1" diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c22..2a90139ecbe1 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -30,8 +30,8 @@ KBUILD_PKG_ROOTCMD ?="fakeroot -u" export KDEB_SOURCENAME # Include only those top-level files that are needed by make, plus the GPL copy TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \ - include init io_uring ipc kernel lib mm net samples scripts \ - security sound tools usr virt \ + include init io_uring ipc kernel lib mm net rust \ + samples scripts security sound tools usr virt \ .config .scmversion Makefile \ Kbuild Kconfig COPYING $(wildcard localversion*) MKSPEC := $(srctree)/scripts/package/mkspec diff --git a/scripts/faddr2line b/scripts/faddr2line index 5514c23f45c2..0e73aca4f908 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -74,7 +74,8 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | + ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -178,7 +179,7 @@ __faddr2line() { found=2 break fi - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) if [[ $found = 0 ]]; then warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -259,7 +260,7 @@ __faddr2line() { DONE=1 - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 60a2a63a5e90..a3ac5a716e9f 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -90,7 +90,7 @@ if [ -n "$KDEB_PKGVERSION" ]; then packageversion=$KDEB_PKGVERSION revision=${packageversion##*-} else - revision=$(cat .version 2>/dev/null||echo 1) + revision=$($srctree/init/build-version) packageversion=$version-$revision fi sourcename=$KDEB_SOURCENAME diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index b7aee23fc387..47ef6bc30c0e 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); * expand the variable length event to linear buffer space. */ -static int seq_copy_in_kernel(char **bufptr, const void *src, int size) +static int seq_copy_in_kernel(void *ptr, void *src, int size) { + char **bufptr = ptr; + memcpy(*bufptr, src, size); *bufptr += size; return 0; } -static int seq_copy_in_user(char __user **bufptr, const void *src, int size) +static int seq_copy_in_user(void *ptr, void *src, int size) { + char __user **bufptr = ptr; + if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; @@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char return newlen; } err = snd_seq_dump_var_event(event, - in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : - (snd_seq_dump_func_t)seq_copy_in_user, + in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf); return err < 0 ? err : newlen; } diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index b9eb3208f288..ae31bb127594 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -321,6 +321,11 @@ static const struct config_entry config_table[] = { } }, { + .flags = FLAG_SOF, + .device = 0x34c8, + .codec_hid = &essx_83x6, + }, + { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x34c8, }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e18499dd14f0..e5c036385666 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9436,6 +9436,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 6c0f1de10429..d9715bea965e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -206,6 +206,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"), + } + }, {} }; diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index fc19c34ca00e..b65560981abb 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -14,7 +14,7 @@ enum { HDAC_HDMI_1_DAI_ID, HDAC_HDMI_2_DAI_ID, HDAC_HDMI_3_DAI_ID, - HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, + HDAC_DAI_ID_NUM }; struct hdac_hda_pcm { @@ -24,7 +24,7 @@ struct hdac_hda_pcm { struct hdac_hda_priv { struct hda_codec *codec; - struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; + struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; bool need_display_power; }; diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 3e04c7f0cce4..ec0905df65d1 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -549,6 +549,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c) max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, sizeof(*max98373->cache), GFP_KERNEL); + if (!max98373->cache) { + ret = -ENOMEM; + return ret; + } for (i = 0; i < max98373->cache_num; i++) max98373->cache[i].reg = max98373_i2c_cache_reg[i]; diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 1a25a3787935..362663abcb89 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -298,13 +298,14 @@ static int rt5514_spi_pcm_new(struct snd_soc_component *component, } static const struct snd_soc_component_driver rt5514_spi_component = { - .name = DRV_NAME, - .probe = rt5514_spi_pcm_probe, - .open = rt5514_spi_pcm_open, - .hw_params = rt5514_spi_hw_params, - .hw_free = rt5514_spi_hw_free, - .pointer = rt5514_spi_pcm_pointer, - .pcm_construct = rt5514_spi_pcm_new, + .name = DRV_NAME, + .probe = rt5514_spi_pcm_probe, + .open = rt5514_spi_pcm_open, + .hw_params = rt5514_spi_hw_params, + .hw_free = rt5514_spi_hw_free, + .pointer = rt5514_spi_pcm_pointer, + .pcm_construct = rt5514_spi_pcm_new, + .legacy_dai_naming = 1, }; /** diff --git a/sound/soc/codecs/rt5677-spi.c b/sound/soc/codecs/rt5677-spi.c index 8f3993a4c1cc..d25703dd7499 100644 --- a/sound/soc/codecs/rt5677-spi.c +++ b/sound/soc/codecs/rt5677-spi.c @@ -396,15 +396,16 @@ static int rt5677_spi_pcm_probe(struct snd_soc_component *component) } static const struct snd_soc_component_driver rt5677_spi_dai_component = { - .name = DRV_NAME, - .probe = rt5677_spi_pcm_probe, - .open = rt5677_spi_pcm_open, - .close = rt5677_spi_pcm_close, - .hw_params = rt5677_spi_hw_params, - .hw_free = rt5677_spi_hw_free, - .prepare = rt5677_spi_prepare, - .pointer = rt5677_spi_pcm_pointer, - .pcm_construct = rt5677_spi_pcm_new, + .name = DRV_NAME, + .probe = rt5677_spi_pcm_probe, + .open = rt5677_spi_pcm_open, + .close = rt5677_spi_pcm_close, + .hw_params = rt5677_spi_hw_params, + .hw_free = rt5677_spi_hw_free, + .prepare = rt5677_spi_prepare, + .pointer = rt5677_spi_pcm_pointer, + .pcm_construct = rt5677_spi_pcm_new, + .legacy_dai_naming = 1, }; /* Select a suitable transfer command for the next transfer to ensure diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 4120842fe699..88a8392a58ed 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -230,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) } /* set the timeout values */ - prop->clk_stop_timeout = 20; + prop->clk_stop_timeout = 700; /* wake-up event */ prop->wake_capable = 1; diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 4b2135eba74d..a916f4619ea3 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1794,6 +1794,7 @@ static void sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 51b87a936179..2e0ed3e68fa5 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -438,20 +438,13 @@ static int tas2764_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index b6765235a4b3..8557759acb1f 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -395,21 +395,13 @@ static int tas2770_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) diff --git a/sound/soc/codecs/tas2780.c b/sound/soc/codecs/tas2780.c index a6db6f0e5431..afdf0c863aa1 100644 --- a/sound/soc/codecs/tas2780.c +++ b/sound/soc/codecs/tas2780.c @@ -380,20 +380,13 @@ static int tas2780_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b4b4355c6728..b901e4c65e8a 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2503,6 +2503,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) snd_soc_component_update_bits(component, WM8962_CLOCKING2, WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate + * correct frequency of LRCLK and BCLK. Sometimes the read-only value + * can't be updated timely after enabling SYSCLK. This results in wrong + * calculation values. Delay is introduced here to wait for newest + * value from register. The time of the delay should be at least + * 500~1000us according to test. + */ + usleep_range(500, 1000); dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 936aef5d2767..e16e7b3fa96c 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -1232,7 +1232,7 @@ static int fsl_asrc_probe(struct platform_device *pdev) } ret = pm_runtime_put_sync(&pdev->dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; ret = devm_snd_soc_register_component(&pdev->dev, &fsl_asrc_component, diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 5c21fc490fce..17fefd27ec90 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -1069,7 +1069,7 @@ static int fsl_esai_probe(struct platform_device *pdev) regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0); ret = pm_runtime_put_sync(&pdev->dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; /* diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 81f89f6767a2..e60c7b344562 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1446,7 +1446,7 @@ static int fsl_sai_probe(struct platform_device *pdev) } ret = pm_runtime_put_sync(dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; /* diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 6432b83f616f..a935c5fd9edb 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -443,6 +443,13 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { | BYT_CHT_ES8316_INTMIC_IN2_MAP | BYT_CHT_ES8316_JD_INVERTED), }, + { /* Nanote UMPC-01 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"), + }, + .driver_data = (void *)BYT_CHT_ES8316_INTMIC_IN1_MAP, + }, { /* Teclast X98 Plus II */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index fbb42e54947a..70713e4b07dc 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -63,6 +63,7 @@ struct sof_es8336_private { struct snd_soc_jack jack; struct list_head hdmi_pcm_list; bool speaker_en; + struct delayed_work pcm_pop_work; }; struct sof_hdmi_pcm { @@ -111,6 +112,46 @@ static void log_quirks(struct device *dev) dev_info(dev, "quirk headset at mic1 port enabled\n"); } +static void pcm_pop_work_events(struct work_struct *work) +{ + struct sof_es8336_private *priv = + container_of(work, struct sof_es8336_private, pcm_pop_work.work); + + gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en); + + if (quirk & SOF_ES8336_HEADPHONE_GPIO) + gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en); + +} + +static int sof_8336_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_card *card = rtd->card; + struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + case SNDRV_PCM_TRIGGER_RESUME: + break; + + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_STOP: + if (priv->speaker_en == false) + if (substream->stream == 0) { + cancel_delayed_work(&priv->pcm_pop_work); + gpiod_set_value_cansleep(priv->gpio_speakers, true); + } + break; + default: + return -EINVAL; + } + + return 0; +} + static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -122,19 +163,7 @@ static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, priv->speaker_en = !SND_SOC_DAPM_EVENT_ON(event); - if (SND_SOC_DAPM_EVENT_ON(event)) - msleep(70); - - gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en); - - if (!(quirk & SOF_ES8336_HEADPHONE_GPIO)) - return 0; - - if (SND_SOC_DAPM_EVENT_ON(event)) - msleep(70); - - gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en); - + queue_delayed_work(system_wq, &priv->pcm_pop_work, msecs_to_jiffies(70)); return 0; } @@ -344,6 +373,7 @@ static int sof_es8336_hw_params(struct snd_pcm_substream *substream, /* machine stream operations */ static struct snd_soc_ops sof_es8336_ops = { .hw_params = sof_es8336_hw_params, + .trigger = sof_8336_trigger, }; static struct snd_soc_dai_link_component platform_component[] = { @@ -723,7 +753,8 @@ static int sof_es8336_probe(struct platform_device *pdev) } INIT_LIST_HEAD(&priv->hdmi_pcm_list); - + INIT_DELAYED_WORK(&priv->pcm_pop_work, + pcm_pop_work_events); snd_soc_card_set_drvdata(card, priv); if (mach->mach_params.dmic_num > 0) { @@ -752,6 +783,7 @@ static int sof_es8336_remove(struct platform_device *pdev) struct snd_soc_card *card = platform_get_drvdata(pdev); struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card); + cancel_delayed_work(&priv->pcm_pop_work); gpiod_put(priv->gpio_speakers); device_remove_software_node(priv->codec_dev); put_device(priv->codec_dev); diff --git a/sound/soc/intel/common/soc-acpi-intel-icl-match.c b/sound/soc/intel/common/soc-acpi-intel-icl-match.c index b032bc07de8b..d0062f2cd256 100644 --- a/sound/soc/intel/common/soc-acpi-intel-icl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-icl-match.c @@ -10,6 +10,11 @@ #include <sound/soc-acpi-intel-match.h> #include "../skylake/skl.h" +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static struct skl_machine_pdata icl_pdata = { .use_tplg_pcm = true, }; @@ -27,6 +32,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_icl_machines[] = { .drv_name = "sof_rt5682", .sof_tplg_filename = "sof-icl-rt5682.tplg", }, + { + .comp_ids = &essx_83x6, + .drv_name = "sof-essx8336", + .sof_tplg_filename = "sof-icl-es8336", /* the tplg suffix is added at run time */ + .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | + SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | + SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, + }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_icl_machines); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 12a82f5a3ff6..a409fbed8f34 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3477,10 +3477,23 @@ EXPORT_SYMBOL_GPL(snd_soc_of_get_dai_link_cpus); static int __init snd_soc_init(void) { + int ret; + snd_soc_debugfs_init(); - snd_soc_util_init(); + ret = snd_soc_util_init(); + if (ret) + goto err_util_init; - return platform_driver_register(&soc_driver); + ret = platform_driver_register(&soc_driver); + if (ret) + goto err_register; + return 0; + +err_register: + snd_soc_util_exit(); +err_util_init: + snd_soc_debugfs_exit(); + return ret; } module_init(snd_soc_init); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d515e7a78ea8..879cf1be67a9 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3645,7 +3645,7 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, switch (w->id) { case snd_soc_dapm_regulator_supply: - w->regulator = devm_regulator_get(dapm->dev, w->name); + w->regulator = devm_regulator_get(dapm->dev, widget->name); if (IS_ERR(w->regulator)) { ret = PTR_ERR(w->regulator); goto request_failed; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index fb87d6d23408..35a16c3f9591 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -822,11 +822,6 @@ static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_startup(dai, substream); if (ret < 0) goto err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - dai->tx_mask = 0; - else - dai->rx_mask = 0; } /* Dynamic PCM DAI links compat checks use dynamic capabilities */ @@ -1252,6 +1247,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, return; be_substream = snd_soc_dpcm_get_substream(be, stream); + if (!be_substream) + return; for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index a3b6df2378b4..a4dba0b751e7 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -264,7 +264,7 @@ int __init snd_soc_util_init(void) return ret; } -void __exit snd_soc_util_exit(void) +void snd_soc_util_exit(void) { platform_driver_unregister(&soc_dummy_driver); platform_device_unregister(soc_dummy_dev); diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index c148715aa0f9..0720e1eae084 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2275,6 +2275,7 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif struct sof_ipc_fw_version *v = &sdev->fw_ready.version; struct snd_sof_widget *swidget; struct snd_sof_route *sroute; + bool dyn_widgets = false; int ret; /* @@ -2284,12 +2285,14 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif * topology loading the sound card unavailable to open PCMs. */ list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->dynamic_pipeline_widget) + if (swidget->dynamic_pipeline_widget) { + dyn_widgets = true; continue; + } - /* Do not free widgets for static pipelines with FW ABI older than 3.19 */ + /* Do not free widgets for static pipelines with FW older than SOF2.2 */ if (!verify && !swidget->dynamic_pipeline_widget && - v->abi_version < SOF_ABI_VER(3, 19, 0)) { + SOF_FW_VER(v->major, v->minor, v->micro) < SOF_FW_VER(2, 2, 0)) { swidget->use_count = 0; swidget->complete = 0; continue; @@ -2303,9 +2306,11 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif /* * Tear down all pipelines associated with PCMs that did not get suspended * and unset the prepare flag so that they can be set up again during resume. - * Skip this step for older firmware. + * Skip this step for older firmware unless topology has any + * dynamic pipeline (in which case the step is mandatory). */ - if (!verify && v->abi_version >= SOF_ABI_VER(3, 19, 0)) { + if (!verify && (dyn_widgets || SOF_FW_VER(v->major, v->minor, v->micro) >= + SOF_FW_VER(2, 2, 0))) { ret = sof_tear_down_left_over_pipelines(sdev); if (ret < 0) { dev_err(sdev->dev, "failed to tear down paused pipelines\n"); diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 38855dd60617..6a0e7f3b5023 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1344,16 +1344,6 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, break; } - if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { - swidget->core = SOF_DSP_PRIMARY_CORE; - } else { - int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, - swidget->num_tuples); - - if (core >= 0) - swidget->core = core; - } - /* check token parsing reply */ if (ret < 0) { dev_err(scomp->dev, @@ -1365,6 +1355,16 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, return ret; } + if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { + swidget->core = SOF_DSP_PRIMARY_CORE; + } else { + int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, + swidget->num_tuples); + + if (core >= 0) + swidget->core = core; + } + /* bind widget to external event */ if (tw->event_type) { if (widget_ops[w->id].bind_event) { diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 643fc8a17018..837c1848d9bf 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -304,6 +304,11 @@ static int stm32_adfsdm_dummy_cb(const void *data, void *private) return 0; } +static void stm32_adfsdm_cleanup(void *data) +{ + iio_channel_release_all_cb(data); +} + static struct snd_soc_component_driver stm32_adfsdm_soc_platform = { .open = stm32_adfsdm_pcm_open, .close = stm32_adfsdm_pcm_close, @@ -350,6 +355,12 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) if (IS_ERR(priv->iio_cb)) return PTR_ERR(priv->iio_cb); + ret = devm_add_action_or_reset(&pdev->dev, stm32_adfsdm_cleanup, priv->iio_cb); + if (ret < 0) { + dev_err(&pdev->dev, "Unable to add action\n"); + return ret; + } + component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL); if (!component) return -ENOMEM; diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index ce7f6942308f..f3dd9f8e621c 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -1077,7 +1077,7 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, if (irq < 0) return irq; - ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, 0, dev_name(&pdev->dev), i2s); if (ret) { dev_err(&pdev->dev, "irq request returned %d\n", ret); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index bbff0923d264..2839f6b6f09b 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1133,10 +1133,8 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) port = &umidi->endpoints[i].out->ports[j]; break; } - if (!port) { - snd_BUG(); + if (!port) return -ENXIO; - } substream->runtime->private_data = port; port->state = STATE_UNKNOWN; diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 05350a1aadf9..30df7a707f02 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -23,12 +23,3 @@ Print all logs available, even debug-level information. This includes logs from libbpf as well as from the verifier, when attempting to load programs. - --l, --legacy - Use legacy libbpf mode which has more relaxed BPF program - requirements. By default, bpftool has more strict requirements - about section names, changes pinning logic and doesn't support - some of the older non-BTF map declarations. - - See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 - for details. diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst index ccf1ffa0686c..827e3ffb1766 100644 --- a/tools/bpf/bpftool/Documentation/substitutions.rst +++ b/tools/bpf/bpftool/Documentation/substitutions.rst @@ -1,3 +1,3 @@ .. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** } +.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 2957b42cab67..35f26f7c1124 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -261,7 +261,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf --legacy' + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index b87e4a7fd689..352290ba7b29 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -467,9 +467,8 @@ static int dump_btf_c(const struct btf *btf, int err = 0, i; d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + if (!d) + return -errno; printf("#ifndef __VMLINUX_H__\n"); printf("#define __VMLINUX_H__\n"); @@ -512,11 +511,9 @@ static struct btf *get_vmlinux_btf_from_sysfs(void) struct btf *base; base = btf__parse(sysfs_vmlinux, NULL); - if (libbpf_get_error(base)) { - p_err("failed to parse vmlinux BTF at '%s': %ld\n", - sysfs_vmlinux, libbpf_get_error(base)); - base = NULL; - } + if (!base) + p_err("failed to parse vmlinux BTF at '%s': %d\n", + sysfs_vmlinux, -errno); return base; } @@ -559,7 +556,7 @@ static int do_dump(int argc, char **argv) __u32 btf_id = -1; const char *src; int fd = -1; - int err; + int err = 0; if (!REQ_ARGS(2)) { usage(); @@ -634,8 +631,8 @@ static int do_dump(int argc, char **argv) base = get_vmlinux_btf_from_sysfs(); btf = btf__parse_split(*argv, base ?: base_btf); - err = libbpf_get_error(btf); if (!btf) { + err = -errno; p_err("failed to load BTF from %s: %s", *argv, strerror(errno)); goto done; @@ -681,8 +678,8 @@ static int do_dump(int argc, char **argv) } btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); - err = libbpf_get_error(btf); if (!btf) { + err = -errno; p_err("get btf by id (%u): %s", btf_id, strerror(errno)); goto done; } diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 19924b6ce796..eda71fdfe95a 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -75,7 +75,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, goto print; prog_btf = btf__load_from_kernel_by_id(info.btf_id); - if (libbpf_get_error(prog_btf)) + if (!prog_btf) goto print; func_type = btf__type_by_id(prog_btf, finfo.type_id); if (!func_type || !btf_is_func(func_type)) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 01bb8d8f5568..2883660d6b67 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -252,9 +252,8 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) int err = 0; d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); - err = libbpf_get_error(d); - if (err) - return err; + if (!d) + return -errno; bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ @@ -976,13 +975,12 @@ static int do_skeleton(int argc, char **argv) /* log_level1 + log_level2 + stats, but not stable UAPI */ opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_mem(obj_data, file_sz, &opts); - err = libbpf_get_error(obj); - if (err) { + if (!obj) { char err_buf[256]; + err = -errno; libbpf_strerror(err, err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); - obj = NULL; goto out; } diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index a3e6b167153d..9a1d2365a297 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -4,6 +4,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <errno.h> #include <unistd.h> #include <linux/err.h> #include <bpf/libbpf.h> @@ -48,8 +49,8 @@ static int do_pin(int argc, char **argv) } obj = bpf_object__open(objfile); - err = libbpf_get_error(obj); - if (err) { + if (!obj) { + err = -errno; p_err("can't open objfile %s", objfile); goto close_map_fd; } @@ -62,13 +63,14 @@ static int do_pin(int argc, char **argv) prog = bpf_object__next_program(obj, NULL); if (!prog) { + err = -errno; p_err("can't find bpf program in objfile %s", objfile); goto close_obj; } link = bpf_program__attach_iter(prog, &iter_opts); - err = libbpf_get_error(link); - if (err) { + if (!link) { + err = -errno; p_err("attach_iter failed for program %s", bpf_program__name(prog)); goto close_obj; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 741e50ee0b6c..08d0ac543c67 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,7 +31,6 @@ bool block_mount; bool verifier_logs; bool relaxed_maps; bool use_loader; -bool legacy_libbpf; struct btf *base_btf; struct hashmap *refs_table; @@ -160,7 +159,6 @@ static int do_version(int argc, char **argv) jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); jsonw_bool_field(json_wtr, "llvm", has_llvm); - jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); jsonw_bool_field(json_wtr, "bootstrap", bootstrap); jsonw_end_object(json_wtr); /* features */ @@ -179,7 +177,6 @@ static int do_version(int argc, char **argv) printf("features:"); print_feature("libbfd", has_libbfd, &nb_features); print_feature("llvm", has_llvm, &nb_features); - print_feature("libbpf_strict", !legacy_libbpf, &nb_features); print_feature("skeletons", has_skeletons, &nb_features); print_feature("bootstrap", bootstrap, &nb_features); printf("\n"); @@ -337,12 +334,12 @@ static int do_batch(int argc, char **argv) if (argc < 2) { p_err("too few parameters for batch"); return -1; - } else if (!is_prefix(*argv, "file")) { - p_err("expected 'file', got: %s", *argv); - return -1; } else if (argc > 2) { p_err("too many parameters for batch"); return -1; + } else if (!is_prefix(*argv, "file")) { + p_err("expected 'file', got: %s", *argv); + return -1; } NEXT_ARG(); @@ -451,7 +448,6 @@ int main(int argc, char **argv) { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, - { "legacy", no_argument, NULL, 'l' }, { 0 } }; bool version_requested = false; @@ -514,19 +510,15 @@ int main(int argc, char **argv) break; case 'B': base_btf = btf__parse(optarg, NULL); - if (libbpf_get_error(base_btf)) { - p_err("failed to parse base BTF at '%s': %ld\n", - optarg, libbpf_get_error(base_btf)); - base_btf = NULL; + if (!base_btf) { + p_err("failed to parse base BTF at '%s': %d\n", + optarg, -errno); return -1; } break; case 'L': use_loader = true; break; - case 'l': - legacy_libbpf = true; - break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -536,14 +528,6 @@ int main(int argc, char **argv) } } - if (!legacy_libbpf) { - /* Allow legacy map definitions for skeleton generation. - * It will still be rejected if users use LIBBPF_STRICT_ALL - * mode for loading generated skeleton. - */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); - } - argc -= optind; argv += optind; if (argc < 0) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index d4e8a1aef787..a84224b6a604 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ @@ -82,7 +82,6 @@ extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; -extern bool legacy_libbpf; extern struct btf *base_btf; extern struct hashmap *refs_table; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index d884070a2314..88911d3aa2d9 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -786,18 +786,18 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - err = libbpf_get_error(btf_vmlinux); - if (err) { + if (!btf_vmlinux) { p_err("failed to get kernel btf"); - return err; + return -errno; } } *btf = btf_vmlinux; } else if (info->btf_value_type_id) { *btf = btf__load_from_kernel_by_id(info->btf_id); - err = libbpf_get_error(*btf); - if (err) + if (!*btf) { + err = -errno; p_err("failed to get btf"); + } } else { *btf = NULL; } @@ -807,16 +807,10 @@ static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) static void free_map_kv_btf(struct btf *btf) { - if (!libbpf_get_error(btf) && btf != btf_vmlinux) + if (btf != btf_vmlinux) btf__free(btf); } -static void free_btf_vmlinux(void) -{ - if (!libbpf_get_error(btf_vmlinux)) - btf__free(btf_vmlinux); -} - static int map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, bool show_header) @@ -953,7 +947,7 @@ exit_close: close(fds[i]); exit_free: free(fds); - free_btf_vmlinux(); + btf__free(btf_vmlinux); return err; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9d32ffb9f22e..cfc9fdc1e863 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -322,7 +322,7 @@ static void show_prog_metadata(int fd, __u32 num_maps) return; btf = btf__load_from_kernel_by_id(map_info.btf_id); - if (libbpf_get_error(btf)) + if (!btf) goto out_free; t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); @@ -726,7 +726,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (info->btf_id) { btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { + if (!btf) { p_err("failed to get btf"); return -1; } @@ -1663,7 +1663,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) { + if (!obj) { p_err("failed to open object file"); goto err_free_reuse_maps; } @@ -1802,11 +1802,6 @@ err_unpin: else bpf_object__unpin_programs(obj, pinfile); err_close_obj: - if (!legacy_libbpf) { - p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" - "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); - } - bpf_object__close(obj); err_free_reuse_maps: for (i = 0; i < old_map_fds; i++) @@ -1887,7 +1882,7 @@ static int do_loader(int argc, char **argv) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) { + if (!obj) { p_err("failed to open object file"); goto err_close_obj; } @@ -2204,7 +2199,7 @@ static char *profile_target_name(int tgt_fd) } btf = btf__load_from_kernel_by_id(info.btf_id); - if (libbpf_get_error(btf)) { + if (!btf) { p_err("failed to load btf for prog FD %d", tgt_fd); goto out; } diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index e08a6ff2866c..903b80ff4e9a 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void) return btf_vmlinux; btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + if (!btf_vmlinux) p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); return btf_vmlinux; @@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) const char *st_ops_name; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (!kern_btf) return "<btf_vmlinux_not_found>"; t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); @@ -63,10 +63,8 @@ static __s32 get_map_info_type_id(void) return map_info_type_id; kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) { - map_info_type_id = PTR_ERR(kern_btf); - return map_info_type_id; - } + if (!kern_btf) + return 0; map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info", BTF_KIND_STRUCT); @@ -415,7 +413,7 @@ static int do_dump(int argc, char **argv) } kern_btf = get_btf_vmlinux(); - if (libbpf_get_error(kern_btf)) + if (!kern_btf) return -1; if (!json_output) { @@ -498,7 +496,7 @@ static int do_register(int argc, char **argv) open_opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_file(file, &open_opts); - if (libbpf_get_error(obj)) + if (!obj) return -1; set_max_rlimit(); @@ -513,10 +511,9 @@ static int do_register(int argc, char **argv) continue; link = bpf_map__attach_struct_ops(map); - if (libbpf_get_error(link)) { + if (!link) { p_err("can't register struct_ops %s: %s", - bpf_map__name(map), - strerror(-PTR_ERR(link))); + bpf_map__name(map), strerror(errno)); nr_errs++; continue; } @@ -593,8 +590,7 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - if (!libbpf_get_error(btf_vmlinux)) - btf__free(btf_vmlinux); + btf__free(btf_vmlinux); return err; } diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 2491c54a5e4f..f8deae4e26a1 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -715,12 +715,12 @@ int main(int argc, char **argv) continue; } - toread = buf_len; } else { usleep(timedelay); - toread = 64; } + toread = buf_len; + read_size = read(buf_fd, data, toread * scan_size); if (read_size < 0) { if (errno == EAGAIN) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fb4c911d2a03..f89de51a45db 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2584,14 +2584,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -2647,7 +2652,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -2808,12 +2813,10 @@ union bpf_attr { * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * @@ -6888,6 +6891,16 @@ struct bpf_dynptr { __u64 :64; } __attribute__((aligned(8))); +struct bpf_list_head { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3bd812bf88ff..71e165b09ed5 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1724,7 +1724,8 @@ err_out: memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); /* and now restore original strings section size; types data size - * wasn't modified, so doesn't need restoring, see big comment above */ + * wasn't modified, so doesn't need restoring, see big comment above + */ btf->hdr->str_len = old_strs_len; hashmap__free(p.str_off_map); @@ -2329,7 +2330,7 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) */ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) { - if (!value|| !value[0]) + if (!value || !value[0]) return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 12f7039e0ab2..deb2bc9a0a7b 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1543,7 +1543,7 @@ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, if (!new_name) return 1; - hashmap__find(name_map, orig_name, &dup_cnt); + (void)hashmap__find(name_map, orig_name, &dup_cnt); dup_cnt++; err = hashmap__set(name_map, new_name, dup_cnt, &old_name, NULL); @@ -1989,7 +1989,7 @@ static int btf_dump_struct_data(struct btf_dump *d, { const struct btf_member *m = btf_members(t); __u16 n = btf_vlen(t); - int i, err; + int i, err = 0; /* note that we increment depth before calling btf_dump_print() below; * this is intentional. btf_dump_data_newline() will not print a diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1d263885d635..2a82f49ce16f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -347,7 +347,8 @@ enum sec_def_flags { SEC_ATTACHABLE = 2, SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, /* attachment target is specified through BTF ID in either kernel or - * other BPF program's BTF object */ + * other BPF program's BTF object + */ SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, @@ -488,7 +489,7 @@ struct bpf_map { char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section - * name. This is important to be be able to find corresponding BTF + * name. This is important to be able to find corresponding BTF * DATASEC information. */ char *real_name; @@ -1863,12 +1864,20 @@ static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, return -ERANGE; } switch (ext->kcfg.sz) { - case 1: *(__u8 *)ext_val = value; break; - case 2: *(__u16 *)ext_val = value; break; - case 4: *(__u32 *)ext_val = value; break; - case 8: *(__u64 *)ext_val = value; break; - default: - return -EINVAL; + case 1: + *(__u8 *)ext_val = value; + break; + case 2: + *(__u16 *)ext_val = value; + break; + case 4: + *(__u32 *)ext_val = value; + break; + case 8: + *(__u64 *)ext_val = value; + break; + default: + return -EINVAL; } ext->is_set = true; return 0; @@ -2770,7 +2779,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) m->type = enum64_placeholder_id; m->offset = 0; } - } + } } return 0; @@ -3502,7 +3511,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RELO; sec_desc->shdr = sh; sec_desc->data = data; - } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || + str_has_pfx(name, BSS_SEC "."))) { sec_desc->sec_type = SEC_BSS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3518,7 +3528,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } /* sort BPF programs by section name and in-section instruction offset - * for faster search */ + * for faster search + */ if (obj->nr_programs) qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); @@ -3817,7 +3828,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -EINVAL; } ext->kcfg.type = find_kcfg_type(obj->btf, t->type, - &ext->kcfg.is_signed); + &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; @@ -4965,9 +4976,9 @@ bpf_object__reuse_map(struct bpf_map *map) err = bpf_map__reuse_fd(map, pin_fd); close(pin_fd); - if (err) { + if (err) return err; - } + map->pinned = true; pr_debug("reused pinned map at '%s'\n", map->pin_path); @@ -5485,7 +5496,7 @@ static int load_module_btfs(struct bpf_object *obj) } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, - sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); + sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) goto err_out; @@ -6237,7 +6248,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * prog; each main prog can have a different set of * subprograms appended (potentially in different order as * well), so position of any subprog can be different for - * different main programs */ + * different main programs + */ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", @@ -10995,7 +11007,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, - usdt_provider, usdt_name, usdt_cookie); + usdt_provider, usdt_name, usdt_cookie); err = libbpf_get_error(link); if (err) return libbpf_err_ptr(err); @@ -11223,7 +11235,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ @@ -12304,7 +12316,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) btf = bpf_object__btf(s->obj); if (!btf) { pr_warn("subskeletons require BTF at runtime (object %s)\n", - bpf_object__name(s->obj)); + bpf_object__name(s->obj)); return libbpf_err(-errno); } diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index bdb83d467f9a..b44fcbb4b42e 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -235,7 +235,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..47855af25f3b 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -128,9 +128,13 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, /* Map read-only producer page and data pages. We map twice as big * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. - * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + */ + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); @@ -220,7 +224,7 @@ static inline int roundup_len(__u32 len) return (len + 7) / 8 * 8; } -static int64_t ringbuf_process_ring(struct ring* r) +static int64_t ringbuf_process_ring(struct ring *r) { int *len_ptr, len, err; /* 64-bit to avoid overflow in case of extreme application behavior */ @@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", @@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f07aef7c592c..b57b091d8026 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -48,6 +48,7 @@ TARGETS += nci TARGETS += net TARGETS += net/af_unix TARGETS += net/forwarding +TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += netfilter diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 09416d5d2e33..8e77515d56f6 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -38,12 +38,14 @@ kprobe_multi_test/skel_api # kprobe_multi__attach unexpect ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) +linked_list lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) lru_bug # lru_bug__attach unexpected error: -524 (errno 524) modify_return # modify_return__attach failed unexpected error: -524 (errno 524) module_attach # skel_attach skeleton attach failed: -524 mptcp/base # run_test mptcp unexpected error: -524 (errno 524) netcnt # packets unexpected packets: actual 10001 != expected 10000 +rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 recursion # skel_attach unexpected error: -524 (errno 524) ringbuf # skel_attach skeleton attachment failed: -1 setget_sockopt # attach_cgroup unexpected error: -524 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index be4e3d47ea3e..648a8a1b6b78 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -10,6 +10,7 @@ bpf_nf # JIT does not support calling kernel f bpf_tcp_ca # JIT does not support calling kernel function (kfunc) cb_refs # expected error message unexpected error: -524 (trampoline) cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) +cgrp_kfunc # JIT does not support calling kernel function cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) core_read_macros # unknown func bpf_probe_read#4 (overlapping) d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) @@ -33,6 +34,7 @@ ksyms_module # test_ksyms_module__open_and_load unex ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) libbpf_get_fd_by_id_opts # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +linked_list # JIT does not support calling kernel function (kfunc) lookup_key # JIT does not support calling kernel function (kfunc) lru_bug # prog 'printk': failed to auto-attach: -524 map_kptr # failed to open_and_load program: -524 (trampoline) @@ -41,6 +43,7 @@ module_attach # skel_attach skeleton attach failed: - mptcp netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) probe_user # check_kprobe_res wrong kprobe res from probe read (?) +rcu_read_lock # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) recursion # skel_attach unexpected error: -524 (trampoline) ringbuf # skel_load skeleton load failed (?) select_reuseport # intermittently fails on new s390x setup @@ -53,6 +56,7 @@ skc_to_unix_sock # could not attach BPF object unexpecte socket_cookie # prog_attach unexpected error: -524 (trampoline) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_kfunc # JIT does not support calling kernel function task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) test_bpffs # bpffs test failed 255 (iterator) test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) @@ -69,6 +73,7 @@ trace_printk # trace_printk__load unexpected error: trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) tracing_struct # failed to auto-attach: -524 (trampoline) trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +type_cast # JIT does not support calling kernel function unpriv_bpf_disabled # fentry user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f3cd17026ee5..6a0f043dc410 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -201,7 +201,7 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) -C bpf_testmod + $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool @@ -310,9 +310,9 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef # Determine target endianness. @@ -320,7 +320,11 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) @@ -542,7 +546,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,) $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc)) endif diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h new file mode 100644 index 000000000000..424f7bbbfe9b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -0,0 +1,68 @@ +#ifndef __BPF_EXPERIMENTAL__ +#define __BPF_EXPERIMENTAL__ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node))) + +/* Description + * Allocates an object of the type represented by 'local_type_id' in + * program BTF. User may use the bpf_core_type_id_local macro to pass the + * type ID of a struct in program BTF. + * + * The 'local_type_id' parameter must be a known constant. + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * A pointer to an object of the type corresponding to the passed in + * 'local_type_id', or NULL on failure. + */ +extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_new_impl */ +#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) + +/* Description + * Free an allocated object. All fields of the object that require + * destruction will be destructed before the storage is freed. + * + * The 'meta' parameter is a hidden argument that is ignored. + * Returns + * Void. + */ +extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_obj_drop_impl */ +#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) + +/* Description + * Add a new entry to the beginning of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Add a new entry to the end of the BPF linked list. + * Returns + * Void. + */ +extern void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; + +/* Description + * Remove the entry at the beginning of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; + +/* Description + * Remove the entry at the end of the BPF linked list. + * Returns + * Pointer to bpf_list_node of deleted entry, or NULL if list is empty. + */ +extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index dd1aa5afcf5a..9e95b37a7dff 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -333,6 +333,25 @@ int get_root_cgroup(void) return fd; } +/* + * remove_cgroup() - Remove a cgroup + * @relative_path: The cgroup path, relative to the workdir, to remove + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup(const char *relative_path) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 3358734356ab..f099a166c94d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -18,6 +18,7 @@ int write_cgroup_file_parent(const char *relative_path, const char *file, int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); +void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); int join_cgroup(const char *relative_path); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7a99a6728169..f9034ea00bc9 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -8,6 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y CONFIG_BPF_SYSCALL=y +CONFIG_BPF_UNPRIV_DEFAULT_OFF=n CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index bec15558fd93..1f37adff7632 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -426,6 +426,10 @@ static int setns_by_fd(int nsfd) if (!ASSERT_OK(err, "mount /sys/fs/bpf")) return err; + err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL); + if (!ASSERT_OK(err, "mount /sys/kernel/debug")) + return err; + return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 95a2b80f0d17..de1b5b9eb93a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3949,6 +3949,20 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid return type", }, { + .descr = "decl_tag test #17, func proto, argument", + .raw_types = { + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag1\0var"), + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index c4a2adb38da1..e02feb5fae97 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -189,6 +189,80 @@ static void test_walk_self_only(struct cgroup_iter *skel) BPF_CGROUP_ITER_SELF_ONLY, "self_only"); } +static void test_walk_dead_self_only(struct cgroup_iter *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + char expected_output[128], buf[128]; + const char *cgrp_name = "/dead"; + union bpf_iter_link_info linfo; + int len, cgrp_fd, iter_fd; + struct bpf_link *link; + size_t left; + char *p; + + cgrp_fd = create_and_get_cgroup(cgrp_name); + if (!ASSERT_GE(cgrp_fd, 0, "create cgrp")) + return; + + /* The cgroup will be dead during read() iteration, so it only has + * epilogue in the output + */ + snprintf(expected_output, sizeof(expected_output), EPILOGUE); + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgrp_fd; + linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + goto close_cgrp; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto free_link; + + /* Close link fd and cgroup fd */ + bpf_link__destroy(link); + close(cgrp_fd); + + /* Remove cgroup to mark it as dead */ + remove_cgroup(cgrp_name); + + /* Two kern_sync_rcu() and usleep() pairs are used to wait for the + * releases of cgroup css, and the last kern_sync_rcu() and usleep() + * pair is used to wait for the free of cgroup itself. + */ + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(8000); + kern_sync_rcu(); + usleep(1000); + + memset(buf, 0, sizeof(buf)); + left = ARRAY_SIZE(buf); + p = buf; + while ((len = read(iter_fd, p, left)) > 0) { + p += len; + left -= len; + } + + ASSERT_STREQ(buf, expected_output, "dead cgroup output"); + + /* read() after iter finishes should be ok. */ + if (len == 0) + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read"); + + close(iter_fd); + return; +free_link: + bpf_link__destroy(link); +close_cgrp: + close(cgrp_fd); +} + void test_cgroup_iter(void) { struct cgroup_iter *skel = NULL; @@ -217,6 +291,8 @@ void test_cgroup_iter(void) test_early_termination(skel); if (test__start_subtest("cgroup_iter__self_only")) test_walk_self_only(skel); + if (test__start_subtest("cgroup_iter__dead_self_only")) + test_walk_dead_self_only(skel); out: cgroup_iter__destroy(skel); cleanup_cgroups(); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c new file mode 100644 index 000000000000..973f0c5af965 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <cgroup_helpers.h> +#include <test_progs.h> + +#include "cgrp_kfunc_failure.skel.h" +#include "cgrp_kfunc_success.skel.h" + +static size_t log_buf_sz = 1 << 20; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void) +{ + struct cgrp_kfunc_success *skel; + int err; + + skel = cgrp_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = cgrp_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + cgrp_kfunc_success__destroy(skel); + return NULL; +} + +static int mkdir_rm_test_dir(void) +{ + int fd; + const char *cgrp_path = "cgrp_kfunc"; + + fd = create_and_get_cgroup(cgrp_path); + if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd")) + return -1; + + close(fd); + remove_cgroup(cgrp_path); + + return 0; +} + +static void run_success_test(const char *prog_name) +{ + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count"); + if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir")) + goto cleanup; + + ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count"); + ASSERT_OK(skel->bss->err, "post_rmdir_err"); + +cleanup: + bpf_link__destroy(link); + cgrp_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_cgrp_acquire_release_argument", + "test_cgrp_acquire_leave_in_map", + "test_cgrp_xchg_release", + "test_cgrp_get_release", + "test_cgrp_get_ancestors", +}; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} failure_tests[] = { + {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"}, + {"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"}, + {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"}, + {"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_acquire_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_get_null", "arg#0 expected pointer to map value"}, + {"cgrp_kfunc_xchg_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_get_unreleased", "Unreleased reference"}, + {"cgrp_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"}, + {"cgrp_kfunc_release_fp", "arg#0 pointer type STRUCT cgroup must point"}, + {"cgrp_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, + {"cgrp_kfunc_release_unacquired", "release kernel function bpf_cgroup_release expects"}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct cgrp_kfunc_failure *skel; + int err, i; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = cgrp_kfunc_failure__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "cgrp_kfunc_failure__open_opts")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + struct bpf_program *prog; + const char *curr_name = failure_tests[i].prog_name; + + prog = bpf_object__find_program_by_name(skel->obj, curr_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name)); + } + + err = cgrp_kfunc_failure__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + cgrp_kfunc_failure__destroy(skel); +} + +void test_cgrp_kfunc(void) +{ + int i, err; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "cgrp_env_setup")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + if (!test__start_subtest(failure_tests[i].prog_name)) + continue; + + verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); + } + +cleanup: + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 8fc4e6c02bfd..b0c06f821cb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -17,7 +17,7 @@ static struct { {"ringbuf_missing_release2", "Unreleased reference id=2"}, {"ringbuf_missing_release_callback", "Unreleased reference id"}, {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, - {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, + {"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"}, {"add_dynptr_to_map1", "invalid indirect read from stack"}, {"add_dynptr_to_map2", "invalid indirect read from stack"}, {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c new file mode 100644 index 000000000000..0613f3bb8b5e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <net/if.h> +#include "empty_skb.skel.h" + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +void serial_test_empty_skb(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, tattr); + struct empty_skb *bpf_obj = NULL; + struct nstoken *tok = NULL; + struct bpf_program *prog; + char eth_hlen_pp[15]; + char eth_hlen[14]; + int veth_ifindex; + int ipip_ifindex; + int err; + int i; + + struct { + const char *msg; + const void *data_in; + __u32 data_size_in; + int *ifindex; + int err; + int ret; + bool success_on_tc; + } tests[] = { + /* Empty packets are always rejected. */ + + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "veth empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &veth_ifindex, + .err = -EINVAL, + }, + { + /* BPF_PROG_RUN ETH_HLEN size check */ + .msg = "ipip empty ingress packet", + .data_in = NULL, + .data_size_in = 0, + .ifindex = &ipip_ifindex, + .err = -EINVAL, + }, + + /* ETH_HLEN-sized packets: + * - can not be redirected at LWT_XMIT + * - can be redirected at TC to non-tunneling dest + */ + + { + /* __bpf_redirect_common */ + .msg = "veth ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &veth_ifindex, + .ret = -ERANGE, + .success_on_tc = true, + }, + { + /* __bpf_redirect_no_mac + * + * lwt: skb->len=0 <= skb_network_offset=0 + * tc: skb->len=14 <= skb_network_offset=14 + */ + .msg = "ipip ETH_HLEN packet ingress", + .data_in = eth_hlen, + .data_size_in = sizeof(eth_hlen), + .ifindex = &ipip_ifindex, + .ret = -ERANGE, + }, + + /* ETH_HLEN+1-sized packet should be redirected. */ + + { + .msg = "veth ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &veth_ifindex, + }, + { + .msg = "ipip ETH_HLEN+1 packet ingress", + .data_in = eth_hlen_pp, + .data_size_in = sizeof(eth_hlen_pp), + .ifindex = &ipip_ifindex, + }, + }; + + SYS("ip netns add empty_skb"); + tok = open_netns("empty_skb"); + SYS("ip link add veth0 type veth peer veth1"); + SYS("ip link set dev veth0 up"); + SYS("ip link set dev veth1 up"); + SYS("ip addr add 10.0.0.1/8 dev veth0"); + SYS("ip addr add 10.0.0.2/8 dev veth1"); + veth_ifindex = if_nametoindex("veth0"); + + SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS("ip link set ipip0 up"); + SYS("ip addr add 192.168.1.1/16 dev ipip0"); + ipip_ifindex = if_nametoindex("ipip0"); + + bpf_obj = empty_skb__open_and_load(); + if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) + goto out; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + bpf_object__for_each_program(prog, bpf_obj->obj) { + char buf[128]; + bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + + tattr.data_in = tests[i].data_in; + tattr.data_size_in = tests[i].data_size_in; + + tattr.data_size_out = 0; + bpf_obj->bss->ifindex = *tests[i].ifindex; + bpf_obj->bss->ret = 0; + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr); + sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog)); + + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(err, 0, buf); + else + ASSERT_EQ(err, tests[i].err, buf); + sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog)); + if (at_tc && tests[i].success_on_tc) + ASSERT_GE(bpf_obj->bss->ret, 0, buf); + else + ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + } + } + +out: + if (bpf_obj) + empty_skb__destroy(bpf_obj); + if (tok) + close_netns(tok); + system("ip netns del empty_skb"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c index c210657d4d0a..55d641c1f126 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c @@ -22,7 +22,7 @@ static struct { "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0}, {"not_valid_dynptr", "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0}, - {"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0}, + {"not_ptr_to_stack", "arg#0 expected pointer to stack", 0}, {"dynptr_data_null", NULL, -EBADMSG}, }; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index eedbf1937fc4..c6f37e825f11 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -360,10 +360,12 @@ static int get_syms(char ***symsp, size_t *cntp) * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) @@ -402,7 +404,7 @@ error: return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -470,6 +472,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c new file mode 100644 index 000000000000..9a7d4c47af63 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -0,0 +1,740 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/btf.h> +#include <test_btf.h> +#include <linux/btf.h> +#include <test_progs.h> +#include <network_helpers.h> + +#include "linked_list.skel.h" +#include "linked_list_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} linked_list_fail_tests[] = { +#define TEST(test, off) \ + { #test "_missing_lock_push_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_push_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_front", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ + { #test "_missing_lock_pop_back", \ + "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, + TEST(kptr, 32) + TEST(global, 16) + TEST(map, 0) + TEST(inner_map, 0) +#undef TEST +#define TEST(test, op) \ + { #test "_kptr_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \ + { #test "_global_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \ + { #test "_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \ + { #test "_inner_map_incorrect_lock_" #op, \ + "held lock and object are not in the same allocation\n" \ + "bpf_spin_lock at off=0 must be held for bpf_list_head" }, + TEST(kptr, push_front) + TEST(kptr, push_back) + TEST(kptr, pop_front) + TEST(kptr, pop_back) + TEST(global, push_front) + TEST(global, push_back) + TEST(global, pop_front) + TEST(global, pop_back) + TEST(map, push_front) + TEST(map, push_back) + TEST(map, pop_front) + TEST(map, pop_back) + TEST(inner_map, push_front) + TEST(inner_map, push_back) + TEST(inner_map, pop_front) + TEST(inner_map, pop_back) +#undef TEST + { "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_tp", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_perf", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" }, + { "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" }, + { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" }, + { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" }, + { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, + { "obj_new_acq", "Unreleased reference id=" }, + { "use_after_drop", "invalid mem access 'scalar'" }, + { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, + { "direct_read_head", "direct access to bpf_list_head is disallowed" }, + { "direct_write_head", "direct access to bpf_list_head is disallowed" }, + { "direct_read_node", "direct access to bpf_list_node is disallowed" }, + { "direct_write_node", "direct access to bpf_list_node is disallowed" }, + { "write_after_push_front", "only read is supported" }, + { "write_after_push_back", "only read is supported" }, + { "use_after_unlock_push_front", "invalid mem access 'scalar'" }, + { "use_after_unlock_push_back", "invalid mem access 'scalar'" }, + { "double_push_front", "arg#1 expected pointer to allocated object" }, + { "double_push_back", "arg#1 expected pointer to allocated object" }, + { "no_node_value_type", "bpf_list_node not found at offset=0" }, + { "incorrect_value_type", + "operation on bpf_list_head expects arg#1 bpf_list_node at offset=0 in struct foo, " + "but arg is at offset=0 in struct bar" }, + { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_node_off1", "bpf_list_node not found at offset=1" }, + { "incorrect_node_off2", "arg#1 offset=40, but expected bpf_list_node at offset=0 in struct foo" }, + { "no_head_type", "bpf_list_head not found at offset=0" }, + { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, + { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, + { "incorrect_head_off1", "bpf_list_head not found at offset=17" }, + { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, + { "pop_front_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, + { "pop_back_off", + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, +}; + +static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct linked_list_fail *skel; + struct bpf_program *prog; + int ret; + + skel = linked_list_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = linked_list_fail__load(skel); + if (!ASSERT_ERR(ret, "linked_list_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + linked_list_fail__destroy(skel); +} + +static void clear_fields(struct bpf_map *map) +{ + char buf[24]; + int key = 0; + + memset(buf, 0xff, sizeof(buf)); + ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields"); +} + +enum { + TEST_ALL, + PUSH_POP, + PUSH_POP_MULT, + LIST_IN_LIST, +}; + +static void test_linked_list_success(int mode, bool leave_in_map) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct linked_list *skel; + int ret; + + skel = linked_list__open_and_load(); + if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load")) + return; + + if (mode == LIST_IN_LIST) + goto lil; + if (mode == PUSH_POP_MULT) + goto ppm; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts); + ASSERT_OK(ret, "map_list_push_pop"); + ASSERT_OK(opts.retval, "map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts); + ASSERT_OK(ret, "global_list_push_pop"); + ASSERT_OK(opts.retval, "global_list_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP) + goto end; + +ppm: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "inner_map_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts); + ASSERT_OK(ret, "global_list_push_pop_multiple"); + ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + if (mode == PUSH_POP_MULT) + goto end; + +lil: + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts); + ASSERT_OK(ret, "map_list_in_list"); + ASSERT_OK(opts.retval, "map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.array_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts); + ASSERT_OK(ret, "inner_map_list_in_list"); + ASSERT_OK(opts.retval, "inner_map_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.inner_map); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts); + ASSERT_OK(ret, "global_list_in_list"); + ASSERT_OK(opts.retval, "global_list_in_list retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); +end: + linked_list__destroy(skel); +} + +#define SPIN_LOCK 2 +#define LIST_HEAD 3 +#define LIST_NODE 4 + +static struct btf *init_btf(void) +{ + int id, lid, hid, nid; + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "btf__new_empty")) + return NULL; + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + if (!ASSERT_EQ(id, 1, "btf__add_int")) + goto end; + lid = btf__add_struct(btf, "bpf_spin_lock", 4); + if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock")) + goto end; + hid = btf__add_struct(btf, "bpf_list_head", 16); + if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head")) + goto end; + nid = btf__add_struct(btf, "bpf_list_node", 16); + if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node")) + goto end; + return btf; +end: + btf__free(btf); + return NULL; +} + +static void test_btf(void) +{ + struct btf *btf = NULL; + int id, err; + + while (test__start_subtest("btf: too many locks")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + err = btf__add_field(btf, "c", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -E2BIG, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing lock")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 16); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_struct foo::a")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: bad offset")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EEXIST, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing contains:")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing struct")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: missing node")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 24); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_HEAD, 64, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c")) + break; + + err = btf__load_into_kernel(btf); + btf__free(btf); + ASSERT_EQ(err, -ENOENT, "check btf"); + break; + } + + while (test__start_subtest("btf: node incorrect type")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 4); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: multiple bpf_list_node with name b")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 52); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0); + if (!ASSERT_OK(err, "btf__add_field foo::d")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -EINVAL, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned AA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned ABA cycle")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) + break; + id = btf__add_struct(btf, "bar", 16); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, 0, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 36); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field foo::c")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar:a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar:b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar:c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) + break; + id = btf__add_struct(btf, "baz", 16); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field baz:a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } + + while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) { + btf = init_btf(); + if (!ASSERT_OK_PTR(btf, "init_btf")) + break; + id = btf__add_struct(btf, "foo", 20); + if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field foo::a")) + break; + err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + if (!ASSERT_OK(err, "btf__add_field foo::b")) + break; + id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); + if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) + break; + id = btf__add_struct(btf, "bar", 36); + if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0); + if (!ASSERT_EQ(id, 8, "btf__add_decl_tag")) + break; + id = btf__add_struct(btf, "baz", 36); + if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) + break; + err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bar::a")) + break; + err = btf__add_field(btf, "b", LIST_NODE, 128, 0); + if (!ASSERT_OK(err, "btf__add_field bar::b")) + break; + err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + if (!ASSERT_OK(err, "btf__add_field bar::c")) + break; + id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0); + if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a")) + break; + id = btf__add_struct(btf, "bam", 16); + if (!ASSERT_EQ(id, 11, "btf__add_struct bam")) + break; + err = btf__add_field(btf, "a", LIST_NODE, 0, 0); + if (!ASSERT_OK(err, "btf__add_field bam::a")) + break; + + err = btf__load_into_kernel(btf); + ASSERT_EQ(err, -ELOOP, "check btf"); + btf__free(btf); + break; + } +} + +void test_linked_list(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) { + if (!test__start_subtest(linked_list_fail_tests[i].prog_name)) + continue; + test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name, + linked_list_fail_tests[i].err_msg); + } + test_btf(); + test_linked_list_success(PUSH_POP, false); + test_linked_list_success(PUSH_POP, true); + test_linked_list_success(PUSH_POP_MULT, false); + test_linked_list_success(PUSH_POP_MULT, true); + test_linked_list_success(LIST_IN_LIST, false); + test_linked_list_success(LIST_IN_LIST, true); + test_linked_list_success(TEST_ALL, false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 1102e4f42d2d..f117bfef68a1 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -173,10 +173,12 @@ static void test_lsm_cgroup_functional(void) ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); - /* AF_UNIX is prohibited. */ - fd = socket(AF_UNIX, SOCK_STREAM, 0); - ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK)) + /* AF_UNIX is prohibited. */ + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); close(fd); /* AF_INET6 gets default policy (sk_priority). */ @@ -233,11 +235,18 @@ static void test_lsm_cgroup_functional(void) /* AF_INET6+SOCK_STREAM * AF_PACKET+SOCK_RAW + * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed * listen_fd * client_fd * accepted_fd */ - ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + if (skel->kconfig->CONFIG_SECURITY_APPARMOR + || skel->kconfig->CONFIG_SECURITY_SELINUX + || skel->kconfig->CONFIG_SECURITY_SMACK) + /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2"); + else + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); /* start_server * bind(ETH_P_ALL) diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c new file mode 100644 index 000000000000..447d8560ecb6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "rcu_read_lock.skel.h" +#include "cgroup_helpers.h" + +static unsigned long long cgroup_id; + +static void test_success(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.get_cgroup_id, true); + bpf_program__set_autoload(skel->progs.task_succ, true); + bpf_program__set_autoload(skel->progs.no_lock, true); + bpf_program__set_autoload(skel->progs.two_regions, true); + bpf_program__set_autoload(skel->progs.non_sleepable_1, true); + bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val"); + ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id"); +out: + rcu_read_lock__destroy(skel); +} + +static void test_rcuptr_acquire(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.task_acquire, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + err = rcu_read_lock__attach(skel); + ASSERT_OK(err, "skel_attach"); +out: + rcu_read_lock__destroy(skel); +} + +static const char * const inproper_region_tests[] = { + "miss_lock", + "miss_unlock", + "non_sleepable_rcu_mismatch", + "inproper_sleepable_helper", + "inproper_sleepable_kfunc", + "nested_rcu_region", +}; + +static void test_inproper_region(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +static const char * const rcuptr_misuse_tests[] = { + "task_untrusted_non_rcuptr", + "task_untrusted_rcuptr", + "cross_rcu_region", +}; + +static void test_rcuptr_misuse(void) +{ + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + rcu_read_lock__destroy(skel); + } +} + +void test_rcu_read_lock(void) +{ + struct btf *vmlinux_btf; + int cgroup_fd; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { + test__skip(); + goto out; + } + + cgroup_fd = test__join_cgroup("/rcu_read_lock"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) + goto out; + + cgroup_id = get_cgroup_id("/rcu_read_lock"); + if (test__start_subtest("success")) + test_success(); + if (test__start_subtest("rcuptr_acquire")) + test_rcuptr_acquire(); + if (test__start_subtest("negative_tests_inproper_region")) + test_inproper_region(); + if (test__start_subtest("negative_tests_rcuptr_misuse")) + test_rcuptr_misuse(); + close(cgroup_fd); +out: + btf__free(vmlinux_btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c new file mode 100644 index 000000000000..d9270bd3d920 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_spin_lock.skel.h" +#include "test_spin_lock_fail.skel.h" + +static char log_buf[1024 * 1024]; + +static struct { + const char *prog_name; + const char *err_msg; +} spin_lock_fail_tests[] = { + { "lock_id_kptr_preserve", + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " + "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=ptr_ expected=percpu_ptr_" }, + { "lock_id_global_zero", + "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mapval_preserve", + "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_innermapval_preserve", + "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) " + "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n" + "R1 type=map_value expected=percpu_ptr_" }, + { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" }, + { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, +}; + +static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_spin_lock_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_spin_lock_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_spin_lock_fail__load(skel); + if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail")) + goto end; + + /* Skip check if JIT does not support kfuncs */ + if (strstr(log_buf, "JIT does not support calling kernel function")) { + test__skip(); + goto end; + } + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_spin_lock_fail__destroy(skel); +} + +static void *spin_lock_thread(void *arg) +{ + int err, prog_fd = *(u32 *) arg; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 10000, + ); + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + pthread_exit(arg); +} + +void test_spin_lock_success(void) +{ + struct test_spin_lock *skel; + pthread_t thread_id[4]; + int prog_fd, i; + void *ret; + + skel = test_spin_lock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load")) + return; + prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test); + for (i = 0; i < 4; i++) { + int err; + + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + goto end; + } + + for (i = 0; i < 4; i++) { + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) + goto end; + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) + goto end; + } +end: + test_spin_lock__destroy(skel); +} + +void test_spin_lock(void) +{ + int i; + + test_spin_lock_success(); + + for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) { + if (!test__start_subtest(spin_lock_fail_tests[i].prog_name)) + continue; + test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name, + spin_lock_fail_tests[i].err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c deleted file mode 100644 index 15eb1372d771..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <test_progs.h> -#include <network_helpers.h> - -static void *spin_lock_thread(void *arg) -{ - int err, prog_fd = *(u32 *) arg; - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), - .repeat = 10000, - ); - - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); - pthread_exit(arg); -} - -void test_spinlock(void) -{ - const char *file = "./test_spin_lock.bpf.o"; - pthread_t thread_id[4]; - struct bpf_object *obj = NULL; - int prog_fd; - int err = 0, i; - void *ret; - - err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (CHECK_FAIL(err)) { - printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); - goto close_prog; - } - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd))) - goto close_prog; - - for (i = 0; i < 4; i++) - if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || - ret != (void *)&prog_fd)) - goto close_prog; -close_prog: - bpf_object__close(obj); -} diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c new file mode 100644 index 000000000000..ffd8ef4303c8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <sys/wait.h> +#include <test_progs.h> +#include <unistd.h> + +#include "task_kfunc_failure.skel.h" +#include "task_kfunc_success.skel.h" + +static size_t log_buf_sz = 1 << 20; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct task_kfunc_success *open_load_task_kfunc_skel(void) +{ + struct task_kfunc_success *skel; + int err; + + skel = task_kfunc_success__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + skel->bss->pid = getpid(); + + err = task_kfunc_success__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + return skel; + +cleanup: + task_kfunc_success__destroy(skel); + return NULL; +} + +static void run_success_test(const char *prog_name) +{ + struct task_kfunc_success *skel; + int status; + pid_t child_pid; + struct bpf_program *prog; + struct bpf_link *link = NULL; + + skel = open_load_task_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_spawn_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attached_link")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + if (child_pid == 0) + _exit(0); + waitpid(child_pid, &status, 0); + + ASSERT_OK(skel->bss->err, "post_wait_err"); + +cleanup: + bpf_link__destroy(link); + task_kfunc_success__destroy(skel); +} + +static const char * const success_tests[] = { + "test_task_acquire_release_argument", + "test_task_acquire_release_current", + "test_task_acquire_leave_in_map", + "test_task_xchg_release", + "test_task_get_release", + "test_task_current_acquire_release", + "test_task_from_pid_arg", + "test_task_from_pid_current", + "test_task_from_pid_invalid", +}; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} failure_tests[] = { + {"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"}, + {"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"}, + {"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"}, + {"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_acquire_unreleased", "Unreleased reference"}, + {"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"}, + {"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"}, + {"task_kfunc_get_null", "arg#0 expected pointer to map value"}, + {"task_kfunc_xchg_unreleased", "Unreleased reference"}, + {"task_kfunc_get_unreleased", "Unreleased reference"}, + {"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"}, + {"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"}, + {"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, + {"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"}, + {"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct task_kfunc_failure *skel; + int err, i; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = task_kfunc_failure__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + struct bpf_program *prog; + const char *curr_name = failure_tests[i].prog_name; + + prog = bpf_object__find_program_by_name(skel->obj, curr_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name)); + } + + err = task_kfunc_failure__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + task_kfunc_failure__destroy(skel); +} + +void test_task_kfunc(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { + if (!test__start_subtest(success_tests[i])) + continue; + + run_success_test(success_tests[i]); + } + + for (i = 0; i < ARRAY_SIZE(failure_tests); i++) { + if (!test__start_subtest(failure_tests[i].prog_name)) + continue; + + verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c new file mode 100644 index 000000000000..9317d5fa2635 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> +#include "type_cast.skel.h" + +static void test_xdp(void) +{ + struct type_cast *skel; + int err, prog_fd; + char buf[128]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_xdp, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_xdp); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval"); + + ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex"); + ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex"); + ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name"); + ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum"); + +out: + type_cast__destroy(skel); +} + +static void test_tc(void) +{ + struct type_cast *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.md_skb, true); + err = type_cast__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.md_skb); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "tc test_run retval"); + + ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len"); + ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len"); + ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len"); + ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len"); + ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare"); + +out: + type_cast__destroy(skel); +} + +static const char * const negative_tests[] = { + "untrusted_ptr", + "kctx_u64", +}; + +static void test_negative(void) +{ + struct bpf_program *prog; + struct type_cast *skel; + int i, err; + + for (i = 0; i < ARRAY_SIZE(negative_tests); i++) { + skel = type_cast__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + bpf_program__set_autoload(prog, true); + err = type_cast__load(skel); + ASSERT_ERR(err, "skel_load"); +out: + type_cast__destroy(skel); + } +} + +void test_type_cast(void) +{ + if (test__start_subtest("xdp")) + test_xdp(); + if (test__start_subtest("tc")) + test_tc(); + if (test__start_subtest("negative")) + test_negative(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index a50971c6cf4a..9ac6f6a268db 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd) } #define NUM_PKTS 10000 -void test_xdp_do_redirect(void) +void serial_test_xdp_do_redirect(void) { int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst; char data[sizeof(pkt_udp) + sizeof(__u32)]; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index c72083885b6d..13daa3746064 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -174,7 +174,7 @@ out: system("ip netns del synproxy"); } -void test_xdp_synproxy(void) +void serial_test_xdp_synproxy(void) { if (test__start_subtest("xdp")) test_synproxy(true); diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h new file mode 100644 index 000000000000..7d30855bfe78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _CGRP_KFUNC_COMMON_H +#define _CGRP_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __cgrps_kfunc_map_value { + struct cgroup __kptr_ref * cgrp; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __cgrps_kfunc_map_value); + __uint(max_entries, 1); +} __cgrps_kfunc_map SEC(".maps"); + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; + +static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) +{ + s32 id; + long status; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); +} + +static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return status; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) { + bpf_map_delete_elem(&__cgrps_kfunc_map, &id); + return -ENOENT; + } + + acquired = bpf_cgroup_acquire(cgrp); + old = bpf_kptr_xchg(&v->cgrp, acquired); + if (old) { + bpf_cgroup_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _CGRP_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c new file mode 100644 index 000000000000..a1369b5ebcf8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp) +{ + int status; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + return NULL; + + return cgrps_kfunc_map_value_lookup(cgrp); +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */ + acquired = bpf_cgroup_acquire(v->cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path; + + /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */ + acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("kretprobe/cgroup_destroy_locked") +int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) +{ + struct cgroup *acquired; + + /* Can't acquire an untrusted struct cgroup * pointer. */ + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */ + acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */ + acquired = bpf_cgroup_acquire(NULL); + if (!acquired) + return 0; + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Acquired cgroup is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a non-kptr, even on a valid cgroup. */ + kptr = bpf_cgroup_kptr_get(&cgrp); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr, *acquired; + + acquired = bpf_cgroup_acquire(cgrp); + + /* Cannot use bpf_cgroup_kptr_get() on a non-map-value, even if the kptr was acquired. */ + kptr = bpf_cgroup_kptr_get(&acquired); + bpf_cgroup_release(acquired); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + + /* Cannot use bpf_cgroup_kptr_get() on a NULL pointer. */ + kptr = bpf_cgroup_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value *v; + + v = insert_lookup_cgrp(cgrp); + if (!v) + return 0; + + /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */ + bpf_cgroup_release(v->cgrp); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired = (struct cgroup *)&path; + + /* Cannot release random frame pointer. */ + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path) +{ + struct __cgrps_kfunc_map_value local, *v; + long status; + struct cgroup *acquired, *old; + s32 id; + + status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id); + if (status) + return 0; + + local.cgrp = NULL; + status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id); + if (!v) + return -ENOENT; + + acquired = bpf_cgroup_acquire(cgrp); + + old = bpf_kptr_xchg(&v->cgrp, acquired); + + /* old cannot be passed to bpf_cgroup_release() without a NULL check. */ + bpf_cgroup_release(old); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path) +{ + /* Cannot release trusted cgroup pointer which was not acquired. */ + bpf_cgroup_release(cgrp); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c new file mode 100644 index 000000000000..0c23ea32df9f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "cgrp_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid, invocations; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(cgroup_mkdir, + * TP_PROTO(struct cgroup *cgrp, const char *path), + * TP_ARGS(cgrp, path) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + bool same = pid == cur_pid; + + if (same) + __sync_fetch_and_add(&invocations, 1); + + return same; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path) +{ + struct cgroup *acquired; + + if (!is_test_kfunc_task()) + return 0; + + acquired = bpf_cgroup_acquire(cgrp); + bpf_cgroup_release(acquired); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->cgrp, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path) +{ + struct cgroup *kptr; + struct __cgrps_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = cgrps_kfunc_map_insert(cgrp); + if (status) { + err = 1; + return 0; + } + + v = cgrps_kfunc_map_value_lookup(cgrp); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_cgroup_kptr_get(&v->cgrp); + if (!kptr) { + err = 3; + return 0; + } + + bpf_cgroup_release(kptr); + + return 0; +} + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) +{ + struct cgroup *self, *ancestor1, *invalid; + + if (!is_test_kfunc_task()) + return 0; + + self = bpf_cgroup_ancestor(cgrp, cgrp->level); + if (!self) { + err = 1; + return 0; + } + + if (self->self.id != cgrp->self.id) { + bpf_cgroup_release(self); + err = 2; + return 0; + } + bpf_cgroup_release(self); + + ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!ancestor1) { + err = 3; + return 0; + } + bpf_cgroup_release(ancestor1); + + invalid = bpf_cgroup_ancestor(cgrp, 10000); + if (invalid) { + bpf_cgroup_release(invalid); + err = 4; + return 0; + } + + invalid = bpf_cgroup_ancestor(cgrp, -1); + if (invalid) { + bpf_cgroup_release(invalid); + err = 5; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c new file mode 100644 index 000000000000..4b0cd6753251 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/empty_skb.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +int ifindex; +int ret; + +SEC("lwt_xmit") +int redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("lwt_xmit") +int redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} + +SEC("tc") +int tc_redirect_ingress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS); + return 0; +} + +SEC("tc") +int tc_redirect_egress(struct __sk_buff *skb) +{ + ret = bpf_clone_redirect(skb, ifindex, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c new file mode 100644 index 000000000000..2c7b615c6d41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#include "linked_list.h" + +static __always_inline +int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 3; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + bpf_obj_drop(f); + return 4; + } + + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + if (leave_in_map) + return 0; + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 6; + } + + bpf_spin_lock(lock); + f->data = 13; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 7; + f = container_of(n, struct foo, node); + if (f->data != 13) { + bpf_obj_drop(f); + return 8; + } + bpf_obj_drop(f); + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 9; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 10; + } + return 0; +} + + +static __always_inline +int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct foo *f[8], *pf; + int i; + + for (i = 0; i < ARRAY_SIZE(f); i++) { + f[i] = bpf_obj_new(typeof(**f)); + if (!f[i]) + return 2; + f[i]->data = i; + bpf_spin_lock(lock); + bpf_list_push_front(head, &f[i]->node); + bpf_spin_unlock(lock); + } + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 3; + pf = container_of(n, struct foo, node); + if (pf->data != (ARRAY_SIZE(f) - i - 1)) { + bpf_obj_drop(pf); + return 4; + } + bpf_spin_lock(lock); + bpf_list_push_back(head, &pf->node); + bpf_spin_unlock(lock); + } + + if (leave_in_map) + return 0; + + for (i = 0; i < ARRAY_SIZE(f); i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (!n) + return 5; + pf = container_of(n, struct foo, node); + if (pf->data != i) { + bpf_obj_drop(pf); + return 6; + } + bpf_obj_drop(pf); + } + bpf_spin_lock(lock); + n = bpf_list_pop_back(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 7; + } + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (n) { + bpf_obj_drop(container_of(n, struct foo, node)); + return 8; + } + return 0; +} + +static __always_inline +int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) +{ + struct bpf_list_node *n; + struct bar *ba[8], *b; + struct foo *f; + int i; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 2; + for (i = 0; i < ARRAY_SIZE(ba); i++) { + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 3; + } + b->data = i; + bpf_spin_lock(&f->lock); + bpf_list_push_back(&f->head, &b->node); + bpf_spin_unlock(&f->lock); + } + + bpf_spin_lock(lock); + f->data = 42; + bpf_list_push_front(head, &f->node); + bpf_spin_unlock(lock); + + if (leave_in_map) + return 0; + + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 4; + f = container_of(n, struct foo, node); + if (f->data != 42) { + bpf_obj_drop(f); + return 5; + } + + for (i = 0; i < ARRAY_SIZE(ba); i++) { + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (!n) { + bpf_obj_drop(f); + return 6; + } + b = container_of(n, struct bar, node); + if (b->data != i) { + bpf_obj_drop(f); + bpf_obj_drop(b); + return 7; + } + bpf_obj_drop(b); + } + bpf_spin_lock(&f->lock); + n = bpf_list_pop_front(&f->head); + bpf_spin_unlock(&f->lock); + if (n) { + bpf_obj_drop(f); + bpf_obj_drop(container_of(n, struct bar, node)); + return 8; + } + bpf_obj_drop(f); + return 0; +} + +static __always_inline +int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop(lock, head, false); + if (ret) + return ret; + return list_push_pop(lock, head, true); +} + +static __always_inline +int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_push_pop_multiple(lock ,head, false); + if (ret) + return ret; + return list_push_pop_multiple(lock, head, true); +} + +static __always_inline +int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + int ret; + + ret = list_in_list(lock, head, false); + if (ret) + return ret; + return list_in_list(lock, head, true); +} + +SEC("tc") +int map_list_push_pop(void *ctx) +{ + struct map_value *v; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop(void *ctx) +{ + struct map_value *v; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop(void *ctx) +{ + return test_list_push_pop(&glock, &ghead); +} + +SEC("tc") +int map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_push_pop_multiple(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_push_pop_multiple(&v->lock, &v->head); +} + +SEC("tc") +int global_list_push_pop_multiple(void *ctx) +{ + int ret; + + ret = list_push_pop_multiple(&glock, &ghead, false); + if (ret) + return ret; + return list_push_pop_multiple(&glock, &ghead, true); +} + +SEC("tc") +int map_list_in_list(void *ctx) +{ + struct map_value *v; + int ret; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int inner_map_list_in_list(void *ctx) +{ + struct map_value *v; + void *map; + int ret; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return test_list_in_list(&v->lock, &v->head); +} + +SEC("tc") +int global_list_in_list(void *ctx) +{ + return test_list_in_list(&glock, &ghead); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h new file mode 100644 index 000000000000..3fb2412552fc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef LINKED_LIST_H +#define LINKED_LIST_H + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct bar { + struct bpf_list_node node; + int data; +}; + +struct foo { + struct bpf_list_node node; + struct bpf_list_head head __contains(bar, node); + struct bpf_spin_lock lock; + int data; + struct bpf_list_node node2; +}; + +struct map_value { + struct bpf_spin_lock lock; + int data; + struct bpf_list_head head __contains(foo, node); +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +}; + +struct array_map array_map SEC(".maps"); +struct array_map inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_list_head ghead __contains(foo, node); +private(B) struct bpf_spin_lock glock2; + +#endif diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c new file mode 100644 index 000000000000..1d9017240e19 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +#include "linked_list.h" + +#define INIT \ + struct map_value *v, *v2, *iv, *iv2; \ + struct foo *f, *f1, *f2; \ + struct bar *b; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \ + if (!map) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v) \ + return 0; \ + v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \ + if (!v2) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv) \ + return 0; \ + iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \ + if (!iv2) \ + return 0; \ + f = bpf_obj_new(typeof(*f)); \ + if (!f) \ + return 0; \ + f1 = f; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + b = bpf_obj_new(typeof(*b)); \ + if (!b) { \ + bpf_obj_drop(f2); \ + bpf_obj_drop(f1); \ + return 0; \ + } + +#define CHECK(test, op, hexpr) \ + SEC("?tc") \ + int test##_missing_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + p(hexpr); \ + return 0; \ + } + +CHECK(kptr, push_front, &f->head); +CHECK(kptr, push_back, &f->head); +CHECK(kptr, pop_front, &f->head); +CHECK(kptr, pop_back, &f->head); + +CHECK(global, push_front, &ghead); +CHECK(global, push_back, &ghead); +CHECK(global, pop_front, &ghead); +CHECK(global, pop_back, &ghead); + +CHECK(map, push_front, &v->head); +CHECK(map, push_back, &v->head); +CHECK(map, pop_front, &v->head); +CHECK(map, pop_back, &v->head); + +CHECK(inner_map, push_front, &iv->head); +CHECK(inner_map, push_back, &iv->head); +CHECK(inner_map, pop_front, &iv->head); +CHECK(inner_map, pop_back, &iv->head); + +#undef CHECK + +#define CHECK(test, op, lexpr, hexpr) \ + SEC("?tc") \ + int test##_incorrect_lock_##op(void *ctx) \ + { \ + INIT; \ + void (*p)(void *) = (void *)&bpf_list_##op; \ + bpf_spin_lock(lexpr); \ + p(hexpr); \ + return 0; \ + } + +#define CHECK_OP(op) \ + CHECK(kptr_kptr, op, &f1->lock, &f2->head); \ + CHECK(kptr_global, op, &f1->lock, &ghead); \ + CHECK(kptr_map, op, &f1->lock, &v->head); \ + CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \ + \ + CHECK(global_global, op, &glock2, &ghead); \ + CHECK(global_kptr, op, &glock, &f1->head); \ + CHECK(global_map, op, &glock, &v->head); \ + CHECK(global_inner_map, op, &glock, &iv->head); \ + \ + CHECK(map_map, op, &v->lock, &v2->head); \ + CHECK(map_kptr, op, &v->lock, &f2->head); \ + CHECK(map_global, op, &v->lock, &ghead); \ + CHECK(map_inner_map, op, &v->lock, &iv->head); \ + \ + CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \ + CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \ + CHECK(inner_map_global, op, &iv->lock, &ghead); \ + CHECK(inner_map_map, op, &iv->lock, &v->head); + +CHECK_OP(push_front); +CHECK_OP(push_back); +CHECK_OP(pop_front); +CHECK_OP(pop_back); + +#undef CHECK +#undef CHECK_OP +#undef INIT + +SEC("?kprobe/xyz") +int map_compat_kprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?kretprobe/xyz") +int map_compat_kretprobe(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tracepoint/xyz") +int map_compat_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?perf_event") +int map_compat_perf(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp/xyz") +int map_compat_raw_tp(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?raw_tp.w/xyz") +int map_compat_raw_tp_w(void *ctx) +{ + bpf_list_push_front(&ghead, NULL); + return 0; +} + +SEC("?tc") +int obj_type_id_oor(void *ctx) +{ + bpf_obj_new_impl(~0UL, NULL); + return 0; +} + +SEC("?tc") +int obj_new_no_composite(void *ctx) +{ + bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42); + return 0; +} + +SEC("?tc") +int obj_new_no_struct(void *ctx) +{ + + bpf_obj_new(union { int data; unsigned udata; }); + return 0; +} + +SEC("?tc") +int obj_drop_non_zero_off(void *ctx) +{ + void *f; + + f = bpf_obj_new(struct foo); + if (!f) + return 0; + bpf_obj_drop(f+1); + return 0; +} + +SEC("?tc") +int new_null_ret(void *ctx) +{ + return bpf_obj_new(struct foo)->data; +} + +SEC("?tc") +int obj_new_acq(void *ctx) +{ + bpf_obj_new(struct foo); + return 0; +} + +SEC("?tc") +int use_after_drop(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_obj_drop(f); + return f->data; +} + +SEC("?tc") +int ptr_walk_scalar(void *ctx) +{ + struct test1 { + struct test2 { + struct test2 *next; + } *ptr; + } *p; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_this_cpu_ptr(p->ptr); + return 0; +} + +SEC("?tc") +int direct_read_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->lock; +} + +SEC("?tc") +int direct_write_lock(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->lock = 0; + return 0; +} + +SEC("?tc") +int direct_read_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->head; +} + +SEC("?tc") +int direct_write_head(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->head = 0; + return 0; +} + +SEC("?tc") +int direct_read_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + return *(int *)&f->node; +} + +SEC("?tc") +int direct_write_node(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + *(int *)&f->node = 0; + return 0; +} + +static __always_inline +int write_after_op(void (*push_op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + push_op(&ghead, &f->node); + f->data = 42; + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int write_after_push_front(void *ctx) +{ + return write_after_op((void *)bpf_list_push_front); +} + +SEC("?tc") +int write_after_push_back(void *ctx) +{ + return write_after_op((void *)bpf_list_push_back); +} + +static __always_inline +int use_after_unlock(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + f->data = 42; + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return f->data; +} + +SEC("?tc") +int use_after_unlock_push_front(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_front); +} + +SEC("?tc") +int use_after_unlock_push_back(void *ctx) +{ + return use_after_unlock((void *)bpf_list_push_back); +} + +static __always_inline +int list_double_add(void (*op)(void *head, void *node)) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + op(&ghead, &f->node); + op(&ghead, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int double_push_front(void *ctx) +{ + return list_double_add((void *)bpf_list_push_front); +} + +SEC("?tc") +int double_push_back(void *ctx) +{ + return list_double_add((void *)bpf_list_push_back); +} + +SEC("?tc") +int no_node_value_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(struct { int data; }); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, p); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_value_type(void *ctx) +{ + struct bar *b; + + b = bpf_obj_new(typeof(*b)); + if (!b) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &b->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_var_off(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + ctx->protocol); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off1(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, (void *)&f->node + 1); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_node_off2(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(&ghead, &f->node2); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int no_head_type(void *ctx) +{ + void *p; + + p = bpf_obj_new(typeof(struct { int data; })); + if (!p) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front(p, NULL); + bpf_spin_lock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off1(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_var_off2(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off1(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + b = bpf_obj_new(typeof(*b)); + if (!b) { + bpf_obj_drop(f); + return 0; + } + + bpf_spin_lock(&f->lock); + bpf_list_push_front((void *)&f->head + 1, &b->node); + bpf_spin_unlock(&f->lock); + + return 0; +} + +SEC("?tc") +int incorrect_head_off2(void *ctx) +{ + struct foo *f; + struct bar *b; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + + bpf_spin_lock(&glock); + bpf_list_push_front((void *)&ghead + 1, &f->node); + bpf_spin_unlock(&glock); + + return 0; +} + +static __always_inline +int pop_ptr_off(void *(*op)(void *head)) +{ + struct { + struct bpf_list_head head __contains(foo, node2); + struct bpf_spin_lock lock; + } *p; + struct bpf_list_node *n; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 0; + bpf_spin_lock(&p->lock); + n = op(&p->head); + bpf_spin_unlock(&p->lock); + + bpf_this_cpu_ptr(n); + return 0; +} + +SEC("?tc") +int pop_front_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_front); +} + +SEC("?tc") +int pop_back_off(void *ctx) +{ + return pop_ptr_off((void *)bpf_list_pop_back); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c index 4f2d60b87b75..02c11d16b692 100644 --- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -7,6 +7,10 @@ char _license[] SEC("license") = "GPL"; +extern bool CONFIG_SECURITY_SELINUX __kconfig __weak; +extern bool CONFIG_SECURITY_SMACK __kconfig __weak; +extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak; + #ifndef AF_PACKET #define AF_PACKET 17 #endif @@ -140,6 +144,10 @@ SEC("lsm_cgroup/sk_alloc_security") int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) { called_socket_alloc++; + /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */ + if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR) + return 1; + if (family == AF_UNIX) return 0; /* EPERM */ diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c new file mode 100644 index 000000000000..94a970076b98 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +__u32 user_data, key_serial, target_pid; +__u64 flags, task_storage_val, cgroup_id; + +struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +void bpf_key_put(struct bpf_key *key) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int get_cgroup_id(void *ctx) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* simulate bpf_get_current_cgroup_id() helper */ + bpf_rcu_read_lock(); + cgroup_id = task->cgroups->dfl_cgrp->kn->id; + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_succ(void *ctx) +{ + struct task_struct *task, *real_parent; + long init_val = 2; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + bpf_rcu_read_lock(); + /* region including helper using rcu ptr real_parent */ + real_parent = task->real_parent; + ptr = bpf_task_storage_get(&map_a, real_parent, &init_val, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + goto out; + ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (!ptr) + goto out; + task_storage_val = *ptr; +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int no_lock(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* no bpf_rcu_read_lock(), old code still works */ + task = bpf_get_current_task_btf(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int two_regions(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* two regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + bpf_rcu_read_lock(); + task = bpf_get_current_task_btf(); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int task_acquire(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + /* acquire a reference which can be used outside rcu read lock region */ + real_parent = bpf_task_acquire(real_parent); + bpf_rcu_read_unlock(); + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_task_release(real_parent); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_lock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_lock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_unlock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_unlock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, task, 0, 0); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_rcu_mismatch(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + /* non-sleepable: missing bpf_rcu_read_unlock() in one path */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + if (real_parent) + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int inproper_sleepable_helper(void *ctx) +{ + struct task_struct *task, *real_parent; + struct pt_regs *regs; + __u32 value = 0; + void *ptr; + + task = bpf_get_current_task_btf(); + /* sleepable helper in rcu read lock region */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + regs = (struct pt_regs *)bpf_task_pt_regs(real_parent); + if (!regs) { + bpf_rcu_read_unlock(); + return 0; + } + + ptr = (void *)PT_REGS_IP(regs); + (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0); + user_data = value; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?lsm.s/bpf") +int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *bkey; + + /* sleepable kfunc in rcu read lock region */ + bpf_rcu_read_lock(); + bkey = bpf_lookup_user_key(key_serial, flags); + bpf_rcu_read_unlock(); + if (!bkey) + return -1; + bpf_key_put(bkey); + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_non_rcuptr(void *ctx) +{ + struct task_struct *task, *last_wakee; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + /* the pointer last_wakee marked as untrusted */ + last_wakee = task->real_parent->last_wakee; + (void)bpf_task_storage_get(&map_a, last_wakee, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_untrusted_rcuptr(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + /* helper use of rcu ptr outside the rcu read lock region */ + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int cross_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* rcu ptr define/use in different regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h new file mode 100644 index 000000000000..c0ffd171743e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#ifndef _TASK_KFUNC_COMMON_H +#define _TASK_KFUNC_COMMON_H + +#include <errno.h> +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct __tasks_kfunc_map_value { + struct task_struct __kptr_ref * task; +}; + +struct hash_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct __tasks_kfunc_map_value); + __uint(max_entries, 1); +} __tasks_kfunc_map SEC(".maps"); + +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym; + +static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p) +{ + s32 pid; + long status; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return NULL; + + return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); +} + +static inline int tasks_kfunc_map_insert(struct task_struct *p) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid); + if (status) + return status; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) { + bpf_map_delete_elem(&__tasks_kfunc_map, &pid); + return -ENOENT; + } + + acquired = bpf_task_acquire(p); + old = bpf_kptr_xchg(&v->task, acquired); + if (old) { + bpf_task_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _TASK_KFUNC_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c new file mode 100644 index 000000000000..e310473190d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task) +{ + int status; + + status = tasks_kfunc_map_insert(task); + if (status) + return NULL; + + return tasks_kfunc_map_value_lookup(task); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_acquire() on an untrusted pointer. */ + acquired = bpf_task_acquire(v->task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags; + + /* Can't invoke bpf_task_acquire() on a random frame pointer. */ + acquired = bpf_task_acquire((struct task_struct *)&stack_task); + bpf_task_release(acquired); + + return 0; +} + +SEC("kretprobe/free_task") +int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + /* Can't release a bpf_task_acquire()'d task without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */ + acquired = bpf_task_acquire(task->last_wakee); + bpf_task_release(acquired); + + return 0; +} + + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + /* Can't invoke bpf_task_acquire() on a NULL pointer. */ + acquired = bpf_task_acquire(NULL); + if (!acquired) + return 0; + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + + /* Acquired task is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */ + kptr = bpf_task_kptr_get(&task); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr, *acquired; + + acquired = bpf_task_acquire(task); + + /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */ + kptr = bpf_task_kptr_get(&acquired); + bpf_task_release(acquired); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + + /* Cannot use bpf_task_kptr_get() on a NULL pointer. */ + kptr = bpf_task_kptr_get(NULL); + if (!kptr) + return 0; + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) + return 0; + + /* Kptr retrieved from map is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + kptr = bpf_task_kptr_get(&v->task); + if (!kptr) + return 0; + + /* Kptr acquired above is never released. */ + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value *v; + + v = insert_lookup_task(task); + if (!v) + return 0; + + /* Can't invoke bpf_task_release() on an untrusted pointer. */ + bpf_task_release(v->task); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired = (struct task_struct *)&clone_flags; + + /* Cannot release random frame pointer. */ + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value local, *v; + long status; + struct task_struct *acquired, *old; + s32 pid; + + status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid); + if (status) + return 0; + + local.task = NULL; + status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST); + if (status) + return status; + + v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid); + if (!v) + return -ENOENT; + + acquired = bpf_task_acquire(task); + + old = bpf_kptr_xchg(&v->task, acquired); + + /* old cannot be passed to bpf_task_release() without a NULL check. */ + bpf_task_release(old); + bpf_task_release(old); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags) +{ + /* Cannot release trusted task pointer which was not acquired. */ + bpf_task_release(task); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(task->pid); + + /* Releasing bpf_task_from_pid() lookup without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c new file mode 100644 index 000000000000..60c7ead41cfc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int err, pid; + +/* Prototype for all of the program trace events below: + * + * TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ + +static bool is_test_kfunc_task(void) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + return pid == cur_pid; +} + +static int test_acquire_release(struct task_struct *task) +{ + struct task_struct *acquired; + + acquired = bpf_task_acquire(task); + bpf_task_release(acquired); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(task); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags) +{ + if (!is_test_kfunc_task()) + return 0; + + return test_acquire_release(bpf_get_current_task_btf()); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags) +{ + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) + err = 1; + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_kptr_xchg(&v->task, NULL); + if (!kptr) { + err = 3; + return 0; + } + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *kptr; + struct __tasks_kfunc_map_value *v; + long status; + + if (!is_test_kfunc_task()) + return 0; + + status = tasks_kfunc_map_insert(task); + if (status) { + err = 1; + return 0; + } + + v = tasks_kfunc_map_value_lookup(task); + if (!v) { + err = 2; + return 0; + } + + kptr = bpf_task_kptr_get(&v->task); + if (!kptr) { + err = 3; + return 0; + } + + bpf_task_release(kptr); + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + current = bpf_get_current_task_btf(); + acquired = bpf_task_acquire(current); + bpf_task_release(acquired); + + return 0; +} + +static void lookup_compare_pid(const struct task_struct *p) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(p->pid); + if (!acquired) { + err = 1; + return; + } + + if (acquired->pid != p->pid) + err = 2; + bpf_task_release(acquired); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(task); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *current, *acquired; + + if (!is_test_kfunc_task()) + return 0; + + lookup_compare_pid(bpf_get_current_task_btf()); + return 0; +} + +static int is_pid_lookup_valid(s32 pid) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_pid(pid); + if (acquired) { + bpf_task_release(acquired); + return 1; + } + + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + if (!is_test_kfunc_task()) + return 0; + + if (is_pid_lookup_valid(-1)) { + err = 1; + return 0; + } + + if (is_pid_lookup_valid(0xcafef00d)) { + err = 2; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 7e88309d3229..5bd10409285b 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -45,8 +45,8 @@ struct { #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) -SEC("tc") -int bpf_sping_lock_test(struct __sk_buff *skb) +SEC("cgroup_skb/ingress") +int bpf_spin_lock_test(struct __sk_buff *skb) { volatile int credit = 0, max_credit = 100, pkt_len = 64; struct hmap_elem zero = {}, *val; diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c new file mode 100644 index 000000000000..86cd183ef6dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +struct foo { + struct bpf_spin_lock lock; + int data; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct foo); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct array_map); +} map_of_maps SEC(".maps") = { + .values = { + [0] = &array_map, + }, +}; + +SEC(".data.A") struct bpf_spin_lock lockA; +SEC(".data.B") struct bpf_spin_lock lockB; + +SEC("?tc") +int lock_id_kptr_preserve(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_global_zero(void *ctx) +{ + bpf_this_cpu_ptr(&lockA); + return 0; +} + +SEC("?tc") +int lock_id_mapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + + f = bpf_map_lookup_elem(&array_map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +SEC("?tc") +int lock_id_innermapval_preserve(void *ctx) +{ + struct foo *f; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f = bpf_map_lookup_elem(map, &key); + if (!f) + return 0; + bpf_this_cpu_ptr(f); + return 0; +} + +#define CHECK(test, A, B) \ + SEC("?tc") \ + int lock_id_mismatch_##test(void *ctx) \ + { \ + struct foo *f1, *f2, *v, *iv; \ + int key = 0; \ + void *map; \ + \ + map = bpf_map_lookup_elem(&map_of_maps, &key); \ + if (!map) \ + return 0; \ + iv = bpf_map_lookup_elem(map, &key); \ + if (!iv) \ + return 0; \ + v = bpf_map_lookup_elem(&array_map, &key); \ + if (!v) \ + return 0; \ + f1 = bpf_obj_new(typeof(*f1)); \ + if (!f1) \ + return 0; \ + f2 = bpf_obj_new(typeof(*f2)); \ + if (!f2) { \ + bpf_obj_drop(f1); \ + return 0; \ + } \ + bpf_spin_lock(A); \ + bpf_spin_unlock(B); \ + return 0; \ + } + +CHECK(kptr_kptr, &f1->lock, &f2->lock); +CHECK(kptr_global, &f1->lock, &lockA); +CHECK(kptr_mapval, &f1->lock, &v->lock); +CHECK(kptr_innermapval, &f1->lock, &iv->lock); + +CHECK(global_global, &lockA, &lockB); +CHECK(global_kptr, &lockA, &f1->lock); +CHECK(global_mapval, &lockA, &v->lock); +CHECK(global_innermapval, &lockA, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_mapval_mapval(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + + f1 = bpf_map_lookup_elem(&array_map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(&array_map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(mapval_kptr, &v->lock, &f1->lock); +CHECK(mapval_global, &v->lock, &lockB); +CHECK(mapval_innermapval, &v->lock, &iv->lock); + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval1(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +SEC("?tc") +int lock_id_mismatch_innermapval_innermapval2(void *ctx) +{ + struct foo *f1, *f2; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f1 = bpf_map_lookup_elem(map, &key); + if (!f1) + return 0; + map = bpf_map_lookup_elem(&map_of_maps, &key); + if (!map) + return 0; + f2 = bpf_map_lookup_elem(map, &key); + if (!f2) + return 0; + + bpf_spin_lock(&f1->lock); + f1->data = 42; + bpf_spin_unlock(&f2->lock); + + return 0; +} + +CHECK(innermapval_kptr, &iv->lock, &f1->lock); +CHECK(innermapval_global, &iv->lock, &lockA); +CHECK(innermapval_mapval, &iv->lock, &v->lock); + +#undef CHECK + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c new file mode 100644 index 000000000000..eb78e6f03129 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/type_cast.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} enter_id SEC(".maps"); + +#define IFNAMSIZ 16 + +int ifindex, ingress_ifindex; +char name[IFNAMSIZ]; +unsigned int inum; +unsigned int meta_len, frag0_len, kskb_len, kskb2_len; + +void *bpf_cast_to_kern_ctx(void *) __ksym; +void *bpf_rdonly_cast(void *, __u32) __ksym; + +SEC("?xdp") +int md_xdp(struct xdp_md *ctx) +{ + struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx); + struct net_device *dev; + + dev = kctx->rxq->dev; + ifindex = dev->ifindex; + inum = dev->nd_net.net->ns.inum; + __builtin_memcpy(name, dev->name, IFNAMSIZ); + ingress_ifindex = ctx->ingress_ifindex; + return XDP_PASS; +} + +SEC("?tc") +int md_skb(struct __sk_buff *skb) +{ + struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb); + struct skb_shared_info *shared_info; + struct sk_buff *kskb2; + + kskb_len = kskb->len; + + /* Simulate the following kernel macro: + * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) + */ + shared_info = bpf_rdonly_cast(kskb->head + kskb->end, + bpf_core_type_id_kernel(struct skb_shared_info)); + meta_len = shared_info->meta_len; + frag0_len = shared_info->frag_list->len; + + /* kskb2 should be equal to kskb */ + kskb2 = bpf_rdonly_cast(kskb, bpf_core_type_id_kernel(struct sk_buff)); + kskb2_len = kskb2->len; + return 0; +} + +SEC("?tp_btf/sys_enter") +int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id) +{ + struct task_struct *task, *task_dup; + long *ptr; + + task = bpf_get_current_task_btf(); + task_dup = bpf_rdonly_cast(task, bpf_core_type_id_kernel(struct task_struct)); + (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0); + return 0; +} + +SEC("?tracepoint/syscalls/sys_enter_nanosleep") +int kctx_u64(void *ctx) +{ + u64 *kctx = bpf_rdonly_cast(ctx, bpf_core_type_id_kernel(u64)); + + (void)kctx; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 9fe4c9336c6f..0cfece7ff4f8 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -309,11 +309,11 @@ class MainHeaderFileExtractor(SourceFileExtractor): commands), which looks to the lists of options in other source files but has different start and end markers: - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile(f'"OPTIONS :=') pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') @@ -336,7 +336,7 @@ class ManSubstitutionsExtractor(SourceFileExtractor): Return a set containing all options, such as: - {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + {'-p', '-d', '--pretty', '--debug', '--json', '-j'} """ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') pattern = re.compile('\*\*([\w/-]+)\*\*') diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index e1a937277b54..3193915c5ee6 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -109,7 +109,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 3 }, { "bpf_kfunc_call_test_release", 5 }, diff --git a/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c new file mode 100644 index 000000000000..67a1c07ead34 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c @@ -0,0 +1,174 @@ +{ + /* This is equivalent to the following program: + * + * r6 = skb->sk; + * r7 = sk_fullsock(r6); + * r0 = sk_fullsock(r6); + * if (r0 == 0) return 0; (a) + * if (r0 != r7) return 0; (b) + * *r7->type; (c) + * return 0; + * + * It is safe to dereference r7 at point (c), because of (a) and (b). + * The test verifies that relation r0 == r7 is propagated from (b) to (c). + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 == r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch", + .insns = { + /* r6 = skb->sk */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + /* if (r0 == r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_0, BPF_REG_7, 1), /* Use ! JNE ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as a first test, but not null should be inferred for JEQ branch */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 9), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + /* if (r0 != r7) return 0; */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + /* r0 = *(r7->type); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Same as above, but verify that another branch of JNE still + * prohibits access to PTR_MAYBE_NULL. + */ + "jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch", + .insns = { + /* r6 = skb->sk; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, sk)), + /* if (r6 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 8), + /* r7 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* r0 = sk_fullsock(skb); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + /* if (r0 == null) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* if (r0 != r7) r0 = *(r7->type); */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_7, 1), /* Use ! JEQ ! */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .result = REJECT, + .errstr = "R7 invalid mem access 'sock_or_null'", + .result_unpriv = REJECT, + .errstr_unpriv = "R7 pointer comparison", +}, +{ + /* Maps are treated in a different branch of `mark_ptr_not_null_reg`, + * so separate test for maps case. + */ + "jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE", + .insns = { + /* r9 = &some stack to use as key */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_9, -8), + /* r8 = process local map */ + BPF_LD_MAP_FD(BPF_REG_8, 0), + /* r6 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r7 = map_lookup_elem(r8, r9); */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* if (r6 == 0) return 0; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2), + /* if (r6 != r7) return 0; */ + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1), + /* read *r7; */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_xdp_sock, queue_id)), + /* return 0; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index fd683a32a276..9540164712b7 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -142,7 +142,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_user_key", 2 }, { "bpf_key_put", 4 }, @@ -163,7 +163,7 @@ .kfunc = "bpf", .expected_attach_type = BPF_LSM_MAC, .flags = BPF_F_SLEEPABLE, - .errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar", + .errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket", .fixup_kfunc_btf_id = { { "bpf_lookup_system_key", 1 }, { "bpf_key_put", 3 }, diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c index b64d33e4833c..84838feba47f 100644 --- a/tools/testing/selftests/bpf/verifier/ringbuf.c +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -28,7 +28,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "dereference of modified alloc_mem ptr R1", + .errstr = "dereference of modified ringbuf_mem ptr R1", }, { "ringbuf: invalid reservation offset 2", diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index e23f07175e1b..9bb302dade23 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -84,7 +84,7 @@ }, .fixup_map_ringbuf = { 1 }, .result = REJECT, - .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", + .errstr = "R0 pointer arithmetic on ringbuf_mem_or_null prohibited", }, { "check corrupted spill/fill", diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 6b8d2e2f23c2..0f3921908b07 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -5,7 +5,9 @@ TEST_PROGS := \ bond-arp-interval-causes-panic.sh \ bond-break-lacpdu-tx.sh \ bond-lladdr-target.sh \ - dev_addr_lists.sh + dev_addr_lists.sh \ + mode-1-recovery-updelay.sh \ + mode-2-recovery-updelay.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 16c7fb858ac1..2a268b17b61f 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +NAMESPACES="" + # Test that a link aggregation device (bonding, team) removes the hardware # addresses that it adds on its underlying devices. test_LAG_cleanup() @@ -59,3 +61,107 @@ test_LAG_cleanup() log_test "$driver cleanup mode $mode" } + +# Build a generic 2 node net namespace with 2 connections +# between the namespaces +# +# +-----------+ +-----------+ +# | node1 | | node2 | +# | | | | +# | | | | +# | eth0 +-------+ eth0 | +# | | | | +# | eth1 +-------+ eth1 | +# | | | | +# +-----------+ +-----------+ +lag_setup2x2() +{ + local state=${1:-down} + local namespaces="lag_node1 lag_node2" + + # create namespaces + for n in ${namespaces}; do + ip netns add ${n} + done + + # wire up namespaces + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth0 + ip link set dev lag1-end netns lag_node2 $state name eth0 + + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth1 + ip link set dev lag1-end netns lag_node2 $state name eth1 + + NAMESPACES="${namespaces}" +} + +# cleanup all lag related namespaces and remove the bonding module +lag_cleanup() +{ + for n in ${NAMESPACES}; do + ip netns delete ${n} >/dev/null 2>&1 || true + done + modprobe -r bonding +} + +SWITCH="lag_node1" +CLIENT="lag_node2" +CLIENTIP="172.20.2.1" +SWITCHIP="172.20.2.2" + +lag_setup_network() +{ + lag_setup2x2 "down" + + # create switch + ip netns exec ${SWITCH} ip link add br0 up type bridge + ip netns exec ${SWITCH} ip link set eth0 master br0 up + ip netns exec ${SWITCH} ip link set eth1 master br0 up + ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0 +} + +lag_reset_network() +{ + ip netns exec ${CLIENT} ip link del bond0 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 up +} + +create_bond() +{ + # create client + ip netns exec ${CLIENT} ip link set eth0 down + ip netns exec ${CLIENT} ip link set eth1 down + + ip netns exec ${CLIENT} ip link add bond0 type bond $@ + ip netns exec ${CLIENT} ip link set eth0 master bond0 + ip netns exec ${CLIENT} ip link set eth1 master bond0 + ip netns exec ${CLIENT} ip link set bond0 up + ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0 +} + +test_bond_recovery() +{ + RET=0 + + create_bond $@ + + # verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + check_err $? "No connectivity" + + # force the links of the bond down + ip netns exec ${SWITCH} ip link set eth0 down + sleep 2 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 down + + # re-verify connectivity + ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + + local rc=$? + check_err $rc "Bond failed to recover" + log_test "$1 ($2) bond recovery" + lag_reset_network +} diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh new file mode 100755 index 000000000000..ad4c845a4ac7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 1 miimon 100 updelay 0 +test_bond_recovery mode 1 miimon 100 updelay 200 +test_bond_recovery mode 1 miimon 100 updelay 500 +test_bond_recovery mode 1 miimon 100 updelay 1000 +test_bond_recovery mode 1 miimon 100 updelay 2000 +test_bond_recovery mode 1 miimon 100 updelay 5000 +test_bond_recovery mode 1 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh new file mode 100755 index 000000000000..2330d37453f9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 2 miimon 100 updelay 0 +test_bond_recovery mode 2 miimon 100 updelay 200 +test_bond_recovery mode 2 miimon 100 updelay 500 +test_bond_recovery mode 2 miimon 100 updelay 1000 +test_bond_recovery mode 2 miimon 100 updelay 2000 +test_bond_recovery mode 2 miimon 100 updelay 5000 +test_bond_recovery mode 2 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings index 867e118223cd..6091b45d226b 100644 --- a/tools/testing/selftests/drivers/net/bonding/settings +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -1 +1 @@ -timeout=60 +timeout=120 diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..05d980fb083d 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -41,6 +41,7 @@ /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/svm_nested_soft_inject_test +/x86_64/svm_nested_shutdown_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/tsc_scaling_sync diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..4a2caef2c939 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..5da0c5e2a7af 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -748,6 +748,19 @@ struct ex_regs { uint64_t rflags; }; +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..41c1c73c464d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) } } -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..e73fcdef47bb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 70b44f0b52fe..ead5d878a71c 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -3,6 +3,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -20,10 +21,11 @@ static void l2_guest_code(void) : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); } -void l1_guest_code(struct vmx_pages *vmx) -{ #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); @@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vcpu_events events; - vm_vaddr_t vmx_pages_gva; struct ucall uc; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); run = vcpu->run; - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -78,13 +109,21 @@ int main(void) "No triple fault pending"); vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } + if (has_svm) { + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; } diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 162c41e9bcae..1562aa7d60b0 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -888,6 +888,17 @@ TEST_F(NCI, deinit) &msg); ASSERT_EQ(rc, 0); EXPECT_EQ(get_dev_enable_state(&msg), 0); + + /* Test that operations that normally send packets to the driver + * don't cause issues when the device is already closed. + * Note: the send of NFC_CMD_DEV_UP itself still succeeds it's just + * that the device won't actually be up. + */ + close(self->virtual_nci_fd); + self->virtual_nci_fd = -1; + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_UP, self->dev_idex); + EXPECT_EQ(rc, 0); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ff8807cc9c2e..ee38ca888244 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +csum cmsg_sender fin_ack_lat gro diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 880e6ded6ed5..3007e98a6d64 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -74,6 +74,7 @@ TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello +TEST_GEN_FILES += csum TEST_FILES := settings diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index 8ccaf8732eb2..a26cb94354f6 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -1,14 +1,51 @@ # SPDX-License-Identifier: GPL-2.0 CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../../lib CCINCLUDE += -I../../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + mkdir -p $@ TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o all: $(TEST_CUSTOM_PROGS) -$(OUTPUT)/%.o: %.c - $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +$(TEST_CUSTOM_PROGS): $(BPFOBJ) + $(CLANG) -O2 -target bpf -c $(@:.o=.c) $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c new file mode 100644 index 000000000000..82a1c1839da6 --- /dev/null +++ b/tools/testing/selftests/net/csum.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. + * + * The test runs on two machines to exercise the NIC. For this reason it + * is not integrated in kselftests. + * + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) + * + * Rx: + * + * The sender sends packets with a known checksum field using PF_INET(6) + * SOCK_RAW sockets. + * + * good packet: $CMD [-t] + * bad packet: $CMD [-t] -E + * + * The receiver reads UDP packets with a UDP socket. This is not an + * option for TCP packets ('-t'). Optionally insert an iptables filter + * to avoid these entering the real protocol stack. + * + * The receiver also reads all packets with a PF_PACKET socket, to + * observe whether both good and bad packets arrive on the host. And to + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. + * + * Tx: + * + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx + * checksum offload. + * + * The sender can sends packets with a UDP socket. + * + * Optionally crafts a packet that sums up to zero to verify that the + * device writes negative zero 0xFFFF in this case to distinguish from + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case + * by choosing a specific source port. + * + * good packet: $CMD -U + * zero csum: $CMD -U -Z + * + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, + * to cover more protocols. PF_PACKET requires passing src and dst mac + * addresses. + * + * good packet: $CMD -s $smac -d $dmac -p [-t] + * + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, + * to verify that the NIC passes these packets unmodified. + * + * Argument '-e' adds a transport mode encapsulation header between + * network and transport header. This will fail for devices that parse + * headers. Should work on devices that implement protocol agnostic tx + * checksum offload (NETIF_F_HW_CSUM). + * + * Argument '-r $SEED' optionally randomizes header, payload and length + * to increase coverage between packets sent. SEED 1 further chooses a + * different seed for each run (and logs this for reproducibility). It + * is advised to enable this for extra coverage in continuous testing. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +static bool cfg_bad_csum; +static int cfg_family = PF_INET6; +static int cfg_num_pkt = 4; +static bool cfg_do_rx = true; +static bool cfg_do_tx = true; +static bool cfg_encap; +static char *cfg_ifname = "eth0"; +static char *cfg_mac_dst; +static char *cfg_mac_src; +static int cfg_proto = IPPROTO_UDP; +static int cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static uint16_t cfg_port_dst = 34000; +static uint16_t cfg_port_src = 33000; +static uint16_t cfg_port_src_encap = 33001; +static unsigned int cfg_random_seed; +static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ +static bool cfg_send_pfpacket; +static bool cfg_send_udp; +static int cfg_timeout_ms = 2000; +static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ +static bool cfg_zero_sum; /* create packet that adds up to zero */ + +static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; +static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; + +#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) +#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) +#define MAX_PAYLOAD_LEN 1024 + +/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ +struct udp_encap_hdr { + uint8_t nexthdr; + uint8_t padding[3]; +}; + +/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ +static void *iph_addr_p; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); +} + +static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) +{ + uint16_t *words = (uint16_t *)data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + + if (len & 1) + sum += ((unsigned char *)data)[len - 1]; + + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t checksum(void *th, uint16_t proto, size_t len) +{ + uint32_t sum; + int alen; + + alen = cfg_family == PF_INET6 ? 32 : 8; + + sum = checksum_nofold(iph_addr_p, alen, 0); + sum += htons(proto); + sum += htons(len); + + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ + if (cfg_do_tx && cfg_send_pfpacket) + return ~checksum_fold(NULL, 0, sum); + else + return checksum_fold(th, len, sum); +} + +static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) +{ + struct iphdr *iph = _iph; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = proto; + iph->saddr = cfg_saddr4.sin_addr.s_addr; + iph->daddr = cfg_daddr4.sin_addr.s_addr; + iph->tot_len = htons(sizeof(*iph) + len); + iph->check = checksum_fold(iph, sizeof(*iph), 0); + + iph_addr_p = &iph->saddr; + + return iph + 1; +} + +static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) +{ + struct ipv6hdr *ip6h = _ip6h; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 64; + ip6h->saddr = cfg_saddr6.sin6_addr; + ip6h->daddr = cfg_daddr6.sin6_addr; + + iph_addr_p = &ip6h->saddr; + + return ip6h + 1; +} + +static void *build_packet_udp(void *_uh) +{ + struct udphdr *uh = _uh; + + uh->source = htons(cfg_port_src); + uh->dest = htons(cfg_port_dst); + uh->len = htons(sizeof(*uh) + cfg_payload_len); + uh->check = 0; + + /* choose source port so that uh->check adds up to zero */ + if (cfg_zero_sum) { + uh->source = 0; + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", + cfg_port_src, ntohs(uh->source)); + cfg_port_src = ntohs(uh->source); + } + + if (cfg_zero_disable) + uh->check = 0; + else + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + if (cfg_bad_csum) + uh->check = ~uh->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); + return uh + 1; +} + +static void *build_packet_tcp(void *_th) +{ + struct tcphdr *th = _th; + + th->source = htons(cfg_port_src); + th->dest = htons(cfg_port_dst); + th->doff = 5; + th->check = 0; + + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); + + if (cfg_bad_csum) + th->check = ~th->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); + return th + 1; +} + +static char *build_packet_udp_encap(void *_uh) +{ + struct udphdr *uh = _uh; + struct udp_encap_hdr *eh = _uh + sizeof(*uh); + + /* outer dst == inner dst, to simplify BPF filter + * outer src != inner src, to demultiplex on recv + */ + uh->dest = htons(cfg_port_dst); + uh->source = htons(cfg_port_src_encap); + uh->check = 0; + uh->len = htons(sizeof(*uh) + + sizeof(*eh) + + sizeof(struct tcphdr) + + cfg_payload_len); + + eh->nexthdr = IPPROTO_TCP; + + return build_packet_tcp(eh + 1); +} + +static char *build_packet(char *buf, int max_len, int *len) +{ + uint8_t proto; + char *off; + int tlen; + + if (cfg_random_seed) { + int *buf32 = (void *)buf; + int i; + + for (i = 0; i < (max_len / sizeof(int)); i++) + buf32[i] = rand(); + } else { + memset(buf, cfg_payload_char, max_len); + } + + if (cfg_proto == IPPROTO_UDP) + tlen = sizeof(struct udphdr) + cfg_payload_len; + else + tlen = sizeof(struct tcphdr) + cfg_payload_len; + + if (cfg_encap) { + proto = IPPROTO_UDP; + tlen += ENC_HEADER_LEN; + } else { + proto = cfg_proto; + } + + if (cfg_family == PF_INET) + off = build_packet_ipv4(buf, proto, tlen); + else + off = build_packet_ipv6(buf, proto, tlen); + + if (cfg_encap) + off = build_packet_udp_encap(off); + else if (cfg_proto == IPPROTO_UDP) + off = build_packet_udp(off); + else + off = build_packet_tcp(off); + + /* only pass the payload, but still compute headers for cfg_zero_sum */ + if (cfg_send_udp) { + *len = cfg_payload_len; + return off; + } + + *len = off - buf + cfg_payload_len; + return buf; +} + +static int open_inet(int ipproto, int protocol) +{ + int fd; + + fd = socket(cfg_family, ipproto, protocol); + if (fd == -1) + error(1, errno, "socket inet"); + + if (cfg_family == PF_INET6) { + /* may have been updated by cfg_zero_sum */ + cfg_saddr6.sin6_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) + error(1, errno, "bind dgram 6"); + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "connect dgram 6"); + } else { + /* may have been updated by cfg_zero_sum */ + cfg_saddr4.sin_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) + error(1, errno, "bind dgram 4"); + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "connect dgram 4"); + } + + return fd; +} + +static int open_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket packet"); + + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt packet_vnet_ndr"); + + return fd; +} + +static void send_inet(int fd, const char *buf, int len) +{ + int ret; + + ret = write(fd, buf, len); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %d", ret); +} + +static void eth_str_to_addr(const char *str, unsigned char *eth) +{ + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ð[0], ð[1], ð[2], ð[3], ð[4], ð[5]) != 6) + error(1, 0, "cannot parse mac addr %s", str); +} + +static void send_packet(int fd, const char *buf, int len) +{ + struct virtio_net_hdr vh = {0}; + struct sockaddr_ll addr = {0}; + struct msghdr msg = {0}; + struct ethhdr eth; + struct iovec iov[3]; + int ret; + + addr.sll_family = AF_PACKET; + addr.sll_halen = ETH_ALEN; + addr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!addr.sll_ifindex) + error(1, errno, "if_nametoindex %s", cfg_ifname); + + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (cfg_family == PF_INET6) { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + addr.sll_protocol = htons(ETH_P_IPV6); + } else { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); + addr.sll_protocol = htons(ETH_P_IP); + } + + if (cfg_encap) + vh.csum_start += ENC_HEADER_LEN; + + if (cfg_proto == IPPROTO_TCP) { + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); + } else { + vh.csum_offset = __builtin_offsetof(struct udphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); + } + + eth_str_to_addr(cfg_mac_src, eth.h_source); + eth_str_to_addr(cfg_mac_dst, eth.h_dest); + eth.h_proto = addr.sll_protocol; + + iov[0].iov_base = &vh; + iov[0].iov_len = sizeof(vh); + + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + + iov[2].iov_base = (void *)buf; + iov[2].iov_len = len; + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "sendmsg packet"); + if (ret != sizeof(vh) + sizeof(eth) + len) + error(1, errno, "sendmsg packet: %u", ret); +} + +static int recv_prepare_udp(void) +{ + int fd; + + fd = socket(cfg_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF r"); + + if (cfg_family == PF_INET6) { + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "bind r"); + } else { + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "bind r"); + } + + return fd; +} + +/* Filter out all traffic that is not cfg_proto with our destination port. + * + * Otherwise background noise may cause PF_PACKET receive queue overflow, + * dropping the expected packets and failing the test. + */ +static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static void recv_prepare_packet_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + + if (cfg_family == AF_INET) + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), + sizeof(struct iphdr) + off_dport); + else + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), + sizeof(struct ipv6hdr) + off_dport); +} + +static void recv_prepare_packet_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + + if (cfg_family == PF_INET) + laddr.sll_protocol = htons(ETH_P_IP); + else + laddr.sll_protocol = htons(ETH_P_IPV6); + + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, 0, "if_nametoindex %s", cfg_ifname); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind pf_packet"); +} + +static int recv_prepare_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF p"); + + /* enable auxdata to recv checksum status (valid vs unknown) */ + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) + error(1, errno, "setsockopt auxdata"); + + /* install filter to restrict packet flow to match */ + recv_prepare_packet_filter(fd); + + /* bind to address family to start packet flow */ + recv_prepare_packet_bind(fd); + + return fd; +} + +static int recv_udp(int fd) +{ + static char buf[MAX_PAYLOAD_LEN]; + int ret, count = 0; + + while (1) { + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv r"); + + fprintf(stderr, "rx: udp: len=%u\n", ret); + count++; + } + + return count; +} + +static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) +{ + uint16_t csum; + + csum = checksum(th, cfg_proto, len); + + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", + sport, len, csum_field, csum); + + /* csum must be zero unless cfg_bad_csum indicates bad csum */ + if (csum && !cfg_bad_csum) { + fprintf(stderr, "pkt: bad csum\n"); + return 1; + } else if (cfg_bad_csum && !csum) { + fprintf(stderr, "pkt: good csum, while bad expected\n"); + return 1; + } + + if (cfg_zero_sum && csum_field != 0xFFFF) { + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); + return 1; + } + + return 0; +} + +static int recv_verify_packet_tcp(void *th, int len) +{ + struct tcphdr *tcph = th; + + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) + return -1; + + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); +} + +static int recv_verify_packet_udp_encap(void *th, int len) +{ + struct udp_encap_hdr *eh = th; + + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) + return -1; + + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); +} + +static int recv_verify_packet_udp(void *th, int len) +{ + struct udphdr *udph = th; + + if (len < sizeof(*udph)) + return -1; + + if (udph->dest != htons(cfg_port_dst)) + return -1; + + if (udph->source == htons(cfg_port_src_encap)) + return recv_verify_packet_udp_encap(udph + 1, + len - sizeof(*udph)); + + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); +} + +static int recv_verify_packet_ipv4(void *nh, int len) +{ + struct iphdr *iph = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*iph) || iph->protocol != proto) + return -1; + + iph_addr_p = &iph->saddr; + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); + else + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); +} + +static int recv_verify_packet_ipv6(void *nh, int len) +{ + struct ipv6hdr *ip6h = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) + return -1; + + iph_addr_p = &ip6h->saddr; + + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + else + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); +} + +/* return whether auxdata includes TP_STATUS_CSUM_VALID */ +static bool recv_verify_packet_csum(struct msghdr *msg) +{ + struct tpacket_auxdata *aux = NULL; + struct cmsghdr *cm; + + if (msg->msg_flags & MSG_CTRUNC) + error(1, 0, "cmsg: truncated"); + + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level != SOL_PACKET || + cm->cmsg_type != PACKET_AUXDATA) + error(1, 0, "cmsg: level=%d type=%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) + error(1, 0, "cmsg: len=%lu expected=%lu", + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); + + aux = (void *)CMSG_DATA(cm); + } + + if (!aux) + error(1, 0, "cmsg: no auxdata"); + + return aux->tp_status & TP_STATUS_CSUM_VALID; +} + +static int recv_packet(int fd) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + unsigned long total = 0, bad_csums = 0, bad_validations = 0; + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + struct pkt *buf = (void *)_buf; + struct msghdr msg = {0}; + struct iovec iov; + int len, ret; + + iov.iov_base = _buf; + iov.iov_len = sizeof(_buf); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + while (1) { + msg.msg_flags = 0; + + len = recvmsg(fd, &msg, MSG_DONTWAIT); + if (len == -1 && errno == EAGAIN) + break; + if (len == -1) + error(1, errno, "recv p"); + + if (cfg_family == PF_INET6) + ret = recv_verify_packet_ipv6(buf, len); + else + ret = recv_verify_packet_ipv4(buf, len); + + if (ret == -1 /* skip: non-matching */) + continue; + + total++; + if (ret == 1) + bad_csums++; + + /* Fail if kernel returns valid for known bad csum. + * Do not fail if kernel does not validate a good csum: + * Absence of validation does not imply invalid. + */ + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); + bad_validations++; + } + } + + if (bad_csums || bad_validations) + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", + total, bad_csums, bad_validations); + + return total; +} + +static void parse_args(int argc, char *const argv[]) +{ + const char *daddr = NULL, *saddr = NULL; + int c; + + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + break; + case '6': + cfg_family = PF_INET6; + break; + case 'd': + cfg_mac_dst = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'e': + cfg_encap = true; + break; + case 'E': + cfg_bad_csum = true; + break; + case 'i': + cfg_ifname = optarg; + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'L': + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'r': + cfg_random_seed = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_send_pfpacket = true; + break; + case 'R': + /* only Rx: used with two machine tests */ + cfg_do_tx = false; + break; + case 's': + cfg_mac_src = optarg; + break; + case 'S': + saddr = optarg; + break; + case 't': + cfg_proto = IPPROTO_TCP; + break; + case 'T': + /* only Tx: used with two machine tests */ + cfg_do_rx = false; + break; + case 'u': + cfg_proto = IPPROTO_UDP; + break; + case 'U': + /* send using real udp socket, + * to exercise tx checksum offload + */ + cfg_send_udp = true; + break; + case 'z': + cfg_zero_disable = true; + break; + case 'Z': + cfg_zero_sum = true; + break; + default: + error(1, 0, "unknown arg %c", c); + } + } + + if (!daddr || !saddr) + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); + + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) + error(1, 0, "Transmit with pf_packet requires mac addresses"); + + if (cfg_payload_len > MAX_PAYLOAD_LEN) + error(1, 0, "Payload length exceeds max"); + + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) + error(1, 0, "Only UDP supports zero csum"); + + if (cfg_zero_sum && !cfg_send_udp) + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); + if (cfg_zero_sum && cfg_bad_csum) + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); + if (cfg_zero_sum && cfg_random_seed) + error(1, 0, "Cannot combine zero checksum conversion with randomization"); + + if (cfg_family == PF_INET6) { + cfg_saddr6.sin6_port = htons(cfg_port_src); + cfg_daddr6.sin6_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -S"); + } else { + cfg_saddr4.sin_port = htons(cfg_port_src); + cfg_daddr4.sin_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -S"); + } + + if (cfg_do_tx && cfg_random_seed) { + /* special case: time-based seed */ + if (cfg_random_seed == 1) + cfg_random_seed = (unsigned int)gettimeofday_ms(); + srand(cfg_random_seed); + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); + } +} + +static void do_tx(void) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + char *buf; + int fd, len, i; + + buf = build_packet(_buf, sizeof(_buf), &len); + + if (cfg_send_pfpacket) + fd = open_packet(); + else if (cfg_send_udp) + fd = open_inet(SOCK_DGRAM, 0); + else + fd = open_inet(SOCK_RAW, IPPROTO_RAW); + + for (i = 0; i < cfg_num_pkt; i++) { + if (cfg_send_pfpacket) + send_packet(fd, buf, len); + else + send_inet(fd, buf, len); + + /* randomize each packet individually to increase coverage */ + if (cfg_random_seed) { + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; + buf = build_packet(_buf, sizeof(_buf), &len); + } + } + + if (close(fd)) + error(1, errno, "close tx"); +} + +static void do_rx(int fdp, int fdr) +{ + unsigned long count_udp = 0, count_pkt = 0; + long tleft, tstop; + struct pollfd pfd; + + tstop = gettimeofday_ms() + cfg_timeout_ms; + tleft = cfg_timeout_ms; + + do { + pfd.events = POLLIN; + pfd.fd = fdp; + if (poll(&pfd, 1, tleft) == -1) + error(1, errno, "poll"); + + if (pfd.revents & POLLIN) + count_pkt += recv_packet(fdp); + + if (cfg_proto == IPPROTO_UDP) + count_udp += recv_udp(fdr); + + tleft = tstop - gettimeofday_ms(); + } while (tleft > 0); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdp)) + error(1, errno, "close p"); + + if (count_pkt < cfg_num_pkt) + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", + count_pkt, cfg_num_pkt); + + if (cfg_proto == IPPROTO_UDP) { + if (cfg_bad_csum && count_udp) + error(1, 0, "rx: unexpected packets at udp"); + if (!cfg_bad_csum && !count_udp) + error(1, 0, "rx: missing packets at udp"); + } +} + +int main(int argc, char *const argv[]) +{ + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ + + parse_args(argc, argv); + + /* open receive sockets before transmitting */ + if (cfg_do_rx) { + fdp = recv_prepare_packet(); + fdr = recv_prepare_udp(); + } + + if (cfg_do_tx) + do_tx(); + + if (cfg_do_rx) + do_rx(fdp, fdr); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 31c3b6ebd388..21ca91473c09 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4196,10 +4196,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile new file mode 100644 index 000000000000..92c1d9d080cd --- /dev/null +++ b/tools/testing/selftests/net/hsr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := hsr_ping.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config new file mode 100644 index 000000000000..22061204fb69 --- /dev/null +++ b/tools/testing/selftests/net/hsr/config @@ -0,0 +1,4 @@ +CONFIG_IPV6=y +CONFIG_NET_SCH_NETEM=m +CONFIG_HSR=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh new file mode 100755 index 000000000000..df9143538708 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +ksft_skip=4 +ipv6=true + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage $0 + exit 1 + ;; +esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" + +cleanup() +{ + local netns + for netns in "$ns1" "$ns2" "$ns3" ;do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +echo "INFO: preparing interfaces." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# +# Interfaces +ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" +ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" +ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + +# HSRv0. +ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 +ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 +ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 + +# IP for HSR +ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 +ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad +ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 +ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad +ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 +ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + +# All Links up +ip -net "$ns1" link set ns1eth1 up +ip -net "$ns1" link set ns1eth2 up +ip -net "$ns1" link set hsr1 up + +ip -net "$ns2" link set ns2eth1 up +ip -net "$ns2" link set ns2eth2 up +ip -net "$ns2" link set hsr2 up + +ip -net "$ns3" link set ns3eth1 up +ip -net "$ns3" link set ns3eth2 up +ip -net "$ns3" link set hsr3 up + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + + +echo "INFO: Initial validation ping." +# Each node has to be able each one. +do_ping "$ns1" 100.64.0.2 +do_ping "$ns2" 100.64.0.1 +do_ping "$ns3" 100.64.0.1 +stop_if_error "Initial validation failed." + +do_ping "$ns1" 100.64.0.3 +do_ping "$ns2" 100.64.0.3 +do_ping "$ns3" 100.64.0.2 + +do_ping "$ns1" dead:beef:1::2 +do_ping "$ns1" dead:beef:1::3 +do_ping "$ns2" dead:beef:1::1 +do_ping "$ns2" dead:beef:1::2 +do_ping "$ns3" dead:beef:1::1 +do_ping "$ns3" dead:beef:1::2 + +stop_if_error "Initial validation failed." + +# Wait until supervisor all supervision frames have been processed and the node +# entries have been merged. Otherwise duplicate frames will be observed which is +# valid at this stage. +WAIT=5 +while [ ${WAIT} -gt 0 ] +do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let WAIT = WAIT - 1 +done + +# Just a safety delay in case the above check didn't handle it. +sleep 1 + +echo "INFO: Longer ping test." +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" dead:beef:1::2 +do_ping_long "$ns1" 100.64.0.3 +do_ping_long "$ns1" dead:beef:1::3 + +stop_if_error "Longer ping test failed." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" dead:beef:1::1 +do_ping_long "$ns2" 100.64.0.3 +do_ping_long "$ns2" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" dead:beef:1::1 +do_ping_long "$ns3" 100.64.0.2 +do_ping_long "$ns3" dead:beef:1::2 +stop_if_error "Longer ping test failed." + +echo "INFO: Cutting one link." +do_ping_long "$ns1" 100.64.0.3 & + +sleep 3 +ip -net "$ns3" link set ns3eth1 down +wait + +ip -net "$ns3" link set ns3eth1 up + +stop_if_error "Failed with one link down." + +echo "INFO: Delay the link and drop a few packages." +tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms +tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + +do_ping_long "$ns1" 100.64.0.2 +do_ping_long "$ns1" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns2" 100.64.0.1 +do_ping_long "$ns2" 100.64.0.3 + +stop_if_error "Failed with delay and packetloss." + +do_ping_long "$ns3" 100.64.0.1 +do_ping_long "$ns3" 100.64.0.2 +stop_if_error "Failed with delay and packetloss." + +echo "INFO: All good." +exit $ret diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 515859a5168b..24bcd7b9bdb2 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index e54653ea2ed4..8a8266957bc5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -83,6 +83,7 @@ struct cfg_cmsg_types { struct cfg_sockopt_types { unsigned int transparent:1; + unsigned int mptfo:1; }; struct tcp_inq_state { @@ -90,6 +91,13 @@ struct tcp_inq_state { bool expect_eof; }; +struct wstate { + char buf[8192]; + unsigned int len; + unsigned int off; + unsigned int total_len; +}; + static struct tcp_inq_state tcp_inq; static struct cfg_cmsg_types cfg_cmsg_types; @@ -232,6 +240,14 @@ static void set_transparent(int fd, int pf) } } +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) + perror("TCP_FASTOPEN"); +} + static int do_ulp_so(int sock, const char *name) { return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); @@ -300,6 +316,9 @@ static int sock_listen_mptcp(const char * const listenaddr, if (cfg_sockopt_types.transparent) set_transparent(sock, pf); + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -330,13 +349,15 @@ static int sock_listen_mptcp(const char * const listenaddr, static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto, - struct addrinfo **peer) + struct addrinfo **peer, + int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + int syn_copied = 0; int sock = -1; hints.ai_family = pf; @@ -354,14 +375,34 @@ static int sock_connect_mptcp(const char * const remoteaddr, if (cfg_mark) set_mark(sock, cfg_mark); - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { - *peer = a; - break; /* success */ + if (cfg_sockopt_types.mptfo) { + if (!winfo->total_len) + winfo->total_len = winfo->len = read(infd, winfo->buf, + sizeof(winfo->buf)); + + syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied >= 0) { + winfo->off = syn_copied; + winfo->len -= syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + perror("connect()"); + close(sock); + sock = -1; } - - perror("connect()"); - close(sock); - sock = -1; } freeaddrinfo(addr); @@ -571,14 +612,14 @@ static void shut_wr(int fd) shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; - char wbuf[8192]; + unsigned int total_wlen = 0, total_rlen = 0; set_nonblock(peerfd, true); @@ -638,19 +679,19 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } if (fds.revents & POLLOUT) { - if (wlen == 0) { - woff = 0; - wlen = read(infd, wbuf, sizeof(wbuf)); + if (winfo->len == 0) { + winfo->off = 0; + winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } - if (wlen > 0) { + if (winfo->len > 0) { ssize_t bw; /* limit the total amount of written data to the trunc value */ - if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) - wlen = cfg_truncate - total_wlen; + if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) + winfo->len = cfg_truncate - total_wlen; - bw = do_rnd_write(peerfd, wbuf + woff, wlen); + bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { if (cfg_rcv_trunc) return 0; @@ -658,10 +699,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after return 111; } - woff += bw; - wlen -= bw; + winfo->off += bw; + winfo->len -= bw; total_wlen += bw; - } else if (wlen == 0) { + } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -717,10 +758,26 @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int spool_buf(int fd, struct wstate *winfo) +{ + while (winfo->len) { + int ret = write(fd, winfo->buf + winfo->off, winfo->len); + + if (ret < 0) { + perror("write"); + return 4; + } + winfo->off += ret; + winfo->len -= ret; + } + return 0; +} + +static int do_mmap(int infd, int outfd, unsigned int size, + struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); - ssize_t ret = 0, off = 0; + ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { @@ -728,7 +785,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) return 1; } - rem = size; + ret = spool_buf(outfd, winfo); + if (ret < 0) + return ret; + + rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); @@ -772,8 +833,16 @@ static int get_infd_size(int fd) return (int)count; } -static int do_sendfile(int infd, int outfd, unsigned int count) +static int do_sendfile(int infd, int outfd, unsigned int count, + struct wstate *winfo) { + int ret = spool_buf(outfd, winfo); + + if (ret < 0) + return ret; + + count -= winfo->total_len; + while (count > 0) { ssize_t r; @@ -790,7 +859,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, + struct wstate *winfo) { int err; @@ -799,9 +869,9 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); if (err) return err; @@ -815,7 +885,7 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size, bool *in_closed_after_out) + unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; @@ -824,9 +894,9 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); } else { - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; @@ -839,7 +909,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } -static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; struct timespec start, end; @@ -851,21 +921,24 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) switch (cfg_mode) { case CFG_MODE_POLL: - ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, + winfo); break; case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); break; default: @@ -999,6 +1072,7 @@ static void maybe_close(int fd) int main_loop_s(int listensock) { struct sockaddr_storage ss; + struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; @@ -1033,7 +1107,8 @@ again: SOCK_TEST_TCPULP(remotesock, 0); - copyfd_io(fd, remotesock, 1, true); + memset(&winfo, 0, sizeof(winfo)); + copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; @@ -1130,6 +1205,11 @@ static void parse_setsock_options(const char *name) return; } + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); exit(1); } @@ -1166,11 +1246,18 @@ void xdisconnect(int fd, int addrlen) int main_loop(void) { - int fd, ret, fd_in = 0; + int fd = 0, ret, fd_in = 0; struct addrinfo *peer; + struct wstate winfo; + + if (cfg_input && cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); + memset(&winfo, 0, sizeof(winfo)); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; @@ -1186,14 +1273,13 @@ again: if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); - if (cfg_input) { + if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); if (fd < 0) xerror("can't open %s:%d", cfg_input, errno); } - /* close the client socket open only if we are not going to reconnect */ - ret = copyfd_io(fd_in, fd, 1, 0); + ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) return ret; @@ -1210,6 +1296,7 @@ again: xerror("can't reconnect: %d", errno); if (cfg_input) close(fd_in); + memset(&winfo, 0, sizeof(winfo)); goto again; } else { close(fd); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 621af6895f4d..a43d3e2f59bb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -274,8 +274,7 @@ check_transfer() check_mptcp_disabled() { - local disabled_ns - disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" + local disabled_ns="ns_disabled-$rndh" ip netns add ${disabled_ns} || exit $ksft_skip # net.mptcp.enabled should be enabled by default @@ -762,9 +761,25 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +run_tests_mptfo() +{ + echo "INFO: with MPTFO start" + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 + + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 + echo "INFO: with MPTFO end" +} + run_tests_disconnect() { - local peekmode="$1" local old_cin=$cin local old_sin=$sin @@ -772,7 +787,6 @@ run_tests_disconnect() # force do_transfer to cope with the multiple tranmissions sin="$cin.disconnect" - sin_disconnect=$old_sin cin="$cin.disconnect" cin_disconnect="$old_cin" connect_per_transfer=3 @@ -783,7 +797,6 @@ run_tests_disconnect() # restore previous status sin=$old_sin - sin_disconnect="$cout".disconnect cin=$old_cin cin_disconnect="$cin".disconnect connect_per_transfer=1 @@ -901,6 +914,10 @@ run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" stop_if_error "Tests with peek mode have failed" +# MPTFO (MultiPath TCP Fatopen tests) +run_tests_mptfo +stop_if_error "Tests with MPTFO have failed" + # connect to ns4 ip address, ns2 should intercept/proxy run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent dead:beef:3::1 "tproxy ipv6" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f3dd5f2a0272..d11d3d566608 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -26,6 +26,10 @@ ip_mptcp=0 check_invert=0 validate_checksum=0 init=0 +evts_ns1="" +evts_ns2="" +evts_ns1_pid=0 +evts_ns2_pid=0 declare -A all_tests declare -a only_tests_ids @@ -59,8 +63,9 @@ init_partial() { capout=$(mktemp) - local rndh - rndh=$(mktemp -u XXXXXX) + local sec rndh + sec=$(date +%s) + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -153,6 +158,8 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) trap cleanup EXIT @@ -164,6 +171,7 @@ cleanup() { rm -f "$cin" "$cout" "$sinfail" rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -rf $evts_ns1 $evts_ns2 cleanup_partial } @@ -319,6 +327,18 @@ reset_with_fail() index 100 || exit 1 } +reset_with_events() +{ + reset "${1}" || return 1 + + :> "$evts_ns1" + :> "$evts_ns2" + ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! +} + fail_test() { ret=1 @@ -472,6 +492,12 @@ kill_wait() wait $1 2>/dev/null } +kill_events_pids() +{ + kill_wait $evts_ns1_pid + kill_wait $evts_ns2_pid +} + pm_nl_set_limits() { local ns=$1 @@ -672,10 +698,6 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid local userspace_pm=0 - local evts_ns1 - local evts_ns1_pid - local evts_ns2 - local evts_ns2_pid :> "$cout" :> "$sout" @@ -752,17 +774,6 @@ do_transfer() addr_nr_ns2=${addr_nr_ns2:9} fi - if [ $userspace_pm -eq 1 ]; then - evts_ns1=$(mktemp) - evts_ns2=$(mktemp) - :> "$evts_ns1" - :> "$evts_ns2" - ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & - evts_ns1_pid=$! - ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & - evts_ns2_pid=$! - fi - local local_addr if is_v6 "${connect_addr}"; then local_addr="::" @@ -829,7 +840,8 @@ do_transfer() if [ $userspace_pm -eq 0 ]; then pm_nl_add_endpoint $ns1 $addr flags signal else - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id @@ -980,12 +992,6 @@ do_transfer() kill $cappid fi - if [ $userspace_pm -eq 1 ]; then - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid - rm -rf $evts_ns1 $evts_ns2 - fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -2152,7 +2158,7 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2165,7 +2171,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2178,7 +2184,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2507,6 +2513,57 @@ backup_tests() fi } +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED + +AF_INET=2 +AF_INET6=10 + +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + if [ $type ] && [ $type = $e_type ] && + [ $family ] && [ $family = $e_family ] && + [ $saddr ] && [ $saddr = $e_saddr ] && + [ $sport ] && [ $sport = $e_sport ]; then + stdbuf -o0 -e0 printf "[ ok ]\n" + return 0 + fi + fail_test + stdbuf -o0 -e0 printf "[fail]\n" +} + add_addr_ports_tests() { # signal address with port @@ -2531,7 +2588,8 @@ add_addr_ports_tests() fi # single address with port, remove - if reset "remove single address with port"; then + # pm listener events + if reset_with_events "remove single address with port"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 @@ -2539,6 +2597,10 @@ add_addr_ports_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 1 chk_rm_nr 1 1 invert + + verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100 + kill_events_pids fi # subflow and signal with port, remove @@ -2959,22 +3021,24 @@ userspace_tests() fi # userspace pm add & remove address - if reset "userspace pm add & remove address"; then + if reset_with_events "userspace pm add & remove address"; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + kill_events_pids fi # userspace pm create destroy subflow - if reset "userspace pm create destroy subflow"; then + if reset_with_events "userspace pm create destroy subflow"; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow chk_join_nr 1 1 1 chk_rm_nr 0 1 + kill_events_pids fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 0879da915014..1b70c0a304ce 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -10,13 +10,19 @@ ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" -do_all_tests=1 + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns_sbox="ns_sbox-$rndh" add_mark_rules() { local ns=$1 local m=$2 + local t for t in iptables ip6tables; do # just to debug: check we have multiple subflows connection requests ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT @@ -31,12 +37,8 @@ add_mark_rules() init() { - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" - - for netns in "$ns1" "$ns2";do + local netns + for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 @@ -44,6 +46,7 @@ init() ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 done + local i for i in `seq 1 4`; do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i @@ -73,7 +76,8 @@ init() cleanup() { - for netns in "$ns1" "$ns2"; do + local netns + for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done rm -f "$cin" "$cout" @@ -103,15 +107,17 @@ check_mark() local ns=$1 local af=$2 - tables=iptables + local tables=iptables if [ $af -eq 6 ];then tables=ip6tables fi + local counters values counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) values=${counters%DROP*} + local v for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 @@ -131,9 +137,9 @@ print_file_err() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 cmp "$in" "$out" > /dev/null 2>&1 if [ $? -ne 0 ] ;then @@ -156,18 +162,18 @@ is_v6() do_transfer() { - listener_ns="$1" - connector_ns="$2" - cl_proto="$3" - srv_proto="$4" - connect_addr="$5" + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" - port=12001 + local port=12001 :> "$cout" :> "$sout" - mptcp_connect="./mptcp_connect -r 20" + local mptcp_connect="./mptcp_connect -r 20" local local_addr if is_v6 "${connect_addr}"; then @@ -180,7 +186,7 @@ do_transfer() ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ ${local_addr} < "$sin" > "$sout" & - spid=$! + local spid=$! sleep 1 @@ -189,12 +195,12 @@ do_transfer() $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ $connect_addr < "$cin" > "$cout" & - cpid=$! + local cpid=$! wait $cpid - retc=$? + local retc=$? wait $spid - rets=$? + local rets=$? if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 @@ -229,9 +235,9 @@ do_transfer() make_file() { - name=$1 - who=$2 - size=$3 + local name=$1 + local who=$2 + local size=$3 dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" @@ -243,7 +249,7 @@ do_mptcp_sockopt_tests() { local lret=0 - ./mptcp_sockopt + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? if [ $lret -ne 0 ]; then @@ -252,7 +258,7 @@ do_mptcp_sockopt_tests() return fi - ./mptcp_sockopt -6 + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then @@ -264,9 +270,9 @@ do_mptcp_sockopt_tests() run_tests() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" local lret=0 do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} @@ -281,8 +287,8 @@ run_tests() do_tcpinq_test() { - ip netns exec "$ns1" ./mptcp_inq "$@" - lret=$? + ip netns exec "$ns_sbox" ./mptcp_inq "$@" + local lret=$? if [ $lret -ne 0 ];then ret=$lret echo "FAIL: mptcp_inq $@" 1>&2 @@ -297,9 +303,7 @@ do_tcpinq_tests() { local lret=0 - ip netns exec "$ns1" iptables -F - ip netns exec "$ns1" ip6tables -F - + local args for args in "-t tcp" "-r tcp"; do do_tcpinq_test $args lret=$? diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index af70c14e0bf9..9f22f7e5027d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -148,9 +149,6 @@ do_transfer() :> "$sout" :> "$capout" - local addr_port - addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - if $capture; then local capuser if [ -z $SUDO_USER ] ; then @@ -247,9 +245,10 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measured in ms, account for transfer size, affegated link speed + # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) - local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3229725b64b0..a29deb9fa024 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -11,11 +11,17 @@ ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED REMOVED=7 # MPTCP_EVENT_REMOVED SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED +LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED AF_INET=2 AF_INET6=10 -evts_pid=0 +file="" +server_evts="" +client_evts="" +server_evts_pid=0 +client_evts_pid=0 client4_pid=0 server4_pid=0 client6_pid=0 @@ -33,7 +39,7 @@ client_addr_id=${RANDOM:0:2} server_addr_id=${RANDOM:0:2} sec=$(date +%s) -rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) +rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" @@ -47,7 +53,7 @@ cleanup() { echo "cleanup" - rm -rf $file + rm -rf $file $client_evts $server_evts # Terminate the MPTCP connection and related processes if [ $client4_pid -ne 0 ]; then @@ -62,8 +68,11 @@ cleanup() if [ $server6_pid -ne 0 ]; then kill_wait $server6_pid fi - if [ $evts_pid -ne 0 ]; then - kill_wait $evts_pid + if [ $server_evts_pid -ne 0 ]; then + kill_wait $server_evts_pid + fi + if [ $client_evts_pid -ne 0 ]; then + kill_wait $client_evts_pid fi local netns for netns in "$ns1" "$ns2" ;do @@ -113,8 +122,9 @@ make_file() make_connection() { - local file - file=$(mktemp) + if [ -z "$file" ]; then + file=$(mktemp) + fi make_file "$file" "client" local is_v6=$1 @@ -132,16 +142,24 @@ make_connection() # Capture netlink events over the two network namespaces running # the MPTCP client and server - local client_evts - client_evts=$(mktemp) + if [ -z "$client_evts" ]; then + client_evts=$(mktemp) + fi :>"$client_evts" + if [ $client_evts_pid -ne 0 ]; then + kill_wait $client_evts_pid + fi ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & - local client_evts_pid=$! - local server_evts - server_evts=$(mktemp) + client_evts_pid=$! + if [ -z "$server_evts" ]; then + server_evts=$(mktemp) + fi :>"$server_evts" + if [ $server_evts_pid -ne 0 ]; then + kill_wait $server_evts_pid + fi ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & - local server_evts_pid=$! + server_evts_pid=$! sleep 0.5 # Run the server @@ -159,7 +177,6 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill_wait $client_evts_pid local client_token local client_port @@ -171,11 +188,10 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill_wait $server_evts_pid - server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") - server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$server_evts") - rm -f "$client_evts" "$server_evts" "$file" + server_token=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + server_serverside=$(grep "type:1," "$server_evts" | + sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && [ "$server_serverside" = 1 ] @@ -239,13 +255,8 @@ verify_announce_event() test_announce() { - local evts - evts=$(mktemp) # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # ADD_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1)) @@ -253,7 +264,7 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t" if [ "$type" = "" ] then @@ -264,71 +275,63 @@ test_announce() fi # ADD_ADDR from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\ + verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" # ADD_ADDR6 from the client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" # ADD_ADDR from the client to server machine using a new port - :>"$evts" + :>"$server_evts" client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ + verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" # ADD_ADDR6 from the server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" # ADD_ADDR from the server to client machine using a new port - :>"$evts" + :>"$client_evts" server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1 stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id sleep 0.5 - verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - - kill_wait $evts_pid - rm -f "$evts" } verify_remove_event() @@ -356,14 +359,8 @@ verify_remove_event() test_remove() { - local evts - evts=$(mktemp) - # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # RM_ADDR using an invalid token should result in no action local invalid_token=$(( client4_token - 1 )) @@ -372,7 +369,7 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\ $client_addr_id local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -386,7 +383,7 @@ test_remove() $invalid_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\ $invalid_id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then stdbuf -o0 -e0 printf "[OK]\n" @@ -395,40 +392,35 @@ test_remove() fi # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR from the client to server machine - :>"$evts" + :>"$server_evts" client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id" + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR6 from the client to server machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\ $client_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - - kill_wait $evts_pid + verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ @@ -436,27 +428,24 @@ test_remove() stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\ $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR from the server to client machine - :>"$evts" + :>"$client_evts" server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id" + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR6 from the server to client machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id sleep 0.5 - verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - - kill_wait $evts_pid - rm -f "$evts" + verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } verify_subflow_events() @@ -532,13 +521,8 @@ verify_subflow_events() test_subflows() { - local evts - evts=$(mktemp) # Capture events on the network namespace running the server - :>"$evts" - ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$server_evts" # Attempt to add a listener at 10.0.2.2:<subflow-port> ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ @@ -551,25 +535,25 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ rport "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ "$client4_port" token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine @@ -583,31 +567,31 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from client to server machine reusing the subflow port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ $client_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW6 from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ "$client_addr_id" "ns1" "ns2" @@ -622,44 +606,39 @@ test_subflows() listener_pid=$! # ADD_ADDR from client to server machine using a new port - :>"$evts" + :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") # DESTROY_SUBFLOW from server to client machine - :>"$evts" + :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ $new4_port token "$server4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill_wait $evts_pid - # Capture events on the network namespace running the client - :>"$evts" - ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 & - evts_pid=$! - sleep 0.5 + :>"$client_evts" # Attempt to add a listener at 10.0.2.1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ @@ -672,24 +651,24 @@ test_subflows() sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $app4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine @@ -703,17 +682,17 @@ test_subflows() listener_pid=$! # ADD_ADDR6 from server to client machine reusing the subflow port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ "$AF_INET6" "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23"\ "$server_addr_id" "ns2" "ns1" @@ -721,14 +700,14 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW6 from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\ + verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR6 from server to client machine @@ -742,38 +721,35 @@ test_subflows() listener_pid=$! # ADD_ADDR from server to client machine using a new port - :>"$evts" + :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id port $new4_port > /dev/null 2>&1 sleep 0.5 # CREATE_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") # DESTROY_SUBFLOW from client to server machine - :>"$evts" + :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ $new4_port token "$client4_token" > /dev/null 2>&1 sleep 0.5 - verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 - - kill_wait $evts_pid - rm -f "$evts" } test_prio() @@ -807,11 +783,85 @@ test_prio() fi } +verify_listener_events() +{ + local evt=$1 + local e_type=$2 + local e_family=$3 + local e_saddr=$4 + local e_sport=$5 + local type + local family + local saddr + local sport + + if [ $e_type = $LISTENER_CREATED ]; then + stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + elif [ $e_type = $LISTENER_CLOSED ]; then + stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\ + $e_saddr $e_sport + fi + + type=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + if [ $family ] && [ $family = $AF_INET6 ]; then + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + else + saddr=$(grep "type:$e_type," $evt | + sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + fi + + if [ $type ] && [ $type = $e_type ] && + [ $family ] && [ $family = $e_family ] && + [ $saddr ] && [ $saddr = $e_saddr ] && + [ $sport ] && [ $sport = $e_sport ]; then + stdbuf -o0 -e0 printf "[OK]\n" + return 0 + fi + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 +} + +test_listener() +{ + # Capture events on the network namespace running the client + :>$client_evts + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ + $client4_port > /dev/null 2>&1 & + local listener_pid=$! + + verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ + $client_addr_id > /dev/null 2>&1 + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport $client4_port token $server4_token > /dev/null 2>&1 + sleep 0.5 + + # Delete the listener from the client ns, if one was created + kill_wait $listener_pid + + verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port +} + make_connection make_connection "v6" test_announce test_remove test_subflows test_prio +test_listener exit 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 736e358dc549..dfe3d287f01d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -686,10 +686,12 @@ setup_xfrm() { } setup_nettest_xfrm() { - which nettest >/dev/null - if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - return 1 + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi fi [ ${1} -eq 6 ] && proto="-6" || proto="" diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 6a443ca3cd3a..0c743752669a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + # set global exit status, but never reset nonzero one. check_err() { @@ -34,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -195,8 +197,8 @@ run_all() { return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 8a1109a545db..894972877e8b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,7 +36,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & @@ -80,8 +82,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 7fe85ba51075..c9c4b9d65839 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -36,7 +38,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action @@ -81,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 1bcd82e1f662..c079565add39 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -46,7 +47,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 430895d1a2b6..2d073595c620 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -216,8 +217,8 @@ while getopts "hs:" option; do esac done -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit 1 fi @@ -288,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ @@ -311,7 +312,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25d7872b29c1..fab4d3790578 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) goto out_err_no_arch_destroy_vm; } - kvm->max_halt_poll_ns = halt_poll_ns; - r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -3377,9 +3375,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > vcpu->kvm->max_halt_poll_ns) - val = vcpu->kvm->max_halt_poll_ns; - vcpu->halt_poll_ns = val; out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); @@ -3483,6 +3478,24 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, } } +static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + if (kvm->override_halt_poll_ns) { + /* + * Ensure kvm->max_halt_poll_ns is not read before + * kvm->override_halt_poll_ns. + * + * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. + */ + smp_rmb(); + return READ_ONCE(kvm->max_halt_poll_ns); + } + + return READ_ONCE(halt_poll_ns); +} + /* * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt * polling is enabled, busy wait for a short time before blocking to avoid the @@ -3491,12 +3504,18 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, */ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { + unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); - bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; ktime_t start, cur, poll_end; bool waited = false; + bool do_halt_poll; u64 halt_ns; + if (vcpu->halt_poll_ns > max_halt_poll_ns) + vcpu->halt_poll_ns = max_halt_poll_ns; + + do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; + start = cur = poll_end = ktime_get(); if (do_halt_poll) { ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); @@ -3535,18 +3554,21 @@ out: update_halt_poll_stats(vcpu, start, poll_end, !waited); if (halt_poll_allowed) { + /* Recompute the max halt poll time in case it changed. */ + max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - } else if (vcpu->kvm->max_halt_poll_ns) { + } else if (max_halt_poll_ns) { if (halt_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ else if (vcpu->halt_poll_ns && - halt_ns > vcpu->kvm->max_halt_poll_ns) + halt_ns > max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && - halt_ns < vcpu->kvm->max_halt_poll_ns) + else if (vcpu->halt_poll_ns < max_halt_poll_ns && + halt_ns < max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { vcpu->halt_poll_ns = 0; @@ -4581,6 +4603,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; kvm->max_halt_poll_ns = cap->args[0]; + + /* + * Ensure kvm->override_halt_poll_ns does not become visible + * before kvm->max_halt_poll_ns. + * + * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). + */ + smp_wmb(); + kvm->override_halt_poll_ns = true; + return 0; } case KVM_CAP_DIRTY_LOG_RING: diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 346e47f15572..7c248193ca26 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -297,7 +297,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (!gpc->valid || old_uhva != gpc->uhva) { ret = hva_to_pfn_retry(kvm, gpc); } else { - /* If the HVA→PFN mapping was already valid, don't unmap it. */ + /* + * If the HVA→PFN mapping was already valid, don't unmap it. + * But do update gpc->khva because the offset within the page + * may have changed. + */ + gpc->khva = old_khva + page_offset; old_pfn = KVM_PFN_ERR_FAULT; old_khva = NULL; ret = 0; |