diff options
359 files changed, 11559 insertions, 4736 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 2090bfc69aa5..c941b214e0b7 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -320,7 +320,7 @@ waiting for a network device refcount to drop to 0 during device unregistration. A lower value may be useful during bisection to detect a leaked reference faster. A larger value may be useful to prevent false warnings on slow/loaded systems. -Default value is 10, minimum 0, maximum 3600. +Default value is 10, minimum 1, maximum 3600. optmem_max ---------- diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt index 2cd452419ed0..b8e4894bc634 100644 --- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -42,11 +42,23 @@ Optional properties: support both 1000BaseX and SGMII modes. If set, the phy-mode should be set to match the mode selected on core reset (i.e. by the basex_or_sgmii core input line). -- clocks : AXI bus clock for the device. Refer to common clock bindings. - Used to calculate MDIO clock divisor. If not specified, it is - auto-detected from the CPU clock (but only on platforms where - this is possible). New device trees should specify this - the - auto detection is only for backward compatibility. +- clock-names: Tuple listing input clock names. Possible clocks: + s_axi_lite_clk: Clock for AXI register slave interface + axis_clk: AXI4-Stream clock for TXD RXD TXC and RXS interfaces + ref_clk: Ethernet reference clock, used by signal delay + primitives and transceivers + mgt_clk: MGT reference clock (used by optional internal + PCS/PMA PHY) + + Note that if s_axi_lite_clk is not specified by name, the + first clock of any name is used for this. If that is also not + specified, the clock rate is auto-detected from the CPU clock + (but only on platforms where this is possible). New device + trees should specify all applicable clocks by name - the + fallbacks to an unnamed clock or to CPU clock are only for + backward compatibility. +- clocks: Phandles to input clocks matching clock-names. Refer to common + clock bindings. - axistream-connected: Reference to another node which contains the resources for the AXI DMA controller used by this device. If this is specified, the DMA-related resources from that @@ -62,7 +74,8 @@ Example: device_type = "network"; interrupt-parent = <µblaze_0_axi_intc>; interrupts = <2 0 1>; - clocks = <&axi_clk>; + clock-names = "s_axi_lite_clk", "axis_clk", "ref_clk", "mgt_clk"; + clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>; phy-mode = "mii"; reg = <0x40c00000 0x40000 0x50c00000 0x40000>; xlnx,rxcsum = <0x2>; diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 05073482db05..4bdb4298f178 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -208,6 +208,8 @@ Userspace to kernel: ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info + ``ETHTOOL_MSG_FEC_GET`` get FEC settings + ``ETHTOOL_MSG_FEC_SET`` set FEC settings ===================================== ================================ Kernel to userspace: @@ -242,6 +244,8 @@ Kernel to userspace: ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info + ``ETHTOOL_MSG_FEC_GET_REPLY`` FEC settings + ``ETHTOOL_MSG_FEC_NTF`` FEC settings ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1280,6 +1284,60 @@ Kernel response contents: For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that the table contains static entries, hard-coded by the NIC. +FEC_GET +======= + +Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ``ETHTOOL_A_FEC_ACTIVE`` u32 index of active FEC mode + ===================================== ====== ========================== + +``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently +active on the interface. This attribute may not be present if device does +not support FEC. + +``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when +autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will +select the FEC mode automatically based on the parameters of the SFP module. +This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface. +``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode +bits (rather than old ``ETHTOOL_FEC_*`` bits). + +FEC_SET +======= + +Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_FEC_HEADER`` nested request header + ``ETHTOOL_A_FEC_MODES`` bitset configured modes + ``ETHTOOL_A_FEC_AUTO`` bool FEC mode auto selection + ===================================== ====== ========================== + +``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise +FEC mode is selected as part of autonegotiation. + +``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended +to set only one bit, if multiple bits are set driver may choose between them +in an implementation specific way. + +``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP +module parameters. This does not mean autonegotiation. + Request translation =================== @@ -1373,8 +1431,8 @@ are netlink only. ``ETHTOOL_MSG_LINKMODES_SET`` ``ETHTOOL_PHY_GTUNABLE`` n/a ``ETHTOOL_PHY_STUNABLE`` n/a - ``ETHTOOL_GFECPARAM`` n/a - ``ETHTOOL_SFECPARAM`` n/a + ``ETHTOOL_GFECPARAM`` ``ETHTOOL_MSG_FEC_GET`` + ``ETHTOOL_SFECPARAM`` ``ETHTOOL_MSG_FEC_SET`` n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index b8a29997d433..e9ce55992aa9 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -76,6 +76,7 @@ Contents: netdevices netfilter-sysctl netif-msg + nexthop-group-resilient nf_conntrack-sysctl nf_flowtable openvswitch diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7952ac5bd2f..9701906f63f9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1073,7 +1073,9 @@ ip_local_reserved_ports - list of comma separated ranges although this is redundant. However such a setting is useful if later the port range is changed to a value that will - include the reserved ports. + include the reserved ports. Also keep in mind, that overlapping + of these ranges may affect probability of selecting ephemeral + ports which are right after block of reserved ports. Default: Empty @@ -1143,6 +1145,12 @@ icmp_echo_ignore_all - BOOLEAN Default: 0 +icmp_echo_enable_probe - BOOLEAN + If set to one, then the kernel will respond to RFC 8335 PROBE + requests sent to it. + + Default: 0 + icmp_echo_ignore_broadcasts - BOOLEAN If set non-zero, then the kernel will ignore all ICMP ECHO and TIMESTAMP requests sent to it via broadcast/multicast. diff --git a/Documentation/networking/nexthop-group-resilient.rst b/Documentation/networking/nexthop-group-resilient.rst new file mode 100644 index 000000000000..fabecee24d85 --- /dev/null +++ b/Documentation/networking/nexthop-group-resilient.rst @@ -0,0 +1,293 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +Resilient Next-hop Groups +========================= + +Resilient groups are a type of next-hop group that is aimed at minimizing +disruption in flow routing across changes to the group composition and +weights of constituent next hops. + +The idea behind resilient hashing groups is best explained in contrast to +the legacy multipath next-hop group, which uses the hash-threshold +algorithm, described in RFC 2992. + +To select a next hop, hash-threshold algorithm first assigns a range of +hashes to each next hop in the group, and then selects the next hop by +comparing the SKB hash with the individual ranges. When a next hop is +removed from the group, the ranges are recomputed, which leads to +reassignment of parts of hash space from one next hop to another. RFC 2992 +illustrates it thus:: + + +-------+-------+-------+-------+-------+ + | 1 | 2 | 3 | 4 | 5 | + +-------+-+-----+---+---+-----+-+-------+ + | 1 | 2 | 4 | 5 | + +---------+---------+---------+---------+ + + Before and after deletion of next hop 3 + under the hash-threshold algorithm. + +Note how next hop 2 gave up part of the hash space in favor of next hop 1, +and 4 in favor of 5. While there will usually be some overlap between the +previous and the new distribution, some traffic flows change the next hop +that they resolve to. + +If a multipath group is used for load-balancing between multiple servers, +this hash space reassignment causes an issue that packets from a single +flow suddenly end up arriving at a server that does not expect them. This +can result in TCP connections being reset. + +If a multipath group is used for load-balancing among available paths to +the same server, the issue is that different latencies and reordering along +the way causes the packets to arrive in the wrong order, resulting in +degraded application performance. + +To mitigate the above-mentioned flow redirection, resilient next-hop groups +insert another layer of indirection between the hash space and its +constituent next hops: a hash table. The selection algorithm uses SKB hash +to choose a hash table bucket, then reads the next hop that this bucket +contains, and forwards traffic there. + +This indirection brings an important feature. In the hash-threshold +algorithm, the range of hashes associated with a next hop must be +continuous. With a hash table, mapping between the hash table buckets and +the individual next hops is arbitrary. Therefore when a next hop is deleted +the buckets that held it are simply reassigned to other next hops:: + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |1|1|1|1|2|2|2|2|3|3|3|3|4|4|4|4|5|5|5|5| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + v v v v + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |1|1|1|1|2|2|2|2|1|2|4|5|4|4|4|4|5|5|5|5| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Before and after deletion of next hop 3 + under the resilient hashing algorithm. + +When weights of next hops in a group are altered, it may be possible to +choose a subset of buckets that are currently not used for forwarding +traffic, and use those to satisfy the new next-hop distribution demands, +keeping the "busy" buckets intact. This way, established flows are ideally +kept being forwarded to the same endpoints through the same paths as before +the next-hop group change. + +Algorithm +--------- + +In a nutshell, the algorithm works as follows. Each next hop deserves a +certain number of buckets, according to its weight and the number of +buckets in the hash table. In accordance with the source code, we will call +this number a "wants count" of a next hop. In case of an event that might +cause bucket allocation change, the wants counts for individual next hops +are updated. + +Next hops that have fewer buckets than their wants count, are called +"underweight". Those that have more are "overweight". If there are no +overweight (and therefore no underweight) next hops in the group, it is +said to be "balanced". + +Each bucket maintains a last-used timer. Every time a packet is forwarded +through a bucket, this timer is updated to current jiffies value. One +attribute of a resilient group is then the "idle timer", which is the +amount of time that a bucket must not be hit by traffic in order for it to +be considered "idle". Buckets that are not idle are busy. + +After assigning wants counts to next hops, an "upkeep" algorithm runs. For +buckets: + +1) that have no assigned next hop, or +2) whose next hop has been removed, or +3) that are idle and their next hop is overweight, + +upkeep changes the next hop that the bucket references to one of the +underweight next hops. If, after considering all buckets in this manner, +there are still underweight next hops, another upkeep run is scheduled to a +future time. + +There may not be enough "idle" buckets to satisfy the updated wants counts +of all next hops. Another attribute of a resilient group is the "unbalanced +timer". This timer can be set to 0, in which case the table will stay out +of balance until idle buckets do appear, possibly never. If set to a +non-zero value, the value represents the period of time that the table is +permitted to stay out of balance. + +With this in mind, we update the above list of conditions with one more +item. Thus buckets: + +4) whose next hop is overweight, and the amount of time that the table has + been out of balance exceeds the unbalanced timer, if that is non-zero, + +\... are migrated as well. + +Offloading & Driver Feedback +---------------------------- + +When offloading resilient groups, the algorithm that distributes buckets +among next hops is still the one in SW. Drivers are notified of updates to +next hop groups in the following three ways: + +- Full group notification with the type + ``NH_NOTIFIER_INFO_TYPE_RES_TABLE``. This is used just after the group is + created and buckets populated for the first time. + +- Single-bucket notifications of the type + ``NH_NOTIFIER_INFO_TYPE_RES_BUCKET``, which is used for notifications of + individual migrations within an already-established group. + +- Pre-replace notification, ``NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE``. This + is sent before the group is replaced, and is a way for the driver to veto + the group before committing anything to the HW. + +Some single-bucket notifications are forced, as indicated by the "force" +flag in the notification. Those are used for the cases where e.g. the next +hop associated with the bucket was removed, and the bucket really must be +migrated. + +Non-forced notifications can be overridden by the driver by returning an +error code. The use case for this is that the driver notifies the HW that a +bucket should be migrated, but the HW discovers that the bucket has in fact +been hit by traffic. + +A second way for the HW to report that a bucket is busy is through the +``nexthop_res_grp_activity_update()`` API. The buckets identified this way +as busy are treated as if traffic hit them. + +Offloaded buckets should be flagged as either "offload" or "trap". This is +done through the ``nexthop_bucket_set_hw_flags()`` API. + +Netlink UAPI +------------ + +Resilient Group Replacement +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Resilient groups are configured using the ``RTM_NEWNEXTHOP`` message in the +same manner as other multipath groups. The following changes apply to the +attributes passed in the netlink message: + + =================== ========================================================= + ``NHA_GROUP_TYPE`` Should be ``NEXTHOP_GRP_TYPE_RES`` for resilient group. + ``NHA_RES_GROUP`` A nest that contains attributes specific to resilient + groups. + =================== ========================================================= + +``NHA_RES_GROUP`` payload: + + =================================== ========================================= + ``NHA_RES_GROUP_BUCKETS`` Number of buckets in the hash table. + ``NHA_RES_GROUP_IDLE_TIMER`` Idle timer in units of clock_t. + ``NHA_RES_GROUP_UNBALANCED_TIMER`` Unbalanced timer in units of clock_t. + =================================== ========================================= + +Next Hop Get +^^^^^^^^^^^^ + +Requests to get resilient next-hop groups use the ``RTM_GETNEXTHOP`` +message in exactly the same way as other next hop get requests. The +response attributes match the replacement attributes cited above, except +``NHA_RES_GROUP`` payload will include the following attribute: + + =================================== ========================================= + ``NHA_RES_GROUP_UNBALANCED_TIME`` How long has the resilient group been out + of balance, in units of clock_t. + =================================== ========================================= + +Bucket Get +^^^^^^^^^^ + +The message ``RTM_GETNEXTHOPBUCKET`` without the ``NLM_F_DUMP`` flag is +used to request a single bucket. The attributes recognized at get requests +are: + + =================== ========================================================= + ``NHA_ID`` ID of the next-hop group that the bucket belongs to. + ``NHA_RES_BUCKET`` A nest that contains attributes specific to bucket. + =================== ========================================================= + +``NHA_RES_BUCKET`` payload: + + ======================== ==================================================== + ``NHA_RES_BUCKET_INDEX`` Index of bucket in the resilient table. + ======================== ==================================================== + +Bucket Dumps +^^^^^^^^^^^^ + +The message ``RTM_GETNEXTHOPBUCKET`` with the ``NLM_F_DUMP`` flag is used +to request a dump of matching buckets. The attributes recognized at dump +requests are: + + =================== ========================================================= + ``NHA_ID`` If specified, limits the dump to just the next-hop group + with this ID. + ``NHA_OIF`` If specified, limits the dump to buckets that contain + next hops that use the device with this ifindex. + ``NHA_MASTER`` If specified, limits the dump to buckets that contain + next hops that use a device in the VRF with this ifindex. + ``NHA_RES_BUCKET`` A nest that contains attributes specific to bucket. + =================== ========================================================= + +``NHA_RES_BUCKET`` payload: + + ======================== ==================================================== + ``NHA_RES_BUCKET_NH_ID`` If specified, limits the dump to just the buckets + that contain the next hop with this ID. + ======================== ==================================================== + +Usage +----- + +To illustrate the usage, consider the following commands:: + + # ip nexthop add id 1 via 192.0.2.2 dev eth0 + # ip nexthop add id 2 via 192.0.2.3 dev eth0 + # ip nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 60 unbalanced_timer 300 + +The last command creates a resilient next-hop group. It will have 8 buckets +(which is unusually low number, and used here for demonstration purposes +only), each bucket will be considered idle when no traffic hits it for at +least 60 seconds, and if the table remains out of balance for 300 seconds, +it will be forcefully brought into balance. + +Changing next-hop weights leads to change in bucket allocation:: + + # ip nexthop replace id 10 group 1,3/2 type resilient + +This can be confirmed by looking at individual buckets:: + + # ip nexthop bucket show id 10 + id 10 index 0 idle_time 5.59 nhid 1 + id 10 index 1 idle_time 5.59 nhid 1 + id 10 index 2 idle_time 8.74 nhid 2 + id 10 index 3 idle_time 8.74 nhid 2 + id 10 index 4 idle_time 8.74 nhid 1 + id 10 index 5 idle_time 8.74 nhid 1 + id 10 index 6 idle_time 8.74 nhid 1 + id 10 index 7 idle_time 8.74 nhid 1 + +Note the two buckets that have a shorter idle time. Those are the ones that +were migrated after the next-hop replace command to satisfy the new demand +that next hop 1 be given 6 buckets instead of 4. + +Netdevsim +--------- + +The netdevsim driver implements a mock offload of resilient groups, and +exposes debugfs interface that allows marking individual buckets as busy. +For example, the following will mark bucket 23 in next-hop group 10 as +active:: + + # echo 10 23 > /sys/kernel/debug/netdevsim/netdevsim10/fib/nexthop_bucket_activity + +In addition, another debugfs interface can be used to configure that the +next attempt to migrate a bucket should fail:: + + # echo 1 > /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_nexthop_bucket_replace + +Besides serving as an example, the interfaces that netdevsim exposes are +useful in automated testing, and +``tools/testing/selftests/drivers/net/netdevsim/nexthop.sh`` makes use of +them to test the algorithm. diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst index d87f253b9d39..d757c21c10f2 100644 --- a/Documentation/networking/nf_flowtable.rst +++ b/Documentation/networking/nf_flowtable.rst @@ -112,6 +112,7 @@ You can identify offloaded flows through the [OFFLOAD] tag when listing your connection tracking table. :: + # conntrack -L tcp 6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2 @@ -138,6 +139,7 @@ allows the flowtable to define a fastpath bypass between the bridge ports device (represented as eth0) in your switch/router. :: + fastpath bypass .-------------------------. / \ @@ -168,12 +170,12 @@ connection tracking entry by specifying the counter statement in your flowtable definition, e.g. :: + table inet x { flowtable f { hook ingress priority 0; devices = { eth0, eth1 }; counter } - ... } Counter support is available since Linux kernel 5.7. @@ -185,12 +187,12 @@ If your network device provides hardware offload support, you can turn it on by means of the 'offload' flag in your flowtable definition, e.g. :: + table inet x { flowtable f { hook ingress priority 0; devices = { eth0, eth1 }; flags offload; } - ... } There is a workqueue that adds the flows to the hardware. Note that a few diff --git a/MAINTAINERS b/MAINTAINERS index 11177892c373..217c7470bfa9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10910,8 +10910,7 @@ T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* MCAN MMIO DEVICE DRIVER -M: Dan Murphy <[email protected]> -M: Pankaj Sharma <[email protected]> +M: Chandrasekar Ramakrishnan <[email protected]> S: Maintained F: Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -17983,13 +17982,6 @@ L: [email protected] (moderated for non-subscribers) S: Odd Fixes F: sound/soc/codecs/tas571x* -TI TCAN4X5X DEVICE DRIVER -M: Dan Murphy <[email protected]> -S: Maintained -F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt -F: drivers/net/can/m_can/tcan4x5x* - TI TRF7970A NFC DRIVER M: Mark Greer <[email protected]> diff --git a/arch/mips/sgi-ip27/TODO b/arch/mips/sgi-ip27/TODO deleted file mode 100644 index 160857ff1483..000000000000 --- a/arch/mips/sgi-ip27/TODO +++ /dev/null @@ -1,19 +0,0 @@ -1. Need to figure out why PCI writes to the IOC3 hang, and if it is okay -not to write to the IOC3 ever. -2. Need to figure out RRB allocation in bridge_startup(). -3. Need to figure out why address swaizzling is needed in inw/outw for -Qlogic scsi controllers. -4. Need to integrate ip27-klconfig.c:find_lboard and -ip27-init.c:find_lbaord_real. DONE -5. Is it okay to set calias space on all nodes as 0, instead of 8k as -in irix? -6. Investigate why things do not work without the setup_test() call -being invoked on all nodes in ip27-memory.c. -8. Too many do_page_faults invoked - investigate. -9. start_thread must turn off UX64 ... and define tlb_refill_debug. -10. Need a bad pmd table, bad pte table. __bad_pmd_table/__bad_pagetable -does not agree with pgd_bad/pmd_bad. -11. All intrs (ip27_do_irq handlers) are targeted at cpu A on the node. -This might need to change later. Only the timer intr is set up to be -received on both Cpu A and B. (ip27_do_irq()/bridge_startup()) -13. Cache flushing (specially the SMP version) has to be investigated. diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 14092152b786..4f7eaa17fb27 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -173,13 +173,13 @@ #define MAX_FRAGS (32 * MAX_CARDS) static LIST_HEAD(HFClist); -static spinlock_t HFClock; /* global hfc list lock */ +static DEFINE_SPINLOCK(HFClock); /* global hfc list lock */ static void ph_state_change(struct dchannel *); static struct hfc_multi *syncmaster; static int plxsd_master; /* if we have a master card (yet) */ -static spinlock_t plx_lock; /* may not acquire other lock inside */ +static DEFINE_SPINLOCK(plx_lock); /* may not acquire other lock inside */ #define TYP_E1 1 #define TYP_4S 4 @@ -5480,9 +5480,6 @@ HFCmulti_init(void) printk(KERN_DEBUG "%s: IRQ_DEBUG IS ENABLED!\n", __func__); #endif - spin_lock_init(&HFClock); - spin_lock_init(&plx_lock); - if (debug & DEBUG_HFCMULTI_INIT) printk(KERN_DEBUG "%s: init entered\n", __func__); diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 4946ea14bf74..386084530c2f 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -176,9 +176,9 @@ MODULE_LICENSE("GPL"); /*int spinnest = 0;*/ -spinlock_t dsp_lock; /* global dsp lock */ -struct list_head dsp_ilist; -struct list_head conf_ilist; +DEFINE_SPINLOCK(dsp_lock); /* global dsp lock */ +LIST_HEAD(dsp_ilist); +LIST_HEAD(conf_ilist); int dsp_debug; int dsp_options; int dsp_poll, dsp_tics; @@ -1169,10 +1169,6 @@ static int __init dsp_init(void) printk(KERN_INFO "mISDN_dsp: DSP clocks every %d samples. This equals " "%d jiffies.\n", dsp_poll, dsp_tics); - spin_lock_init(&dsp_lock); - INIT_LIST_HEAD(&dsp_ilist); - INIT_LIST_HEAD(&conf_ilist); - /* init conversion tables */ dsp_audio_generate_law_tables(); dsp_silence = (dsp_options & DSP_OPT_ULAW) ? 0xff : 0x2a; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index facbd886ee1c..2c40412466e6 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -229,8 +229,8 @@ static const char *l1oip_revision = "2.00"; static int l1oip_cnt; -static spinlock_t l1oip_lock; -static struct list_head l1oip_ilist; +static DEFINE_SPINLOCK(l1oip_lock); +static LIST_HEAD(l1oip_ilist); #define MAX_CARDS 16 static u_int type[MAX_CARDS]; @@ -1440,9 +1440,6 @@ l1oip_init(void) printk(KERN_INFO "mISDN: Layer-1-over-IP driver Rev. %s\n", l1oip_revision); - INIT_LIST_HEAD(&l1oip_ilist); - spin_lock_init(&l1oip_lock); - if (l1oip_4bit_alloc(ulaw)) return -ENOMEM; diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 7511bca9c15e..edfad93e7b68 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -218,6 +218,7 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port) if (err < 0) return ERR_PTR(err); + udp_allow_gso(sock->sk); return sock; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c3091e00dd5f..3455f2cc13f2 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1098,7 +1098,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, * If @slave's permanent hw address is different both from its current * address and from @bond's address, then somewhere in the bond there's * a slave that has @slave's permanet address as its current address. - * We'll make sure that that slave no longer uses @slave's permanent address. + * We'll make sure that slave no longer uses @slave's permanent address. * * Caller must hold RTNL and no other locks */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 74cbbb22470b..d5ca38aa8aa9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -964,7 +964,7 @@ static bool bond_should_notify_peers(struct bonding *bond) } /** - * change_active_interface - change the active slave into the specified one + * bond_change_active_slave - change the active slave into the specified one * @bond: our bonding struct * @new_active: the new slave to make the active one * diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 1c28eade6bec..e355d3974977 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -103,7 +103,7 @@ config CAN_FLEXCAN config CAN_GRCAN tristate "Aeroflex Gaisler GRCAN and GRHCAN CAN devices" - depends on OF && HAS_DMA + depends on OF && HAS_DMA && HAS_IOMEM help Say Y here if you want to use Aeroflex Gaisler GRCAN or GRHCAN. Note that the driver supports little endian, even though little diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 6958830cb983..313793f6922d 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -132,7 +132,6 @@ /* For the high buffers we clear the interrupt bit and newdat */ #define IF_COMM_RCV_HIGH (IF_COMM_RCV_LOW | IF_COMM_CLR_NEWDAT) - /* Receive setup of message objects */ #define IF_COMM_RCV_SETUP (IF_COMM_MASK | IF_COMM_ARB | IF_COMM_CONTROL) @@ -161,9 +160,7 @@ #define IF_MCONT_TX (IF_MCONT_TXIE | IF_MCONT_EOB) -/* - * Use IF1 for RX and IF2 for TX - */ +/* Use IF1 for RX and IF2 for TX */ #define IF_RX 0 #define IF_TX 1 @@ -173,9 +170,6 @@ /* Wait for ~1 sec for INIT bit */ #define INIT_WAIT_MS 1000 -/* napi related */ -#define C_CAN_NAPI_WEIGHT C_CAN_MSG_OBJ_RX_NUM - /* c_can lec values */ enum c_can_lec_type { LEC_NO_ERROR = 0, @@ -189,8 +183,7 @@ enum c_can_lec_type { LEC_MASK = LEC_UNUSED, }; -/* - * c_can error types: +/* c_can error types: * Bus errors (BUS_OFF, ERROR_WARNING, ERROR_PASSIVE) are supported */ enum c_can_bus_error_types { @@ -253,7 +246,6 @@ static void c_can_obj_update(struct net_device *dev, int iface, u32 cmd, u32 obj udelay(1); } netdev_err(dev, "Updating object timed out\n"); - } static inline void c_can_object_get(struct net_device *dev, int iface, @@ -268,8 +260,7 @@ static inline void c_can_object_put(struct net_device *dev, int iface, c_can_obj_update(dev, iface, cmd | IF_COMM_WR, obj); } -/* - * Note: According to documentation clearing TXIE while MSGVAL is set +/* Note: According to documentation clearing TXIE while MSGVAL is set * is not allowed, but works nicely on C/DCAN. And that lowers the I/O * load significantly. */ @@ -285,8 +276,7 @@ static void c_can_inval_msg_object(struct net_device *dev, int iface, int obj) { struct c_can_priv *priv = netdev_priv(dev); - priv->write_reg(priv, C_CAN_IFACE(ARB1_REG, iface), 0); - priv->write_reg(priv, C_CAN_IFACE(ARB2_REG, iface), 0); + priv->write_reg32(priv, C_CAN_IFACE(ARB1_REG, iface), 0); c_can_inval_tx_object(dev, iface, obj); } @@ -309,12 +299,11 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface, if (!rtr) arb |= IF_ARB_TRANSMIT; - /* - * If we change the DIR bit, we need to invalidate the buffer + /* If we change the DIR bit, we need to invalidate the buffer * first, i.e. clear the MSGVAL flag in the arbiter. */ if (rtr != (bool)test_bit(idx, &priv->tx_dir)) { - u32 obj = idx + C_CAN_MSG_OBJ_TX_FIRST; + u32 obj = idx + priv->msg_obj_tx_first; c_can_inval_msg_object(dev, iface, obj); change_bit(idx, &priv->tx_dir); @@ -447,18 +436,16 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; - /* - * This is not a FIFO. C/D_CAN sends out the buffers + /* This is not a FIFO. C/D_CAN sends out the buffers * prioritized. The lowest buffer number wins. */ idx = fls(atomic_read(&priv->tx_active)); - obj = idx + C_CAN_MSG_OBJ_TX_FIRST; + obj = idx + priv->msg_obj_tx_first; /* If this is the last buffer, stop the xmit queue */ - if (idx == C_CAN_MSG_OBJ_TX_NUM - 1) + if (idx == priv->msg_obj_tx_num - 1) netif_stop_queue(dev); - /* - * Store the message in the interface so we can call + /* Store the message in the interface so we can call * can_put_echo_skb(). We must do this before we enable * transmit as we might race against do_tx(). */ @@ -467,7 +454,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, can_put_echo_skb(skb, dev, idx, 0); /* Update the active bits */ - atomic_add((1 << idx), &priv->tx_active); + atomic_add(BIT(idx), &priv->tx_active); /* Start transmission */ c_can_object_put(dev, IF_TX, obj, IF_COMM_TX); @@ -511,7 +498,7 @@ static int c_can_set_bittiming(struct net_device *dev) reg_brpe = brpe & BRP_EXT_BRPE_MASK; netdev_info(dev, - "setting BTR=%04x BRPE=%04x\n", reg_btr, reg_brpe); + "setting BTR=%04x BRPE=%04x\n", reg_btr, reg_brpe); ctrl_save = priv->read_reg(priv, C_CAN_CTRL_REG); ctrl_save &= ~CONTROL_INIT; @@ -527,8 +514,7 @@ static int c_can_set_bittiming(struct net_device *dev) return c_can_wait_for_ctrl_init(dev, priv, 0); } -/* - * Configure C_CAN message objects for Tx and Rx purposes: +/* Configure C_CAN message objects for Tx and Rx purposes: * C_CAN provides a total of 32 message objects that can be configured * either for Tx or Rx purposes. Here the first 16 message objects are used as * a reception FIFO. The end of reception FIFO is signified by the EoB bit @@ -538,17 +524,18 @@ static int c_can_set_bittiming(struct net_device *dev) */ static void c_can_configure_msg_objects(struct net_device *dev) { + struct c_can_priv *priv = netdev_priv(dev); int i; /* first invalidate all message objects */ - for (i = C_CAN_MSG_OBJ_RX_FIRST; i <= C_CAN_NO_OF_OBJECTS; i++) + for (i = priv->msg_obj_rx_first; i <= priv->msg_obj_num; i++) c_can_inval_msg_object(dev, IF_RX, i); /* setup receive message objects */ - for (i = C_CAN_MSG_OBJ_RX_FIRST; i < C_CAN_MSG_OBJ_RX_LAST; i++) + for (i = priv->msg_obj_rx_first; i < priv->msg_obj_rx_last; i++) c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_RCV); - c_can_setup_receive_object(dev, IF_RX, C_CAN_MSG_OBJ_RX_LAST, 0, 0, + c_can_setup_receive_object(dev, IF_RX, priv->msg_obj_rx_last, 0, 0, IF_MCONT_RCV_EOB); } @@ -572,8 +559,7 @@ static int c_can_software_reset(struct net_device *dev) return 0; } -/* - * Configure C_CAN chip: +/* Configure C_CAN chip: * - enable/disable auto-retransmission * - set operating mode * - configure message objects @@ -714,12 +700,21 @@ static void c_can_do_tx(struct net_device *dev) struct net_device_stats *stats = &dev->stats; u32 idx, obj, pkts = 0, bytes = 0, pend, clr; - clr = pend = priv->read_reg(priv, C_CAN_INTPND2_REG); + if (priv->msg_obj_tx_last > 32) + pend = priv->read_reg32(priv, C_CAN_INTPND3_REG); + else + pend = priv->read_reg(priv, C_CAN_INTPND2_REG); + clr = pend; while ((idx = ffs(pend))) { idx--; - pend &= ~(1 << idx); - obj = idx + C_CAN_MSG_OBJ_TX_FIRST; + pend &= ~BIT(idx); + obj = idx + priv->msg_obj_tx_first; + + /* We use IF_RX interface instead of IF_TX because we + * are called from c_can_poll(), which runs inside + * NAPI. We are not trasmitting. + */ c_can_inval_tx_object(dev, IF_RX, obj); can_get_echo_skb(dev, idx, NULL); bytes += priv->dlc[idx]; @@ -729,7 +724,7 @@ static void c_can_do_tx(struct net_device *dev) /* Clear the bits in the tx_active mask */ atomic_sub(clr, &priv->tx_active); - if (clr & (1 << (C_CAN_MSG_OBJ_TX_NUM - 1))) + if (clr & BIT(priv->msg_obj_tx_num - 1)) netif_wake_queue(dev); if (pkts) { @@ -739,20 +734,18 @@ static void c_can_do_tx(struct net_device *dev) } } -/* - * If we have a gap in the pending bits, that means we either +/* If we have a gap in the pending bits, that means we either * raced with the hardware or failed to readout all upper * objects in the last run due to quota limit. */ -static u32 c_can_adjust_pending(u32 pend) +static u32 c_can_adjust_pending(u32 pend, u32 rx_mask) { u32 weight, lasts; - if (pend == RECEIVE_OBJECT_BITS) + if (pend == rx_mask) return pend; - /* - * If the last set bit is larger than the number of pending + /* If the last set bit is larger than the number of pending * bits we have a gap. */ weight = hweight32(pend); @@ -762,19 +755,19 @@ static u32 c_can_adjust_pending(u32 pend) if (lasts == weight) return pend; - /* - * Find the first set bit after the gap. We walk backwards + /* Find the first set bit after the gap. We walk backwards * from the last set bit. */ - for (lasts--; pend & (1 << (lasts - 1)); lasts--); + for (lasts--; pend & BIT(lasts - 1); lasts--) + ; - return pend & ~((1 << lasts) - 1); + return pend & ~GENMASK(lasts - 1, 0); } static inline void c_can_rx_object_get(struct net_device *dev, struct c_can_priv *priv, u32 obj) { - c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high); + c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high); } static inline void c_can_rx_finalize(struct net_device *dev, @@ -803,8 +796,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv, continue; } - /* - * This really should not happen, but this covers some + /* This really should not happen, but this covers some * odd HW behaviour. Do not remove that unless you * want to brick your machine. */ @@ -825,19 +817,22 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv, static inline u32 c_can_get_pending(struct c_can_priv *priv) { - u32 pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG); + u32 pend; + + if (priv->msg_obj_rx_last > 16) + pend = priv->read_reg32(priv, C_CAN_NEWDAT1_REG); + else + pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG); return pend; } -/* - * theory of operation: +/* theory of operation: * * c_can core saves a received CAN message into the first free message * object it finds free (starting with the lowest). Bits NEWDAT and * INTPND are set for this message object indicating that a new message - * has arrived. To work-around this issue, we keep two groups of message - * objects whose partitioning is defined by C_CAN_MSG_OBJ_RX_SPLIT. + * has arrived. * * We clear the newdat bit right away. * @@ -848,23 +843,16 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) struct c_can_priv *priv = netdev_priv(dev); u32 pkts = 0, pend = 0, toread, n; - /* - * It is faster to read only one 16bit register. This is only possible - * for a maximum number of 16 objects. - */ - BUILD_BUG_ON_MSG(C_CAN_MSG_OBJ_RX_LAST > 16, - "Implementation does not support more message objects than 16"); - while (quota > 0) { if (!pend) { pend = c_can_get_pending(priv); if (!pend) break; - /* - * If the pending field has a gap, handle the + /* If the pending field has a gap, handle the * bits above the gap first. */ - toread = c_can_adjust_pending(pend); + toread = c_can_adjust_pending(pend, + priv->msg_obj_rx_mask); } else { toread = pend; } @@ -883,7 +871,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) } static int c_can_handle_state_change(struct net_device *dev, - enum c_can_bus_error_types error_type) + enum c_can_bus_error_types error_type) { unsigned int reg_err_counter; unsigned int rx_err_passive; @@ -979,8 +967,7 @@ static int c_can_handle_bus_err(struct net_device *dev, struct can_frame *cf; struct sk_buff *skb; - /* - * early exit if no lec update or no error. + /* early exit if no lec update or no error. * no lec update means that no CAN bus event has been detected * since CPU wrote 0x7 value to status reg. */ @@ -999,8 +986,7 @@ static int c_can_handle_bus_err(struct net_device *dev, if (unlikely(!skb)) return 0; - /* - * check for 'last error code' which tells us the + /* check for 'last error code' which tells us the * type of the last error to occur on the CAN bus */ cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; @@ -1049,7 +1035,8 @@ static int c_can_poll(struct napi_struct *napi, int quota) /* Only read the status register if a status interrupt was pending */ if (atomic_xchg(&priv->sie_pending, 0)) { - priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); + priv->last_status = priv->read_reg(priv, C_CAN_STS_REG); + curr = priv->last_status; /* Ack status on C_CAN. D_CAN is self clearing */ if (priv->type != BOSCH_D_CAN) priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); @@ -1147,7 +1134,7 @@ static int c_can_open(struct net_device *dev) /* register interrupt handler */ err = request_irq(dev->irq, &c_can_isr, IRQF_SHARED, dev->name, - dev); + dev); if (err < 0) { netdev_err(dev, "failed to request interrupt\n"); goto exit_irq_fail; @@ -1195,17 +1182,31 @@ static int c_can_close(struct net_device *dev) return 0; } -struct net_device *alloc_c_can_dev(void) +struct net_device *alloc_c_can_dev(int msg_obj_num) { struct net_device *dev; struct c_can_priv *priv; + int msg_obj_tx_num = msg_obj_num / 2; - dev = alloc_candev(sizeof(struct c_can_priv), C_CAN_MSG_OBJ_TX_NUM); + dev = alloc_candev(struct_size(priv, dlc, msg_obj_tx_num), + msg_obj_tx_num); if (!dev) return NULL; priv = netdev_priv(dev); - netif_napi_add(dev, &priv->napi, c_can_poll, C_CAN_NAPI_WEIGHT); + priv->msg_obj_num = msg_obj_num; + priv->msg_obj_rx_num = msg_obj_num - msg_obj_tx_num; + priv->msg_obj_rx_first = 1; + priv->msg_obj_rx_last = + priv->msg_obj_rx_first + priv->msg_obj_rx_num - 1; + priv->msg_obj_rx_mask = GENMASK(priv->msg_obj_rx_num - 1, 0); + + priv->msg_obj_tx_num = msg_obj_tx_num; + priv->msg_obj_tx_first = priv->msg_obj_rx_last + 1; + priv->msg_obj_tx_last = + priv->msg_obj_tx_first + priv->msg_obj_tx_num - 1; + + netif_napi_add(dev, &priv->napi, c_can_poll, priv->msg_obj_rx_num); priv->dev = dev; priv->can.bittiming_const = &c_can_bittiming_const; @@ -1239,7 +1240,7 @@ int c_can_power_down(struct net_device *dev) /* Wait for the PDA bit to get set */ time_out = jiffies + msecs_to_jiffies(INIT_WAIT_MS); while (!(priv->read_reg(priv, C_CAN_STS_REG) & STATUS_PDA) && - time_after(time_out, jiffies)) + time_after(time_out, jiffies)) cpu_relax(); if (time_after(jiffies, time_out)) @@ -1280,7 +1281,7 @@ int c_can_power_up(struct net_device *dev) /* Wait for the PDA bit to get clear */ time_out = jiffies + msecs_to_jiffies(INIT_WAIT_MS); while ((priv->read_reg(priv, C_CAN_STS_REG) & STATUS_PDA) && - time_after(time_out, jiffies)) + time_after(time_out, jiffies)) cpu_relax(); if (time_after(jiffies, time_out)) { diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 92213d3d96eb..8acedd9e63a7 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -22,23 +22,6 @@ #ifndef C_CAN_H #define C_CAN_H -/* message object split */ -#define C_CAN_NO_OF_OBJECTS 32 -#define C_CAN_MSG_OBJ_RX_NUM 16 -#define C_CAN_MSG_OBJ_TX_NUM 16 - -#define C_CAN_MSG_OBJ_RX_FIRST 1 -#define C_CAN_MSG_OBJ_RX_LAST (C_CAN_MSG_OBJ_RX_FIRST + \ - C_CAN_MSG_OBJ_RX_NUM - 1) - -#define C_CAN_MSG_OBJ_TX_FIRST (C_CAN_MSG_OBJ_RX_LAST + 1) -#define C_CAN_MSG_OBJ_TX_LAST (C_CAN_MSG_OBJ_TX_FIRST + \ - C_CAN_MSG_OBJ_TX_NUM - 1) - -#define C_CAN_MSG_OBJ_RX_SPLIT 9 -#define C_CAN_MSG_RX_LOW_LAST (C_CAN_MSG_OBJ_RX_SPLIT - 1) -#define RECEIVE_OBJECT_BITS 0x0000ffff - enum reg { C_CAN_CTRL_REG = 0, C_CAN_CTRL_EX_REG, @@ -76,6 +59,7 @@ enum reg { C_CAN_NEWDAT2_REG, C_CAN_INTPND1_REG, C_CAN_INTPND2_REG, + C_CAN_INTPND3_REG, C_CAN_MSGVAL1_REG, C_CAN_MSGVAL2_REG, C_CAN_FUNCTION_REG, @@ -137,6 +121,7 @@ static const u16 __maybe_unused reg_map_d_can[] = { [C_CAN_NEWDAT2_REG] = 0x9E, [C_CAN_INTPND1_REG] = 0xB0, [C_CAN_INTPND2_REG] = 0xB2, + [C_CAN_INTPND3_REG] = 0xB4, [C_CAN_MSGVAL1_REG] = 0xC4, [C_CAN_MSGVAL2_REG] = 0xC6, [C_CAN_IF1_COMREQ_REG] = 0x100, @@ -176,6 +161,7 @@ struct raminit_bits { struct c_can_driver_data { enum c_can_dev_id id; + unsigned int msg_obj_num; /* RAMINIT register description. Optional. */ const struct raminit_bits *raminit_bits; /* Array of START/DONE bit positions */ @@ -197,26 +183,34 @@ struct c_can_priv { struct napi_struct napi; struct net_device *dev; struct device *device; + unsigned int msg_obj_num; + unsigned int msg_obj_rx_num; + unsigned int msg_obj_tx_num; + unsigned int msg_obj_rx_first; + unsigned int msg_obj_rx_last; + unsigned int msg_obj_tx_first; + unsigned int msg_obj_tx_last; + u32 msg_obj_rx_mask; atomic_t tx_active; atomic_t sie_pending; unsigned long tx_dir; int last_status; - u16 (*read_reg) (const struct c_can_priv *priv, enum reg index); - void (*write_reg) (const struct c_can_priv *priv, enum reg index, u16 val); - u32 (*read_reg32) (const struct c_can_priv *priv, enum reg index); - void (*write_reg32) (const struct c_can_priv *priv, enum reg index, u32 val); + u16 (*read_reg)(const struct c_can_priv *priv, enum reg index); + void (*write_reg)(const struct c_can_priv *priv, enum reg index, u16 val); + u32 (*read_reg32)(const struct c_can_priv *priv, enum reg index); + void (*write_reg32)(const struct c_can_priv *priv, enum reg index, u32 val); void __iomem *base; const u16 *regs; void *priv; /* for board-specific data */ enum c_can_dev_id type; struct c_can_raminit raminit_sys; /* RAMINIT via syscon regmap */ - void (*raminit) (const struct c_can_priv *priv, bool enable); + void (*raminit)(const struct c_can_priv *priv, bool enable); u32 comm_rcv_high; u32 rxmasked; - u32 dlc[C_CAN_MSG_OBJ_TX_NUM]; + u32 dlc[]; }; -struct net_device *alloc_c_can_dev(void); +struct net_device *alloc_c_can_dev(int msg_obj_num); void free_c_can_dev(struct net_device *dev); int register_c_can_dev(struct net_device *dev); void unregister_c_can_dev(struct net_device *dev); diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index 7efb60b50876..bf2f8c3da1c1 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -31,6 +31,8 @@ enum c_can_pci_reg_align { struct c_can_pci_data { /* Specify if is C_CAN or D_CAN */ enum c_can_dev_id type; + /* Number of message objects */ + unsigned int msg_obj_num; /* Set the register alignment in the memory */ enum c_can_pci_reg_align reg_align; /* Set the frequency */ @@ -41,32 +43,31 @@ struct c_can_pci_data { void (*init)(const struct c_can_priv *priv, bool enable); }; -/* - * 16-bit c_can registers can be arranged differently in the memory +/* 16-bit c_can registers can be arranged differently in the memory * architecture of different implementations. For example: 16-bit * registers can be aligned to a 16-bit boundary or 32-bit boundary etc. * Handle the same by providing a common read/write interface. */ static u16 c_can_pci_read_reg_aligned_to_16bit(const struct c_can_priv *priv, - enum reg index) + enum reg index) { return readw(priv->base + priv->regs[index]); } static void c_can_pci_write_reg_aligned_to_16bit(const struct c_can_priv *priv, - enum reg index, u16 val) + enum reg index, u16 val) { writew(val, priv->base + priv->regs[index]); } static u16 c_can_pci_read_reg_aligned_to_32bit(const struct c_can_priv *priv, - enum reg index) + enum reg index) { return readw(priv->base + 2 * priv->regs[index]); } static void c_can_pci_write_reg_aligned_to_32bit(const struct c_can_priv *priv, - enum reg index, u16 val) + enum reg index, u16 val) { writew(val, priv->base + 2 * priv->regs[index]); } @@ -88,13 +89,13 @@ static u32 c_can_pci_read_reg32(const struct c_can_priv *priv, enum reg index) u32 val; val = priv->read_reg(priv, index); - val |= ((u32) priv->read_reg(priv, index + 1)) << 16; + val |= ((u32)priv->read_reg(priv, index + 1)) << 16; return val; } static void c_can_pci_write_reg32(const struct c_can_priv *priv, enum reg index, - u32 val) + u32 val) { priv->write_reg(priv, index + 1, val >> 16); priv->write_reg(priv, index, val); @@ -142,14 +143,13 @@ static int c_can_pci_probe(struct pci_dev *pdev, pci_resource_len(pdev, c_can_pci_data->bar)); if (!addr) { dev_err(&pdev->dev, - "device has no PCI memory resources, " - "failing adapter\n"); + "device has no PCI memory resources, failing adapter\n"); ret = -ENOMEM; goto out_release_regions; } /* allocate the c_can device */ - dev = alloc_c_can_dev(); + dev = alloc_c_can_dev(c_can_pci_data->msg_obj_num); if (!dev) { ret = -ENOMEM; goto out_iounmap; @@ -217,7 +217,7 @@ static int c_can_pci_probe(struct pci_dev *pdev, } dev_dbg(&pdev->dev, "%s device registered (regs=%p, irq=%d)\n", - KBUILD_MODNAME, priv->regs, dev->irq); + KBUILD_MODNAME, priv->regs, dev->irq); return 0; @@ -252,8 +252,9 @@ static void c_can_pci_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static const struct c_can_pci_data c_can_sta2x11= { +static const struct c_can_pci_data c_can_sta2x11 = { .type = BOSCH_C_CAN, + .msg_obj_num = 32, .reg_align = C_CAN_REG_ALIGN_32, .freq = 52000000, /* 52 Mhz */ .bar = 0, @@ -261,6 +262,7 @@ static const struct c_can_pci_data c_can_sta2x11= { static const struct c_can_pci_data c_can_pch = { .type = BOSCH_C_CAN, + .msg_obj_num = 32, .reg_align = C_CAN_REG_32, .freq = 50000000, /* 50 MHz */ .init = c_can_pci_reset_pch, @@ -269,7 +271,7 @@ static const struct c_can_pci_data c_can_pch = { #define C_CAN_ID(_vend, _dev, _driverdata) { \ PCI_DEVICE(_vend, _dev), \ - .driver_data = (unsigned long)&_driverdata, \ + .driver_data = (unsigned long)&(_driverdata), \ } static const struct pci_device_id c_can_pci_tbl[] = { @@ -279,6 +281,7 @@ static const struct pci_device_id c_can_pci_tbl[] = { c_can_pch), {}, }; + static struct pci_driver c_can_pci_driver = { .name = KBUILD_MODNAME, .id_table = c_can_pci_tbl, diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 47b251b1607c..36950363682f 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -193,10 +193,12 @@ static void c_can_hw_raminit(const struct c_can_priv *priv, bool enable) static const struct c_can_driver_data c_can_drvdata = { .id = BOSCH_C_CAN, + .msg_obj_num = 32, }; static const struct c_can_driver_data d_can_drvdata = { .id = BOSCH_D_CAN, + .msg_obj_num = 32, }; static const struct raminit_bits dra7_raminit_bits[] = { @@ -206,6 +208,7 @@ static const struct raminit_bits dra7_raminit_bits[] = { static const struct c_can_driver_data dra7_dcan_drvdata = { .id = BOSCH_D_CAN, + .msg_obj_num = 64, .raminit_num = ARRAY_SIZE(dra7_raminit_bits), .raminit_bits = dra7_raminit_bits, .raminit_pulse = true, @@ -218,6 +221,7 @@ static const struct raminit_bits am3352_raminit_bits[] = { static const struct c_can_driver_data am3352_dcan_drvdata = { .id = BOSCH_D_CAN, + .msg_obj_num = 64, .raminit_num = ARRAY_SIZE(am3352_raminit_bits), .raminit_bits = am3352_raminit_bits, }; @@ -294,7 +298,7 @@ static int c_can_plat_probe(struct platform_device *pdev) } /* allocate the c_can device */ - dev = alloc_c_can_dev(); + dev = alloc_c_can_dev(drvdata->msg_obj_num); if (!dev) { ret = -ENOMEM; goto exit; diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c index f7fe226bb395..f49170eadd54 100644 --- a/drivers/net/can/dev/bittiming.c +++ b/drivers/net/can/dev/bittiming.c @@ -81,9 +81,9 @@ int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, if (bt->sample_point) { sample_point_nominal = bt->sample_point; } else { - if (bt->bitrate > 800000) + if (bt->bitrate > 800 * CAN_KBPS) sample_point_nominal = 750; - else if (bt->bitrate > 500000) + else if (bt->bitrate > 500 * CAN_KBPS) sample_point_nominal = 800; else sample_point_nominal = 875; @@ -174,6 +174,30 @@ int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, return 0; } + +void can_calc_tdco(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + const struct can_bittiming *dbt = &priv->data_bittiming; + struct can_tdc *tdc = &priv->tdc; + const struct can_tdc_const *tdc_const = priv->tdc_const; + + if (!tdc_const) + return; + + /* As specified in ISO 11898-1 section 11.3.3 "Transmitter + * delay compensation" (TDC) is only applicable if data BRP is + * one or two. + */ + if (dbt->brp == 1 || dbt->brp == 2) { + /* Reuse "normal" sample point and convert it to time quanta */ + u32 sample_point_in_tq = can_bit_time(dbt) * dbt->sample_point / 1000; + + tdc->tdco = min(sample_point_in_tq, tdc_const->tdco_max); + } else { + tdc->tdco = 0; + } +} #endif /* CONFIG_CAN_CALC_BITTIMING */ /* Checks the validity of the specified bit-timing parameters prop_seg, diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index f5d79e6e5483..e38c2566aff4 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -8,20 +8,17 @@ #include <net/rtnetlink.h> static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { - [IFLA_CAN_STATE] = { .type = NLA_U32 }, - [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, - [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, - [IFLA_CAN_RESTART] = { .type = NLA_U32 }, - [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, - [IFLA_CAN_BITTIMING_CONST] - = { .len = sizeof(struct can_bittiming_const) }, - [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, - [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, - [IFLA_CAN_DATA_BITTIMING] - = { .len = sizeof(struct can_bittiming) }, - [IFLA_CAN_DATA_BITTIMING_CONST] - = { .len = sizeof(struct can_bittiming_const) }, - [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, + [IFLA_CAN_STATE] = { .type = NLA_U32 }, + [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, + [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, + [IFLA_CAN_RESTART] = { .type = NLA_U32 }, + [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, + [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, + [IFLA_CAN_DATA_BITTIMING] = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], @@ -189,6 +186,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], memcpy(&priv->data_bittiming, &dbt, sizeof(dbt)); + can_calc_tdco(dev); + if (priv->do_set_data_bittiming) { /* Finally, set the bit-timing registers */ err = priv->do_set_data_bittiming(dev); diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 6a64fe410987..387c0bc0fb9c 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -45,7 +45,7 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, BUG_ON(idx >= priv->echo_skb_max); /* check flag whether this packet has to be looped back */ - if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK || + if (!(dev->flags & IFF_ECHO) || (skb->protocol != htons(ETH_P_CAN) && skb->protocol != htons(ETH_P_CANFD))) { kfree_skb(skb); @@ -58,7 +58,6 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, return -ENOMEM; /* make settings for echo to reduce code in irq context */ - skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; skb->dev = dev; @@ -111,6 +110,13 @@ __can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, priv->echo_skb[idx] = NULL; + if (skb->pkt_type == PACKET_LOOPBACK) { + skb->pkt_type = PACKET_BROADCAST; + } else { + dev_consume_skb_any(skb); + return NULL; + } + return skb; } @@ -147,14 +153,25 @@ EXPORT_SYMBOL_GPL(can_get_echo_skb); * * The function is typically called when TX failed. */ -void can_free_echo_skb(struct net_device *dev, unsigned int idx) +void can_free_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr) { struct can_priv *priv = netdev_priv(dev); - BUG_ON(idx >= priv->echo_skb_max); + if (idx >= priv->echo_skb_max) { + netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", + __func__, idx, priv->echo_skb_max); + return; + } if (priv->echo_skb[idx]) { - dev_kfree_skb_any(priv->echo_skb[idx]); + struct sk_buff *skb = priv->echo_skb[idx]; + struct can_skb_priv *can_skb_priv = can_skb_prv(skb); + + if (frame_len_ptr) + *frame_len_ptr = can_skb_priv->frame_len; + + dev_kfree_skb_any(skb); priv->echo_skb[idx] = NULL; } } diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 4a8453290530..78e27940b2af 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -520,7 +520,7 @@ static int catch_up_echo_skb(struct net_device *dev, int budget, bool echo) can_get_echo_skb(dev, i, NULL); } else { /* For cleanup of untransmitted messages */ - can_free_echo_skb(dev, i); + can_free_echo_skb(dev, i, NULL); } priv->eskbp = grcan_ring_add(priv->eskbp, GRCAN_MSG_SIZE, diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 0c8d36bc668c..890ed826a355 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -8,6 +8,7 @@ * https://github.com/linux-can/can-doc/tree/master/m_can */ +#include <linux/bitfield.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kernel.h> @@ -148,6 +149,16 @@ enum m_can_reg { #define NBTP_NTSEG2_SHIFT 0 #define NBTP_NTSEG2_MASK (0x7f << NBTP_NTSEG2_SHIFT) +/* Timestamp Counter Configuration Register (TSCC) */ +#define TSCC_TCP_MASK GENMASK(19, 16) +#define TSCC_TSS_MASK GENMASK(1, 0) +#define TSCC_TSS_DISABLE 0x0 +#define TSCC_TSS_INTERNAL 0x1 +#define TSCC_TSS_EXTERNAL 0x2 + +/* Timestamp Counter Value Register (TSCV) */ +#define TSCV_TSC_MASK GENMASK(15, 0) + /* Error Counter Register(ECR) */ #define ECR_RP BIT(15) #define ECR_REC_SHIFT 8 @@ -302,6 +313,7 @@ enum m_can_reg { #define RX_BUF_ANMF BIT(31) #define RX_BUF_FDF BIT(21) #define RX_BUF_BRS BIT(20) +#define RX_BUF_RXTS_MASK GENMASK(15, 0) /* Tx Buffer Element */ /* T0 */ @@ -319,6 +331,7 @@ enum m_can_reg { /* E1 */ #define TX_EVENT_MM_SHIFT TX_BUF_MM_SHIFT #define TX_EVENT_MM_MASK (0xff << TX_EVENT_MM_SHIFT) +#define TX_EVENT_TXTS_MASK GENMASK(15, 0) static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg) { @@ -413,6 +426,20 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_ILE, 0x0); } +/* Retrieve internal timestamp counter from TSCV.TSC, and shift it to 32-bit + * width. + */ +static u32 m_can_get_timestamp(struct m_can_classdev *cdev) +{ + u32 tscv; + u32 tsc; + + tscv = m_can_read(cdev, M_CAN_TSCV); + tsc = FIELD_GET(TSCV_TSC_MASK, tscv); + + return (tsc << 16); +} + static void m_can_clean(struct net_device *net) { struct m_can_classdev *cdev = netdev_priv(net); @@ -425,11 +452,26 @@ static void m_can_clean(struct net_device *net) putidx = ((m_can_read(cdev, M_CAN_TXFQS) & TXFQS_TFQPI_MASK) >> TXFQS_TFQPI_SHIFT); - can_free_echo_skb(cdev->net, putidx); + can_free_echo_skb(cdev->net, putidx, NULL); cdev->tx_skb = NULL; } } +/* For peripherals, pass skb to rx-offload, which will push skb from + * napi. For non-peripherals, RX is done in napi already, so push + * directly. timestamp is used to ensure good skb ordering in + * rx-offload and is ignored for non-peripherals. +*/ +static void m_can_receive_skb(struct m_can_classdev *cdev, + struct sk_buff *skb, + u32 timestamp) +{ + if (cdev->is_peripheral) + can_rx_offload_queue_sorted(&cdev->offload, skb, timestamp); + else + netif_receive_skb(skb); +} + static void m_can_read_fifo(struct net_device *dev, u32 rxfs) { struct net_device_stats *stats = &dev->stats; @@ -437,6 +479,7 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs) struct canfd_frame *cf; struct sk_buff *skb; u32 id, fgi, dlc; + u32 timestamp = 0; int i; /* calculate the fifo get index for where to read data */ @@ -485,7 +528,9 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs) stats->rx_packets++; stats->rx_bytes += cf->len; - netif_receive_skb(skb); + timestamp = FIELD_GET(RX_BUF_RXTS_MASK, dlc); + + m_can_receive_skb(cdev, skb, timestamp); } static int m_can_do_rx_poll(struct net_device *dev, int quota) @@ -516,9 +561,11 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) static int m_can_handle_lost_msg(struct net_device *dev) { + struct m_can_classdev *cdev = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; struct sk_buff *skb; struct can_frame *frame; + u32 timestamp = 0; netdev_err(dev, "msg lost in rxf0\n"); @@ -532,7 +579,10 @@ static int m_can_handle_lost_msg(struct net_device *dev) frame->can_id |= CAN_ERR_CRTL; frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; - netif_receive_skb(skb); + if (cdev->is_peripheral) + timestamp = m_can_get_timestamp(cdev); + + m_can_receive_skb(cdev, skb, timestamp); return 1; } @@ -544,6 +594,7 @@ static int m_can_handle_lec_err(struct net_device *dev, struct net_device_stats *stats = &dev->stats; struct can_frame *cf; struct sk_buff *skb; + u32 timestamp = 0; cdev->can.can_stats.bus_error++; stats->rx_errors++; @@ -589,7 +640,11 @@ static int m_can_handle_lec_err(struct net_device *dev, stats->rx_packets++; stats->rx_bytes += cf->len; - netif_receive_skb(skb); + + if (cdev->is_peripheral) + timestamp = m_can_get_timestamp(cdev); + + m_can_receive_skb(cdev, skb, timestamp); return 1; } @@ -647,6 +702,7 @@ static int m_can_handle_state_change(struct net_device *dev, struct sk_buff *skb; struct can_berr_counter bec; unsigned int ecr; + u32 timestamp = 0; switch (new_state) { case CAN_STATE_ERROR_WARNING: @@ -708,7 +764,11 @@ static int m_can_handle_state_change(struct net_device *dev, stats->rx_packets++; stats->rx_bytes += cf->len; - netif_receive_skb(skb); + + if (cdev->is_peripheral) + timestamp = m_can_get_timestamp(cdev); + + m_can_receive_skb(cdev, skb, timestamp); return 1; } @@ -773,6 +833,7 @@ static int m_can_handle_protocol_error(struct net_device *dev, u32 irqstatus) struct m_can_classdev *cdev = netdev_priv(dev); struct can_frame *cf; struct sk_buff *skb; + u32 timestamp = 0; /* propagate the error condition to the CAN stack */ skb = alloc_can_err_skb(dev, &cf); @@ -794,7 +855,11 @@ static int m_can_handle_protocol_error(struct net_device *dev, u32 irqstatus) netdev_dbg(dev, "allocation of skb failed\n"); return 0; } - netif_receive_skb(skb); + + if (cdev->is_peripheral) + timestamp = m_can_get_timestamp(cdev); + + m_can_receive_skb(cdev, skb, timestamp); return 1; } @@ -895,6 +960,29 @@ static int m_can_poll(struct napi_struct *napi, int quota) return work_done; } +/* Echo tx skb and update net stats. Peripherals use rx-offload for + * echo. timestamp is used for peripherals to ensure correct ordering + * by rx-offload, and is ignored for non-peripherals. +*/ +static void m_can_tx_update_stats(struct m_can_classdev *cdev, + unsigned int msg_mark, + u32 timestamp) +{ + struct net_device *dev = cdev->net; + struct net_device_stats *stats = &dev->stats; + + if (cdev->is_peripheral) + stats->tx_bytes += + can_rx_offload_get_echo_skb(&cdev->offload, + msg_mark, + timestamp, + NULL); + else + stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL); + + stats->tx_packets++; +} + static void m_can_echo_tx_event(struct net_device *dev) { u32 txe_count = 0; @@ -904,7 +992,6 @@ static void m_can_echo_tx_event(struct net_device *dev) unsigned int msg_mark; struct m_can_classdev *cdev = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; /* read tx event fifo status */ m_can_txefs = m_can_read(cdev, M_CAN_TXEFS); @@ -914,21 +1001,23 @@ static void m_can_echo_tx_event(struct net_device *dev) /* Get and process all sent elements */ for (i = 0; i < txe_count; i++) { + u32 txe, timestamp = 0; + /* retrieve get index */ fgi = (m_can_read(cdev, M_CAN_TXEFS) & TXEFS_EFGI_MASK) >> TXEFS_EFGI_SHIFT; - /* get message marker */ - msg_mark = (m_can_txe_fifo_read(cdev, fgi, 4) & - TX_EVENT_MM_MASK) >> TX_EVENT_MM_SHIFT; + /* get message marker, timestamp */ + txe = m_can_txe_fifo_read(cdev, fgi, 4); + msg_mark = (txe & TX_EVENT_MM_MASK) >> TX_EVENT_MM_SHIFT; + timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe); /* ack txe element */ m_can_write(cdev, M_CAN_TXEFA, (TXEFA_EFAI_MASK & (fgi << TXEFA_EFAI_SHIFT))); /* update stats */ - stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL); - stats->tx_packets++; + m_can_tx_update_stats(cdev, msg_mark, timestamp); } } @@ -936,7 +1025,6 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct m_can_classdev *cdev = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; u32 ir; if (pm_runtime_suspended(cdev->dev)) @@ -969,8 +1057,12 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) if (cdev->version == 30) { if (ir & IR_TC) { /* Transmission Complete Interrupt*/ - stats->tx_bytes += can_get_echo_skb(dev, 0, NULL); - stats->tx_packets++; + u32 timestamp = 0; + + if (cdev->is_peripheral) + timestamp = m_can_get_timestamp(cdev); + m_can_tx_update_stats(cdev, 0, timestamp); + can_led_event(dev, CAN_LED_EVENT_TX); netif_wake_queue(dev); } @@ -1108,6 +1200,7 @@ static int m_can_set_bittiming(struct net_device *dev) * - >= v3.1.x: TX FIFO is used * - configure mode * - setup bittiming + * - configure timestamp generation */ static void m_can_chip_config(struct net_device *dev) { @@ -1219,6 +1312,10 @@ static void m_can_chip_config(struct net_device *dev) /* set bittiming params */ m_can_set_bittiming(dev); + /* enable internal timestamp generation, with a prescalar of 16. The + * prescalar is applied to the nominal bit timing */ + m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf)); + m_can_config_endisable(cdev, false); if (cdev->ops->init) @@ -1426,6 +1523,9 @@ static int m_can_close(struct net_device *dev) cdev->tx_wq = NULL; } + if (cdev->is_peripheral) + can_rx_offload_disable(&cdev->offload); + close_candev(dev); can_led_event(dev, CAN_LED_EVENT_STOP); @@ -1624,6 +1724,9 @@ static int m_can_open(struct net_device *dev) goto exit_disable_clks; } + if (cdev->is_peripheral) + can_rx_offload_enable(&cdev->offload); + /* register interrupt handler */ if (cdev->is_peripheral) { cdev->tx_skb = NULL; @@ -1665,6 +1768,8 @@ exit_irq_fail: if (cdev->is_peripheral) destroy_workqueue(cdev->tx_wq); out_wq_fail: + if (cdev->is_peripheral) + can_rx_offload_disable(&cdev->offload); close_candev(dev); exit_disable_clks: m_can_clk_stop(cdev); @@ -1787,11 +1892,6 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, } class_dev = netdev_priv(net_dev); - if (!class_dev) { - dev_err(dev, "Failed to init netdev cdevate"); - goto out; - } - class_dev->net = net_dev; class_dev->dev = dev; SET_NETDEV_DEV(net_dev, dev); @@ -1818,15 +1918,22 @@ int m_can_class_register(struct m_can_classdev *cdev) return ret; } + if (cdev->is_peripheral) { + ret = can_rx_offload_add_manual(cdev->net, &cdev->offload, + M_CAN_NAPI_WEIGHT); + if (ret) + goto clk_disable; + } + ret = m_can_dev_setup(cdev); if (ret) - goto clk_disable; + goto rx_offload_del; ret = register_m_can_dev(cdev->net); if (ret) { dev_err(cdev->dev, "registering %s failed (err=%d)\n", cdev->net->name, ret); - goto clk_disable; + goto rx_offload_del; } devm_can_led_init(cdev->net); @@ -1839,6 +1946,13 @@ int m_can_class_register(struct m_can_classdev *cdev) /* Probe finished * Stop clocks. They will be reactivated once the M_CAN device is opened */ + m_can_clk_stop(cdev); + + return 0; + +rx_offload_del: + if (cdev->is_peripheral) + can_rx_offload_del(&cdev->offload); clk_disable: m_can_clk_stop(cdev); @@ -1848,6 +1962,8 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + if (cdev->is_peripheral) + can_rx_offload_del(&cdev->offload); unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index 3fda84cef351..ace071c3e58c 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -8,6 +8,7 @@ #include <linux/can/core.h> #include <linux/can/led.h> +#include <linux/can/rx-offload.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/dma-mapping.h> @@ -71,6 +72,7 @@ struct m_can_ops { struct m_can_classdev { struct can_priv can; + struct can_rx_offload offload; struct napi_struct napi; struct net_device *net; struct device *dev; diff --git a/drivers/net/can/m_can/tcan4x5x.h b/drivers/net/can/m_can/tcan4x5x.h index c66da829b795..e62c030d3e1e 100644 --- a/drivers/net/can/m_can/tcan4x5x.h +++ b/drivers/net/can/m_can/tcan4x5x.h @@ -11,7 +11,6 @@ #include <linux/gpio/consumer.h> #include <linux/regmap.h> -#include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/spi/spi.h> diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 4870c4ea190a..00e4533c8bdd 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -217,7 +217,7 @@ static void tx_failure_cleanup(struct net_device *ndev) int i; for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++) - can_free_echo_skb(ndev, i); + can_free_echo_skb(ndev, i, NULL); } static void rcar_can_error(struct net_device *ndev) diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index d8d233e62990..311e6ca3bdc4 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -617,7 +617,7 @@ static void rcar_canfd_tx_failure_cleanup(struct net_device *ndev) u32 i; for (i = 0; i < RCANFD_FIFO_DEPTH; i++) - can_free_echo_skb(ndev, i); + can_free_echo_skb(ndev, i, NULL); } static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 9e86488ba55f..3fad54646746 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -525,7 +525,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT && !(status & SR_TCS)) { stats->tx_errors++; - can_free_echo_skb(dev, 0); + can_free_echo_skb(dev, 0, NULL); } else { /* transmission complete */ stats->tx_bytes += diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index c3e020c90111..6f5d6d04a8b9 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -179,7 +179,7 @@ static void hi3110_clean(struct net_device *net) net->stats.tx_errors++; dev_kfree_skb(priv->tx_skb); if (priv->tx_len) - can_free_echo_skb(priv->net, 0); + can_free_echo_skb(priv->net, 0, NULL); priv->tx_skb = NULL; priv->tx_len = 0; } diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index f69fb4238a65..80ab1593ca31 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -276,7 +276,7 @@ static void mcp251x_clean(struct net_device *net) net->stats.tx_errors++; dev_kfree_skb(priv->tx_skb); if (priv->tx_len) - can_free_echo_skb(priv->net, 0); + can_free_echo_skb(priv->net, 0, NULL); priv->tx_skb = NULL; priv->tx_len = 0; } diff --git a/drivers/net/can/spi/mcp251xfd/Kconfig b/drivers/net/can/spi/mcp251xfd/Kconfig index f5a147a92cb2..dd0fc0a54be1 100644 --- a/drivers/net/can/spi/mcp251xfd/Kconfig +++ b/drivers/net/can/spi/mcp251xfd/Kconfig @@ -3,6 +3,7 @@ config CAN_MCP251XFD tristate "Microchip MCP251xFD SPI CAN controllers" select REGMAP + select WANT_DEV_COREDUMP help Driver for the Microchip MCP251XFD SPI FD-CAN controller family. diff --git a/drivers/net/can/spi/mcp251xfd/Makefile b/drivers/net/can/spi/mcp251xfd/Makefile index cb71244cbe89..3cba3b9447ea 100644 --- a/drivers/net/can/spi/mcp251xfd/Makefile +++ b/drivers/net/can/spi/mcp251xfd/Makefile @@ -6,3 +6,6 @@ mcp251xfd-objs := mcp251xfd-objs += mcp251xfd-core.o mcp251xfd-objs += mcp251xfd-crc16.o mcp251xfd-objs += mcp251xfd-regmap.o +mcp251xfd-objs += mcp251xfd-timestamp.o + +mcp251xfd-$(CONFIG_DEV_COREDUMP) += mcp251xfd-dump.o diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 799e9d5d3481..142eb4506b55 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2,8 +2,8 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020 Pengutronix, -// Marc Kleine-Budde <[email protected]> +// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Marc Kleine-Budde <[email protected]> // // Based on: // @@ -16,7 +16,6 @@ #include <linux/clk.h> #include <linux/device.h> #include <linux/module.h> -#include <linux/netdevice.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> @@ -330,6 +329,7 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) struct mcp251xfd_tx_ring *tx_ring; struct mcp251xfd_rx_ring *rx_ring, *prev_rx_ring = NULL; struct mcp251xfd_tx_obj *tx_obj; + struct spi_transfer *xfer; u32 val; u16 addr; u8 len; @@ -347,8 +347,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) addr, val, val); for (j = 0; j < ARRAY_SIZE(tef_ring->uinc_xfer); j++) { - struct spi_transfer *xfer; - xfer = &tef_ring->uinc_xfer[j]; xfer->tx_buf = &tef_ring->uinc_buf; xfer->len = len; @@ -357,6 +355,15 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS; } + /* "cs_change == 1" on the last transfer results in an active + * chip select after the complete SPI message. This causes the + * controller to interpret the next register access as + * data. Set "cs_change" of the last transfer to "0" to + * properly deactivate the chip select at the end of the + * message. + */ + xfer->cs_change = 0; + /* TX */ tx_ring = priv->tx; tx_ring->head = 0; @@ -397,8 +404,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) addr, val, val); for (j = 0; j < ARRAY_SIZE(rx_ring->uinc_xfer); j++) { - struct spi_transfer *xfer; - xfer = &rx_ring->uinc_xfer[j]; xfer->tx_buf = &rx_ring->uinc_buf; xfer->len = len; @@ -406,6 +411,15 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) xfer->cs_change_delay.value = 0; xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS; } + + /* "cs_change == 1" on the last transfer results in an + * active chip select after the complete SPI + * message. This causes the controller to interpret + * the next register access as data. Set "cs_change" + * of the last transfer to "0" to properly deactivate + * the chip select at the end of the message. + */ + xfer->cs_change = 0; } } @@ -1097,6 +1111,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) return 0; out_chip_stop: + mcp251xfd_dump(priv); mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED); return err; @@ -1250,7 +1265,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, const struct mcp251xfd_hw_tef_obj *hw_tef_obj) { struct net_device_stats *stats = &priv->ndev->stats; - u32 seq, seq_masked, tef_tail_masked; + struct sk_buff *skb; + u32 seq, seq_masked, tef_tail_masked, tef_tail; seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, hw_tef_obj->flags); @@ -1266,9 +1282,13 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, if (seq_masked != tef_tail_masked) return mcp251xfd_handle_tefif_recover(priv, seq); + tef_tail = mcp251xfd_get_tef_tail(priv); + skb = priv->can.echo_skb[tef_tail]; + if (skb) + mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts); stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, - mcp251xfd_get_tef_tail(priv), + tef_tail, hw_tef_obj->ts, NULL); stats->tx_packets++; priv->tef->tail++; @@ -1365,25 +1385,20 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) if (len) { struct mcp251xfd_tef_ring *ring = priv->tef; struct mcp251xfd_tx_ring *tx_ring = priv->tx; - struct spi_transfer *last_xfer; + int offset; /* Increment the TEF FIFO tail pointer 'len' times in * a single SPI message. * * Note: - * - * "cs_change == 1" on the last transfer results in an - * active chip select after the complete SPI - * message. This causes the controller to interpret - * the next register access as data. Temporary set - * "cs_change" of the last transfer to "0" to properly - * deactivate the chip select at the end of the - * message. + * Calculate offset, so that the SPI transfer ends on + * the last message of the uinc_xfer array, which has + * "cs_change == 0", to properly deactivate the chip + * select. */ - last_xfer = &ring->uinc_xfer[len - 1]; - last_xfer->cs_change = 0; - err = spi_sync_transfer(priv->spi, ring->uinc_xfer, len); - last_xfer->cs_change = 1; + offset = ARRAY_SIZE(ring->uinc_xfer) - len; + err = spi_sync_transfer(priv->spi, + ring->uinc_xfer + offset, len); if (err) return err; @@ -1432,7 +1447,7 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv, } static void -mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, +mcp251xfd_hw_rx_obj_to_skb(struct mcp251xfd_priv *priv, const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj, struct sk_buff *skb) { @@ -1475,6 +1490,8 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) memcpy(cfd->data, hw_rx_obj->data, cfd->len); + + mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts); } static int @@ -1535,7 +1552,7 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, return err; while ((len = mcp251xfd_get_rx_linear_len(ring))) { - struct spi_transfer *last_xfer; + int offset; rx_tail = mcp251xfd_get_rx_tail(ring); @@ -1556,19 +1573,14 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, * single SPI message. * * Note: - * - * "cs_change == 1" on the last transfer results in an - * active chip select after the complete SPI - * message. This causes the controller to interpret - * the next register access as data. Temporary set - * "cs_change" of the last transfer to "0" to properly - * deactivate the chip select at the end of the - * message. + * Calculate offset, so that the SPI transfer ends on + * the last message of the uinc_xfer array, which has + * "cs_change == 0", to properly deactivate the chip + * select. */ - last_xfer = &ring->uinc_xfer[len - 1]; - last_xfer->cs_change = 0; - err = spi_sync_transfer(priv->spi, ring->uinc_xfer, len); - last_xfer->cs_change = 1; + offset = ARRAY_SIZE(ring->uinc_xfer) - len; + err = spi_sync_transfer(priv->spi, + ring->uinc_xfer + offset, len); if (err) return err; @@ -1592,23 +1604,22 @@ static int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv) return 0; } -static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv, - u32 *timestamp) -{ - return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp); -} - static struct sk_buff * -mcp251xfd_alloc_can_err_skb(const struct mcp251xfd_priv *priv, +mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv, struct can_frame **cf, u32 *timestamp) { + struct sk_buff *skb; int err; err = mcp251xfd_get_timestamp(priv, timestamp); if (err) return NULL; - return alloc_can_err_skb(priv->ndev, cf); + skb = alloc_can_err_skb(priv->ndev, cf); + if (skb) + mcp251xfd_skb_set_timestamp(priv, skb, *timestamp); + + return skb; } static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv) @@ -1760,6 +1771,7 @@ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv) if (!cf) return 0; + mcp251xfd_skb_set_timestamp(priv, skb, timestamp); err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); if (err) stats->rx_fifo_errors++; @@ -2277,6 +2289,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) out_fail: netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n", err, priv->regs_status.intf); + mcp251xfd_dump(priv); mcp251xfd_chip_interrupts_disable(priv); return handled; @@ -2493,6 +2506,7 @@ static int mcp251xfd_open(struct net_device *ndev) if (err) goto out_transceiver_disable; + mcp251xfd_timestamp_init(priv); can_rx_offload_enable(&priv->offload); err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq, @@ -2513,6 +2527,7 @@ static int mcp251xfd_open(struct net_device *ndev) free_irq(spi->irq, priv); out_can_rx_offload_disable: can_rx_offload_disable(&priv->offload); + mcp251xfd_timestamp_stop(priv); out_transceiver_disable: mcp251xfd_transceiver_disable(priv); out_mcp251xfd_ring_free: @@ -2534,6 +2549,7 @@ static int mcp251xfd_stop(struct net_device *ndev) mcp251xfd_chip_interrupts_disable(priv); free_irq(ndev->irq, priv); can_rx_offload_disable(&priv->offload); + mcp251xfd_timestamp_stop(priv); mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED); mcp251xfd_transceiver_disable(priv); mcp251xfd_ring_free(priv); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c new file mode 100644 index 000000000000..ffae8fdd3af0 --- /dev/null +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// mcp251xfd - Microchip MCP251xFD Family CAN controller driver +// +// Copyright (c) 2020, 2021 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// Copyright (C) 2015-2018 Etnaviv Project +// + +#include <linux/devcoredump.h> + +#include "mcp251xfd.h" +#include "mcp251xfd-dump.h" + +struct mcp251xfd_dump_iter { + void *start; + struct mcp251xfd_dump_object_header *hdr; + void *data; +}; + +struct mcp251xfd_dump_reg_space { + u16 base; + u16 size; +}; + +struct mcp251xfd_dump_ring { + enum mcp251xfd_dump_object_ring_key key; + u32 val; +}; + +static const struct mcp251xfd_dump_reg_space mcp251xfd_dump_reg_space[] = { + { + .base = MCP251XFD_REG_CON, + .size = MCP251XFD_REG_FLTOBJ(32) - MCP251XFD_REG_CON, + }, { + .base = MCP251XFD_RAM_START, + .size = MCP251XFD_RAM_SIZE, + }, { + .base = MCP251XFD_REG_OSC, + .size = MCP251XFD_REG_DEVID - MCP251XFD_REG_OSC, + }, +}; + +static void mcp251xfd_dump_header(struct mcp251xfd_dump_iter *iter, + enum mcp251xfd_dump_object_type object_type, + const void *data_end) +{ + struct mcp251xfd_dump_object_header *hdr = iter->hdr; + unsigned int len; + + len = data_end - iter->data; + if (!len) + return; + + hdr->magic = cpu_to_le32(MCP251XFD_DUMP_MAGIC); + hdr->type = cpu_to_le32(object_type); + hdr->offset = cpu_to_le32(iter->data - iter->start); + hdr->len = cpu_to_le32(len); + + iter->hdr++; + iter->data += len; +} + +static void mcp251xfd_dump_registers(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter) +{ + const int val_bytes = regmap_get_val_bytes(priv->map_rx); + struct mcp251xfd_dump_object_reg *reg = iter->data; + unsigned int i, j; + int err; + + for (i = 0; i < ARRAY_SIZE(mcp251xfd_dump_reg_space); i++) { + const struct mcp251xfd_dump_reg_space *reg_space; + void *buf; + + reg_space = &mcp251xfd_dump_reg_space[i]; + + buf = kmalloc(reg_space->size, GFP_KERNEL); + if (!buf) + goto out; + + err = regmap_bulk_read(priv->map_reg, reg_space->base, + buf, reg_space->size / val_bytes); + if (err) { + kfree(buf); + continue; + } + + for (j = 0; j < reg_space->size; j += sizeof(u32), reg++) { + reg->reg = cpu_to_le32(reg_space->base + j); + reg->val = cpu_to_le32p(buf + j); + } + + kfree(buf); + } + + out: + mcp251xfd_dump_header(iter, MCP251XFD_DUMP_OBJECT_TYPE_REG, reg); +} + +static void mcp251xfd_dump_ring(struct mcp251xfd_dump_iter *iter, + enum mcp251xfd_dump_object_type object_type, + const struct mcp251xfd_dump_ring *dump_ring, + unsigned int len) +{ + struct mcp251xfd_dump_object_reg *reg = iter->data; + unsigned int i; + + for (i = 0; i < len; i++, reg++) { + reg->reg = cpu_to_le32(dump_ring[i].key); + reg->val = cpu_to_le32(dump_ring[i].val); + } + + mcp251xfd_dump_header(iter, object_type, reg); +} + +static void mcp251xfd_dump_tef_ring(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter) +{ + const struct mcp251xfd_tef_ring *tef = priv->tef; + const struct mcp251xfd_tx_ring *tx = priv->tx; + const struct mcp251xfd_dump_ring dump_ring[] = { + { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD, + .val = tef->head, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL, + .val = tef->tail, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE, + .val = 0, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR, + .val = 0, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR, + .val = 0, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM, + .val = tx->obj_num, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE, + .val = sizeof(struct mcp251xfd_hw_tef_obj), + }, + }; + + mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_TEF, + dump_ring, ARRAY_SIZE(dump_ring)); +} + +static void mcp251xfd_dump_rx_ring_one(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter, + const struct mcp251xfd_rx_ring *rx) +{ + const struct mcp251xfd_dump_ring dump_ring[] = { + { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD, + .val = rx->head, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL, + .val = rx->tail, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE, + .val = rx->base, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR, + .val = rx->nr, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR, + .val = rx->fifo_nr, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM, + .val = rx->obj_num, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE, + .val = rx->obj_size, + }, + }; + + mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_RX, + dump_ring, ARRAY_SIZE(dump_ring)); +} + +static void mcp251xfd_dump_rx_ring(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter) +{ + struct mcp251xfd_rx_ring *rx_ring; + unsigned int i; + + mcp251xfd_for_each_rx_ring(priv, rx_ring, i) + mcp251xfd_dump_rx_ring_one(priv, iter, rx_ring); +} + +static void mcp251xfd_dump_tx_ring(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter) +{ + const struct mcp251xfd_tx_ring *tx = priv->tx; + const struct mcp251xfd_dump_ring dump_ring[] = { + { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD, + .val = tx->head, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL, + .val = tx->tail, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE, + .val = tx->base, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR, + .val = 0, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR, + .val = MCP251XFD_TX_FIFO, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM, + .val = tx->obj_num, + }, { + .key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE, + .val = tx->obj_size, + }, + }; + + mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_TX, + dump_ring, ARRAY_SIZE(dump_ring)); +} + +static void mcp251xfd_dump_end(const struct mcp251xfd_priv *priv, + struct mcp251xfd_dump_iter *iter) +{ + struct mcp251xfd_dump_object_header *hdr = iter->hdr; + + hdr->magic = cpu_to_le32(MCP251XFD_DUMP_MAGIC); + hdr->type = cpu_to_le32(MCP251XFD_DUMP_OBJECT_TYPE_END); + hdr->offset = cpu_to_le32(0); + hdr->len = cpu_to_le32(0); + + /* provoke NULL pointer access, if used after END object */ + iter->hdr = NULL; +} + +void mcp251xfd_dump(const struct mcp251xfd_priv *priv) +{ + struct mcp251xfd_dump_iter iter; + unsigned int rings_num, obj_num; + unsigned int file_size = 0; + unsigned int i; + + /* register space + end marker */ + obj_num = 2; + + /* register space */ + for (i = 0; i < ARRAY_SIZE(mcp251xfd_dump_reg_space); i++) + file_size += mcp251xfd_dump_reg_space[i].size / sizeof(u32) * + sizeof(struct mcp251xfd_dump_object_reg); + + /* TEF ring, RX ring, TX rings */ + rings_num = 1 + priv->rx_ring_num + 1; + obj_num += rings_num; + file_size += rings_num * __MCP251XFD_DUMP_OBJECT_RING_KEY_MAX * + sizeof(struct mcp251xfd_dump_object_reg); + + /* size of the headers */ + file_size += sizeof(*iter.hdr) * obj_num; + + /* allocate the file in vmalloc memory, it's likely to be big */ + iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | + __GFP_ZERO | __GFP_NORETRY); + if (!iter.start) { + netdev_warn(priv->ndev, "Failed to allocate devcoredump file.\n"); + return; + } + + /* point the data member after the headers */ + iter.hdr = iter.start; + iter.data = &iter.hdr[obj_num]; + + mcp251xfd_dump_registers(priv, &iter); + mcp251xfd_dump_tef_ring(priv, &iter); + mcp251xfd_dump_rx_ring(priv, &iter); + mcp251xfd_dump_tx_ring(priv, &iter); + mcp251xfd_dump_end(priv, &iter); + + dev_coredumpv(&priv->spi->dev, iter.start, + iter.data - iter.start, GFP_KERNEL); +} diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h new file mode 100644 index 000000000000..e7560b0712eb --- /dev/null +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * mcp251xfd - Microchip MCP251xFD Family CAN controller driver + * + * Copyright (c) 2019, 2020, 2021 Pengutronix, + * Marc Kleine-Budde <[email protected]> + */ + +#ifndef _MCP251XFD_DUMP_H +#define _MCP251XFD_DUMP_H + +#define MCP251XFD_DUMP_MAGIC 0x1825434d + +enum mcp251xfd_dump_object_type { + MCP251XFD_DUMP_OBJECT_TYPE_REG, + MCP251XFD_DUMP_OBJECT_TYPE_TEF, + MCP251XFD_DUMP_OBJECT_TYPE_RX, + MCP251XFD_DUMP_OBJECT_TYPE_TX, + MCP251XFD_DUMP_OBJECT_TYPE_END = -1, +}; + +enum mcp251xfd_dump_object_ring_key { + MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD, + MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL, + MCP251XFD_DUMP_OBJECT_RING_KEY_BASE, + MCP251XFD_DUMP_OBJECT_RING_KEY_NR, + MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR, + MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM, + MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE, + __MCP251XFD_DUMP_OBJECT_RING_KEY_MAX, +}; + +struct mcp251xfd_dump_object_header { + __le32 magic; + __le32 type; + __le32 offset; + __le32 len; +}; + +struct mcp251xfd_dump_object_reg { + __le32 reg; + __le32 val; +}; + +#endif diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c new file mode 100644 index 000000000000..ed3169274d24 --- /dev/null +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// mcp251xfd - Microchip MCP251xFD Family CAN controller driver +// +// Copyright (c) 2021 Pengutronix, +// Marc Kleine-Budde <[email protected]> +// + +#include <linux/clocksource.h> +#include <linux/workqueue.h> + +#include "mcp251xfd.h" + +static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc) +{ + struct mcp251xfd_priv *priv; + u32 timestamp = 0; + int err; + + priv = container_of(cc, struct mcp251xfd_priv, cc); + err = mcp251xfd_get_timestamp(priv, ×tamp); + if (err) + netdev_err(priv->ndev, + "Error %d while reading timestamp. HW timestamps may be inaccurate.", + err); + + return timestamp; +} + +static void mcp251xfd_timestamp_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mcp251xfd_priv *priv; + + priv = container_of(delayed_work, struct mcp251xfd_priv, timestamp); + timecounter_read(&priv->tc); + + schedule_delayed_work(&priv->timestamp, + MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ); +} + +void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv, + struct sk_buff *skb, u32 timestamp) +{ + struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); + u64 ns; + + ns = timecounter_cyc2time(&priv->tc, timestamp); + hwtstamps->hwtstamp = ns_to_ktime(ns); +} + +void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv) +{ + struct cyclecounter *cc = &priv->cc; + + cc->read = mcp251xfd_timestamp_read; + cc->mask = CYCLECOUNTER_MASK(32); + cc->shift = 1; + cc->mult = clocksource_hz2mult(priv->can.clock.freq, cc->shift); + + timecounter_init(&priv->tc, &priv->cc, ktime_get_real_ns()); + + INIT_DELAYED_WORK(&priv->timestamp, mcp251xfd_timestamp_work); + schedule_delayed_work(&priv->timestamp, + MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ); +} + +void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv) +{ + cancel_delayed_work_sync(&priv->timestamp); +} diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 480bd4480bdf..1002f3902ad2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -15,9 +15,12 @@ #include <linux/can/rx-offload.h> #include <linux/gpio/consumer.h> #include <linux/kernel.h> +#include <linux/netdevice.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/spi/spi.h> +#include <linux/timecounter.h> +#include <linux/workqueue.h> /* MPC251x registers */ @@ -394,6 +397,9 @@ #define MCP251XFD_SYSCLOCK_HZ_MAX 40000000 #define MCP251XFD_SYSCLOCK_HZ_MIN 1000000 #define MCP251XFD_SPICLOCK_HZ_MAX 20000000 +#define MCP251XFD_TIMESTAMP_WORK_DELAY_SEC 45 +static_assert(MCP251XFD_TIMESTAMP_WORK_DELAY_SEC < + CYCLECOUNTER_MASK(32) / MCP251XFD_SYSCLOCK_HZ_MAX / 2); #define MCP251XFD_OSC_PLL_MULTIPLIER 10 #define MCP251XFD_OSC_STAB_SLEEP_US (3 * USEC_PER_MSEC) #define MCP251XFD_OSC_STAB_TIMEOUT_US (10 * MCP251XFD_OSC_STAB_SLEEP_US) @@ -595,6 +601,10 @@ struct mcp251xfd_priv { struct mcp251xfd_ecc ecc; struct mcp251xfd_regs_status regs_status; + struct cyclecounter cc; + struct timecounter tc; + struct delayed_work timestamp; + struct gpio_desc *rx_int; struct clk *clk; struct regulator *reg_vdd; @@ -727,6 +737,12 @@ mcp251xfd_spi_cmd_write(const struct mcp251xfd_priv *priv, return data; } +static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv, + u32 *timestamp) +{ + return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp); +} + static inline u16 mcp251xfd_get_tef_obj_addr(u8 n) { return MCP251XFD_RAM_START + @@ -837,5 +853,17 @@ int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv); u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size, const void *data, size_t data_size); u16 mcp251xfd_crc16_compute(const void *data, size_t data_size); +void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv, + struct sk_buff *skb, u32 timestamp); +void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv); +void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv); + +#if IS_ENABLED(CONFIG_DEV_COREDUMP) +void mcp251xfd_dump(const struct mcp251xfd_priv *priv); +#else +static inline void mcp251xfd_dump(const struct mcp251xfd_priv *priv) +{ +} +#endif #endif diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 18f40eb20360..5af69787d9d5 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -807,7 +807,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err)) { - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); usb_unanchor_urb(urb); usb_free_coherent(dev->udev, size, buf, urb->transfer_dma); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 562acbf454fd..65b58f8fc328 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -360,7 +360,7 @@ static void esd_usb2_tx_done_msg(struct esd_usb2_net_priv *priv, can_get_echo_skb(netdev, context->echo_index, NULL); } else { stats->tx_errors++; - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); } /* Release context */ @@ -793,7 +793,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); atomic_dec(&priv->active_tx_jobs); usb_unanchor_urb(urb); diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index a00dc1904415..5e892bef46b0 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -533,7 +533,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, if (unlikely(rc)) { /* usb send failed */ atomic_dec(&dev->active_tx_urbs); - can_free_echo_skb(netdev, idx); + can_free_echo_skb(netdev, idx, NULL); gs_free_tx_context(txc); usb_unanchor_urb(urb); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 4e97da8434ab..90ebcae13409 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -593,7 +593,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (unlikely(err)) { spin_lock_irqsave(&priv->tx_contexts_lock, flags); - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(netdev); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 1f649d178010..029e77dfa773 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -364,7 +364,7 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; xmit_failed: - can_free_echo_skb(priv->netdev, ctx->ndx); + can_free_echo_skb(priv->netdev, ctx->ndx, NULL); mcba_usb_free_ctx(ctx); dev_kfree_skb(skb); stats->tx_dropped++; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index e393e8457d77..ba509aed7b4c 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/usb.h> #include <linux/module.h> +#include <linux/ethtool.h> #include <linux/can.h> #include <linux/can/dev.h> @@ -40,6 +41,7 @@ #define PCAN_USB_CMD_REGISTER 9 #define PCAN_USB_CMD_EXT_VCC 10 #define PCAN_USB_CMD_ERR_FR 11 +#define PCAN_USB_CMD_LED 12 /* PCAN_USB_CMD_SET_BUS number arg */ #define PCAN_USB_BUS_XCVER 2 @@ -248,6 +250,15 @@ static int pcan_usb_set_ext_vcc(struct peak_usb_device *dev, u8 onoff) return pcan_usb_send_cmd(dev, PCAN_USB_CMD_EXT_VCC, PCAN_USB_SET, args); } +static int pcan_usb_set_led(struct peak_usb_device *dev, u8 onoff) +{ + u8 args[PCAN_USB_CMD_ARGS_LEN] = { + [0] = !!onoff, + }; + + return pcan_usb_send_cmd(dev, PCAN_USB_CMD_LED, PCAN_USB_SET, args); +} + /* * set bittiming value to can */ @@ -971,6 +982,40 @@ static int pcan_usb_probe(struct usb_interface *intf) return 0; } +static int pcan_usb_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct peak_usb_device *dev = netdev_priv(netdev); + int err = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + /* call ON/OFF twice a second */ + return 2; + + case ETHTOOL_ID_OFF: + err = pcan_usb_set_led(dev, 0); + break; + + case ETHTOOL_ID_ON: + fallthrough; + + case ETHTOOL_ID_INACTIVE: + /* restore LED default */ + err = pcan_usb_set_led(dev, 1); + break; + + default: + break; + } + + return err; +} + +static const struct ethtool_ops pcan_usb_ethtool_ops = { + .set_phys_id = pcan_usb_set_phys_id, +}; + /* * describe the PCAN-USB adapter */ @@ -1001,6 +1046,8 @@ const struct peak_usb_adapter pcan_usb = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb), + .ethtool_ops = &pcan_usb_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 16, .ts_period = 24575, /* calibration period in ts. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 573b11559d73..ad006edf474d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/usb.h> +#include <linux/ethtool.h> #include <linux/can.h> #include <linux/can/dev.h> @@ -371,7 +372,7 @@ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb, err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); usb_unanchor_urb(urb); @@ -820,6 +821,9 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, netdev->flags |= IFF_ECHO; /* we support local echo */ + /* add ethtool support */ + netdev->ethtool_ops = peak_usb_adapter->ethtool_ops; + init_usb_anchor(&dev->rx_submitted); init_usb_anchor(&dev->tx_submitted); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index 4b1528a42a7b..e15b4c78f309 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -46,6 +46,8 @@ struct peak_usb_adapter { const struct can_bittiming_const * const data_bittiming_const; unsigned int ctrl_count; + const struct ethtool_ops *ethtool_ops; + int (*intf_probe)(struct usb_interface *intf); int (*dev_init)(struct peak_usb_device *dev); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index bae078579c0d..6f62b6f51051 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -7,6 +7,7 @@ #include <linux/netdevice.h> #include <linux/usb.h> #include <linux/module.h> +#include <linux/ethtool.h> #include <linux/can.h> #include <linux/can/dev.h> @@ -773,6 +774,10 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev, tx_msg_flags |= PUCAN_MSG_RTR; } + /* Single-Shot frame */ + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + tx_msg_flags |= PUCAN_MSG_SINGLE_SHOT; + tx_msg->flags = cpu_to_le16(tx_msg_flags); tx_msg->channel_dlc = PUCAN_MSG_CHANNEL_DLC(dev->ctrl_idx, dlc); memcpy(tx_msg->d, cfd->data, cfd->len); @@ -1006,6 +1011,31 @@ static void pcan_usb_fd_free(struct peak_usb_device *dev) } } +/* blink LED's */ +static int pcan_usb_fd_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct peak_usb_device *dev = netdev_priv(netdev); + int err = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_FAST); + break; + case ETHTOOL_ID_INACTIVE: + err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_DEF); + break; + default: + break; + } + + return err; +} + +static const struct ethtool_ops pcan_usb_fd_ethtool_ops = { + .set_phys_id = pcan_usb_fd_set_phys_id, +}; + /* describes the PCAN-USB FD adapter */ static const struct can_bittiming_const pcan_usb_fd_const = { .name = "pcan_usb_fd", @@ -1037,7 +1067,7 @@ const struct peak_usb_adapter pcan_usb_fd = { .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_CC_LEN8_DLC, + CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, @@ -1047,6 +1077,8 @@ const struct peak_usb_adapter pcan_usb_fd = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + .ethtool_ops = &pcan_usb_fd_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 32, .ts_period = 1000000, /* calibration period in ts. */ @@ -1110,7 +1142,7 @@ const struct peak_usb_adapter pcan_usb_chip = { .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_CC_LEN8_DLC, + CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, @@ -1120,6 +1152,8 @@ const struct peak_usb_adapter pcan_usb_chip = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + .ethtool_ops = &pcan_usb_fd_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 32, .ts_period = 1000000, /* calibration period in ts. */ @@ -1183,7 +1217,7 @@ const struct peak_usb_adapter pcan_usb_pro_fd = { .ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_CC_LEN8_DLC, + CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, @@ -1193,6 +1227,8 @@ const struct peak_usb_adapter pcan_usb_pro_fd = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + .ethtool_ops = &pcan_usb_fd_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 32, .ts_period = 1000000, /* calibration period in ts. */ @@ -1256,7 +1292,7 @@ const struct peak_usb_adapter pcan_usb_x6 = { .ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_CC_LEN8_DLC, + CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, @@ -1266,6 +1302,8 @@ const struct peak_usb_adapter pcan_usb_x6 = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), + .ethtool_ops = &pcan_usb_fd_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 32, .ts_period = 1000000, /* calibration period in ts. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index 18fa180ecc81..2d1b645af76c 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -9,6 +9,7 @@ #include <linux/netdevice.h> #include <linux/usb.h> #include <linux/module.h> +#include <linux/ethtool.h> #include <linux/can.h> #include <linux/can/dev.h> @@ -36,6 +37,7 @@ #define PCAN_USBPRO_RTR 0x01 #define PCAN_USBPRO_EXT 0x02 +#define PCAN_USBPRO_SS 0x08 #define PCAN_USBPRO_CMD_BUFFER_SIZE 512 @@ -776,9 +778,13 @@ static int pcan_usb_pro_encode_msg(struct peak_usb_device *dev, flags = 0; if (cf->can_id & CAN_EFF_FLAG) - flags |= 0x02; + flags |= PCAN_USBPRO_EXT; if (cf->can_id & CAN_RTR_FLAG) - flags |= 0x01; + flags |= PCAN_USBPRO_RTR; + + /* Single-Shot frame */ + if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + flags |= PCAN_USBPRO_SS; pcan_msg_add_rec(&usb_msg, data_type, 0, flags, len, cf->can_id, cf->data); @@ -906,7 +912,7 @@ static int pcan_usb_pro_init(struct peak_usb_device *dev) usb_if->dev[dev->ctrl_idx] = dev; /* set LED in default state (end of init phase) */ - pcan_usb_pro_set_led(dev, 0, 1); + pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_DEVICE, 1); kfree(bi); kfree(fi); @@ -990,6 +996,35 @@ int pcan_usb_pro_probe(struct usb_interface *intf) return 0; } +static int pcan_usb_pro_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct peak_usb_device *dev = netdev_priv(netdev); + int err = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + /* fast blinking forever */ + err = pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_BLINK_FAST, + 0xffffffff); + break; + + case ETHTOOL_ID_INACTIVE: + /* restore LED default */ + err = pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_DEVICE, 1); + break; + + default: + break; + } + + return err; +} + +static const struct ethtool_ops pcan_usb_pro_ethtool_ops = { + .set_phys_id = pcan_usb_pro_set_phys_id, +}; + /* * describe the PCAN-USB Pro adapter */ @@ -1009,7 +1044,8 @@ const struct peak_usb_adapter pcan_usb_pro = { .name = "PCAN-USB Pro", .device_id = PCAN_USBPRO_PRODUCT_ID, .ctrl_count = PCAN_USBPRO_CHANNEL_COUNT, - .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY, + .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | + CAN_CTRLMODE_ONE_SHOT, .clock = { .freq = PCAN_USBPRO_CRYSTAL_HZ, }, @@ -1018,6 +1054,8 @@ const struct peak_usb_adapter pcan_usb_pro = { /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_pro_device), + .ethtool_ops = &pcan_usb_pro_ethtool_ops, + /* timestamps usage */ .ts_used_bits = 32, .ts_period = 1000000, /* calibration period in ts. */ diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h index 6bb12357d078..6f4504300e23 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h @@ -115,6 +115,12 @@ struct __packed pcan_usb_pro_devid { __le32 serial_num; }; +#define PCAN_USBPRO_LED_DEVICE 0x00 +#define PCAN_USBPRO_LED_BLINK_FAST 0x01 +#define PCAN_USBPRO_LED_BLINK_SLOW 0x02 +#define PCAN_USBPRO_LED_ON 0x03 +#define PCAN_USBPRO_LED_OFF 0x04 + struct __packed pcan_usb_pro_setled { u8 data_type; u8 channel; diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index fa403c080871..1679cbe45ded 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -246,7 +246,7 @@ struct ucan_message_in { */ struct ucan_tx_complete_entry_t can_tx_complete_msg[0]; } __aligned(0x4) msg; -} __packed; +} __packed __aligned(0x4); /* Macros to calculate message lengths */ #define UCAN_OUT_HDR_SIZE offsetof(struct ucan_message_out, msg) @@ -675,7 +675,7 @@ static void ucan_tx_complete_msg(struct ucan_priv *up, can_get_echo_skb(up->netdev, echo_index, NULL); } else { up->netdev->stats.tx_dropped++; - can_free_echo_skb(up->netdev, echo_index); + can_free_echo_skb(up->netdev, echo_index, NULL); } spin_unlock_irqrestore(&up->echo_skb_lock, flags); } @@ -843,7 +843,7 @@ static void ucan_write_bulk_callback(struct urb *urb) /* update counters an cleanup */ spin_lock_irqsave(&up->echo_skb_lock, flags); - can_free_echo_skb(up->netdev, context - up->context_array); + can_free_echo_skb(up->netdev, context - up->context_array, NULL); spin_unlock_irqrestore(&up->echo_skb_lock, flags); up->netdev->stats.tx_dropped++; @@ -1157,7 +1157,7 @@ static netdev_tx_t ucan_start_xmit(struct sk_buff *skb, * frees the skb */ spin_lock_irqsave(&up->echo_skb_lock, flags); - can_free_echo_skb(up->netdev, echo_index); + can_free_echo_skb(up->netdev, echo_index, NULL); spin_unlock_irqrestore(&up->echo_skb_lock, flags); if (ret == -ENODEV) { diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index e8c42430a4fc..b6e7ef0d5bc6 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -691,7 +691,7 @@ nofreecontext: return NETDEV_TX_BUSY; failed: - can_free_echo_skb(netdev, context->echo_index); + can_free_echo_skb(netdev, context->echo_index, NULL); usb_unanchor_urb(urb); usb_free_coherent(priv->udev, size, buf, urb->transfer_dma); diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 37fa19c62d73..3b883e607d8b 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1772,17 +1772,15 @@ static int xcan_probe(struct platform_device *pdev) /* Getting the CAN can_clk info */ priv->can_clk = devm_clk_get(&pdev->dev, "can_clk"); if (IS_ERR(priv->can_clk)) { - if (PTR_ERR(priv->can_clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "Device clock not found.\n"); - ret = PTR_ERR(priv->can_clk); + ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->can_clk), + "device clock not found\n"); goto err_free; } priv->bus_clk = devm_clk_get(&pdev->dev, devtype->bus_clk_name); if (IS_ERR(priv->bus_clk)) { - if (PTR_ERR(priv->bus_clk) != -EPROBE_DEFER) - dev_err(&pdev->dev, "bus clock not found\n"); - ret = PTR_ERR(priv->bus_clk); + ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->bus_clk), + "bus clock not found\n"); goto err_free; } diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 413158275db8..ecb9f7f6b335 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -335,6 +335,7 @@ static const struct of_device_id b53_spi_of_match[] = { { .compatible = "brcm,bcm53128" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, b53_spi_of_match); static struct spi_driver b53_spi_driver = { .driver = { diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 918be7eb626f..4d78219da253 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1842,10 +1842,8 @@ static int hellcreek_probe(struct platform_device *pdev) } hellcreek->base = devm_ioremap_resource(dev, res); - if (IS_ERR(hellcreek->base)) { - dev_err(dev, "No memory available!\n"); + if (IS_ERR(hellcreek->base)) return PTR_ERR(hellcreek->base); - } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ptp"); if (!res) { @@ -1854,10 +1852,8 @@ static int hellcreek_probe(struct platform_device *pdev) } hellcreek->ptp_base = devm_ioremap_resource(dev, res); - if (IS_ERR(hellcreek->ptp_base)) { - dev_err(dev, "No memory available!\n"); + if (IS_ERR(hellcreek->ptp_base)) return PTR_ERR(hellcreek->ptp_base); - } ret = hellcreek_detect(hellcreek); if (ret) { diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c442a5885fca..2bd1bab71497 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -67,6 +67,11 @@ static const struct mt7530_mib_desc mt7530_mib[] = { MIB_DESC(1, 0xb8, "RxArlDrop"), }; +/* Since phy_device has not yet been created and + * phy_{read,write}_mmd_indirect is not available, we provide our own + * core_{read,write}_mmd_indirect with core_{clear,write,set} wrappers + * to complete this function. + */ static int core_read_mmd_indirect(struct mt7530_priv *priv, int prtad, int devad) { @@ -435,19 +440,13 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), TD_DM_DRVP(8) | TD_DM_DRVN(8)); - /* Setup core clock for MT7530 */ - /* Disable MT7530 core clock */ - core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); + /* Disable MT7530 core and TRGMII Tx clocks */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, + REG_GSWCK_EN | REG_TRGMIICK_EN); - /* Disable PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we - * provide our own core_write_mmd_indirect to complete this - * function. - */ - core_write_mmd_indirect(priv, - CORE_GSWPLL_GRP1, - MDIO_MMD_VEND2, - 0); + /* Setup core clock for MT7530 */ + /* Disable PLL */ + core_write(priv, CORE_GSWPLL_GRP1, 0); /* Set core clock into 500Mhz */ core_write(priv, CORE_GSWPLL_GRP2, @@ -460,11 +459,7 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) RG_GSWPLL_POSDIV_200M(2) | RG_GSWPLL_FBKDIV_200M(32)); - /* Enable MT7530 core clock */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); - /* Setup the MT7530 TRGMII Tx Clock */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0)); core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta)); @@ -478,6 +473,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) core_write(priv, CORE_PLL_GROUP7, RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) | RG_LCDDS_PWDB | RG_LCDDS_ISO_EN); + + /* Enable MT7530 core and TRGMII Tx clocks */ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN | REG_TRGMIICK_EN); diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 5ff623ee76a6..789fe08cae50 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1057,10 +1057,8 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) res.end += felix->imdio_base; imdio_regs = devm_ioremap_resource(dev, &res); - if (IS_ERR(imdio_regs)) { - dev_err(dev, "failed to map internal MDIO registers\n"); + if (IS_ERR(imdio_regs)) return PTR_ERR(imdio_regs); - } hw = enetc_hw_alloc(dev, imdio_regs); if (IS_ERR(hw)) { diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 02087d443e73..764852ead1d6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -863,7 +863,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) if (unlikely(i == timeout)) { netdev_err(ena_dev->net_device, - "Reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n", mmio_read->seq_num, offset, read_resp->req_id, read_resp->reg_off); ret = ENA_MMIO_READ_TIMEOUT; @@ -2396,7 +2396,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, if (key) { if (key_len != sizeof(hash_key->key)) { netdev_err(ena_dev->net_device, - "key len (%hu) doesn't equal the supported size (%zu)\n", + "key len (%u) doesn't equal the supported size (%zu)\n", key_len, sizeof(hash_key->key)); return -EINVAL; } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 5c062c51b4cb..881f88754bf6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3975,7 +3975,7 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); - /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); if (unlikely(!max_num_io_queues)) { dev_err(&pdev->dev, "The device doesn't have io queues\n"); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 960d483e8997..4a1220cc6f10 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -100,19 +100,19 @@ static int amd8111e_read_phy(struct amd8111e_priv *lp, { void __iomem *mmio = lp->mmio; unsigned int reg_val; - unsigned int repeat= REPEAT_CNT; + unsigned int repeat = REPEAT_CNT; reg_val = readl(mmio + PHY_ACCESS); while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); + reg_val = readl(mmio + PHY_ACCESS); - writel( PHY_RD_CMD | ((phy_id & 0x1f) << 21) | - ((reg & 0x1f) << 16), mmio +PHY_ACCESS); - do{ + writel(PHY_RD_CMD | ((phy_id & 0x1f) << 21) | + ((reg & 0x1f) << 16), mmio + PHY_ACCESS); + do { reg_val = readl(mmio + PHY_ACCESS); udelay(30); /* It takes 30 us to read/write data */ } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - if(reg_val & PHY_RD_ERR) + if (reg_val & PHY_RD_ERR) goto err_phy_read; *val = reg_val & 0xffff; @@ -133,17 +133,17 @@ static int amd8111e_write_phy(struct amd8111e_priv *lp, reg_val = readl(mmio + PHY_ACCESS); while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); + reg_val = readl(mmio + PHY_ACCESS); - writel( PHY_WR_CMD | ((phy_id & 0x1f) << 21) | + writel(PHY_WR_CMD | ((phy_id & 0x1f) << 21) | ((reg & 0x1f) << 16)|val, mmio + PHY_ACCESS); - do{ + do { reg_val = readl(mmio + PHY_ACCESS); udelay(30); /* It takes 30 us to read/write the data */ } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - if(reg_val & PHY_RD_ERR) + if (reg_val & PHY_RD_ERR) goto err_phy_write; return 0; @@ -159,7 +159,7 @@ static int amd8111e_mdio_read(struct net_device *dev, int phy_id, int reg_num) struct amd8111e_priv *lp = netdev_priv(dev); unsigned int reg_val; - amd8111e_read_phy(lp,phy_id,reg_num,®_val); + amd8111e_read_phy(lp, phy_id, reg_num, ®_val); return reg_val; } @@ -179,17 +179,17 @@ static void amd8111e_mdio_write(struct net_device *dev, static void amd8111e_set_ext_phy(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - u32 bmcr,advert,tmp; + u32 bmcr, advert, tmp; /* Determine mii register values to set the speed */ advert = amd8111e_mdio_read(dev, lp->ext_phy_addr, MII_ADVERTISE); tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); - switch (lp->ext_phy_option){ + switch (lp->ext_phy_option) { default: case SPEED_AUTONEG: /* advertise all values */ - tmp |= ( ADVERTISE_10HALF|ADVERTISE_10FULL| - ADVERTISE_100HALF|ADVERTISE_100FULL) ; + tmp |= (ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL); break; case SPEED10_HALF: tmp |= ADVERTISE_10HALF; @@ -224,20 +224,20 @@ static int amd8111e_free_skbs(struct net_device *dev) int i; /* Freeing transmit skbs */ - for(i = 0; i < NUM_TX_BUFFERS; i++){ - if(lp->tx_skbuff[i]){ + for (i = 0; i < NUM_TX_BUFFERS; i++) { + if (lp->tx_skbuff[i]) { dma_unmap_single(&lp->pci_dev->dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, DMA_TO_DEVICE); - dev_kfree_skb (lp->tx_skbuff[i]); + dev_kfree_skb(lp->tx_skbuff[i]); lp->tx_skbuff[i] = NULL; lp->tx_dma_addr[i] = 0; } } /* Freeing previously allocated receive buffers */ - for (i = 0; i < NUM_RX_BUFFERS; i++){ + for (i = 0; i < NUM_RX_BUFFERS; i++) { rx_skbuff = lp->rx_skbuff[i]; - if(rx_skbuff != NULL){ + if (rx_skbuff != NULL) { dma_unmap_single(&lp->pci_dev->dev, lp->rx_dma_addr[i], lp->rx_buff_len - 2, DMA_FROM_DEVICE); @@ -258,13 +258,13 @@ static inline void amd8111e_set_rx_buff_len(struct net_device *dev) struct amd8111e_priv *lp = netdev_priv(dev); unsigned int mtu = dev->mtu; - if (mtu > ETH_DATA_LEN){ + if (mtu > ETH_DATA_LEN) { /* MTU + ethernet header + FCS * + optional VLAN tag + skb reserve space 2 */ lp->rx_buff_len = mtu + ETH_HLEN + 10; lp->options |= OPTION_JUMBO_ENABLE; - } else{ + } else { lp->rx_buff_len = PKT_BUFF_SZ; lp->options &= ~OPTION_JUMBO_ENABLE; } @@ -285,11 +285,11 @@ static int amd8111e_init_ring(struct net_device *dev) lp->tx_ring_idx = 0; - if(lp->opened) + if (lp->opened) /* Free previously allocated transmit and receive skbs */ amd8111e_free_skbs(dev); - else{ + else { /* allocate the tx and rx descriptors */ lp->tx_ring = dma_alloc_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR, @@ -312,12 +312,12 @@ static int amd8111e_init_ring(struct net_device *dev) lp->rx_skbuff[i] = netdev_alloc_skb(dev, lp->rx_buff_len); if (!lp->rx_skbuff[i]) { - /* Release previos allocated skbs */ - for(--i; i >= 0 ;i--) - dev_kfree_skb(lp->rx_skbuff[i]); - goto err_free_rx_ring; + /* Release previos allocated skbs */ + for (--i; i >= 0; i--) + dev_kfree_skb(lp->rx_skbuff[i]); + goto err_free_rx_ring; } - skb_reserve(lp->rx_skbuff[i],2); + skb_reserve(lp->rx_skbuff[i], 2); } /* Initilaizing receive descriptors */ for (i = 0; i < NUM_RX_BUFFERS; i++) { @@ -375,40 +375,40 @@ static int amd8111e_set_coalesce(struct net_device *dev, enum coal_mode cmod) case RX_INTR_COAL : timeout = coal_conf->rx_timeout; event_count = coal_conf->rx_event_count; - if( timeout > MAX_TIMEOUT || - event_count > MAX_EVENT_COUNT ) + if (timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT) return -EINVAL; timeout = timeout * DELAY_TIMER_CONV; writel(VAL0|STINTEN, mmio+INTEN0); - writel((u32)DLY_INT_A_R0|( event_count<< 16 )|timeout, - mmio+DLY_INT_A); + writel((u32)DLY_INT_A_R0 | (event_count << 16) | + timeout, mmio + DLY_INT_A); break; - case TX_INTR_COAL : + case TX_INTR_COAL: timeout = coal_conf->tx_timeout; event_count = coal_conf->tx_event_count; - if( timeout > MAX_TIMEOUT || - event_count > MAX_EVENT_COUNT ) + if (timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT) return -EINVAL; timeout = timeout * DELAY_TIMER_CONV; - writel(VAL0|STINTEN,mmio+INTEN0); - writel((u32)DLY_INT_B_T0|( event_count<< 16 )|timeout, - mmio+DLY_INT_B); + writel(VAL0 | STINTEN, mmio + INTEN0); + writel((u32)DLY_INT_B_T0 | (event_count << 16) | + timeout, mmio + DLY_INT_B); break; case DISABLE_COAL: - writel(0,mmio+STVAL); - writel(STINTEN, mmio+INTEN0); - writel(0, mmio +DLY_INT_B); - writel(0, mmio+DLY_INT_A); + writel(0, mmio + STVAL); + writel(STINTEN, mmio + INTEN0); + writel(0, mmio + DLY_INT_B); + writel(0, mmio + DLY_INT_A); break; case ENABLE_COAL: /* Start the timer */ - writel((u32)SOFT_TIMER_FREQ, mmio+STVAL); /* 0.5 sec */ - writel(VAL0|STINTEN, mmio+INTEN0); + writel((u32)SOFT_TIMER_FREQ, mmio + STVAL); /* 0.5 sec */ + writel(VAL0 | STINTEN, mmio + INTEN0); break; default: break; @@ -423,67 +423,67 @@ static int amd8111e_restart(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); void __iomem *mmio = lp->mmio; - int i,reg_val; + int i, reg_val; /* stop the chip */ writel(RUN, mmio + CMD0); - if(amd8111e_init_ring(dev)) + if (amd8111e_init_ring(dev)) return -ENOMEM; /* enable the port manager and set auto negotiation always */ - writel((u32) VAL1|EN_PMGR, mmio + CMD3 ); - writel((u32)XPHYANE|XPHYRST , mmio + CTRL2); + writel((u32)VAL1 | EN_PMGR, mmio + CMD3); + writel((u32)XPHYANE | XPHYRST, mmio + CTRL2); amd8111e_set_ext_phy(dev); /* set control registers */ reg_val = readl(mmio + CTRL1); reg_val &= ~XMTSP_MASK; - writel( reg_val| XMTSP_128 | CACHE_ALIGN, mmio + CTRL1 ); + writel(reg_val | XMTSP_128 | CACHE_ALIGN, mmio + CTRL1); /* enable interrupt */ - writel( APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN | + writel(APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN | APINT0EN | MIIPDTINTEN | MCCIINTEN | MCCINTEN | MREINTEN | SPNDINTEN | MPINTEN | SINTEN | STINTEN, mmio + INTEN0); writel(VAL3 | LCINTEN | VAL1 | TINTEN0 | VAL0 | RINTEN0, mmio + INTEN0); /* initialize tx and rx ring base addresses */ - writel((u32)lp->tx_ring_dma_addr,mmio + XMT_RING_BASE_ADDR0); - writel((u32)lp->rx_ring_dma_addr,mmio+ RCV_RING_BASE_ADDR0); + writel((u32)lp->tx_ring_dma_addr, mmio + XMT_RING_BASE_ADDR0); + writel((u32)lp->rx_ring_dma_addr, mmio + RCV_RING_BASE_ADDR0); writew((u32)NUM_TX_RING_DR, mmio + XMT_RING_LEN0); writew((u16)NUM_RX_RING_DR, mmio + RCV_RING_LEN0); /* set default IPG to 96 */ - writew((u32)DEFAULT_IPG,mmio+IPG); + writew((u32)DEFAULT_IPG, mmio + IPG); writew((u32)(DEFAULT_IPG-IFS1_DELTA), mmio + IFS1); - if(lp->options & OPTION_JUMBO_ENABLE){ + if (lp->options & OPTION_JUMBO_ENABLE) { writel((u32)VAL2|JUMBO, mmio + CMD3); /* Reset REX_UFLO */ - writel( REX_UFLO, mmio + CMD2); + writel(REX_UFLO, mmio + CMD2); /* Should not set REX_UFLO for jumbo frames */ - writel( VAL0 | APAD_XMT|REX_RTRY , mmio + CMD2); - }else{ - writel( VAL0 | APAD_XMT | REX_RTRY|REX_UFLO, mmio + CMD2); + writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2); + } else { + writel(VAL0 | APAD_XMT | REX_RTRY | REX_UFLO, mmio + CMD2); writel((u32)JUMBO, mmio + CMD3); } #if AMD8111E_VLAN_TAG_USED - writel((u32) VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3); + writel((u32)VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3); #endif - writel( VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2 ); + writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2); /* Setting the MAC address to the device */ for (i = 0; i < ETH_ALEN; i++) - writeb( dev->dev_addr[i], mmio + PADR + i ); + writeb(dev->dev_addr[i], mmio + PADR + i); /* Enable interrupt coalesce */ - if(lp->options & OPTION_INTR_COAL_ENABLE){ + if (lp->options & OPTION_INTR_COAL_ENABLE) { netdev_info(dev, "Interrupt Coalescing Enabled.\n"); - amd8111e_set_coalesce(dev,ENABLE_COAL); + amd8111e_set_coalesce(dev, ENABLE_COAL); } /* set RUN bit to start the chip */ @@ -499,11 +499,11 @@ static int amd8111e_restart(struct net_device *dev) static void amd8111e_init_hw_default(struct amd8111e_priv *lp) { unsigned int reg_val; - unsigned int logic_filter[2] ={0,}; + unsigned int logic_filter[2] = {0,}; void __iomem *mmio = lp->mmio; - /* stop the chip */ + /* stop the chip */ writel(RUN, mmio + CMD0); /* AUTOPOLL0 Register *//*TBD default value is 8100 in FPS */ @@ -519,13 +519,13 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) writel(0, mmio + XMT_RING_BASE_ADDR3); /* Clear CMD0 */ - writel(CMD0_CLEAR,mmio + CMD0); + writel(CMD0_CLEAR, mmio + CMD0); /* Clear CMD2 */ - writel(CMD2_CLEAR, mmio +CMD2); + writel(CMD2_CLEAR, mmio + CMD2); /* Clear CMD7 */ - writel(CMD7_CLEAR , mmio + CMD7); + writel(CMD7_CLEAR, mmio + CMD7); /* Clear DLY_INT_A and DLY_INT_B */ writel(0x0, mmio + DLY_INT_A); @@ -542,16 +542,16 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) writel(0x0, mmio + STVAL); /* Clear INTEN0 */ - writel( INTEN0_CLEAR, mmio + INTEN0); + writel(INTEN0_CLEAR, mmio + INTEN0); /* Clear LADRF */ - writel(0x0 , mmio + LADRF); + writel(0x0, mmio + LADRF); /* Set SRAM_SIZE & SRAM_BOUNDARY registers */ - writel( 0x80010,mmio + SRAM_SIZE); + writel(0x80010, mmio + SRAM_SIZE); /* Clear RCV_RING0_LEN */ - writel(0x0, mmio + RCV_RING_LEN0); + writel(0x0, mmio + RCV_RING_LEN0); /* Clear XMT_RING0/1/2/3_LEN */ writel(0x0, mmio + XMT_RING_LEN0); @@ -571,10 +571,10 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) /* SRAM_SIZE register */ reg_val = readl(mmio + SRAM_SIZE); - if(lp->options & OPTION_JUMBO_ENABLE) - writel( VAL2|JUMBO, mmio + CMD3); + if (lp->options & OPTION_JUMBO_ENABLE) + writel(VAL2 | JUMBO, mmio + CMD3); #if AMD8111E_VLAN_TAG_USED - writel(VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3 ); + writel(VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3); #endif /* Set default value to CTRL1 Register */ writel(CTRL1_DEFAULT, mmio + CTRL1); @@ -616,14 +616,14 @@ static void amd8111e_stop_chip(struct amd8111e_priv *lp) static void amd8111e_free_ring(struct amd8111e_priv *lp) { /* Free transmit and receive descriptor rings */ - if(lp->rx_ring){ + if (lp->rx_ring) { dma_free_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_rx_dr) * NUM_RX_RING_DR, lp->rx_ring, lp->rx_ring_dma_addr); lp->rx_ring = NULL; } - if(lp->tx_ring){ + if (lp->tx_ring) { dma_free_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR, lp->tx_ring, lp->tx_ring_dma_addr); @@ -643,11 +643,11 @@ static int amd8111e_tx(struct net_device *dev) int tx_index; int status; /* Complete all the transmit packet */ - while (lp->tx_complete_idx != lp->tx_idx){ + while (lp->tx_complete_idx != lp->tx_idx) { tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK; status = le16_to_cpu(lp->tx_ring[tx_index].tx_flags); - if(status & OWN_BIT) + if (status & OWN_BIT) break; /* It still hasn't been Txed */ lp->tx_ring[tx_index].buff_phy_addr = 0; @@ -669,10 +669,10 @@ static int amd8111e_tx(struct net_device *dev) le16_to_cpu(lp->tx_ring[tx_index].buff_count); if (netif_queue_stopped(dev) && - lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS +2){ + lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS + 2) { /* The ring is no longer full, clear tbusy. */ /* lp->tx_full = 0; */ - netif_wake_queue (dev); + netif_wake_queue(dev); } } return 0; @@ -685,7 +685,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) struct net_device *dev = lp->amd8111e_net_dev; int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK; void __iomem *mmio = lp->mmio; - struct sk_buff *skb,*new_skb; + struct sk_buff *skb, *new_skb; int min_pkt_len, status; int num_rx_pkt = 0; short pkt_len; @@ -710,7 +710,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) goto err_next_pkt; } /* check for STP and ENP */ - if (!((status & STP_BIT) && (status & ENP_BIT))){ + if (!((status & STP_BIT) && (status & ENP_BIT))) { /* resetting flags */ lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; goto err_next_pkt; @@ -755,7 +755,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) skb->protocol = eth_type_trans(skb, dev); #if AMD8111E_VLAN_TAG_USED - if (vtag == TT_VLAN_TAGGED){ + if (vtag == TT_VLAN_TAGGED) { u16 vlan_tag = le16_to_cpu(lp->rx_ring[rx_index].tag_ctrl_info); __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } @@ -793,25 +793,25 @@ err_next_pkt: static int amd8111e_link_change(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - int status0,speed; + int status0, speed; /* read the link change */ - status0 = readl(lp->mmio + STAT0); + status0 = readl(lp->mmio + STAT0); - if(status0 & LINK_STATS){ - if(status0 & AUTONEG_COMPLETE) + if (status0 & LINK_STATS) { + if (status0 & AUTONEG_COMPLETE) lp->link_config.autoneg = AUTONEG_ENABLE; else lp->link_config.autoneg = AUTONEG_DISABLE; - if(status0 & FULL_DPLX) + if (status0 & FULL_DPLX) lp->link_config.duplex = DUPLEX_FULL; else lp->link_config.duplex = DUPLEX_HALF; speed = (status0 & SPEED_MASK) >> 7; - if(speed == PHY_SPEED_10) + if (speed == PHY_SPEED_10) lp->link_config.speed = SPEED_10; - else if(speed == PHY_SPEED_100) + else if (speed == PHY_SPEED_100) lp->link_config.speed = SPEED_100; netdev_info(dev, "Link is Up. Speed is %s Mbps %s Duplex\n", @@ -821,8 +821,7 @@ static int amd8111e_link_change(struct net_device *dev) "Full" : "Half"); netif_carrier_on(dev); - } - else{ + } else { lp->link_config.speed = SPEED_INVALID; lp->link_config.duplex = DUPLEX_INVALID; lp->link_config.autoneg = AUTONEG_INVALID; @@ -840,7 +839,7 @@ static int amd8111e_read_mib(void __iomem *mmio, u8 MIB_COUNTER) unsigned int data; unsigned int repeat = REPEAT_CNT; - writew( MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR); + writew(MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR); do { status = readw(mmio + MIB_ADDR); udelay(2); /* controller takes MAX 2 us to get mib data */ @@ -863,7 +862,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev) if (!lp->opened) return new_stats; - spin_lock_irqsave (&lp->lock, flags); + spin_lock_irqsave(&lp->lock, flags); /* stats.rx_packets */ new_stats->rx_packets = amd8111e_read_mib(mmio, rcv_broadcast_pkts)+ @@ -943,7 +942,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev) /* Reset the mibs for collecting new statistics */ /* writew(MIB_CLEAR, mmio + MIB_ADDR);*/ - spin_unlock_irqrestore (&lp->lock, flags); + spin_unlock_irqrestore(&lp->lock, flags); return new_stats; } @@ -974,96 +973,90 @@ static int amd8111e_calc_coalesce(struct net_device *dev) rx_data_rate = coal_conf->rx_bytes - coal_conf->rx_prev_bytes; coal_conf->rx_prev_bytes = coal_conf->rx_bytes; - if(rx_pkt_rate < 800){ - if(coal_conf->rx_coal_type != NO_COALESCE){ + if (rx_pkt_rate < 800) { + if (coal_conf->rx_coal_type != NO_COALESCE) { coal_conf->rx_timeout = 0x0; coal_conf->rx_event_count = 0; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = NO_COALESCE; } - } - else{ + } else { rx_pkt_size = rx_data_rate/rx_pkt_rate; - if (rx_pkt_size < 128){ - if(coal_conf->rx_coal_type != NO_COALESCE){ + if (rx_pkt_size < 128) { + if (coal_conf->rx_coal_type != NO_COALESCE) { coal_conf->rx_timeout = 0; coal_conf->rx_event_count = 0; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = NO_COALESCE; } - } - else if ( (rx_pkt_size >= 128) && (rx_pkt_size < 512) ){ + } else if ((rx_pkt_size >= 128) && (rx_pkt_size < 512)) { - if(coal_conf->rx_coal_type != LOW_COALESCE){ + if (coal_conf->rx_coal_type != LOW_COALESCE) { coal_conf->rx_timeout = 1; coal_conf->rx_event_count = 4; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = LOW_COALESCE; } - } - else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)){ + } else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)) { - if(coal_conf->rx_coal_type != MEDIUM_COALESCE){ + if (coal_conf->rx_coal_type != MEDIUM_COALESCE) { coal_conf->rx_timeout = 1; coal_conf->rx_event_count = 4; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = MEDIUM_COALESCE; } - } - else if(rx_pkt_size >= 1024){ - if(coal_conf->rx_coal_type != HIGH_COALESCE){ + } else if (rx_pkt_size >= 1024) { + + if (coal_conf->rx_coal_type != HIGH_COALESCE) { coal_conf->rx_timeout = 2; coal_conf->rx_event_count = 3; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = HIGH_COALESCE; } } } - /* NOW FOR TX INTR COALESC */ - if(tx_pkt_rate < 800){ - if(coal_conf->tx_coal_type != NO_COALESCE){ + /* NOW FOR TX INTR COALESC */ + if (tx_pkt_rate < 800) { + if (coal_conf->tx_coal_type != NO_COALESCE) { coal_conf->tx_timeout = 0x0; coal_conf->tx_event_count = 0; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = NO_COALESCE; } - } - else{ + } else { tx_pkt_size = tx_data_rate/tx_pkt_rate; - if (tx_pkt_size < 128){ + if (tx_pkt_size < 128) { - if(coal_conf->tx_coal_type != NO_COALESCE){ + if (coal_conf->tx_coal_type != NO_COALESCE) { coal_conf->tx_timeout = 0; coal_conf->tx_event_count = 0; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = NO_COALESCE; } - } - else if ( (tx_pkt_size >= 128) && (tx_pkt_size < 512) ){ + } else if ((tx_pkt_size >= 128) && (tx_pkt_size < 512)) { - if(coal_conf->tx_coal_type != LOW_COALESCE){ + if (coal_conf->tx_coal_type != LOW_COALESCE) { coal_conf->tx_timeout = 1; coal_conf->tx_event_count = 2; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = LOW_COALESCE; } - } - else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)){ + } else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)) { - if(coal_conf->tx_coal_type != MEDIUM_COALESCE){ + if (coal_conf->tx_coal_type != MEDIUM_COALESCE) { coal_conf->tx_timeout = 2; coal_conf->tx_event_count = 5; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = MEDIUM_COALESCE; } } else if (tx_pkt_size >= 1024) { @@ -1091,7 +1084,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) unsigned int intr0, intren0; unsigned int handled = 1; - if(unlikely(dev == NULL)) + if (unlikely(dev == NULL)) return IRQ_NONE; spin_lock(&lp->lock); @@ -1105,7 +1098,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) /* Process all the INT event until INTR bit is clear. */ - if (!(intr0 & INTR)){ + if (!(intr0 & INTR)) { handled = 0; goto err_no_interrupt; } @@ -1140,7 +1133,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) amd8111e_calc_coalesce(dev); err_no_interrupt: - writel( VAL0 | INTREN,mmio + CMD0); + writel(VAL0 | INTREN, mmio + CMD0); spin_unlock(&lp->lock); @@ -1180,7 +1173,7 @@ static int amd8111e_close(struct net_device *dev) netif_carrier_off(lp->amd8111e_net_dev); /* Delete ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) del_timer_sync(&lp->ipg_data.ipg_timer); spin_unlock_irq(&lp->lock); @@ -1200,8 +1193,8 @@ static int amd8111e_open(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - if(dev->irq ==0 || request_irq(dev->irq, amd8111e_interrupt, IRQF_SHARED, - dev->name, dev)) + if (dev->irq == 0 || request_irq(dev->irq, amd8111e_interrupt, + IRQF_SHARED, dev->name, dev)) return -EAGAIN; napi_enable(&lp->napi); @@ -1210,7 +1203,7 @@ static int amd8111e_open(struct net_device *dev) amd8111e_init_hw_default(lp); - if(amd8111e_restart(dev)){ + if (amd8111e_restart(dev)) { spin_unlock_irq(&lp->lock); napi_disable(&lp->napi); if (dev->irq) @@ -1218,7 +1211,7 @@ static int amd8111e_open(struct net_device *dev) return -ENOMEM; } /* Start ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE){ + if (lp->options & OPTION_DYN_IPG_ENABLE) { add_timer(&lp->ipg_data.ipg_timer); netdev_info(dev, "Dynamic IPG Enabled\n"); } @@ -1289,10 +1282,10 @@ static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb, lp->tx_idx++; /* Trigger an immediate send poll. */ - writel( VAL1 | TDMD0, lp->mmio + CMD0); - writel( VAL2 | RDMD0,lp->mmio + CMD0); + writel(VAL1 | TDMD0, lp->mmio + CMD0); + writel(VAL2 | RDMD0, lp->mmio + CMD0); - if(amd8111e_tx_queue_avail(lp) < 0){ + if (amd8111e_tx_queue_avail(lp) < 0) { netif_stop_queue(dev); } spin_unlock_irqrestore(&lp->lock, flags); @@ -1326,15 +1319,15 @@ static void amd8111e_set_multicast_list(struct net_device *dev) { struct netdev_hw_addr *ha; struct amd8111e_priv *lp = netdev_priv(dev); - u32 mc_filter[2] ; + u32 mc_filter[2]; int bit_num; - if(dev->flags & IFF_PROMISC){ - writel( VAL2 | PROM, lp->mmio + CMD2); + if (dev->flags & IFF_PROMISC) { + writel(VAL2 | PROM, lp->mmio + CMD2); return; } else - writel( PROM, lp->mmio + CMD2); + writel(PROM, lp->mmio + CMD2); if (dev->flags & IFF_ALLMULTI || netdev_mc_count(dev) > MAX_FILTER_SIZE) { /* get all multicast packet */ @@ -1439,7 +1432,7 @@ static int amd8111e_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol_ if (wol_info->wolopts & WAKE_MAGIC) lp->options |= (OPTION_WOL_ENABLE | OPTION_WAKE_MAGIC_ENABLE); - else if(wol_info->wolopts & WAKE_PHY) + else if (wol_info->wolopts & WAKE_PHY) lp->options |= (OPTION_WOL_ENABLE | OPTION_WAKE_PHY_ENABLE); else @@ -1464,14 +1457,14 @@ static const struct ethtool_ops ops = { * gets/sets driver speed, gets memory mapped register values, forces * auto negotiation, sets/gets WOL options for ethtool application. */ -static int amd8111e_ioctl(struct net_device *dev , struct ifreq *ifr, int cmd) +static int amd8111e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *data = if_mii(ifr); struct amd8111e_priv *lp = netdev_priv(dev); int err; u32 mii_regval; - switch(cmd) { + switch (cmd) { case SIOCGMIIPHY: data->phy_id = lp->ext_phy_addr; @@ -1511,7 +1504,7 @@ static int amd8111e_set_mac_address(struct net_device *dev, void *p) spin_lock_irq(&lp->lock); /* Setting the MAC address to the device */ for (i = 0; i < ETH_ALEN; i++) - writeb( dev->dev_addr[i], lp->mmio + PADR + i ); + writeb(dev->dev_addr[i], lp->mmio + PADR + i); spin_unlock_irq(&lp->lock); @@ -1536,22 +1529,22 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu) spin_lock_irq(&lp->lock); - /* stop the chip */ + /* stop the chip */ writel(RUN, lp->mmio + CMD0); dev->mtu = new_mtu; err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); - if(!err) + if (!err) netif_start_queue(dev); return err; } static int amd8111e_enable_magicpkt(struct amd8111e_priv *lp) { - writel( VAL1|MPPLBA, lp->mmio + CMD3); - writel( VAL0|MPEN_SW, lp->mmio + CMD7); + writel(VAL1 | MPPLBA, lp->mmio + CMD3); + writel(VAL0 | MPEN_SW, lp->mmio + CMD7); /* To eliminate PCI posting bug */ readl(lp->mmio + CMD7); @@ -1562,7 +1555,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp) { /* Adapter is already stoped/suspended/interrupt-disabled */ - writel(VAL0|LCMODE_SW,lp->mmio + CMD7); + writel(VAL0 | LCMODE_SW, lp->mmio + CMD7); /* To eliminate PCI posting bug */ readl(lp->mmio + CMD7); @@ -1584,7 +1577,7 @@ static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock_irq(&lp->lock); err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); - if(!err) + if (!err) netif_wake_queue(dev); } @@ -1605,22 +1598,21 @@ static int __maybe_unused amd8111e_suspend(struct device *dev_d) /* stop chip */ spin_lock_irq(&lp->lock); - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) del_timer_sync(&lp->ipg_data.ipg_timer); amd8111e_stop_chip(lp); spin_unlock_irq(&lp->lock); - if(lp->options & OPTION_WOL_ENABLE){ + if (lp->options & OPTION_WOL_ENABLE) { /* enable wol */ - if(lp->options & OPTION_WAKE_MAGIC_ENABLE) + if (lp->options & OPTION_WAKE_MAGIC_ENABLE) amd8111e_enable_magicpkt(lp); - if(lp->options & OPTION_WAKE_PHY_ENABLE) + if (lp->options & OPTION_WAKE_PHY_ENABLE) amd8111e_enable_link_change(lp); device_set_wakeup_enable(dev_d, 1); - } - else{ + } else { device_set_wakeup_enable(dev_d, 0); } @@ -1640,7 +1632,7 @@ static int __maybe_unused amd8111e_resume(struct device *dev_d) spin_lock_irq(&lp->lock); amd8111e_restart(dev); /* Restart ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES); spin_unlock_irq(&lp->lock); @@ -1657,14 +1649,14 @@ static void amd8111e_config_ipg(struct timer_list *t) unsigned int total_col_cnt; unsigned int tmp_ipg; - if(lp->link_config.duplex == DUPLEX_FULL){ + if (lp->link_config.duplex == DUPLEX_FULL) { ipg_data->ipg = DEFAULT_IPG; return; } - if(ipg_data->ipg_state == SSTATE){ + if (ipg_data->ipg_state == SSTATE) { - if(ipg_data->timer_tick == IPG_STABLE_TIME){ + if (ipg_data->timer_tick == IPG_STABLE_TIME) { ipg_data->timer_tick = 0; ipg_data->ipg = MIN_IPG - IPG_STEP; @@ -1676,7 +1668,7 @@ static void amd8111e_config_ipg(struct timer_list *t) ipg_data->timer_tick++; } - if(ipg_data->ipg_state == CSTATE){ + if (ipg_data->ipg_state == CSTATE) { /* Get the current collision count */ @@ -1684,10 +1676,10 @@ static void amd8111e_config_ipg(struct timer_list *t) amd8111e_read_mib(mmio, xmt_collisions); if ((total_col_cnt - prev_col_cnt) < - (ipg_data->diff_col_cnt)){ + (ipg_data->diff_col_cnt)) { ipg_data->diff_col_cnt = - total_col_cnt - prev_col_cnt ; + total_col_cnt - prev_col_cnt; ipg_data->ipg = ipg_data->current_ipg; } @@ -1696,7 +1688,7 @@ static void amd8111e_config_ipg(struct timer_list *t) if (ipg_data->current_ipg <= MAX_IPG) tmp_ipg = ipg_data->current_ipg; - else{ + else { tmp_ipg = ipg_data->ipg; ipg_data->ipg_state = SSTATE; } @@ -1748,24 +1740,24 @@ static int amd8111e_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int err, i; - unsigned long reg_addr,reg_len; + unsigned long reg_addr, reg_len; struct amd8111e_priv *lp; struct net_device *dev; err = pci_enable_device(pdev); - if(err){ + if (err) { dev_err(&pdev->dev, "Cannot enable new PCI device\n"); return err; } - if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)){ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Cannot find PCI base address\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, MODULE_NAME); - if(err){ + if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); goto err_disable_pdev; } @@ -1798,7 +1790,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX ; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif lp = netdev_priv(dev); @@ -1821,16 +1813,16 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Setting user defined parametrs */ lp->ext_phy_option = speed_duplex[card_idx]; - if(coalesce[card_idx]) + if (coalesce[card_idx]) lp->options |= OPTION_INTR_COAL_ENABLE; - if(dynamic_ipg[card_idx++]) + if (dynamic_ipg[card_idx++]) lp->options |= OPTION_DYN_IPG_ENABLE; /* Initialize driver entry points */ dev->netdev_ops = &amd8111e_netdev_ops; dev->ethtool_ops = &ops; - dev->irq =pdev->irq; + dev->irq = pdev->irq; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; dev->min_mtu = AMD8111E_MIN_MTU; dev->max_mtu = AMD8111E_MAX_MTU; @@ -1861,7 +1853,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); /* Initialize software ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE){ + if (lp->options & OPTION_DYN_IPG_ENABLE) { timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; @@ -1870,7 +1862,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, } /* display driver and device information */ - chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000)>>28; + chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000) >> 28; dev_info(&pdev->dev, "[ Rev %x ] PCI 10/100BaseT Ethernet %pM\n", chip_version, dev->dev_addr); if (lp->ext_phy_id) @@ -1879,7 +1871,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, else dev_info(&pdev->dev, "Couldn't detect MII PHY, assuming address 0x01\n"); - return 0; + return 0; err_free_dev: free_netdev(dev); @@ -1919,7 +1911,7 @@ MODULE_DEVICE_TABLE(pci, amd8111e_pci_tbl); static SIMPLE_DEV_PM_OPS(amd8111e_pm_ops, amd8111e_suspend, amd8111e_resume); static struct pci_driver amd8111e_driver = { - .name = MODULE_NAME, + .name = MODULE_NAME, .id_table = amd8111e_pci_tbl, .probe = amd8111e_probe_one, .remove = amd8111e_remove_one, diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c index e10aceb2b767..6784f8748638 100644 --- a/drivers/net/ethernet/amd/hplance.c +++ b/drivers/net/ethernet/amd/hplance.c @@ -170,6 +170,7 @@ static void hplance_init(struct net_device *dev, struct dio_dev *d) static void hplance_writerap(void *priv, unsigned short value) { struct lance_private *lp = (struct lance_private *)priv; + do { out_be16(lp->base + HPLANCE_REGOFF + LANCE_RAP, value); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); @@ -178,6 +179,7 @@ static void hplance_writerap(void *priv, unsigned short value) static void hplance_writerdp(void *priv, unsigned short value) { struct lance_private *lp = (struct lance_private *)priv; + do { out_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP, value); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); @@ -187,6 +189,7 @@ static unsigned short hplance_readrdp(void *priv) { struct lance_private *lp = (struct lance_private *)priv; __u16 value; + do { value = in_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1c86eddb1b51..facde824bcaa 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -18,7 +18,6 @@ #include <linux/delay.h> #include <linux/pm.h> #include <linux/clk.h> -#include <linux/version.h> #include <linux/platform_device.h> #include <net/arp.h> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a9b30a72ddad..80efc8116963 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -318,17 +318,34 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv, return 0; } +static enum dpsw_stp_state br_stp_state_to_dpsw(u8 state) +{ + switch (state) { + case BR_STATE_DISABLED: + return DPSW_STP_STATE_DISABLED; + case BR_STATE_LISTENING: + return DPSW_STP_STATE_LISTENING; + case BR_STATE_LEARNING: + return DPSW_STP_STATE_LEARNING; + case BR_STATE_FORWARDING: + return DPSW_STP_STATE_FORWARDING; + case BR_STATE_BLOCKING: + return DPSW_STP_STATE_BLOCKING; + default: + return DPSW_STP_STATE_DISABLED; + } +} + static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state) { - struct dpsw_stp_cfg stp_cfg = { - .state = state, - }; + struct dpsw_stp_cfg stp_cfg = {0}; int err; u16 vid; if (!netif_running(port_priv->netdev) || state == port_priv->stp_state) return 0; /* Nothing to do */ + stp_cfg.state = br_stp_state_to_dpsw(state); for (vid = 0; vid <= VLAN_VID_MASK; vid++) { if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) { stp_cfg.vlan_id = vid; @@ -1233,14 +1250,6 @@ static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev) fsl_mc_free_irqs(sw_dev); } -static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev, - u8 state) -{ - struct ethsw_port_priv *port_priv = netdev_priv(netdev); - - return dpaa2_switch_port_set_stp_state(port_priv, state); -} - static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, bool enable) { struct ethsw_core *ethsw = port_priv->ethsw_data; @@ -1263,6 +1272,32 @@ static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, boo return err; } +static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev, + u8 state) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + err = dpaa2_switch_port_set_stp_state(port_priv, state); + if (err) + return err; + + switch (state) { + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + err = dpaa2_switch_port_set_learning(port_priv, false); + break; + case BR_STATE_LEARNING: + case BR_STATE_FORWARDING: + err = dpaa2_switch_port_set_learning(port_priv, + port_priv->learn_ena); + break; + } + + return err; +} + static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv, struct switchdev_brport_flags flags) { @@ -1312,6 +1347,7 @@ static int dpaa2_switch_port_bridge_flags(struct net_device *netdev, err = dpaa2_switch_port_set_learning(port_priv, learn_ena); if (err) return err; + port_priv->learn_ena = learn_ena; } if (flags.mask & (BR_BCAST_FLOOD | BR_FLOOD | BR_MCAST_FLOOD)) { @@ -1620,6 +1656,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, /* Inherit the initial bridge port learning state */ learn_ena = br_port_flag_is_set(netdev, BR_LEARNING); err = dpaa2_switch_port_set_learning(port_priv, learn_ena); + port_priv->learn_ena = learn_ena; /* Setup the egress flood policy (broadcast, unknown unicast) */ err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); @@ -1702,6 +1739,7 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) err = dpaa2_switch_port_set_learning(port_priv, false); if (err) return err; + port_priv->learn_ena = false; /* Add the VLAN 1 as PVID when not under a bridge. We need this since * the dpaa2 switch interfaces are not capable to be VLAN unaware @@ -2632,8 +2670,72 @@ err_close: return err; } +/* Add an ACL to redirect frames with specific destination MAC address to + * control interface + */ +static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv, + const char *mac) +{ + struct net_device *netdev = port_priv->netdev; + struct dpsw_acl_entry_cfg acl_entry_cfg; + struct dpsw_acl_fields *acl_h; + struct dpsw_acl_fields *acl_m; + struct dpsw_acl_key acl_key; + struct device *dev; + u8 *cmd_buff; + int err; + + dev = port_priv->netdev->dev.parent; + acl_h = &acl_key.match; + acl_m = &acl_key.mask; + + if (port_priv->acl_num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) { + netdev_err(netdev, "ACL full\n"); + return -ENOMEM; + } + + memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg)); + memset(&acl_key, 0, sizeof(acl_key)); + + /* Match on the destination MAC address */ + ether_addr_copy(acl_h->l2_dest_mac, mac); + eth_broadcast_addr(acl_m->l2_dest_mac); + + cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); + if (!cmd_buff) + return -ENOMEM; + dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff); + + memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg)); + acl_entry_cfg.precedence = port_priv->acl_num_rules; + acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; + acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg.key_iova))) { + netdev_err(netdev, "DMA mapping failed\n"); + return -EFAULT; + } + + err = dpsw_acl_add_entry(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->acl_tbl, &acl_entry_cfg); + + dma_unmap_single(dev, acl_entry_cfg.key_iova, sizeof(cmd_buff), + DMA_TO_DEVICE); + if (err) { + netdev_err(netdev, "dpsw_acl_add_entry() failed %d\n", err); + return err; + } + + port_priv->acl_num_rules++; + + return 0; +} + static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) { + const char stpa[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; struct switchdev_obj_port_vlan vlan = { .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid = DEFAULT_VLAN_ID, @@ -2642,8 +2744,10 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) struct net_device *netdev = port_priv->netdev; struct ethsw_core *ethsw = port_priv->ethsw_data; struct dpsw_fdb_cfg fdb_cfg = {0}; - struct dpaa2_switch_fdb *fdb; + struct dpsw_acl_if_cfg acl_if_cfg; struct dpsw_if_attr dpsw_if_attr; + struct dpaa2_switch_fdb *fdb; + struct dpsw_acl_cfg acl_cfg; u16 fdb_id; int err; @@ -2685,6 +2789,29 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) if (err) return err; + /* Create an ACL table to be used by this switch port */ + acl_cfg.max_entries = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES; + err = dpsw_acl_add(ethsw->mc_io, 0, ethsw->dpsw_handle, + &port_priv->acl_tbl, &acl_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add err %d\n", err); + return err; + } + + acl_if_cfg.if_id[0] = port_priv->idx; + acl_if_cfg.num_ifs = 1; + err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->acl_tbl, &acl_if_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); + dpsw_acl_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->acl_tbl); + } + + err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa); + if (err) + return err; + return err; } @@ -2801,6 +2928,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, err = dpaa2_switch_port_set_learning(port_priv, false); if (err) goto err_port_probe; + port_priv->learn_ena = false; return 0; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index 549218994243..0ae1d27c811e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -79,6 +79,9 @@ #define DPAA2_SWITCH_NEEDED_HEADROOM \ (DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN) +#define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES 16 +#define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE 256 + extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops; struct ethsw_core; @@ -113,6 +116,10 @@ struct ethsw_port_priv { struct dpaa2_switch_fdb *fdb; bool bcast_flood; bool ucast_flood; + bool learn_ena; + + u16 acl_tbl; + u8 acl_num_rules; }; /* Switch data */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h index 24b17d6e09af..1747cee19a72 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h @@ -74,6 +74,12 @@ #define DPSW_CMDID_FDB_REMOVE_MULTICAST DPSW_CMD_ID(0x087) #define DPSW_CMDID_FDB_DUMP DPSW_CMD_ID(0x08A) +#define DPSW_CMDID_ACL_ADD DPSW_CMD_ID(0x090) +#define DPSW_CMDID_ACL_REMOVE DPSW_CMD_ID(0x091) +#define DPSW_CMDID_ACL_ADD_ENTRY DPSW_CMD_ID(0x092) +#define DPSW_CMDID_ACL_ADD_IF DPSW_CMD_ID(0x094) +#define DPSW_CMDID_ACL_REMOVE_IF DPSW_CMD_ID(0x095) + #define DPSW_CMDID_IF_GET_PORT_MAC_ADDR DPSW_CMD_ID(0x0A7) #define DPSW_CMDID_CTRL_IF_GET_ATTR DPSW_CMD_ID(0x0A0) @@ -457,5 +463,74 @@ struct dpsw_cmd_if_set_learning_mode { /* only the first 4 bits from LSB */ u8 mode; }; + +struct dpsw_cmd_acl_add { + __le16 pad; + __le16 max_entries; +}; + +struct dpsw_rsp_acl_add { + __le16 acl_id; +}; + +struct dpsw_cmd_acl_remove { + __le16 acl_id; +}; + +struct dpsw_cmd_acl_if { + __le16 acl_id; + __le16 num_ifs; + __le32 pad; + __le64 if_id; +}; + +struct dpsw_prep_acl_entry { + u8 match_l2_dest_mac[6]; + __le16 match_l2_tpid; + + u8 match_l2_source_mac[6]; + __le16 match_l2_vlan_id; + + __le32 match_l3_dest_ip; + __le32 match_l3_source_ip; + + __le16 match_l4_dest_port; + __le16 match_l4_source_port; + __le16 match_l2_ether_type; + u8 match_l2_pcp_dei; + u8 match_l3_dscp; + + u8 mask_l2_dest_mac[6]; + __le16 mask_l2_tpid; + + u8 mask_l2_source_mac[6]; + __le16 mask_l2_vlan_id; + + __le32 mask_l3_dest_ip; + __le32 mask_l3_source_ip; + + __le16 mask_l4_dest_port; + __le16 mask_l4_source_port; + __le16 mask_l2_ether_type; + u8 mask_l2_pcp_dei; + u8 mask_l3_dscp; + + u8 match_l3_protocol; + u8 mask_l3_protocol; +}; + +#define DPSW_RESULT_ACTION_SHIFT 0 +#define DPSW_RESULT_ACTION_SIZE 4 + +struct dpsw_cmd_acl_entry { + __le16 acl_id; + __le16 result_if_id; + __le32 precedence; + /* from LSB only the first 4 bits */ + u8 result_action; + u8 pad[7]; + __le64 pad2[4]; + __le64 key_iova; +}; #pragma pack(pop) #endif /* __FSL_DPSW_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c index 6c787d4b85f9..6704efe89bc1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c @@ -1354,3 +1354,193 @@ int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, return mc_send_command(mc_io, &cmd); } + +/** + * dpsw_acl_add() - Create an ACL table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: Returned ACL ID, for future references + * @cfg: ACL configuration + * + * Create Access Control List table. Multiple ACLs can be created and + * co-exist in L2 switch + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id, + const struct dpsw_acl_cfg *cfg) +{ + struct dpsw_cmd_acl_add *cmd_params; + struct dpsw_rsp_acl_add *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD, cmd_flags, token); + cmd_params = (struct dpsw_cmd_acl_add *)cmd.params; + cmd_params->max_entries = cpu_to_le16(cfg->max_entries); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_acl_add *)cmd.params; + *acl_id = le16_to_cpu(rsp_params->acl_id); + + return 0; +} + +/** + * dpsw_acl_remove() - Remove an ACL table from L2 switch. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id) +{ + struct dpsw_cmd_acl_remove *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_remove *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_add_if() - Associate interface/interfaces with an ACL table. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Interfaces list + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg) +{ + struct dpsw_cmd_acl_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_IF, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_if *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_remove_if() - De-associate interface/interfaces from an ACL table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Interfaces list + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg) +{ + struct dpsw_cmd_acl_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_IF, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_if *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_prepare_entry_cfg() - Setup an ACL entry + * @key: Key + * @entry_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA + * + * This function has to be called before adding or removing acl_entry + * + */ +void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, + u8 *entry_cfg_buf) +{ + struct dpsw_prep_acl_entry *ext_params; + int i; + + ext_params = (struct dpsw_prep_acl_entry *)entry_cfg_buf; + + for (i = 0; i < 6; i++) { + ext_params->match_l2_dest_mac[i] = key->match.l2_dest_mac[5 - i]; + ext_params->match_l2_source_mac[i] = key->match.l2_source_mac[5 - i]; + ext_params->mask_l2_dest_mac[i] = key->mask.l2_dest_mac[5 - i]; + ext_params->mask_l2_source_mac[i] = key->mask.l2_source_mac[5 - i]; + } + + ext_params->match_l2_tpid = cpu_to_le16(key->match.l2_tpid); + ext_params->match_l2_vlan_id = cpu_to_le16(key->match.l2_vlan_id); + ext_params->match_l3_dest_ip = cpu_to_le32(key->match.l3_dest_ip); + ext_params->match_l3_source_ip = cpu_to_le32(key->match.l3_source_ip); + ext_params->match_l4_dest_port = cpu_to_le16(key->match.l4_dest_port); + ext_params->match_l4_source_port = cpu_to_le16(key->match.l4_source_port); + ext_params->match_l2_ether_type = cpu_to_le16(key->match.l2_ether_type); + ext_params->match_l2_pcp_dei = key->match.l2_pcp_dei; + ext_params->match_l3_dscp = key->match.l3_dscp; + + ext_params->mask_l2_tpid = cpu_to_le16(key->mask.l2_tpid); + ext_params->mask_l2_vlan_id = cpu_to_le16(key->mask.l2_vlan_id); + ext_params->mask_l3_dest_ip = cpu_to_le32(key->mask.l3_dest_ip); + ext_params->mask_l3_source_ip = cpu_to_le32(key->mask.l3_source_ip); + ext_params->mask_l4_dest_port = cpu_to_le16(key->mask.l4_dest_port); + ext_params->mask_l4_source_port = cpu_to_le16(key->mask.l4_source_port); + ext_params->mask_l2_ether_type = cpu_to_le16(key->mask.l2_ether_type); + ext_params->mask_l2_pcp_dei = key->mask.l2_pcp_dei; + ext_params->mask_l3_dscp = key->mask.l3_dscp; + ext_params->match_l3_protocol = key->match.l3_protocol; + ext_params->mask_l3_protocol = key->mask.l3_protocol; +} + +/** + * dpsw_acl_add_entry() - Add a rule to the ACL table. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Entry configuration + * + * warning: This function has to be called after dpsw_acl_prepare_entry_cfg() + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg) +{ + struct dpsw_cmd_acl_entry *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_ENTRY, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id); + cmd_params->precedence = cpu_to_le32(cfg->precedence); + cmd_params->key_iova = cpu_to_le64(cfg->key_iova); + dpsw_set_field(cmd_params->result_action, + RESULT_ACTION, + cfg->result.action); + + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h index 96837b10cc94..08e37c475ae8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -628,4 +628,125 @@ int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, enum dpsw_learning_mode mode); +/** + * struct dpsw_acl_cfg - ACL Configuration + * @max_entries: Number of ACL rules + */ +struct dpsw_acl_cfg { + u16 max_entries; +}; + +int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id, + const struct dpsw_acl_cfg *cfg); + +int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id); + +/** + * struct dpsw_acl_if_cfg - List of interfaces to associate with an ACL table + * @num_ifs: Number of interfaces + * @if_id: List of interfaces + */ +struct dpsw_acl_if_cfg { + u16 num_ifs; + u16 if_id[DPSW_MAX_IF]; +}; + +int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg); + +int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg); + +/** + * struct dpsw_acl_fields - ACL fields. + * @l2_dest_mac: Destination MAC address: BPDU, Multicast, Broadcast, Unicast, + * slow protocols, MVRP, STP + * @l2_source_mac: Source MAC address + * @l2_tpid: Layer 2 (Ethernet) protocol type, used to identify the following + * protocols: MPLS, PTP, PFC, ARP, Jumbo frames, LLDP, IEEE802.1ae, + * Q-in-Q, IPv4, IPv6, PPPoE + * @l2_pcp_dei: indicate which protocol is encapsulated in the payload + * @l2_vlan_id: layer 2 VLAN ID + * @l2_ether_type: layer 2 Ethernet type + * @l3_dscp: Layer 3 differentiated services code point + * @l3_protocol: Tells the Network layer at the destination host, to which + * Protocol this packet belongs to. The following protocol are + * supported: ICMP, IGMP, IPv4 (encapsulation), TCP, IPv6 + * (encapsulation), GRE, PTP + * @l3_source_ip: Source IPv4 IP + * @l3_dest_ip: Destination IPv4 IP + * @l4_source_port: Source TCP/UDP Port + * @l4_dest_port: Destination TCP/UDP Port + */ +struct dpsw_acl_fields { + u8 l2_dest_mac[6]; + u8 l2_source_mac[6]; + u16 l2_tpid; + u8 l2_pcp_dei; + u16 l2_vlan_id; + u16 l2_ether_type; + u8 l3_dscp; + u8 l3_protocol; + u32 l3_source_ip; + u32 l3_dest_ip; + u16 l4_source_port; + u16 l4_dest_port; +}; + +/** + * struct dpsw_acl_key - ACL key + * @match: Match fields + * @mask: Mask: b'1 - valid, b'0 don't care + */ +struct dpsw_acl_key { + struct dpsw_acl_fields match; + struct dpsw_acl_fields mask; +}; + +/** + * enum dpsw_acl_action - action to be run on the ACL rule match + * @DPSW_ACL_ACTION_DROP: Drop frame + * @DPSW_ACL_ACTION_REDIRECT: Redirect to certain port + * @DPSW_ACL_ACTION_ACCEPT: Accept frame + * @DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF: Redirect to control interface + */ +enum dpsw_acl_action { + DPSW_ACL_ACTION_DROP, + DPSW_ACL_ACTION_REDIRECT, + DPSW_ACL_ACTION_ACCEPT, + DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF +}; + +/** + * struct dpsw_acl_result - ACL action + * @action: Action should be taken when ACL entry hit + * @if_id: Interface IDs to redirect frame. Valid only if redirect selected for + * action + */ +struct dpsw_acl_result { + enum dpsw_acl_action action; + u16 if_id; +}; + +/** + * struct dpsw_acl_entry_cfg - ACL entry + * @key_iova: I/O virtual address of DMA-able memory filled with key after call + * to dpsw_acl_prepare_entry_cfg() + * @result: Required action when entry hit occurs + * @precedence: Precedence inside ACL 0 is lowest; This priority can not change + * during the lifetime of a Policy. It is user responsibility to + * space the priorities according to consequent rule additions. + */ +struct dpsw_acl_entry_cfg { + u64 key_iova; + struct dpsw_acl_result result; + int precedence; +}; + +void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, + u8 *entry_cfg_buf); + +int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); #endif /* __FSL_DPSW_H */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 5a54976e6a28..57049ae97201 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2,41 +2,71 @@ /* Copyright 2017-2019 NXP */ #include "enetc.h" +#include <linux/bpf_trace.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/vmalloc.h> #include <net/pkt_sched.h> -/* ENETC overhead: optional extension BD + 1 BD gap */ -#define ENETC_TXBDS_NEEDED(val) ((val) + 2) -/* max # of chained Tx BDs is 15, including head and extension BD */ -#define ENETC_MAX_SKB_FRAGS 13 -#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) +static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect) + return NULL; + + return tx_swbd->skb; +} + +static struct xdp_frame * +enetc_tx_swbd_get_xdp_frame(struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->is_xdp_redirect) + return tx_swbd->xdp_frame; + + return NULL; +} static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring, struct enetc_tx_swbd *tx_swbd) { + /* For XDP_TX, pages come from RX, whereas for the other contexts where + * we have is_dma_page_set, those come from skb_frag_dma_map. We need + * to match the DMA mapping length, so we need to differentiate those. + */ if (tx_swbd->is_dma_page) dma_unmap_page(tx_ring->dev, tx_swbd->dma, - tx_swbd->len, DMA_TO_DEVICE); + tx_swbd->is_xdp_tx ? PAGE_SIZE : tx_swbd->len, + tx_swbd->dir); else dma_unmap_single(tx_ring->dev, tx_swbd->dma, - tx_swbd->len, DMA_TO_DEVICE); + tx_swbd->len, tx_swbd->dir); tx_swbd->dma = 0; } -static void enetc_free_tx_skb(struct enetc_bdr *tx_ring, - struct enetc_tx_swbd *tx_swbd) +static void enetc_free_tx_frame(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) { + struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); + struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); + if (tx_swbd->dma) enetc_unmap_tx_buff(tx_ring, tx_swbd); - if (tx_swbd->skb) { - dev_kfree_skb_any(tx_swbd->skb); + if (xdp_frame) { + xdp_return_frame(tx_swbd->xdp_frame); + tx_swbd->xdp_frame = NULL; + } else if (skb) { + dev_kfree_skb_any(skb); tx_swbd->skb = NULL; } } +/* Let H/W know BD ring has been updated */ +static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring) +{ + /* includes wmb() */ + enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use); +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, int active_offloads) { @@ -67,6 +97,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, tx_swbd->dma = dma; tx_swbd->len = len; tx_swbd->is_dma_page = 0; + tx_swbd->dir = DMA_TO_DEVICE; count++; do_vlan = skb_vlan_tag_present(skb); @@ -149,6 +180,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, tx_swbd->dma = dma; tx_swbd->len = len; tx_swbd->is_dma_page = 1; + tx_swbd->dir = DMA_TO_DEVICE; count++; } @@ -157,6 +189,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, temp_bd.flags = flags; *txbd = temp_bd; + tx_ring->tx_swbd[i].is_eof = true; tx_ring->tx_swbd[i].skb = skb; enetc_bdr_idx_inc(tx_ring, &i); @@ -164,8 +197,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, skb_tx_timestamp(skb); - /* let H/W know BD ring has been updated */ - enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */ + enetc_update_tx_ring_tail(tx_ring); return count; @@ -174,7 +206,7 @@ dma_err: do { tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_skb(tx_ring, tx_swbd); + enetc_free_tx_frame(tx_ring, tx_swbd); if (i == 0) i = tx_ring->bd_count; i--; @@ -274,6 +306,25 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; } +static bool enetc_page_reusable(struct page *page) +{ + return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1); +} + +static void enetc_reuse_page(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *old) +{ + struct enetc_rx_swbd *new; + + new = &rx_ring->rx_swbd[rx_ring->next_to_alloc]; + + /* next buf that may reuse a page */ + enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc); + + /* copy page reference */ + *new = *old; +} + static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, u64 *tstamp) { @@ -299,6 +350,43 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) } } +static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) +{ + struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); + struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index]; + struct enetc_rx_swbd rx_swbd = { + .dma = tx_swbd->dma, + .page = tx_swbd->page, + .page_offset = tx_swbd->page_offset, + .dir = tx_swbd->dir, + .len = tx_swbd->len, + }; + + if (likely(enetc_swbd_unused(rx_ring))) { + enetc_reuse_page(rx_ring, &rx_swbd); + + /* sync for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd.dma, + rx_swbd.page_offset, + ENETC_RXB_DMA_SIZE_XDP, + rx_swbd.dir); + + rx_ring->stats.recycles++; + } else { + /* RX ring is already full, we need to unmap and free the + * page, since there's nothing useful we can do with it. + */ + rx_ring->stats.recycle_failures++; + + dma_unmap_page(rx_ring->dev, rx_swbd.dma, PAGE_SIZE, + rx_swbd.dir); + __free_page(rx_swbd.page); + } + + rx_ring->xdp.xdp_tx_in_flight--; +} + static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) { struct net_device *ndev = tx_ring->ndev; @@ -316,7 +404,8 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) do_tstamp = false; while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { - bool is_eof = !!tx_swbd->skb; + struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); + struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); if (unlikely(tx_swbd->check_wb)) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -332,19 +421,28 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) } } - if (likely(tx_swbd->dma)) + if (tx_swbd->is_xdp_tx) + enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd); + else if (likely(tx_swbd->dma)) enetc_unmap_tx_buff(tx_ring, tx_swbd); - if (is_eof) { + if (xdp_frame) { + xdp_return_frame(xdp_frame); + tx_swbd->xdp_frame = NULL; + } else if (skb) { if (unlikely(do_tstamp)) { - enetc_tstamp_tx(tx_swbd->skb, tstamp); + enetc_tstamp_tx(skb, tstamp); do_tstamp = false; } - napi_consume_skb(tx_swbd->skb, napi_budget); + napi_consume_skb(skb, napi_budget); tx_swbd->skb = NULL; } tx_byte_cnt += tx_swbd->len; + /* Scrub the swbd here so we don't have to do that + * when we reuse it during xmit + */ + memset(tx_swbd, 0, sizeof(*tx_swbd)); bds_to_clean--; tx_swbd++; @@ -355,7 +453,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) } /* BD iteration loop end */ - if (is_eof) { + if (tx_swbd->is_eof) { tx_frm_cnt++; /* re-arm interrupt source */ enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) | @@ -382,6 +480,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) static bool enetc_new_page(struct enetc_bdr *rx_ring, struct enetc_rx_swbd *rx_swbd) { + bool xdp = !!(rx_ring->xdp.prog); struct page *page; dma_addr_t addr; @@ -389,7 +488,10 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, if (unlikely(!page)) return false; - addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + /* For XDP_TX, we forgo dma_unmap -> dma_map */ + rx_swbd->dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + + addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, rx_swbd->dir); if (unlikely(dma_mapping_error(rx_ring->dev, addr))) { __free_page(page); @@ -398,7 +500,7 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, rx_swbd->dma = addr; rx_swbd->page = page; - rx_swbd->page_offset = ENETC_RXB_PAD; + rx_swbd->page_offset = rx_ring->buffer_offset; return true; } @@ -513,32 +615,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, #endif } -static void enetc_process_skb(struct enetc_bdr *rx_ring, - struct sk_buff *skb) -{ - skb_record_rx_queue(skb, rx_ring->index); - skb->protocol = eth_type_trans(skb, rx_ring->ndev); -} - -static bool enetc_page_reusable(struct page *page) -{ - return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1); -} - -static void enetc_reuse_page(struct enetc_bdr *rx_ring, - struct enetc_rx_swbd *old) -{ - struct enetc_rx_swbd *new; - - new = &rx_ring->rx_swbd[rx_ring->next_to_alloc]; - - /* next buf that may reuse a page */ - enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc); - - /* copy page reference */ - *new = *old; -} - +/* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS, + * so it needs to work with both DMA_FROM_DEVICE as well as DMA_BIDIRECTIONAL + * mapped buffers. + */ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, int i, u16 size) { @@ -546,7 +626,7 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma, rx_swbd->page_offset, - size, DMA_FROM_DEVICE); + size, rx_swbd->dir); return rx_swbd; } @@ -554,6 +634,8 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, struct enetc_rx_swbd *rx_swbd) { if (likely(enetc_page_reusable(rx_swbd->page))) { + size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset; + rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE; page_ref_inc(rx_swbd->page); @@ -562,11 +644,10 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, /* sync for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, rx_swbd->page_offset, - ENETC_RXB_DMA_SIZE, - DMA_FROM_DEVICE); + buffer_size, rx_swbd->dir); } else { - dma_unmap_page(rx_ring->dev, rx_swbd->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); } rx_swbd->page = NULL; @@ -580,13 +661,13 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, void *ba; ba = page_address(rx_swbd->page) + rx_swbd->page_offset; - skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE); + skb = build_skb(ba - rx_ring->buffer_offset, ENETC_RXB_TRUESIZE); if (unlikely(!skb)) { rx_ring->stats.rx_alloc_errs++; return NULL; } - skb_reserve(skb, ENETC_RXB_PAD); + skb_reserve(skb, rx_ring->buffer_offset); __skb_put(skb, size); enetc_put_rx_buff(rx_ring, rx_swbd); @@ -605,6 +686,69 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i, enetc_put_rx_buff(rx_ring, rx_swbd); } +static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring, + u32 bd_status, + union enetc_rx_bd **rxbd, int *i) +{ + if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK)))) + return false; + + enetc_rxbd_next(rx_ring, rxbd, i); + + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + dma_rmb(); + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + + enetc_rxbd_next(rx_ring, rxbd, i); + } + + rx_ring->ndev->stats.rx_dropped++; + rx_ring->ndev->stats.rx_errors++; + + return true; +} + +static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring, + u32 bd_status, union enetc_rx_bd **rxbd, + int *i, int *cleaned_cnt, int buffer_size) +{ + struct sk_buff *skb; + u16 size; + + size = le16_to_cpu((*rxbd)->r.buf_len); + skb = enetc_map_rx_buff_to_skb(rx_ring, *i, size); + if (!skb) + return NULL; + + enetc_get_offloads(rx_ring, *rxbd, skb); + + (*cleaned_cnt)++; + + enetc_rxbd_next(rx_ring, rxbd, i); + + /* not last BD in frame? */ + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + size = buffer_size; + + if (bd_status & ENETC_RXBD_LSTATUS_F) { + dma_rmb(); + size = le16_to_cpu((*rxbd)->r.buf_len); + } + + enetc_add_rx_buff_to_skb(rx_ring, *i, size, skb); + + (*cleaned_cnt)++; + + enetc_rxbd_next(rx_ring, rxbd, i); + } + + skb_record_rx_queue(skb, rx_ring->index); + skb->protocol = eth_type_trans(skb, rx_ring->ndev); + + return skb; +} + #define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, @@ -621,7 +765,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, union enetc_rx_bd *rxbd; struct sk_buff *skb; u32 bd_status; - u16 size; if (cleaned_cnt >= ENETC_RXBD_BUNDLE) cleaned_cnt -= enetc_refill_rx_ring(rx_ring, @@ -634,55 +777,446 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); dma_rmb(); /* for reading other rxbd fields */ - size = le16_to_cpu(rxbd->r.buf_len); - skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); + + if (enetc_check_bd_errors_and_consume(rx_ring, bd_status, + &rxbd, &i)) + break; + + skb = enetc_build_skb(rx_ring, bd_status, &rxbd, &i, + &cleaned_cnt, ENETC_RXB_DMA_SIZE); if (!skb) break; - enetc_get_offloads(rx_ring, rxbd, skb); + rx_byte_cnt += skb->len; + rx_frm_cnt++; - cleaned_cnt++; + napi_gro_receive(napi, skb); + } - enetc_rxbd_next(rx_ring, &rxbd, &i); + rx_ring->next_to_clean = i; - if (unlikely(bd_status & - ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { - dev_kfree_skb(skb); - while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { - dma_rmb(); - bd_status = le32_to_cpu(rxbd->r.lstatus); + rx_ring->stats.packets += rx_frm_cnt; + rx_ring->stats.bytes += rx_byte_cnt; - enetc_rxbd_next(rx_ring, &rxbd, &i); - } + return rx_frm_cnt; +} + +static void enetc_xdp_map_tx_buff(struct enetc_bdr *tx_ring, int i, + struct enetc_tx_swbd *tx_swbd, + int frm_len) +{ + union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i); + + prefetchw(txbd); + + enetc_clear_tx_bd(txbd); + txbd->addr = cpu_to_le64(tx_swbd->dma + tx_swbd->page_offset); + txbd->buf_len = cpu_to_le16(tx_swbd->len); + txbd->frm_len = cpu_to_le16(frm_len); + + memcpy(&tx_ring->tx_swbd[i], tx_swbd, sizeof(*tx_swbd)); +} + +/* Puts in the TX ring one XDP frame, mapped as an array of TX software buffer + * descriptors. + */ +static bool enetc_xdp_tx(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *xdp_tx_arr, int num_tx_swbd) +{ + struct enetc_tx_swbd *tmp_tx_swbd = xdp_tx_arr; + int i, k, frm_len = tmp_tx_swbd->len; + + if (unlikely(enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(num_tx_swbd))) + return false; + + while (unlikely(!tmp_tx_swbd->is_eof)) { + tmp_tx_swbd++; + frm_len += tmp_tx_swbd->len; + } + + i = tx_ring->next_to_use; + + for (k = 0; k < num_tx_swbd; k++) { + struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[k]; + + enetc_xdp_map_tx_buff(tx_ring, i, xdp_tx_swbd, frm_len); + + /* last BD needs 'F' bit set */ + if (xdp_tx_swbd->is_eof) { + union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i); + + txbd->flags = ENETC_TXBD_FLAGS_F; + } + + enetc_bdr_idx_inc(tx_ring, &i); + } + + tx_ring->next_to_use = i; + + return true; +} + +static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *xdp_tx_arr, + struct xdp_frame *xdp_frame) +{ + struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[0]; + struct skb_shared_info *shinfo; + void *data = xdp_frame->data; + int len = xdp_frame->len; + skb_frag_t *frag; + dma_addr_t dma; + unsigned int f; + int n = 0; + + dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) { + netdev_err(tx_ring->ndev, "DMA map error\n"); + return -1; + } + + xdp_tx_swbd->dma = dma; + xdp_tx_swbd->dir = DMA_TO_DEVICE; + xdp_tx_swbd->len = len; + xdp_tx_swbd->is_xdp_redirect = true; + xdp_tx_swbd->is_eof = false; + xdp_tx_swbd->xdp_frame = NULL; + + n++; + xdp_tx_swbd = &xdp_tx_arr[n]; + + shinfo = xdp_get_shared_info_from_frame(xdp_frame); + + for (f = 0, frag = &shinfo->frags[0]; f < shinfo->nr_frags; + f++, frag++) { + data = skb_frag_address(frag); + len = skb_frag_size(frag); + + dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) { + /* Undo the DMA mapping for all fragments */ + while (n-- >= 0) + enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]); + + netdev_err(tx_ring->ndev, "DMA map error\n"); + return -1; + } + + xdp_tx_swbd->dma = dma; + xdp_tx_swbd->dir = DMA_TO_DEVICE; + xdp_tx_swbd->len = len; + xdp_tx_swbd->is_xdp_redirect = true; + xdp_tx_swbd->is_eof = false; + xdp_tx_swbd->xdp_frame = NULL; - rx_ring->ndev->stats.rx_dropped++; - rx_ring->ndev->stats.rx_errors++; + n++; + xdp_tx_swbd = &xdp_tx_arr[n]; + } + + xdp_tx_arr[n - 1].is_eof = true; + xdp_tx_arr[n - 1].xdp_frame = xdp_frame; + + return n; +} + +int enetc_xdp_xmit(struct net_device *ndev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct enetc_tx_swbd xdp_redirect_arr[ENETC_MAX_SKB_FRAGS] = {0}; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_bdr *tx_ring; + int xdp_tx_bd_cnt, i, k; + int xdp_tx_frm_cnt = 0; + + tx_ring = priv->tx_ring[smp_processor_id()]; + + prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use)); + for (k = 0; k < num_frames; k++) { + xdp_tx_bd_cnt = enetc_xdp_frame_to_xdp_tx_swbd(tx_ring, + xdp_redirect_arr, + frames[k]); + if (unlikely(xdp_tx_bd_cnt < 0)) + break; + + if (unlikely(!enetc_xdp_tx(tx_ring, xdp_redirect_arr, + xdp_tx_bd_cnt))) { + for (i = 0; i < xdp_tx_bd_cnt; i++) + enetc_unmap_tx_buff(tx_ring, + &xdp_redirect_arr[i]); + tx_ring->stats.xdp_tx_drops++; break; } - /* not last BD in frame? */ - while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { - bd_status = le32_to_cpu(rxbd->r.lstatus); - size = ENETC_RXB_DMA_SIZE; + xdp_tx_frm_cnt++; + } - if (bd_status & ENETC_RXBD_LSTATUS_F) { - dma_rmb(); - size = le16_to_cpu(rxbd->r.buf_len); - } + if (unlikely((flags & XDP_XMIT_FLUSH) || k != xdp_tx_frm_cnt)) + enetc_update_tx_ring_tail(tx_ring); + + tx_ring->stats.xdp_tx += xdp_tx_frm_cnt; + + return xdp_tx_frm_cnt; +} + +static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, + struct xdp_buff *xdp_buff, u16 size) +{ + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset; + struct skb_shared_info *shinfo; - enetc_add_rx_buff_to_skb(rx_ring, i, size, skb); + /* To be used for XDP_TX */ + rx_swbd->len = size; - cleaned_cnt++; + xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset, + rx_ring->buffer_offset, size, false); - enetc_rxbd_next(rx_ring, &rxbd, &i); + shinfo = xdp_get_shared_info_from_buff(xdp_buff); + shinfo->nr_frags = 0; +} + +static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, + u16 size, struct xdp_buff *xdp_buff) +{ + struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp_buff); + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags]; + + /* To be used for XDP_TX */ + rx_swbd->len = size; + + skb_frag_off_set(frag, rx_swbd->page_offset); + skb_frag_size_set(frag, size); + __skb_frag_set_page(frag, rx_swbd->page); + + shinfo->nr_frags++; +} + +static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status, + union enetc_rx_bd **rxbd, int *i, + int *cleaned_cnt, struct xdp_buff *xdp_buff) +{ + u16 size = le16_to_cpu((*rxbd)->r.buf_len); + + xdp_init_buff(xdp_buff, ENETC_RXB_TRUESIZE, &rx_ring->xdp.rxq); + + enetc_map_rx_buff_to_xdp(rx_ring, *i, xdp_buff, size); + (*cleaned_cnt)++; + enetc_rxbd_next(rx_ring, rxbd, i); + + /* not last BD in frame? */ + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + size = ENETC_RXB_DMA_SIZE_XDP; + + if (bd_status & ENETC_RXBD_LSTATUS_F) { + dma_rmb(); + size = le16_to_cpu((*rxbd)->r.buf_len); } - rx_byte_cnt += skb->len; + enetc_add_rx_buff_to_xdp(rx_ring, *i, size, xdp_buff); + (*cleaned_cnt)++; + enetc_rxbd_next(rx_ring, rxbd, i); + } +} - enetc_process_skb(rx_ring, skb); +/* Reuse the current page without performing half-page buffer flipping */ +static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *rx_swbd) +{ + enetc_reuse_page(rx_ring, rx_swbd); - napi_gro_receive(napi, skb); + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, + rx_swbd->page_offset, + ENETC_RXB_DMA_SIZE_XDP, + rx_swbd->dir); + + rx_swbd->page = NULL; +} + +/* Convert RX buffer descriptors to TX buffer descriptors. These will be + * recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the + * RX software BDs because the ownership of the buffer no longer belongs to the + * RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page. + */ +static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr, + struct enetc_bdr *rx_ring, + int rx_ring_first, int rx_ring_last) +{ + int n = 0; + + for (; rx_ring_first != rx_ring_last; + n++, enetc_bdr_idx_inc(rx_ring, &rx_ring_first)) { + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first]; + struct enetc_tx_swbd *tx_swbd = &xdp_tx_arr[n]; + + /* No need to dma_map, we already have DMA_BIDIRECTIONAL */ + tx_swbd->dma = rx_swbd->dma; + tx_swbd->dir = rx_swbd->dir; + tx_swbd->page = rx_swbd->page; + tx_swbd->page_offset = rx_swbd->page_offset; + tx_swbd->len = rx_swbd->len; + tx_swbd->is_dma_page = true; + tx_swbd->is_xdp_tx = true; + tx_swbd->is_eof = false; + memset(rx_swbd, 0, sizeof(*rx_swbd)); + } + + /* We rely on caller providing an rx_ring_last > rx_ring_first */ + xdp_tx_arr[n - 1].is_eof = true; + + return n; +} + +static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_put_xdp_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } + rx_ring->stats.xdp_drops++; +} + +static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first]; + + if (rx_swbd->page) { + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); + __free_page(rx_swbd->page); + rx_swbd->page = NULL; + } + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } + rx_ring->stats.xdp_redirect_failures++; +} + +static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit, + struct bpf_prog *prog) +{ + int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0; + struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0}; + struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); + struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index]; + int rx_frm_cnt = 0, rx_byte_cnt = 0; + int cleaned_cnt, i; + u32 xdp_act; + + cleaned_cnt = enetc_bd_unused(rx_ring); + /* next descriptor to process */ + i = rx_ring->next_to_clean; + + while (likely(rx_frm_cnt < work_limit)) { + union enetc_rx_bd *rxbd, *orig_rxbd; + int orig_i, orig_cleaned_cnt; + struct xdp_buff xdp_buff; + struct sk_buff *skb; + int tmp_orig_i, err; + u32 bd_status; + + rxbd = enetc_rxbd(rx_ring, i); + bd_status = le32_to_cpu(rxbd->r.lstatus); + if (!bd_status) + break; + + enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); + dma_rmb(); /* for reading other rxbd fields */ + + if (enetc_check_bd_errors_and_consume(rx_ring, bd_status, + &rxbd, &i)) + break; + + orig_rxbd = rxbd; + orig_cleaned_cnt = cleaned_cnt; + orig_i = i; + + enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, + &cleaned_cnt, &xdp_buff); + + xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); + + switch (xdp_act) { + case XDP_ABORTED: + trace_xdp_exception(rx_ring->ndev, prog, xdp_act); + fallthrough; + case XDP_DROP: + enetc_xdp_drop(rx_ring, orig_i, i); + break; + case XDP_PASS: + rxbd = orig_rxbd; + cleaned_cnt = orig_cleaned_cnt; + i = orig_i; + + skb = enetc_build_skb(rx_ring, bd_status, &rxbd, + &i, &cleaned_cnt, + ENETC_RXB_DMA_SIZE_XDP); + if (unlikely(!skb)) + /* Exit the switch/case, not the loop */ + break; + + napi_gro_receive(napi, skb); + break; + case XDP_TX: + xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr, + rx_ring, + orig_i, i); + + if (!enetc_xdp_tx(tx_ring, xdp_tx_arr, xdp_tx_bd_cnt)) { + enetc_xdp_drop(rx_ring, orig_i, i); + tx_ring->stats.xdp_tx_drops++; + } else { + tx_ring->stats.xdp_tx += xdp_tx_bd_cnt; + rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt; + xdp_tx_frm_cnt++; + } + break; + case XDP_REDIRECT: + /* xdp_return_frame does not support S/G in the sense + * that it leaks the fragments (__xdp_return should not + * call page_frag_free only for the initial buffer). + * Until XDP_REDIRECT gains support for S/G let's keep + * the code structure in place, but dead. We drop the + * S/G frames ourselves to avoid memory leaks which + * would otherwise leave the kernel OOM. + */ + if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) { + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_redirect_sg++; + break; + } + + tmp_orig_i = orig_i; + + while (orig_i != i) { + enetc_put_rx_buff(rx_ring, + &rx_ring->rx_swbd[orig_i]); + enetc_bdr_idx_inc(rx_ring, &orig_i); + } + + err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); + if (unlikely(err)) { + enetc_xdp_free(rx_ring, tmp_orig_i, i); + } else { + xdp_redirect_frm_cnt++; + rx_ring->stats.xdp_redirect++; + } + + if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) { + xdp_do_flush_map(); + xdp_redirect_frm_cnt = 0; + } + + break; + default: + bpf_warn_invalid_xdp_action(xdp_act); + } rx_frm_cnt++; } @@ -692,6 +1226,16 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; + if (xdp_redirect_frm_cnt) + xdp_do_flush_map(); + + if (xdp_tx_frm_cnt) + enetc_update_tx_ring_tail(tx_ring); + + if (cleaned_cnt > rx_ring->xdp.xdp_tx_in_flight) + enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) - + rx_ring->xdp.xdp_tx_in_flight); + return rx_frm_cnt; } @@ -699,6 +1243,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) { struct enetc_int_vector *v = container_of(napi, struct enetc_int_vector, napi); + struct enetc_bdr *rx_ring = &v->rx_ring; + struct bpf_prog *prog; bool complete = true; int work_done; int i; @@ -709,7 +1255,11 @@ static int enetc_poll(struct napi_struct *napi, int budget) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) complete = false; - work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget); + prog = rx_ring->xdp.prog; + if (prog) + work_done = enetc_clean_rx_ring_xdp(rx_ring, napi, budget, prog); + else + work_done = enetc_clean_rx_ring(rx_ring, napi, budget); if (work_done == budget) complete = false; if (work_done) @@ -813,7 +1363,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr) int size, i; for (i = 0; i < txr->bd_count; i++) - enetc_free_tx_skb(txr, &txr->tx_swbd[i]); + enetc_free_tx_frame(txr, &txr->tx_swbd[i]); size = txr->bd_count * sizeof(union enetc_tx_bd); @@ -930,7 +1480,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring) for (i = 0; i < tx_ring->bd_count; i++) { struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_skb(tx_ring, tx_swbd); + enetc_free_tx_frame(tx_ring, tx_swbd); } tx_ring->next_to_clean = 0; @@ -950,8 +1500,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring) if (!rx_swbd->page) continue; - dma_unmap_page(rx_ring->dev, rx_swbd->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); __free_page(rx_swbd->page); rx_swbd->page = NULL; } @@ -1099,7 +1649,10 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) enetc_rxbdr_wr(hw, idx, ENETC_RBLENR, ENETC_RTBLENR_LEN(rx_ring->bd_count)); - enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); + if (rx_ring->xdp.prog) + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE_XDP); + else + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); @@ -1490,6 +2043,54 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, } } +static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + struct bpf_prog *old_prog; + bool is_up; + int i; + + /* The buffer layout is changing, so we need to drain the old + * RX buffers and seed new ones. + */ + is_up = netif_running(dev); + if (is_up) + dev_close(dev); + + old_prog = xchg(&priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + for (i = 0; i < priv->num_rx_rings; i++) { + struct enetc_bdr *rx_ring = priv->rx_ring[i]; + + rx_ring->xdp.prog = prog; + + if (prog) + rx_ring->buffer_offset = XDP_PACKET_HEADROOM; + else + rx_ring->buffer_offset = ENETC_RXB_PAD; + } + + if (is_up) + return dev_open(dev, extack); + + return 0; +} + +int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return enetc_setup_xdp_prog(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } + + return 0; +} + struct net_device_stats *enetc_get_stats(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -1706,6 +2307,28 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) priv->int_vector[i] = v; + bdr = &v->rx_ring; + bdr->index = i; + bdr->ndev = priv->ndev; + bdr->dev = priv->dev; + bdr->bd_count = priv->rx_bd_count; + bdr->buffer_offset = ENETC_RXB_PAD; + priv->rx_ring[i] = bdr; + + err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); + if (err) { + kfree(v); + goto fail; + } + + err = xdp_rxq_info_reg_mem_model(&bdr->xdp.rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err) { + xdp_rxq_info_unreg(&bdr->xdp.rxq); + kfree(v); + goto fail; + } + /* init defaults for adaptive IC */ if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) { v->rx_ictt = 0x1; @@ -1733,22 +2356,20 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) bdr->bd_count = priv->tx_bd_count; priv->tx_ring[idx] = bdr; } - - bdr = &v->rx_ring; - bdr->index = i; - bdr->ndev = priv->ndev; - bdr->dev = priv->dev; - bdr->bd_count = priv->rx_bd_count; - priv->rx_ring[i] = bdr; } return 0; fail: while (i--) { - netif_napi_del(&priv->int_vector[i]->napi); - cancel_work_sync(&priv->int_vector[i]->rx_dim.work); - kfree(priv->int_vector[i]); + struct enetc_int_vector *v = priv->int_vector[i]; + struct enetc_bdr *rx_ring = &v->rx_ring; + + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq); + xdp_rxq_info_unreg(&rx_ring->xdp.rxq); + netif_napi_del(&v->napi); + cancel_work_sync(&v->rx_dim.work); + kfree(v); } pci_free_irq_vectors(pdev); @@ -1762,7 +2383,10 @@ void enetc_free_msix(struct enetc_ndev_priv *priv) for (i = 0; i < priv->bdr_int_num; i++) { struct enetc_int_vector *v = priv->int_vector[i]; + struct enetc_bdr *rx_ring = &v->rx_ring; + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq); + xdp_rxq_info_unreg(&rx_ring->xdp.rxq); netif_napi_del(&v->napi); cancel_work_sync(&v->rx_dim.work); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 773e412b9f4e..05474f46b0d9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -19,12 +19,21 @@ (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) struct enetc_tx_swbd { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdp_frame; + }; dma_addr_t dma; + struct page *page; /* valid only if is_xdp_tx */ + u16 page_offset; /* valid only if is_xdp_tx */ u16 len; + enum dma_data_direction dir; u8 is_dma_page:1; u8 check_wb:1; u8 do_tstamp:1; + u8 is_eof:1; + u8 is_xdp_tx:1; + u8 is_xdp_redirect:1; }; #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE @@ -32,20 +41,44 @@ struct enetc_tx_swbd { #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_DMA_SIZE \ (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) +#define ENETC_RXB_DMA_SIZE_XDP \ + (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM) struct enetc_rx_swbd { dma_addr_t dma; struct page *page; u16 page_offset; + enum dma_data_direction dir; + u16 len; }; +/* ENETC overhead: optional extension BD + 1 BD gap */ +#define ENETC_TXBDS_NEEDED(val) ((val) + 2) +/* max # of chained Tx BDs is 15, including head and extension BD */ +#define ENETC_MAX_SKB_FRAGS 13 +#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) + struct enetc_ring_stats { unsigned int packets; unsigned int bytes; unsigned int rx_alloc_errs; + unsigned int xdp_drops; + unsigned int xdp_tx; + unsigned int xdp_tx_drops; + unsigned int xdp_redirect; + unsigned int xdp_redirect_failures; + unsigned int xdp_redirect_sg; + unsigned int recycles; + unsigned int recycle_failures; +}; + +struct enetc_xdp_data { + struct xdp_rxq_info rxq; + struct bpf_prog *prog; + int xdp_tx_in_flight; }; -#define ENETC_RX_RING_DEFAULT_SIZE 512 +#define ENETC_RX_RING_DEFAULT_SIZE 2048 #define ENETC_TX_RING_DEFAULT_SIZE 256 #define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2) @@ -71,6 +104,9 @@ struct enetc_bdr { }; void __iomem *idr; /* Interrupt Detect Register pointer */ + int buffer_offset; + struct enetc_xdp_data xdp; + struct enetc_ring_stats stats; dma_addr_t bd_dma_base; @@ -92,6 +128,14 @@ static inline int enetc_bd_unused(struct enetc_bdr *bdr) return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1; } +static inline int enetc_swbd_unused(struct enetc_bdr *bdr) +{ + if (bdr->next_to_clean > bdr->next_to_alloc) + return bdr->next_to_clean - bdr->next_to_alloc - 1; + + return bdr->bd_count + bdr->next_to_clean - bdr->next_to_alloc - 1; +} + /* Control BD ring */ #define ENETC_CBDR_DEFAULT_SIZE 64 struct enetc_cbdr { @@ -275,6 +319,8 @@ struct enetc_ndev_priv { struct phylink *phylink; int ic_mode; u32 tx_ictt; + + struct bpf_prog *xdp_prog; }; /* Messaging */ @@ -314,6 +360,9 @@ int enetc_set_features(struct net_device *ndev, int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data); +int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp); +int enetc_xdp_xmit(struct net_device *ndev, int num_frames, + struct xdp_frame **frames, u32 flags); /* ethtool */ void enetc_set_ethtool_ops(struct net_device *ndev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 89e558135432..7cc81b453bd7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -192,10 +192,18 @@ static const struct { static const char rx_ring_stats[][ETH_GSTRING_LEN] = { "Rx ring %2d frames", "Rx ring %2d alloc errors", + "Rx ring %2d XDP drops", + "Rx ring %2d recycles", + "Rx ring %2d recycle failures", + "Rx ring %2d redirects", + "Rx ring %2d redirect failures", + "Rx ring %2d redirect S/G", }; static const char tx_ring_stats[][ETH_GSTRING_LEN] = { "Tx ring %2d frames", + "Tx ring %2d XDP frames", + "Tx ring %2d XDP drops", }; static int enetc_get_sset_count(struct net_device *ndev, int sset) @@ -267,12 +275,21 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg); - for (i = 0; i < priv->num_tx_rings; i++) + for (i = 0; i < priv->num_tx_rings; i++) { data[o++] = priv->tx_ring[i]->stats.packets; + data[o++] = priv->tx_ring[i]->stats.xdp_tx; + data[o++] = priv->tx_ring[i]->stats.xdp_tx_drops; + } for (i = 0; i < priv->num_rx_rings; i++) { data[o++] = priv->rx_ring[i]->stats.packets; data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs; + data[o++] = priv->rx_ring[i]->stats.xdp_drops; + data[o++] = priv->rx_ring[i]->stats.recycles; + data[o++] = priv->rx_ring[i]->stats.recycle_failures; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg; } if (!enetc_si_is_pf(priv->si)) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 5e95afd61c87..f61fedf462e5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -707,6 +707,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_features = enetc_pf_set_features, .ndo_do_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, + .ndo_bpf = enetc_setup_bpf, + .ndo_xdp_xmit = enetc_xdp_xmit, }; static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 0901fa6853ca..5fb05cf36b49 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -14,9 +14,9 @@ static void gve_get_drvinfo(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - strlcpy(info->driver, "gve", sizeof(info->driver)); - strlcpy(info->version, gve_version_str, sizeof(info->version)); - strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); + strscpy(info->driver, "gve", sizeof(info->driver)); + strscpy(info->version, gve_version_str, sizeof(info->version)); + strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); } static void gve_set_msglevel(struct net_device *netdev, u32 value) @@ -388,7 +388,7 @@ static int gve_set_channels(struct net_device *netdev, gve_get_channels(netdev, &old_settings); - /* Changing combined is not allowed allowed */ + /* Changing combined is not allowed */ if (cmd->combined_count != old_settings.combined_count) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 7a1a0a90a1a3..5e349c0bdecc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1235,7 +1235,7 @@ static int hns_nic_init_affinity_mask(int q_num, int ring_idx, { int cpu; - /* Diffrent irq banlance between 16core and 32core. + /* Different irq balance between 16core and 32core. * The cpu mask set by ring index according to the ring flag * which indicate the ring is tx or rx. */ @@ -1592,7 +1592,7 @@ static void hns_disable_serdes_lb(struct net_device *ndev) * which buffer size is 4096. * 2. we set the chip serdes loopback and set rss indirection to the ring. * 3. construct 64-bytes ip broadcast packages, wait the associated rx ring - * recieving all packages and it will fetch new descriptions. + * receiving all packages and it will fetch new descriptions. * 4. recover to the original state. * *@ndev: net device @@ -1621,7 +1621,7 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev) if (!org_indir) return -ENOMEM; - /* store the orginal indirection */ + /* store the original indirection */ ops->get_rss(h, org_indir, NULL, NULL); cur_indir = kzalloc(indir_size, GFP_KERNEL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 01d6bfc0917c..a234116ba0e5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -579,7 +579,7 @@ struct hnae3_ae_ops { int vector_num, struct hnae3_ring_chain_node *vr_chain); - int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id); + int (*reset_queue)(struct hnae3_handle *handle); u32 (*get_fw_version)(struct hnae3_handle *handle); void (*get_mdix_mode)(struct hnae3_handle *handle, u8 *tp_mdix_ctrl, u8 *tp_mdix); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 44b775efd5b9..f59cd719b5b4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -210,7 +210,6 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, * Rl defines rate of interrupts i.e. number of interrupts-per-second * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing */ - if (rl_reg > 0 && !tqp_vector->tx_group.coal.adapt_enable && !tqp_vector->rx_group.coal.adapt_enable) /* According to the hardware, the range of rl_reg is @@ -695,7 +694,7 @@ void hns3_enable_vlan_filter(struct net_device *netdev, bool enable) } static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, - u16 *mss, u32 *type_cs_vlan_tso) + u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes) { u32 l4_offset, hdr_len; union l3_hdr_info l3; @@ -751,6 +750,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, (__force __wsum)htonl(l4_paylen)); } + *send_bytes = (skb_shinfo(skb)->gso_segs - 1) * hdr_len + skb->len; + /* find the txbd field values */ *paylen_fdop_ol4cs = skb->len - hdr_len; hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_TSO_B, 1); @@ -883,7 +884,6 @@ static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto, hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S, HNS3_OL3T_IPV4_NO_CSUM); - } else if (skb->protocol == htons(ETH_P_IPV6)) { hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6); @@ -1078,7 +1078,8 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) } static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, - struct sk_buff *skb, struct hns3_desc *desc) + struct sk_buff *skb, struct hns3_desc *desc, + struct hns3_desc_cb *desc_cb) { u32 ol_type_vlan_len_msec = 0; u32 paylen_ol4cs = skb->len; @@ -1107,6 +1108,8 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, 1); } + desc_cb->send_bytes = skb->len; + if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 ol4_proto, il4_proto; @@ -1142,7 +1145,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, } ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum, - &type_cs_vlan_tso); + &type_cs_vlan_tso, &desc_cb->send_bytes); if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_tso_err++; @@ -1277,31 +1280,29 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size, } static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size, - u8 max_non_tso_bd_num) + u8 max_non_tso_bd_num, unsigned int bd_num, + unsigned int recursion_level) { +#define HNS3_MAX_RECURSION_LEVEL 24 + struct sk_buff *frag_skb; - unsigned int bd_num = 0; /* If the total len is within the max bd limit */ - if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) && + if (likely(skb->len <= HNS3_MAX_BD_SIZE && !recursion_level && + !skb_has_frag_list(skb) && skb_shinfo(skb)->nr_frags < max_non_tso_bd_num)) return skb_shinfo(skb)->nr_frags + 1U; - /* The below case will always be linearized, return - * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized. - */ - if (unlikely(skb->len > HNS3_MAX_TSO_SIZE || - (!skb_is_gso(skb) && skb->len > - HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num)))) - return HNS3_MAX_TSO_BD_NUM + 1U; + if (unlikely(recursion_level >= HNS3_MAX_RECURSION_LEVEL)) + return UINT_MAX; bd_num = hns3_skb_bd_num(skb, bd_size, bd_num); - if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM) return bd_num; skb_walk_frags(skb, frag_skb) { - bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num); + bd_num = hns3_tx_bd_num(frag_skb, bd_size, max_non_tso_bd_num, + bd_num, recursion_level + 1); if (bd_num > HNS3_MAX_TSO_BD_NUM) return bd_num; } @@ -1361,6 +1362,43 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size) size[i] = skb_frag_size(&shinfo->frags[i]); } +static int hns3_skb_linearize(struct hns3_enet_ring *ring, + struct sk_buff *skb, + u8 max_non_tso_bd_num, + unsigned int bd_num) +{ + /* 'bd_num == UINT_MAX' means the skb' fraglist has a + * recursion level of over HNS3_MAX_RECURSION_LEVEL. + */ + if (bd_num == UINT_MAX) { + u64_stats_update_begin(&ring->syncp); + ring->stats.over_max_recursion++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + /* The skb->len has exceeded the hw limitation, linearization + * will not help. + */ + if (skb->len > HNS3_MAX_TSO_SIZE || + (!skb_is_gso(skb) && skb->len > + HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) { + u64_stats_update_begin(&ring->syncp); + ring->stats.hw_limitation++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + if (__skb_linearize(skb)) { + u64_stats_update_begin(&ring->syncp); + ring->stats.sw_err_cnt++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + return 0; +} + static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, struct net_device *netdev, struct sk_buff *skb) @@ -1370,7 +1408,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U]; unsigned int bd_num; - bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num); + bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num, 0, 0); if (unlikely(bd_num > max_non_tso_bd_num)) { if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) && !hns3_skb_need_linearized(skb, bd_size, bd_num, @@ -1379,16 +1417,11 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, goto out; } - if (__skb_linearize(skb)) + if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num, + bd_num)) return -ENOMEM; bd_num = hns3_tx_bd_count(skb->len); - if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) || - (!skb_is_gso(skb) && - bd_num > max_non_tso_bd_num)) { - trace_hns3_over_max_bd(skb); - return -ENOMEM; - } u64_stats_update_begin(&ring->syncp); ring->stats.tx_copy++; @@ -1412,6 +1445,10 @@ out: return bd_num; } + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_busy++; + u64_stats_update_end(&ring->syncp); + return -EBUSY; } @@ -1459,6 +1496,7 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, struct sk_buff *skb, enum hns_desc_type type) { unsigned int size = skb_headlen(skb); + struct sk_buff *frag_skb; int i, ret, bd_num = 0; if (size) { @@ -1483,6 +1521,15 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, bd_num += ret; } + skb_walk_frags(skb, frag_skb) { + ret = hns3_fill_skb_to_desc(ring, frag_skb, + DESC_TYPE_FRAGLIST_SKB); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + return bd_num; } @@ -1511,16 +1558,20 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping]; + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct netdev_queue *dev_queue; int pre_ntu, next_to_use_head; - struct sk_buff *frag_skb; - int bd_num = 0; bool doorbell; int ret; /* Hardware can only handle short frames above 32 bytes */ if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) { hns3_tx_doorbell(ring, 0, !netdev_xmit_more()); + + u64_stats_update_begin(&ring->syncp); + ring->stats.sw_err_cnt++; + u64_stats_update_end(&ring->syncp); + return NETDEV_TX_OK; } @@ -1530,15 +1581,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) ret = hns3_nic_maybe_stop_tx(ring, netdev, skb); if (unlikely(ret <= 0)) { if (ret == -EBUSY) { - u64_stats_update_begin(&ring->syncp); - ring->stats.tx_busy++; - u64_stats_update_end(&ring->syncp); hns3_tx_doorbell(ring, 0, true); return NETDEV_TX_BUSY; - } else if (ret == -ENOMEM) { - u64_stats_update_begin(&ring->syncp); - ring->stats.sw_err_cnt++; - u64_stats_update_end(&ring->syncp); } hns3_rl_err(netdev, "xmit error: %d!\n", ret); @@ -1547,25 +1591,19 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) next_to_use_head = ring->next_to_use; - ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use]); + ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use], + desc_cb); if (unlikely(ret < 0)) goto fill_err; + /* 'ret < 0' means filling error, 'ret == 0' means skb->len is + * zero, which is unlikely, and 'ret > 0' means how many tx desc + * need to be notified to the hw. + */ ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); - if (unlikely(ret < 0)) + if (unlikely(ret <= 0)) goto fill_err; - bd_num += ret; - - skb_walk_frags(skb, frag_skb) { - ret = hns3_fill_skb_to_desc(ring, frag_skb, - DESC_TYPE_FRAGLIST_SKB); - if (unlikely(ret < 0)) - goto fill_err; - - bd_num += ret; - } - pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) : (ring->desc_num - 1); ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= @@ -1574,9 +1612,9 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring->queue_index); - doorbell = __netdev_tx_sent_queue(dev_queue, skb->len, + doorbell = __netdev_tx_sent_queue(dev_queue, desc_cb->send_bytes, netdev_xmit_more()); - hns3_tx_doorbell(ring, bd_num, doorbell); + hns3_tx_doorbell(ring, ret, doorbell); return NETDEV_TX_OK; @@ -1748,11 +1786,15 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_drop += ring->stats.tx_l4_proto_err; tx_drop += ring->stats.tx_l2l3l4_err; tx_drop += ring->stats.tx_tso_err; + tx_drop += ring->stats.over_max_recursion; + tx_drop += ring->stats.hw_limitation; tx_errors += ring->stats.sw_err_cnt; tx_errors += ring->stats.tx_vlan_err; tx_errors += ring->stats.tx_l4_proto_err; tx_errors += ring->stats.tx_l2l3l4_err; tx_errors += ring->stats.tx_tso_err; + tx_errors += ring->stats.over_max_recursion; + tx_errors += ring->stats.hw_limitation; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); /* fetch the rx stats */ @@ -2691,8 +2733,12 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, break; desc_cb = &ring->desc_cb[ntc]; - (*pkts) += (desc_cb->type == DESC_TYPE_SKB); - (*bytes) += desc_cb->length; + + if (desc_cb->type == DESC_TYPE_SKB) { + (*pkts)++; + (*bytes) += desc_cb->send_bytes; + } + /* desc_cb will be cleaned, after hnae3_free_buffer_detach */ hns3_free_buffer_detach(ring, ntc, budget); @@ -2965,7 +3011,6 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, HNS3_RXD_L3ID_S); l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); - /* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */ if ((l3_type == HNS3_L3_TYPE_IPV4 || l3_type == HNS3_L3_TYPE_IPV6) && @@ -3295,7 +3340,6 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring) if (!skb) { bd_base_info = le32_to_cpu(desc->rx.bd_base_info); - /* Check valid BD */ if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) return -ENXIO; @@ -3557,7 +3601,6 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget) hns3_for_each_ring(ring, tqp_vector->rx_group) { int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget, hns3_rx_skb); - if (rx_cleaned >= rx_budget) clean_complete = false; @@ -4024,7 +4067,6 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring) hns3_buf_size2type(ring->buf_size)); hns3_write_dev(q, HNS3_RING_RX_RING_BD_NUM_REG, ring->desc_num / 8 - 1); - } else { hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_L_REG, (u32)dma); @@ -4462,11 +4504,11 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h) int i, j; int ret; - for (i = 0; i < h->kinfo.num_tqps; i++) { - ret = h->ae_algo->ops->reset_queue(h, i); - if (ret) - return ret; + ret = h->ae_algo->ops->reset_queue(h); + if (ret) + return ret; + for (i = 0; i < h->kinfo.num_tqps; i++) { hns3_init_ring_hw(&priv->ring[i]); /* We need to clear tx ring here because self test will diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index d069b04ee587..daa04aeb0942 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -298,7 +298,12 @@ struct hns3_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack */ void *priv; - u32 page_offset; + + union { + u32 page_offset; /* for rx */ + u32 send_bytes; /* for tx */ + }; + u32 length; /* length of the buffer */ u16 reuse_flag; @@ -376,6 +381,8 @@ struct ring_stats { u64 tx_l4_proto_err; u64 tx_l2l3l4_err; u64 tx_tso_err; + u64 over_max_recursion; + u64 hw_limitation; }; struct { u64 rx_pkts; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index a1d69c56d119..b48faf769b1c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -44,6 +44,8 @@ static const struct hns3_stats hns3_txq_stats[] = { HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err), HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err), HNS3_TQP_STAT("tso_err", tx_tso_err), + HNS3_TQP_STAT("over_max_recursion", over_max_recursion), + HNS3_TQP_STAT("hw_limitation", hw_limitation), }; #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats) @@ -307,7 +309,7 @@ out: } /** - * hns3_nic_self_test - self test + * hns3_self_test - self test * @ndev: net device * @eth_test: test cmd * @data: test result diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 3284a2cb52e6..76a482456f1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -366,7 +366,6 @@ static void hclge_parse_capability(struct hclge_dev *hdev, u32 caps; caps = __le32_to_cpu(cmd->caps[0]); - if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B)) set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B)) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 565c5aa54f88..c6fc22e29581 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -506,8 +506,6 @@ struct hclge_pf_res_cmd { #define HCLGE_CFG_RD_LEN_BYTES 16 #define HCLGE_CFG_RD_LEN_UNIT 4 -#define HCLGE_CFG_VMDQ_S 0 -#define HCLGE_CFG_VMDQ_M GENMASK(7, 0) #define HCLGE_CFG_TC_NUM_S 8 #define HCLGE_CFG_TC_NUM_M GENMASK(15, 8) #define HCLGE_CFG_TQP_DESC_N_S 16 @@ -950,10 +948,16 @@ struct hclge_reset_tqp_queue_cmd { #define HCLGE_CFG_RESET_MAC_B 3 #define HCLGE_CFG_RESET_FUNC_B 7 +#define HCLGE_CFG_RESET_RCB_B 1 struct hclge_reset_cmd { u8 mac_func_reset; u8 fun_reset_vfid; - u8 rsv[22]; + u8 fun_reset_rcb; + u8 rsv; + __le16 fun_reset_rcb_vqid_start; + __le16 fun_reset_rcb_vqid_num; + u8 fun_reset_rcb_return_status; + u8 rsv1[15]; }; #define HCLGE_PF_RESET_DONE_BIT BIT(0) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 1c699131e8df..85d306459e36 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1541,8 +1541,7 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, } } -static void hclge_dbg_dump_loopback(struct hclge_dev *hdev, - const char *cmd_buf) +static void hclge_dbg_dump_loopback(struct hclge_dev *hdev) { struct phy_device *phydev = hdev->hw.mac.phydev; struct hclge_config_mac_mode_cmd *req_app; @@ -1778,7 +1777,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) hclge_dbg_dump_mac_tnl_status(hdev); } else if (strncmp(cmd_buf, DUMP_LOOPBACK, strlen(DUMP_LOOPBACK)) == 0) { - hclge_dbg_dump_loopback(hdev, &cmd_buf[sizeof(DUMP_LOOPBACK)]); + hclge_dbg_dump_loopback(hdev); } else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) { hclge_dbg_dump_qs_shaper(hdev, &cmd_buf[sizeof("dump qs shaper")]); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 0ca7f1b984bf..d25291916b31 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -865,13 +865,7 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) } /* configure TM QCN hw errors */ - ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0); - if (ret) { - dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret); - return ret; - } - - hclge_cmd_reuse_desc(&desc, false); + hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); if (en) desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); @@ -1497,7 +1491,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) } status = le32_to_cpu(desc[0].data[0]); - if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { if (status & HCLGE_ROCEE_RERR_INT_MASK) dev_err(dev, "ROCEE RAS AXI rresp error\n"); @@ -1647,7 +1640,6 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) } status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); - if (status & HCLGE_RAS_REG_NFE_MASK || status & HCLGE_RAS_REG_ROCEE_ERR_MASK) ae_dev->hw_err_reset_req = 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 058317ce579c..bc805d5fb16e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -553,7 +553,6 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) int ret; ret = hclge_mac_query_reg_num(hdev, &desc_num); - /* The firmware supports the new statistics acquisition method */ if (!ret) ret = hclge_mac_update_stats_complete(hdev, desc_num); @@ -784,7 +783,6 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } - } else if (stringset == ETH_SS_STATS) { count = ARRAY_SIZE(g_mac_stats_string) + hclge_tqps_get_sset_count(handle, stringset); @@ -1292,9 +1290,6 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) req = (struct hclge_cfg_param_cmd *)desc[0].data; /* get the configuration */ - cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]), - HCLGE_CFG_VMDQ_M, - HCLGE_CFG_VMDQ_S); cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S); cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), @@ -1511,7 +1506,7 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) "Running kdump kernel. Using minimal resources\n"); /* minimal queue pairs equals to the number of vports */ - hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + hdev->num_tqps = hdev->num_req_vfs + 1; hdev->num_tx_desc = HCLGE_MIN_TX_DESC; hdev->num_rx_desc = HCLGE_MIN_RX_DESC; } @@ -1526,7 +1521,6 @@ static int hclge_configure(struct hclge_dev *hdev) if (ret) return ret; - hdev->num_vmdq_vport = cfg.vmdq_vport_num; hdev->base_tqp_pid = 0; hdev->vf_rss_size_max = cfg.vf_rss_size_max; hdev->pf_rss_size_max = cfg.pf_rss_size_max; @@ -1777,7 +1771,7 @@ static int hclge_map_tqp(struct hclge_dev *hdev) struct hclge_vport *vport = hdev->vport; u16 i, num_vport; - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; for (i = 0; i < num_vport; i++) { int ret; @@ -1819,7 +1813,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) int ret; /* We need to alloc a vport for main NIC of PF */ - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; if (hdev->num_tqps < num_vport) { dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)", @@ -2195,7 +2189,6 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, COMPENSATE_HALF_MPS_NUM * half_mps; min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT); rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT); - if (rx_priv < min_rx_priv) return false; @@ -2224,7 +2217,7 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev * @buf_alloc: pointer to buffer calculation data - * @return: 0: calculate sucessful, negative: fail + * @return: 0: calculate successful, negative: fail */ static int hclge_rx_buffer_calc(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) @@ -2889,13 +2882,12 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status) static void hclge_update_link_status(struct hclge_dev *hdev) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; - struct hnae3_handle *rhandle; - struct hnae3_handle *handle; int state; int ret; - int i; if (!client) return; @@ -2910,15 +2902,11 @@ static void hclge_update_link_status(struct hclge_dev *hdev) } if (state != hdev->hw.mac.link) { - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - handle = &hdev->vport[i].nic; - client->ops->link_status_change(handle, state); - hclge_config_mac_tnl_int(hdev, state); - rhandle = &hdev->vport[i].roce; - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, - state); - } + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, state); + hdev->hw.mac.link = state; } @@ -3370,7 +3358,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) * caused this event. Therefore, we will do below for now: * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we * have defered type of reset to be used. - * 2. Schedule the reset serivce task. + * 2. Schedule the reset service task. * 3. When service task receives HNAE3_UNKNOWN_RESET type it * will fetch the correct type of reset. This would be done * by first decoding the types of errors. @@ -3498,8 +3486,9 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev) int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *client = hdev->nic_client; - u16 i; + int ret; if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client) return 0; @@ -3507,27 +3496,20 @@ int hclge_notify_client(struct hclge_dev *hdev, if (!client->ops->reset_notify) return -EOPNOTSUPP; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].nic; - int ret; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify nic client failed %d(%d)\n", type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n", + type, ret); - return 0; + return ret; } static int hclge_notify_roce_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].roce; struct hnae3_client *client = hdev->roce_client; int ret; - u16 i; if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) return 0; @@ -3535,17 +3517,10 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev, if (!client->ops->reset_notify) return -EOPNOTSUPP; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].roce; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify roce client failed %d(%d)", - type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)", + type, ret); return ret; } @@ -3613,7 +3588,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) { int i; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) { + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; int ret; @@ -3694,14 +3669,12 @@ void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type) { struct hnae3_client *client = hdev->nic_client; - u16 i; if (!client || !client->ops->process_hw_error || !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) - client->ops->process_hw_error(&hdev->vport[i].nic, type); + client->ops->process_hw_error(&hdev->vport[0].nic, type); } static void hclge_handle_imp_error(struct hclge_dev *hdev) @@ -3967,6 +3940,21 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) return false; } +static void hclge_update_reset_level(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; + + /* if default_reset_request has a higher level reset request, + * it should be handled as soon as possible. since some errors + * need this kind of reset to fix. + */ + reset_level = hclge_get_reset_level(ae_dev, + &hdev->default_reset_request); + if (reset_level != HNAE3_NONE_RESET) + set_bit(reset_level, &hdev->reset_request); +} + static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; @@ -4054,8 +4042,6 @@ static int hclge_reset_prepare(struct hclge_dev *hdev) static int hclge_reset_rebuild(struct hclge_dev *hdev) { - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - enum hnae3_reset_type reset_level; int ret; hdev->rst_stats.hw_reset_done_cnt++; @@ -4099,14 +4085,7 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev) hdev->rst_stats.reset_done_cnt++; clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); - /* if default_reset_request has a higher level reset request, - * it should be handled as soon as possible. since some errors - * need this kind of reset to fix. - */ - reset_level = hclge_get_reset_level(ae_dev, - &hdev->default_reset_request); - if (reset_level != HNAE3_NONE_RESET) - set_bit(reset_level, &hdev->reset_request); + hclge_update_reset_level(hdev); return 0; } @@ -4913,58 +4892,44 @@ int hclge_rss_init_hw(struct hclge_dev *hdev) void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) { - struct hclge_vport *vport = hdev->vport; - int i, j; + struct hclge_vport *vport = &hdev->vport[0]; + int i; - for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { - for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) - vport[j].rss_indirection_tbl[i] = - i % vport[j].alloc_rss_size; - } + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) + vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size; } static int hclge_rss_init_cfg(struct hclge_dev *hdev) { u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; - int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; - struct hclge_vport *vport = hdev->vport; + int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + struct hclge_vport *vport = &hdev->vport[0]; + u16 *rss_ind_tbl; if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - u16 *rss_ind_tbl; - - vport[i].rss_tuple_sets.ipv4_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_sctp_en = - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv4_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_sctp_en = - hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? - HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv6_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - - vport[i].rss_algo = rss_algo; - - rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, - sizeof(*rss_ind_tbl), GFP_KERNEL); - if (!rss_ind_tbl) - return -ENOMEM; + vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + + vport->rss_algo = rss_algo; + + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM; - vport[i].rss_indirection_tbl = rss_ind_tbl; - memcpy(vport[i].rss_hash_key, hclge_hash_key, - HCLGE_RSS_KEY_SIZE); - } + vport->rss_indirection_tbl = rss_ind_tbl; + memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE); hclge_rss_indir_init_cfg(hdev); @@ -6474,8 +6439,9 @@ static int hclge_add_fd_entry_common(struct hclge_dev *hdev, if (ret) goto out; - hclge_update_fd_list(hdev, HCLGE_FD_ACTIVE, rule->location, rule); + rule->state = HCLGE_FD_ACTIVE; hdev->fd_active_type = rule->rule_type; + hclge_update_fd_list(hdev, rule->state, rule->location, rule); out: spin_unlock_bh(&hdev->fd_rule_lock); @@ -7037,6 +7003,7 @@ static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples, rule->action = 0; rule->vf_id = 0; rule->rule_type = HCLGE_FD_ARFS_ACTIVE; + rule->state = HCLGE_FD_TO_ADD; if (tuples->ether_proto == ETH_P_IP) { if (tuples->ip_proto == IPPROTO_TCP) rule->flow_type = TCP_V4_FLOW; @@ -7099,8 +7066,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, rule->arfs.flow_id = flow_id; rule->queue_id = queue_id; hclge_fd_build_arfs_rule(&new_tuples, rule); - hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, rule->location, - rule); + hclge_update_fd_list(hdev, rule->state, rule->location, rule); hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE; } else if (rule->queue_id != queue_id) { rule->queue_id = queue_id; @@ -7168,8 +7134,8 @@ static int hclge_clear_arfs_rules(struct hclge_dev *hdev) } hclge_sync_fd_state(hdev); - return 0; #endif + return 0; } static void hclge_get_cls_key_basic(const struct flow_rule *flow, @@ -7831,13 +7797,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en) return ret; } -static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclge_desc desc; struct hclge_cfg_com_tqp_queue_cmd *req = (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; - int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); req->tqp_id = cpu_to_le16(tqp_id); @@ -7845,20 +7810,30 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, if (enable) req->enable |= 1U << HCLGE_TQP_ENABLE_B; - ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) - dev_err(&hdev->pdev->dev, - "Tqp enable fail, status =%d.\n", ret); - return ret; + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + +static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + return 0; } static int hclge_set_loopback(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hnae3_knic_private_info *kinfo; struct hclge_dev *hdev = vport->back; - int i, ret; + int ret; /* Loopback can be enabled in three places: SSU, MAC, and serdes. By * default, SSU loopback is enabled, so if the SMAC and the DMAC are @@ -7895,14 +7870,12 @@ static int hclge_set_loopback(struct hnae3_handle *handle, if (ret) return ret; - kinfo = &vport->nic.kinfo; - for (i = 0; i < kinfo->num_tqps; i++) { - ret = hclge_tqp_enable(hdev, i, 0, en); - if (ret) - return ret; - } + ret = hclge_tqp_enable(handle, en); + if (ret) + dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n", + en ? "enable" : "disable", ret); - return 0; + return ret; } static int hclge_set_default_loopback(struct hclge_dev *hdev) @@ -7989,7 +7962,6 @@ static void hclge_ae_stop(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int i; set_bit(HCLGE_STATE_DOWN, &hdev->state); spin_lock_bh(&hdev->fd_rule_lock); @@ -8006,8 +7978,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle) return; } - for (i = 0; i < handle->kinfo.num_tqps; i++) - hclge_reset_tqp(handle, i); + hclge_reset_tqp(handle); hclge_config_mac_tnl_int(hdev, false); @@ -8457,7 +8428,7 @@ int hclge_update_mac_list(struct hclge_vport *vport, /* if the mac addr is already in the mac list, no need to add a new * one into it, just check the mac addr state, convert it to a new - * new state, or just remove it, or do nothing. + * state, or just remove it, or do nothing. */ mac_node = hclge_find_mac_node(list, addr); if (mac_node) { @@ -8646,7 +8617,6 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, if (status) return status; status = hclge_add_mac_vlan_tbl(vport, &req, desc); - /* if already overflow, not to print each time */ if (status == -ENOSPC && !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) @@ -8695,7 +8665,6 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, else /* Not all the vfid is zero, update the vfid */ status = hclge_add_mac_vlan_tbl(vport, &req, desc); - } else if (status == -ENOENT) { status = 0; } @@ -9130,7 +9099,7 @@ static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, return true; vf_idx += HCLGE_VF_VPORT_START_NUM; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) if (i != vf_idx && ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) return true; @@ -9511,8 +9480,7 @@ static int hclge_check_vf_vlan_cmd_status(struct hclge_dev *hdev, u16 vfid, } static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, - bool is_kill, u16 vlan, - __be16 proto) + bool is_kill, u16 vlan) { struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_desc desc[2]; @@ -9578,8 +9546,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto, if (is_kill && !vlan_id) return 0; - ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id, - proto); + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id); if (ret) { dev_err(&hdev->pdev->dev, "Set %u vport vlan filter config fail, ret =%d.\n", @@ -10385,7 +10352,7 @@ out: return ret; } -static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, +static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id, bool enable) { struct hclge_reset_tqp_queue_cmd *req; @@ -10441,94 +10408,114 @@ u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id) return tqp->index; } -int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; + u16 reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + u16 i; - queue_gid = hclge_covert_handle_qid_global(handle, queue_id); - - ret = hclge_tqp_enable(hdev, queue_id, 0, false); - if (ret) { - dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); - return ret; - } + for (i = 0; i < handle->kinfo.num_tqps; i++) { + queue_gid = hclge_covert_handle_qid_global(handle, i); + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to send reset tqp cmd, ret = %d\n", + ret); + return ret; + } - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); - if (ret) { - dev_err(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return ret; - } + while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { + reset_status = hclge_get_reset_status(hdev, queue_gid); + if (reset_status) + break; - while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + /* Wait for tqp hw reset */ + usleep_range(1000, 1200); + } - /* Wait for tqp hw reset */ - usleep_range(1000, 1200); - } + if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { + dev_err(&hdev->pdev->dev, + "wait for tqp hw reset timeout\n"); + return -ETIME; + } - if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_err(&hdev->pdev->dev, "Reset TQP fail\n"); - return ret; + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to deassert soft reset, ret = %d\n", + ret); + return ret; + } + reset_try_times = 0; } - - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_err(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); - - return ret; + return 0; } -void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) +static int hclge_reset_rcb(struct hnae3_handle *handle) { - struct hnae3_handle *handle = &vport->nic; +#define HCLGE_RESET_RCB_NOT_SUPPORT 0U +#define HCLGE_RESET_RCB_SUCCESS 1U + + struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; - int reset_status; + struct hclge_reset_cmd *req; + struct hclge_desc desc; + u8 return_status; u16 queue_gid; int ret; - if (queue_id >= handle->kinfo.num_tqps) { - dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", - queue_id); - return; - } + queue_gid = hclge_covert_handle_qid_global(handle, 0); - queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); + req = (struct hclge_reset_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1); + req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid); + req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps); - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { - dev_warn(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, + "failed to send rcb reset cmd, ret = %d\n", ret); + return ret; } - while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + return_status = req->fun_reset_rcb_return_status; + if (return_status == HCLGE_RESET_RCB_SUCCESS) + return 0; - /* Wait for tqp hw reset */ - usleep_range(1000, 1200); + if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) { + dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n", + return_status); + return -EIO; } - if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_warn(&hdev->pdev->dev, "Reset TQP fail\n"); - return; + /* if reset rcb cmd is unsupported, we need to send reset tqp cmd + * again to reset all tqps + */ + return hclge_reset_tqp_cmd(handle); +} + +int hclge_reset_tqp(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + /* only need to disable PF's tqp */ + if (!vport->vport_id) { + ret = hclge_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to disable tqp, ret = %d\n", ret); + return ret; + } } - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_warn(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); + return hclge_reset_rcb(handle); } static u32 hclge_get_fw_version(struct hnae3_handle *handle) @@ -10771,7 +10758,6 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc); dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc); dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport); - dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport); dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs); dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map); dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size); @@ -10886,39 +10872,35 @@ static int hclge_init_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i, ret; - - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; + struct hclge_vport *vport = &hdev->vport[0]; + int ret; - switch (client->type) { - case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; - vport->nic.client = client; - ret = hclge_init_nic_client_instance(ae_dev, vport); - if (ret) - goto clear_nic; + switch (client->type) { + case HNAE3_CLIENT_KNIC: + hdev->nic_client = client; + vport->nic.client = client; + ret = hclge_init_nic_client_instance(ae_dev, vport); + if (ret) + goto clear_nic; - ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce; - break; - case HNAE3_CLIENT_ROCE: - if (hnae3_dev_roce_supported(hdev)) { - hdev->roce_client = client; - vport->roce.client = client; - } + break; + case HNAE3_CLIENT_ROCE: + if (hnae3_dev_roce_supported(hdev)) { + hdev->roce_client = client; + vport->roce.client = client; + } - ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce; - break; - default: - return -EINVAL; - } + break; + default: + return -EINVAL; } return 0; @@ -10937,32 +10919,27 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i; + struct hclge_vport *vport = &hdev->vport[0]; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; - if (hdev->roce_client) { - clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - hdev->roce_client->ops->uninit_instance(&vport->roce, - 0); - hdev->roce_client = NULL; - vport->roce.client = NULL; - } - if (client->type == HNAE3_CLIENT_ROCE) - return; - if (hdev->nic_client && client->ops->uninit_instance) { - clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - client->ops->uninit_instance(&vport->nic, 0); - hdev->nic_client = NULL; - vport->nic.client = NULL; - } + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } + if (client->type == HNAE3_CLIENT_ROCE) + return; + if (hdev->nic_client && client->ops->uninit_instance) { + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; } } @@ -11978,7 +11955,6 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) #define REG_SEPARATOR_LINE 1 #define REG_NUM_REMAIN_MASK 3 -#define BD_LIST_MAX_NUM 30 int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) { @@ -12072,15 +12048,19 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int data_len_per_desc, bd_num, i; - int bd_num_list[BD_LIST_MAX_NUM]; + int *bd_num_list; u32 data_len; int ret; + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; } data_len_per_desc = sizeof_field(struct hclge_desc, data); @@ -12091,6 +12071,8 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) *len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE; } +out: + kfree(bd_num_list); return ret; } @@ -12098,16 +12080,20 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int bd_num, bd_num_max, buf_len, i; - int bd_num_list[BD_LIST_MAX_NUM]; struct hclge_desc *desc_src; + int *bd_num_list; u32 *reg = data; int ret; + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; } bd_num_max = bd_num_list[0]; @@ -12116,8 +12102,10 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) buf_len = sizeof(*desc_src) * bd_num_max; desc_src = kzalloc(buf_len, GFP_KERNEL); - if (!desc_src) - return -ENOMEM; + if (!desc_src) { + ret = -ENOMEM; + goto out; + } for (i = 0; i < dfx_reg_type_num; i++) { bd_num = bd_num_list[i]; @@ -12133,6 +12121,8 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) } kfree(desc_src); +out: + kfree(bd_num_list); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 97e77e2f7539..c1aaf7c534c9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -348,7 +348,6 @@ struct hclge_tc_info { }; struct hclge_cfg { - u8 vmdq_vport_num; u8 tc_num; u16 tqp_desc_num; u16 rx_buf_len; @@ -811,7 +810,6 @@ struct hclge_dev { struct hclge_rst_stats rst_stats; struct semaphore reset_sem; /* protect reset process */ u32 fw_version; - u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */ u16 num_tqps; /* Num task queue pairs of this PF */ u16 num_req_vfs; /* Num VFs requested for this PF */ @@ -1055,8 +1053,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev); void hclge_rss_indir_init_cfg(struct hclge_dev *hdev); void hclge_mbx_handler(struct hclge_dev *hdev); -int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); -void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id); +int hclge_reset_tqp(struct hnae3_handle *handle); int hclge_cfg_flowctrl(struct hclge_dev *hdev); int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); int hclge_vport_start(struct hclge_vport *vport); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 51a36e74f088..c88607bdda59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -550,14 +550,32 @@ static void hclge_get_link_mode(struct hclge_vport *vport, HCLGE_MBX_LINK_STAT_MODE, dest_vfid); } -static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static int hclge_mbx_reset_vf_queue(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { +#define HCLGE_RESET_ALL_QUEUE_DONE 1U + struct hnae3_handle *handle = &vport->nic; + struct hclge_dev *hdev = vport->back; u16 queue_id; + int ret; memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id)); + resp_msg->data[0] = HCLGE_RESET_ALL_QUEUE_DONE; + resp_msg->len = sizeof(u8); - hclge_reset_vf_queue(vport, queue_id); + /* pf will reset vf's all queues at a time. So it is unnecessary + * to reset queues if queue_id > 0, just return success. + */ + if (queue_id > 0) + return 0; + + ret = hclge_reset_tqp(handle); + if (ret) + dev_err(&hdev->pdev->dev, "failed to reset vf %u queue, ret = %d\n", + vport->vport_id - HCLGE_VF_VPORT_START_NUM, ret); + + return ret; } static int hclge_reset_vf(struct hclge_vport *vport) @@ -783,7 +801,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev) ret); break; case HCLGE_MBX_QUEUE_RESET: - hclge_mbx_reset_vf_queue(vport, req); + ret = hclge_mbx_reset_vf_queue(vport, req, &resp_msg); break; case HCLGE_MBX_RESET: ret = hclge_reset_vf(vport); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 151afd1f0688..ebb962bad451 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -631,13 +631,12 @@ static u16 hclge_vport_get_tqp_num(struct hclge_vport *vport) return sum; } -static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) +static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; u16 vport_max_rss_size; u16 max_rss_size; - u8 i; /* TC configuration is shared by PF/VF in one port, only allow * one tc for VF for simplicity. VF's vport_id is non zero. @@ -665,19 +664,18 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { - /* if user not set rss, the rss_size should compare with the - * valid msi numbers to ensure one to one map between tqp and - * irq as default. - */ - if (!kinfo->req_rss_size) - max_rss_size = min_t(u16, max_rss_size, - (hdev->num_nic_msi - 1) / - kinfo->tc_info.num_tc); - /* Set to the maximum specification value (max_rss_size). */ kinfo->rss_size = max_rss_size; } +} + +static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) +{ + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + struct hclge_dev *hdev = vport->back; + u8 i; + hclge_tm_update_kinfo_rss_size(vport); kinfo->num_tqps = hclge_vport_get_tqp_num(vport); vport->dwrr = 100; /* 100 percent as init */ vport->alloc_rss_size = kinfo->rss_size; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 46700c427849..d8c5c5810b99 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -349,7 +349,6 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev, u32 caps; caps = __le32_to_cpu(cmd->caps[0]); - if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B)) set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B)) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 8a37a22a176b..c6dc11b32aa7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -223,11 +223,14 @@ struct hclgevf_rss_indirection_table_cmd { }; #define HCLGEVF_RSS_TC_OFFSET_S 0 -#define HCLGEVF_RSS_TC_OFFSET_M (0x3ff << HCLGEVF_RSS_TC_OFFSET_S) +#define HCLGEVF_RSS_TC_OFFSET_M GENMASK(10, 0) +#define HCLGEVF_RSS_TC_SIZE_MSB_B 11 #define HCLGEVF_RSS_TC_SIZE_S 12 -#define HCLGEVF_RSS_TC_SIZE_M (0x7 << HCLGEVF_RSS_TC_SIZE_S) +#define HCLGEVF_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGEVF_RSS_TC_VALID_B 15 #define HCLGEVF_MAX_TC_NUM 8 +#define HCLGEVF_RSS_TC_SIZE_MSB_OFFSET 3 + struct hclgevf_rss_tc_mode_cmd { __le16 rss_tc_mode[HCLGEVF_MAX_TC_NUM]; u8 rsv[8]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 700e068764c8..1682769112d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -497,7 +497,6 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) link_state = test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state; - if (link_state != hdev->hw.mac.link) { client->ops->link_status_change(handle, !!link_state); if (rclient && rclient->ops->link_status_change) @@ -707,6 +706,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) (tc_valid[i] & 0x1)); hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M, HCLGEVF_RSS_TC_SIZE_S, tc_size[i]); + hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B, + tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET & + 0x1); hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M, HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]); @@ -1241,12 +1243,11 @@ static void hclgevf_sync_promisc_mode(struct hclgevf_dev *hdev) } } -static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclgevf_cfg_com_tqp_queue_cmd *req; struct hclgevf_desc desc; - int status; req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data; @@ -1257,12 +1258,22 @@ static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, if (enable) req->enable |= 1U << HCLGEVF_TQP_ENABLE_B; - status = hclgevf_cmd_send(&hdev->hw, &desc, 1); - if (status) - dev_err(&hdev->pdev->dev, - "TQP enable fail, status =%d.\n", status); + return hclgevf_cmd_send(&hdev->hw, &desc, 1); +} - return status; +static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + + return 0; } static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) @@ -1711,20 +1722,39 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } -static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclgevf_reset_tqp(struct hnae3_handle *handle) { +#define HCLGEVF_RESET_ALL_QUEUE_DONE 1U struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; + u8 return_status = 0; int ret; + u16 i; /* disable vf queue before send queue reset msg to PF */ - ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); - if (ret) + ret = hclgevf_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n", + ret); return ret; + } hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); - memcpy(send_msg.data, &queue_id, sizeof(queue_id)); - return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status, + sizeof(return_status)); + if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE) + return ret; + + for (i = 1; i < handle->kinfo.num_tqps; i++) { + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); + memcpy(send_msg.data, &i, sizeof(i)); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + if (ret) + return ret; + } + + return 0; } static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) @@ -2356,7 +2386,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, /* fetch the events from their corresponding regs */ cmdq_stat_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG); - if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) { rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); dev_info(&hdev->pdev->dev, @@ -2638,14 +2667,11 @@ static int hclgevf_ae_start(struct hnae3_handle *handle) static void hclgevf_ae_stop(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - int i; set_bit(HCLGEVF_STATE_DOWN, &hdev->state); if (hdev->reset_type != HNAE3_VF_RESET) - for (i = 0; i < handle->kinfo.num_tqps; i++) - if (hclgevf_reset_tqp(handle, i)) - break; + hclgevf_reset_tqp(handle); hclgevf_reset_tqp_stats(handle); hclgevf_update_link_status(hdev, 0); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 8c27ecd819af..ade6e7f5be5b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -192,7 +192,6 @@ struct hclgevf_tqp { }; struct hclgevf_cfg { - u8 vmdq_vport_num; u8 tc_num; u16 tqp_desc_num; u16 rx_buf_len; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0f84ed0143e4..1555d6009bf5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7339,7 +7339,7 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) qcount = min_t(int, vsi->alloc_queue_pairs, i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - /* For the TC that is not enabled set the offset to to default + /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; @@ -10625,7 +10625,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try - * try to recover minimal use by getting the basic PF VSI working. + * to recover minimal use by getting the basic PF VSI working. */ if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 9bf346133cbd..02badaaf818c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -89,6 +89,7 @@ #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256 +#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ #define ICE_MAX_RESET_WAIT 20 #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@ -192,7 +193,7 @@ struct ice_sw { u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ }; -enum ice_state { +enum ice_pf_state { __ICE_TESTING, __ICE_DOWN, __ICE_NEEDS_RESTART, @@ -235,12 +236,14 @@ enum ice_state { __ICE_STATE_NBITS /* must be last */ }; -enum ice_vsi_flags { - ICE_VSI_FLAG_UMAC_FLTR_CHANGED, - ICE_VSI_FLAG_MMAC_FLTR_CHANGED, - ICE_VSI_FLAG_VLAN_FLTR_CHANGED, - ICE_VSI_FLAG_PROMISC_CHANGED, - ICE_VSI_FLAG_NBITS /* must be last */ +enum ice_vsi_state { + ICE_VSI_DOWN, + ICE_VSI_NEEDS_RESTART, + ICE_VSI_UMAC_FLTR_CHANGED, + ICE_VSI_MMAC_FLTR_CHANGED, + ICE_VSI_VLAN_FLTR_CHANGED, + ICE_VSI_PROMISC_CHANGED, + ICE_VSI_STATE_NBITS /* must be last */ }; /* struct that defines a VSI, associated with a dev */ @@ -256,8 +259,7 @@ struct ice_vsi { irqreturn_t (*irq_handler)(int irq, void *data); u64 tx_linearize; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); - DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); + DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS); unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; @@ -503,7 +505,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr << GLINT_DYN_CTL_ITR_INDX_S); if (vsi) - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; wr32(hw, GLINT_DYN_CTL(vector), val); } @@ -620,8 +622,10 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi); int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed); +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 80186589153b..b9491ef5f21c 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1407,8 +1407,7 @@ struct ice_aqc_nvm_comp_tbl { u8 cvs[]; /* Component Version String */ } __packed; -/* - * Send to PF command (indirect 0x0801) ID is only used by PF +/* Send to PF command (indirect 0x0801) ID is only used by PF * * Send to VF command (indirect 0x0802) ID is only used by PF * @@ -1790,6 +1789,7 @@ struct ice_pkg_ver { }; #define ICE_PKG_NAME_SIZE 32 +#define ICE_SEG_ID_SIZE 28 #define ICE_SEG_NAME_SIZE 28 struct ice_aqc_get_pkg_info { diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 6560acd76c94..88d98c9e5f91 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -581,8 +581,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) return; netdev = vsi->netdev; - if (!netdev || !netdev->rx_cpu_rmap || - netdev->reg_state != NETREG_REGISTERED) + if (!netdev || !netdev->rx_cpu_rmap) return; free_irq_cpu_rmap(netdev->rx_cpu_rmap); @@ -604,8 +603,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) pf = vsi->back; netdev = vsi->netdev; - if (!pf || !netdev || !vsi->num_q_vectors || - vsi->netdev->reg_state != NETREG_REGISTERED) + if (!pf || !netdev || !vsi->num_q_vectors) return -EINVAL; netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1148d768f8ed..be26775a7dfe 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -215,6 +215,26 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) } /** + * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring + * @ring: The Tx ring to configure + * + * This enables/disables XPS for a given Tx descriptor ring + * based on the TCs enabled for the VSI that ring belongs to. + */ +static void ice_cfg_xps_tx_ring(struct ice_ring *ring) +{ + if (!ring->q_vector || !ring->netdev) + return; + + /* We only initialize XPS once, so as not to overwrite user settings */ + if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state)) + return; + + netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask, + ring->q_index); +} + +/** * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance * @ring: The Tx ring to configure * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized @@ -664,6 +684,9 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 pf_q; u8 tc; + /* Configure XPS */ + ice_cfg_xps_tx_ring(ring); + pf_q = ring->reg_idx; ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 1898325e62b5..54df00ee912b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3186,7 +3186,7 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); cmd = &desc.params.read_write_sff_param; - desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); cmd->lport_num = (u8)(lport & 0xff); cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & @@ -3206,23 +3206,33 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_id: VSI FW index - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer - * @glob_lut_idx: global LUT index + * @params: RSS LUT parameters * @set: set true to set the table, false to get the table * * Internal function to get (0x0B05) or set (0x0B03) RSS look up table */ static enum ice_status -__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, - u16 lut_size, u8 glob_lut_idx, bool set) +__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set) { + u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle; struct ice_aqc_get_set_rss_lut *cmd_resp; struct ice_aq_desc desc; enum ice_status status; - u16 flags = 0; + u8 *lut; + + if (!params) + return ICE_ERR_PARAM; + + vsi_handle = params->vsi_handle; + lut = params->lut; + + if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) + return ICE_ERR_PARAM; + + lut_size = params->lut_size; + lut_type = params->lut_type; + glob_lut_idx = params->global_lut_id; + vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); cmd_resp = &desc.params.get_set_rss_lut; @@ -3296,43 +3306,27 @@ ice_aq_get_set_rss_lut_exit: /** * ice_aq_get_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @get_params: RSS LUT parameters used to specify which RSS LUT to get * * get the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, false); + return __ice_aq_get_set_rss_lut(hw, get_params, false); } /** * ice_aq_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @set_params: RSS LUT parameters used to specify how to set the RSS LUT * * set the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, true); + return __ice_aq_get_set_rss_lut(hw, set_params, true); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index baf4064fcbfe..81fd69cb1485 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -51,11 +51,9 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index); enum ice_status -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut, - u16 lut_size); +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params); enum ice_status -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut, - u16 lut_size); +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params); enum ice_status ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle, struct ice_aqc_get_set_rss_keys *keys); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index faaa08e8171b..7d0905f25ddc 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -14,8 +14,8 @@ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) #define ICE_CTL_Q_DESC_UNUSED(R) \ - (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1)) /* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 4f738425fb44..15152e63f204 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2886,7 +2886,7 @@ process_link: /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ice_down(vsi); if (tx_rings) { @@ -3140,7 +3140,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int ret = 0, i; + int err, i; u8 *lut; if (hfunc) @@ -3159,17 +3159,20 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) if (!lut) return -ENOMEM; - if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { - ret = -EIO; + err = ice_get_rss_key(vsi, key); + if (err) + goto out; + + err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) goto out; - } for (i = 0; i < vsi->rss_table_size; i++) indir[i] = (u32)(lut[i]); out: kfree(lut); - return ret; + return err; } /** @@ -3190,7 +3193,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct device *dev; - u8 *seed = NULL; + int err; dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) @@ -3211,7 +3214,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return -ENOMEM; } memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); - seed = vsi->rss_hkey_user; + + err = ice_set_rss_key(vsi, vsi->rss_hkey_user); + if (err) + return err; } if (!vsi->rss_lut_user) { @@ -3232,8 +3238,9 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, vsi->rss_size); } - if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) - return -EIO; + err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size); + if (err) + return err; return 0; } @@ -3329,10 +3336,9 @@ static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size) static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) { struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; struct ice_hw *hw; - int err = 0; + int err; u8 *lut; dev = ice_pf_to_dev(pf); @@ -3353,14 +3359,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) /* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) + dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err, ice_aq_str(hw->adminq.sq_last_status)); - err = -EIO; - } kfree(lut); return err; @@ -3926,14 +3928,14 @@ ice_get_module_eeprom(struct net_device *netdev, u8 value = 0; u8 page = 0; - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, - &value, 1, 0, NULL); - if (status) - return -EIO; - if (!ee || !ee->len || !data) return -EINVAL; + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0, + NULL); + if (status) + return -EIO; + if (value == ICE_MODULE_TYPE_SFP) is_sfp = true; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index afe77f7a3199..4b83960876f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1063,32 +1063,36 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg) static enum ice_status ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) { - struct ice_global_metadata_seg *meta_seg; struct ice_generic_seg_hdr *seg_hdr; if (!pkg_hdr) return ICE_ERR_PARAM; - meta_seg = (struct ice_global_metadata_seg *) - ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr); - if (meta_seg) { - hw->pkg_ver = meta_seg->pkg_ver; - memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name)); + seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr); + if (seg_hdr) { + struct ice_meta_sect *meta; + struct ice_pkg_enum state; + + memset(&state, 0, sizeof(state)); + + /* Get package information from the Metadata Section */ + meta = ice_pkg_enum_section((struct ice_seg *)seg_hdr, &state, + ICE_SID_METADATA); + if (!meta) { + ice_debug(hw, ICE_DBG_INIT, "Did not find ice metadata section in package\n"); + return ICE_ERR_CFG; + } + + hw->pkg_ver = meta->ver; + memcpy(hw->pkg_name, meta->name, sizeof(meta->name)); ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n", - meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor, - meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft, - meta_seg->pkg_name); - } else { - ice_debug(hw, ICE_DBG_INIT, "Did not find metadata segment in driver package\n"); - return ICE_ERR_CFG; - } + meta->ver.major, meta->ver.minor, meta->ver.update, + meta->ver.draft, meta->name); - seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr); - if (seg_hdr) { - hw->ice_pkg_ver = seg_hdr->seg_format_ver; - memcpy(hw->ice_pkg_name, seg_hdr->seg_id, - sizeof(hw->ice_pkg_name)); + hw->ice_seg_fmt_ver = seg_hdr->seg_format_ver; + memcpy(hw->ice_seg_id, seg_hdr->seg_id, + sizeof(hw->ice_seg_id)); ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n", seg_hdr->seg_format_ver.major, diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index abc156ce9d8c..7d8b517a63c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -109,6 +109,7 @@ struct ice_buf_hdr { (ent_sz)) /* ice package section IDs */ +#define ICE_SID_METADATA 1 #define ICE_SID_XLT0_SW 10 #define ICE_SID_XLT_KEY_BUILDER_SW 11 #define ICE_SID_XLT1_SW 12 @@ -117,6 +118,14 @@ struct ice_buf_hdr { #define ICE_SID_PROFID_REDIR_SW 15 #define ICE_SID_FLD_VEC_SW 16 #define ICE_SID_CDID_KEY_BUILDER_SW 17 + +struct ice_meta_sect { + struct ice_pkg_ver ver; +#define ICE_META_SECT_NAME_SIZE 28 + char name[ICE_META_SECT_NAME_SIZE]; + __le32 track_id; +}; + #define ICE_SID_CDID_REDIR_SW 18 #define ICE_SID_XLT0_ACL 20 @@ -488,8 +497,8 @@ struct ice_xlt1 { #define ICE_PF_NUM_S 13 #define ICE_PF_NUM_M (0x07 << ICE_PF_NUM_S) #define ICE_VSIG_VALUE(vsig, pf_id) \ - (u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \ - (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)) + ((u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \ + (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M))) #define ICE_DEFAULT_VSIG 0 /* XLT2 Table */ diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index 8e8bfc6fa2b4..4d59eb96383b 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -238,7 +238,7 @@ static const u32 ice_ptypes_ipv6_il[] = { }; /* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */ -static const u32 ice_ipv4_ofos_no_l4[] = { +static const u32 ice_ptypes_ipv4_ofos_no_l4[] = { 0x10C00000, 0x04000800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -262,7 +262,7 @@ static const u32 ice_ptypes_arp_of[] = { }; /* Packet types for packets with an Innermost/Last IPv4 header - no L4 */ -static const u32 ice_ipv4_il_no_l4[] = { +static const u32 ice_ptypes_ipv4_il_no_l4[] = { 0x60000000, 0x18043008, 0x80000002, 0x6010c021, 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -274,7 +274,7 @@ static const u32 ice_ipv4_il_no_l4[] = { }; /* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */ -static const u32 ice_ipv6_ofos_no_l4[] = { +static const u32 ice_ptypes_ipv6_ofos_no_l4[] = { 0x00000000, 0x00000000, 0x43000000, 0x10002000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -286,7 +286,7 @@ static const u32 ice_ipv6_ofos_no_l4[] = { }; /* Packet types for packets with an Innermost/Last IPv6 header - no L4 */ -static const u32 ice_ipv6_il_no_l4[] = { +static const u32 ice_ptypes_ipv6_il_no_l4[] = { 0x00000000, 0x02180430, 0x0000010c, 0x086010c0, 0x00000430, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -749,8 +749,8 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) ICE_FLOW_PTYPE_MAX); } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) && !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) { - src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 : - (const unsigned long *)ice_ipv4_il_no_l4; + src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos_no_l4 : + (const unsigned long *)ice_ptypes_ipv4_il_no_l4; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) { @@ -760,8 +760,8 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) ICE_FLOW_PTYPE_MAX); } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) && !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) { - src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 : - (const unsigned long *)ice_ipv6_il_no_l4; + src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos_no_l4 : + (const unsigned long *)ice_ptypes_ipv6_il_no_l4; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) { @@ -2008,9 +2008,9 @@ ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof) * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled */ #define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \ - (u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \ - (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \ - ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)) + ((u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \ + (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \ + ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0))) /** * ice_add_rss_cfg_sync - add an RSS configuration diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c345432fac72..6041ca2830de 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -422,7 +422,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) vsi->type = vsi_type; vsi->back = pf; - set_bit(__ICE_DOWN, vsi->state); + set_bit(ICE_VSI_DOWN, vsi->state); if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); @@ -742,11 +742,10 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0; + u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - u16 tx_numq_tc, rx_numq_tc; - u16 pow = 0, max_rss = 0; bool ena_tc0 = false; u8 netdev_tc = 0; int i; @@ -764,12 +763,15 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->tc_cfg.ena_tc |= 1; } - rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc; - if (!rx_numq_tc) - rx_numq_tc = 1; - tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc; - if (!tx_numq_tc) - tx_numq_tc = 1; + num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); + if (!num_rxq_per_tc) + num_rxq_per_tc = 1; + num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc; + if (!num_txq_per_tc) + num_txq_per_tc = 1; + + /* find the (rounded up) power-of-2 of qcount */ + pow = (u16)order_base_2(num_rxq_per_tc); /* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. @@ -782,26 +784,6 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) * * Setup number and offset of Rx queues for all TCs for the VSI */ - - qcount_rx = rx_numq_tc; - - /* qcount will change if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { - if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) { - if (vsi->type == ICE_VSI_PF) - max_rss = ICE_MAX_LG_RSS_QS; - else - max_rss = ICE_MAX_RSS_QS_PER_VF; - qcount_rx = min_t(u16, rx_numq_tc, max_rss); - if (!vsi->req_rxq) - qcount_rx = min_t(u16, qcount_rx, - vsi->rss_size); - } - } - - /* find the (rounded up) power-of-2 of qcount */ - pow = (u16)order_base_2(qcount_rx); - ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ @@ -815,16 +797,16 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) /* TC is enabled */ vsi->tc_cfg.tc_info[i].qoffset = offset; - vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; - vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc; + vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc; + vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc; vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); - offset += qcount_rx; - tx_count += tx_numq_tc; + offset += num_rxq_per_tc; + tx_count += num_txq_per_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } @@ -837,7 +819,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) if (offset) vsi->num_rxq = offset; else - vsi->num_rxq = qcount_rx; + vsi->num_rxq = num_rxq_per_tc; vsi->num_txq = tx_count; @@ -1344,7 +1326,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) vsi->rss_size); } - err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); return err; } @@ -1355,12 +1337,10 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) */ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { - struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; - int err = 0; - u8 *lut; + u8 *lut, *key; + int err; dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); @@ -1374,37 +1354,26 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) else ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - - if (status) { - dev_err(dev, "set_rss_lut failed, error %s\n", - ice_stat_str(status)); - err = -EIO; + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) { + dev_err(dev, "set_rss_lut failed, error %d\n", err); goto ice_vsi_cfg_rss_exit; } - key = kzalloc(sizeof(*key), GFP_KERNEL); + key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; } if (vsi->rss_hkey_user) - memcpy(key, - (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); else - netdev_rss_key_fill((void *)key, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); - status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); - - if (status) { - dev_err(dev, "set_rss_key failed, error %s\n", - ice_stat_str(status)); - err = -EIO; - } + err = ice_set_rss_key(vsi, key); + if (err) + dev_err(dev, "set_rss_key failed, error %d\n", err); kfree(key); ice_vsi_cfg_rss_exit: @@ -2624,7 +2593,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi) */ void ice_vsi_close(struct ice_vsi *vsi) { - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi); ice_vsi_free_irq(vsi); @@ -2641,10 +2610,10 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) { int err = 0; - if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state)) return 0; - clear_bit(__ICE_NEEDS_RESTART, vsi->state); + clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state); if (vsi->netdev && vsi->type == ICE_VSI_PF) { if (netif_running(vsi->netdev)) { @@ -2670,10 +2639,10 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) */ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) { - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; - set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); if (vsi->type == ICE_VSI_PF && vsi->netdev) { if (netif_running(vsi->netdev)) { @@ -2843,7 +2812,7 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_free_q_vectors(vsi); /* make sure unregister_netdev() was called by checking __ICE_DOWN */ - if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) { + if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) { free_netdev(vsi->netdev); vsi->netdev = NULL; } @@ -2866,39 +2835,47 @@ int ice_vsi_release(struct ice_vsi *vsi) } /** - * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector + * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector * @q_vector: pointer to q_vector which is being updated - * @coalesce: pointer to array of struct with stored coalesce + * @stored_intrl_setting: original INTRL setting * * Set coalesce param in q_vector and update these parameters in HW. */ static void -ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, - struct ice_coalesce_stored *coalesce) +ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector, + u16 stored_intrl_setting) { - struct ice_ring_container *rx_rc = &q_vector->rx; - struct ice_ring_container *tx_rc = &q_vector->tx; struct ice_hw *hw = &q_vector->vsi->back->hw; - tx_rc->itr_setting = coalesce->itr_tx; - rx_rc->itr_setting = coalesce->itr_rx; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) - wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(tx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) - wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - - q_vector->intrl = coalesce->intrl; + q_vector->intrl = stored_intrl_setting; wr32(hw, GLINT_RATE(q_vector->reg_idx), ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); } /** + * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector + * @q_vector: pointer to q_vector which is being updated + * @rc: pointer to ring container + * @stored_itr_setting: original ITR setting + * + * Set coalesce param in q_vector and update these parameters in HW. + */ +static void +ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + u16 stored_itr_setting) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + rc->itr_setting = stored_itr_setting; + + /* dynamic ITR values will be updated during Tx/Rx */ + if (!ITR_IS_DYNAMIC(rc->itr_setting)) + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S); +} + +/** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors * @coalesce: array of struct with stored coalesce @@ -2917,6 +2894,11 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, coalesce[i].itr_tx = q_vector->tx.itr_setting; coalesce[i].itr_rx = q_vector->rx.itr_setting; coalesce[i].intrl = q_vector->intrl; + + if (i < vsi->num_txq) + coalesce[i].tx_valid = true; + if (i < vsi->num_rxq) + coalesce[i].rx_valid = true; } return vsi->num_q_vectors; @@ -2941,17 +2923,59 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, if ((size && !coalesce) || !vsi) return; - for (i = 0; i < size && i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[i]); - - /* number of q_vectors increased, so assume coalesce settings were - * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use - * the previous settings from q_vector 0 for all of the new q_vectors + /* There are a couple of cases that have to be handled here: + * 1. The case where the number of queue vectors stays the same, but + * the number of Tx or Rx rings changes (the first for loop) + * 2. The case where the number of queue vectors increased (the + * second for loop) + */ + for (i = 0; i < size && i < vsi->num_q_vectors; i++) { + /* There are 2 cases to handle here and they are the same for + * both Tx and Rx: + * if the entry was valid previously (coalesce[i].[tr]x_valid + * and the loop variable is less than the number of rings + * allocated, then write the previous values + * + * if the entry was not valid previously, but the number of + * rings is less than are allocated (this means the number of + * rings increased from previously), then write out the + * values in the first element + */ + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[i].itr_rx); + else if (i < vsi->alloc_rxq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + + if (i < vsi->alloc_txq && coalesce[i].tx_valid) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[i].itr_tx); + else if (i < vsi->alloc_txq) + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[i].intrl); + } + + /* the number of queue vectors increased so write whatever is in + * the first element */ - for (; i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[0]); + for (; i < vsi->num_q_vectors; i++) { + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->tx, + coalesce[0].itr_tx); + ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i], + &vsi->q_vectors[i]->rx, + coalesce[0].itr_rx); + ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i], + coalesce[0].intrl); + } } /** @@ -2980,9 +3004,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (coalesce) - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, - coalesce); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f318d7f607e4..b3c1cadecf21 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -84,7 +84,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) break; } - if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state)) return; if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) @@ -140,21 +140,10 @@ static int ice_init_mac_fltr(struct ice_pf *pf) perm_addr = vsi->port_info->mac.perm_addr; status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); - if (!status) - return 0; - - /* We aren't useful with no MAC filters, so unregister if we - * had an error - */ - if (vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n", - ice_stat_str(status)); - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } + if (status) + return -EIO; - return -EIO; + return 0; } /** @@ -209,9 +198,9 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) */ static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) { - return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); } /** @@ -278,9 +267,9 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) INIT_LIST_HEAD(&vsi->tmp_unsync_list); if (ice_vsi_fltr_changed(vsi)) { - clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); /* grab the netdev's addr_list_lock */ netif_addr_lock_bh(netdev); @@ -361,8 +350,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || - test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { - clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) { + clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { @@ -395,12 +384,12 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) goto exit; out_promisc: - set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); goto exit; out: /* if something went wrong then set the changed flag so we try again */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: clear_bit(__ICE_CFG_BUSY, vsi->state); return err; @@ -609,7 +598,7 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi) case ICE_AQ_LINK_TOPO_UNREACH_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: - netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); break; case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); @@ -764,7 +753,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) if (!vsi) return; - if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev) return; if (vsi->type == ICE_VSI_PF) { @@ -2020,7 +2009,7 @@ static void ice_check_media_subtask(struct ice_pf *pf) /* PHY settings are reset on media insertion, reconfigure * PHY to preserve settings. */ - if (test_bit(__ICE_DOWN, vsi->state) && + if (test_bit(ICE_VSI_DOWN, vsi->state) && test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) return; @@ -2531,7 +2520,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); @@ -2982,18 +2971,11 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - int err; - - err = ice_devlink_create_port(vsi); - if (err) - return err; netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, vsi->alloc_rxq); - if (!netdev) { - err = -ENOMEM; - goto err_destroy_devlink_port; - } + if (!netdev) + return -ENOMEM; vsi->netdev = netdev; np = netdev_priv(netdev); @@ -3021,25 +3003,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU; - err = register_netdev(vsi->netdev); - if (err) - goto err_free_netdev; - - devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); - - netif_carrier_off(vsi->netdev); - - /* make sure transmit queues start off as stopped */ - netif_tx_stop_all_queues(vsi->netdev); - return 0; - -err_free_netdev: - free_netdev(vsi->netdev); - vsi->netdev = NULL; -err_destroy_devlink_port: - ice_devlink_destroy_port(vsi); - return err; } /** @@ -3139,7 +3103,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, */ ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!ret) - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; } @@ -3178,7 +3142,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) ret = ice_cfg_vlan_pruning(vsi, false, false); - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; } @@ -3237,8 +3201,6 @@ unroll_napi_add: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { - if (vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@ -3993,6 +3955,40 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** + * ice_register_netdev - register netdev and devlink port + * @pf: pointer to the PF struct + */ +static int ice_register_netdev(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + int err = 0; + + vsi = ice_get_main_vsi(pf); + if (!vsi || !vsi->netdev) + return -EIO; + + err = register_netdev(vsi->netdev); + if (err) + goto err_register_netdev; + + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + err = ice_devlink_create_port(vsi); + if (err) + goto err_devlink_create; + + devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); + + return 0; +err_devlink_create: + unregister_netdev(vsi->netdev); +err_register_netdev: + free_netdev(vsi->netdev); + vsi->netdev = NULL; + return err; +} + +/** * ice_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in ice_pci_tbl @@ -4272,10 +4268,16 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pcie_print_link_status(pf->pdev); probe_done: + err = ice_register_netdev(pf); + if (err) + goto err_netdev_reg; + /* ready to go, so clear down state bit */ clear_bit(__ICE_DOWN, pf->state); + return 0; +err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: @@ -4971,8 +4973,8 @@ static void ice_set_rx_mode(struct net_device *netdev) * ndo_set_rx_mode may be triggered even without a change in netdev * flags */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags); /* schedule our worker thread which will take care of @@ -5245,7 +5247,7 @@ static int ice_up_complete(struct ice_vsi *vsi) if (err) return err; - clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_napi_enable_all(vsi); ice_vsi_ena_irq(vsi); @@ -5388,7 +5390,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) struct ice_eth_stats *cur_es = &vsi->eth_stats; struct ice_pf *pf = vsi->back; - if (test_bit(__ICE_DOWN, vsi->state) || + if (test_bit(ICE_VSI_DOWN, vsi->state) || test_bit(__ICE_CFG_BUSY, pf->state)) return; @@ -5593,7 +5595,7 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) * But, only call the update routine and read the registers if VSI is * not down. */ - if (!test_bit(__ICE_DOWN, vsi->state)) + if (!test_bit(ICE_VSI_DOWN, vsi->state)) ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; @@ -5793,7 +5795,7 @@ int ice_vsi_open_ctrl(struct ice_vsi *vsi) if (err) goto err_up_complete; - clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_vsi_ena_irq(vsi); return 0; @@ -6180,7 +6182,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu = (unsigned int)new_mtu; /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { int err; err = ice_down(vsi); @@ -6315,89 +6317,118 @@ const char *ice_stat_str(enum ice_status stat_err) } /** - * ice_set_rss - Set RSS keys and lut + * ice_set_rss_lut - Set RSS LUT * @vsi: Pointer to VSI structure - * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure */ -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev; - dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL; - status = ice_aq_set_rss_key(hw, vsi->idx, buf); + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; - if (status) { - dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + status = ice_aq_set_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } - if (lut) { - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_set_rss_key - Set RSS key + * @vsi: Pointer to the VSI structure + * @seed: RSS hash seed + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } return 0; } /** - * ice_get_rss - Get RSS keys and lut + * ice_get_rss_lut - Get RSS LUT * @vsi: Pointer to VSI structure - * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev; - dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL; - status = ice_aq_get_rss_key(hw, vsi->idx, buf); - if (status) { - dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; + + status = ice_aq_get_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } - if (lut) { - status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_get_rss_key - Get RSS key + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the key in + * + * Returns 0 on success, negative on failure + */ +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } return 0; @@ -6620,7 +6651,7 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); set_bit(__ICE_DOWN, pf->state); - set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); set_bit(__ICE_SERVICE_DIS, pf->state); break; } @@ -6654,11 +6685,6 @@ int ice_open(struct net_device *netdev) return -EIO; } - if (test_bit(__ICE_DOWN, pf->state)) { - netdev_err(netdev, "device is not ready yet\n"); - return -EBUSY; - } - netif_carrier_off(netdev); pi = vsi->port_info; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 67c965a3f5d2..5e5683a3eb23 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -920,7 +920,7 @@ ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, struct ice_vsi_list_map_info *v_map; int i; - v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL); if (!v_map) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index c71f2fbbb262..dfdf2c1fa9d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -309,7 +309,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->q_index) && - !test_bit(__ICE_DOWN, vsi->state)) { + !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->q_index); ++tx_ring->tx_stats.restart_q; @@ -569,7 +569,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct ice_ring *xdp_ring; int nxmit = 0, i; - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) @@ -1520,7 +1520,7 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, vsi->state)) + if (!test_bit(ICE_VSI_DOWN, vsi->state)) wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 5dab77504fa5..ffe0d271dec7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -195,6 +195,11 @@ struct ice_rxq_stats { u64 gro_dropped; /* GRO returned dropped */ }; +enum ice_ring_state_t { + ICE_TX_XPS_INIT_DONE, + ICE_TX_NBITS, +}; + /* this enum matches hardware bits and is meant to be used by DYN_CTLN * registers and QINT registers or more generally anywhere in the manual * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any @@ -292,6 +297,7 @@ struct ice_ring { }; struct rcu_head rcu; /* to avoid race on free */ + DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct bpf_prog *xdp_prog; struct xsk_buff_pool *xsk_pool; u16 rx_offset; @@ -351,6 +357,8 @@ struct ice_coalesce_stored { u16 itr_tx; u16 itr_rx; u8 intrl; + u8 tx_valid; + u8 rx_valid; }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 2893143d9e62..276ebcc309dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -720,13 +720,13 @@ struct ice_hw { enum ice_aq_err pkg_dwnld_status; - /* Driver's package ver - (from the Metadata seg) */ + /* Driver's package ver - (from the Ice Metadata section) */ struct ice_pkg_ver pkg_ver; u8 pkg_name[ICE_PKG_NAME_SIZE]; - /* Driver's Ice package version (from the Ice seg) */ - struct ice_pkg_ver ice_pkg_ver; - u8 ice_pkg_name[ICE_PKG_NAME_SIZE]; + /* Driver's Ice segment format version and ID (from the Ice seg) */ + struct ice_pkg_ver ice_seg_fmt_ver; + u8 ice_seg_id[ICE_SEG_ID_SIZE]; /* Pointer to the ice segment */ struct ice_seg *seg; @@ -827,6 +827,14 @@ struct ice_hw_port_stats { u64 fd_sb_match; }; +struct ice_aq_get_set_rss_lut_params { + u16 vsi_handle; /* software VSI handle */ + u16 lut_size; /* size of the LUT buffer */ + u8 lut_type; /* type of the LUT (i.e. VSI, PF, Global) */ + u8 *lut; /* input RSS LUT for set and output RSS LUT for get */ + u8 global_lut_id; /* only valid when lut_type is global */ +}; + /* Checksum and Shadow RAM pointers */ #define ICE_SR_NVM_CTRL_WORD 0x00 #define ICE_SR_BOOT_CFG_PTR 0x132 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 78679ece2e08..e68d52a6b11d 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2233,7 +2233,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) goto error_param; } - if (ice_set_rss(vsi, vrk->key, NULL, 0)) + if (ice_set_rss_key(vsi, vrk->key)) v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, @@ -2280,7 +2280,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) goto error_param; } - if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE)) + if (ice_set_rss_lut(vsi, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE)) v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 1c3051db9085..95d1e8c490a4 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_IGC) += igc.o igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ -igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o +igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 1b08a7dc7bc4..91493a73355d 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -111,6 +111,8 @@ struct igc_ring { struct sk_buff *skb; }; }; + + struct xdp_rxq_info xdp_rxq; } ____cacheline_internodealigned_in_smp; /* Board specific private data structure */ @@ -219,6 +221,8 @@ struct igc_adapter { ktime_t ptp_reset_start; /* Reset time in clock mono */ char fw_version[32]; + + struct bpf_prog *xdp_prog; }; void igc_up(struct igc_adapter *adapter); @@ -373,6 +377,8 @@ enum igc_tx_flags { /* olinfo flags */ IGC_TX_FLAGS_IPV4 = 0x10, IGC_TX_FLAGS_CSUM = 0x20, + + IGC_TX_FLAGS_XDP = 0x100, }; enum igc_boards { @@ -395,7 +401,10 @@ enum igc_boards { struct igc_tx_buffer { union igc_adv_tx_desc *next_to_watch; unsigned long time_stamp; - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; unsigned int bytecount; u16 gso_segs; __be16 protocol; @@ -504,6 +513,10 @@ enum igc_ring_flags_t { #define ring_uses_large_buffer(ring) \ test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) +#define set_ring_uses_large_buffer(ring) \ + set_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) +#define clear_ring_uses_large_buffer(ring) \ + clear_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) #define ring_uses_build_skb(ring) \ test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) @@ -547,8 +560,7 @@ void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, - struct sk_buff *skb); +ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index baa45a1f3a65..10765491e357 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -10,17 +10,24 @@ #include <linux/ip.h> #include <linux/pm_runtime.h> #include <net/pkt_sched.h> +#include <linux/bpf_trace.h> #include <net/ipv6.h> #include "igc.h" #include "igc_hw.h" #include "igc_tsn.h" +#include "igc_xdp.h" #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) +#define IGC_XDP_PASS 0 +#define IGC_XDP_CONSUMED BIT(0) +#define IGC_XDP_TX BIT(1) +#define IGC_XDP_REDIRECT BIT(2) + static int debug = -1; MODULE_AUTHOR("Intel Corporation, <[email protected]>"); @@ -176,8 +183,10 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) while (i != tx_ring->next_to_use) { union igc_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + xdp_return_frame(tx_buffer->xdpf); + else + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -375,6 +384,8 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring) i = 0; } + clear_ring_uses_large_buffer(rx_ring); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -403,6 +414,8 @@ void igc_free_rx_resources(struct igc_ring *rx_ring) { igc_clean_rx_ring(rx_ring); + igc_xdp_unregister_rxq_info(rx_ring); + vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -440,7 +453,11 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) { struct net_device *ndev = rx_ring->netdev; struct device *dev = rx_ring->dev; - int size, desc_len; + int size, desc_len, res; + + res = igc_xdp_register_rxq_info(rx_ring); + if (res < 0) + return res; size = sizeof(struct igc_rx_buffer) * rx_ring->count; rx_ring->rx_buffer_info = vzalloc(size); @@ -466,6 +483,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) return 0; err: + igc_xdp_unregister_rxq_info(rx_ring); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); @@ -497,6 +515,11 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter) return err; } +static bool igc_xdp_is_enabled(struct igc_adapter *adapter) +{ + return !!adapter->xdp_prog; +} + /** * igc_configure_rx_ring - Configure a receive ring after Reset * @adapter: board private structure @@ -513,6 +536,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, u32 srrctl = 0, rxdctl = 0; u64 rdba = ring->dma; + if (igc_xdp_is_enabled(adapter)) + set_ring_uses_large_buffer(ring); + /* disable the queue */ wr32(IGC_RXDCTL(reg_idx), 0); @@ -1029,7 +1055,7 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) -static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) +static u32 igc_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ u32 cmd_type = IGC_ADVTXD_DTYP_DATA | @@ -1078,7 +1104,7 @@ static int igc_tx_map(struct igc_ring *tx_ring, u16 i = tx_ring->next_to_use; unsigned int data_len, size; dma_addr_t dma; - u32 cmd_type = igc_tx_cmd_type(skb, tx_flags); + u32 cmd_type = igc_tx_cmd_type(tx_flags); tx_desc = IGC_TX_DESC(tx_ring, i); @@ -1480,11 +1506,18 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring, } static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct igc_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -1499,6 +1532,32 @@ static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, return rx_buffer; } +static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, + unsigned int truesize) +{ +#if (PAGE_SIZE < 8192) + buffer->page_offset ^= truesize; +#else + buffer->page_offset += truesize; +#endif +} + +static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, + unsigned int size) +{ + unsigned int truesize; + +#if (PAGE_SIZE < 8192) + truesize = igc_rx_pg_size(ring) / 2; +#else + truesize = ring_uses_build_skb(ring) ? + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IGC_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif + return truesize; +} + /** * igc_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -1513,20 +1572,19 @@ static void igc_add_rx_frag(struct igc_ring *rx_ring, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; + unsigned int truesize; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - rx_buffer->page_offset ^= truesize; +#if (PAGE_SIZE < 8192) + truesize = igc_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IGC_SKB_PAD + size) : - SKB_DATA_ALIGN(size); + truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGC_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); - rx_buffer->page_offset += truesize; -#endif + + igc_rx_buffer_flip(rx_buffer, truesize); } static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, @@ -1535,12 +1593,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, unsigned int size) { void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IGC_SKB_PAD + size); -#endif + unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); struct sk_buff *skb; /* prefetch first cache line of first page */ @@ -1555,27 +1608,18 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, skb_reserve(skb, IGC_SKB_PAD); __skb_put(skb, size); - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - + igc_rx_buffer_flip(rx_buffer, truesize); return skb; } static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, struct igc_rx_buffer *rx_buffer, - union igc_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + ktime_t timestamp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif + unsigned int size = xdp->data_end - xdp->data; + unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); + void *va = xdp->data; unsigned int headlen; struct sk_buff *skb; @@ -1587,11 +1631,8 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, if (unlikely(!skb)) return NULL; - if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) { - igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - va += IGC_TS_HDR_LEN; - size -= IGC_TS_HDR_LEN; - } + if (timestamp) + skb_hwtstamps(skb)->hwtstamp = timestamp; /* Determine available headroom for copy */ headlen = size; @@ -1607,11 +1648,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, skb_add_rx_frag(skb, 0, rx_buffer->page, (va + headlen) - page_address(rx_buffer->page), size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + igc_rx_buffer_flip(rx_buffer, truesize); } else { rx_buffer->pagecnt_bias++; } @@ -1648,7 +1685,8 @@ static void igc_reuse_rx_page(struct igc_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) +static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1659,7 +1697,7 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGC_LAST_OFFSET \ @@ -1673,8 +1711,8 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -1726,6 +1764,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { struct net_device *netdev = rx_ring->netdev; @@ -1743,9 +1785,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring, } static void igc_put_rx_buffer(struct igc_ring *rx_ring, - struct igc_rx_buffer *rx_buffer) + struct igc_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { - if (igc_can_reuse_rx_page(rx_buffer)) { + if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ igc_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -1765,7 +1808,14 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring, static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) { - return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; + struct igc_adapter *adapter = rx_ring->q_vector->adapter; + + if (ring_uses_build_skb(rx_ring)) + return IGC_SKB_PAD; + if (igc_xdp_is_enabled(adapter)) + return XDP_PACKET_HEADROOM; + + return 0; } static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, @@ -1804,7 +1854,8 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = igc_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } @@ -1879,17 +1930,196 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) } } +static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, + struct xdp_frame *xdpf, + struct igc_ring *ring) +{ + dma_addr_t dma; + + dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) { + netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); + return -ENOMEM; + } + + buffer->xdpf = xdpf; + buffer->tx_flags = IGC_TX_FLAGS_XDP; + buffer->protocol = 0; + buffer->bytecount = xdpf->len; + buffer->gso_segs = 1; + buffer->time_stamp = jiffies; + dma_unmap_len_set(buffer, len, xdpf->len); + dma_unmap_addr_set(buffer, dma, dma); + return 0; +} + +/* This function requires __netif_tx_lock is held by the caller. */ +static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, + struct xdp_frame *xdpf) +{ + struct igc_tx_buffer *buffer; + union igc_adv_tx_desc *desc; + u32 cmd_type, olinfo_status; + int err; + + if (!igc_desc_unused(ring)) + return -EBUSY; + + buffer = &ring->tx_buffer_info[ring->next_to_use]; + err = igc_xdp_init_tx_buffer(buffer, xdpf, ring); + if (err) + return err; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + buffer->bytecount; + olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; + + desc = IGC_TX_DESC(ring, ring->next_to_use); + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.olinfo_status = cpu_to_le32(olinfo_status); + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); + + netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); + + buffer->next_to_watch = desc; + + ring->next_to_use++; + if (ring->next_to_use == ring->count) + ring->next_to_use = 0; + + return 0; +} + +static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, + int cpu) +{ + int index = cpu; + + if (unlikely(index < 0)) + index = 0; + + while (index >= adapter->num_tx_queues) + index -= adapter->num_tx_queues; + + return adapter->tx_ring[index]; +} + +static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + int res; + + if (unlikely(!xdpf)) + return -EFAULT; + + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + res = igc_xdp_init_tx_descriptor(ring, xdpf); + __netif_tx_unlock(nq); + return res; +} + +static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, + struct xdp_buff *xdp) +{ + struct bpf_prog *prog; + int res; + u32 act; + + rcu_read_lock(); + + prog = READ_ONCE(adapter->xdp_prog); + if (!prog) { + res = IGC_XDP_PASS; + goto unlock; + } + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + res = IGC_XDP_PASS; + break; + case XDP_TX: + if (igc_xdp_xmit_back(adapter, xdp) < 0) + res = IGC_XDP_CONSUMED; + else + res = IGC_XDP_TX; + break; + case XDP_REDIRECT: + if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) + res = IGC_XDP_CONSUMED; + else + res = IGC_XDP_REDIRECT; + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(adapter->netdev, prog, act); + fallthrough; + case XDP_DROP: + res = IGC_XDP_CONSUMED; + break; + } + +unlock: + rcu_read_unlock(); + return ERR_PTR(-res); +} + +/* This function assumes __netif_tx_lock is held by the caller. */ +static void igc_flush_tx_descriptors(struct igc_ring *ring) +{ + /* Once tail pointer is updated, hardware can fetch the descriptors + * any time so we issue a write membar here to ensure all memory + * writes are complete before the tail pointer is updated. + */ + wmb(); + writel(ring->next_to_use, ring->tail); +} + +static void igc_finalize_xdp(struct igc_adapter *adapter, int status) +{ + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + + if (status & IGC_XDP_TX) { + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + igc_flush_tx_descriptors(ring); + __netif_tx_unlock(nq); + } + + if (status & IGC_XDP_REDIRECT) + xdp_do_flush(); +} + static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) { unsigned int total_bytes = 0, total_packets = 0; + struct igc_adapter *adapter = q_vector->adapter; struct igc_ring *rx_ring = q_vector->rx.ring; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = igc_desc_unused(rx_ring); + int xdp_status = 0, rx_buffer_pgcnt; while (likely(total_packets < budget)) { union igc_adv_rx_desc *rx_desc; struct igc_rx_buffer *rx_buffer; - unsigned int size; + unsigned int size, truesize; + ktime_t timestamp = 0; + struct xdp_buff xdp; + int pkt_offset = 0; + void *pktbuf; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGC_RX_BUFFER_WRITE) { @@ -1908,16 +2138,52 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) */ dma_rmb(); - rx_buffer = igc_get_rx_buffer(rx_ring, size); + rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); + truesize = igc_get_rx_frame_truesize(rx_ring, size); + + pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; - /* retrieve a buffer from the ring */ - if (skb) + if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { + timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, + pktbuf); + pkt_offset = IGC_TS_HDR_LEN; + size -= IGC_TS_HDR_LEN; + } + + if (!skb) { + xdp.data = pktbuf + pkt_offset; + xdp.data_end = xdp.data + size; + xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring); + xdp_set_data_meta_invalid(&xdp); + xdp.frame_sz = truesize; + xdp.rxq = &rx_ring->xdp_rxq; + + skb = igc_xdp_run_prog(adapter, &xdp); + } + + if (IS_ERR(skb)) { + unsigned int xdp_res = -PTR_ERR(skb); + + switch (xdp_res) { + case IGC_XDP_CONSUMED: + rx_buffer->pagecnt_bias++; + break; + case IGC_XDP_TX: + case IGC_XDP_REDIRECT: + igc_rx_buffer_flip(rx_buffer, truesize); + xdp_status |= xdp_res; + break; + } + + total_packets++; + total_bytes += size; + } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); else - skb = igc_construct_skb(rx_ring, rx_buffer, - rx_desc, size); + skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, + timestamp); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1926,7 +2192,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) break; } - igc_put_rx_buffer(rx_ring, rx_buffer); + igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1954,6 +2220,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) total_packets++; } + if (xdp_status) + igc_finalize_xdp(adapter, xdp_status); + /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; @@ -2015,8 +2284,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + xdp_return_frame(tx_buffer->xdpf); + else + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -3858,6 +4129,11 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct igc_adapter *adapter = netdev_priv(netdev); + if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { + netdev_dbg(netdev, "Jumbo frames not supported with XDP"); + return -EINVAL; + } + /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; @@ -4844,6 +5120,58 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + struct igc_adapter *adapter = netdev_priv(dev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); + default: + return -EOPNOTSUPP; + } +} + +static int igc_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct igc_adapter *adapter = netdev_priv(dev); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + int i, drops; + + if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + + drops = 0; + for (i = 0; i < num_frames; i++) { + int err; + struct xdp_frame *xdpf = frames[i]; + + err = igc_xdp_init_tx_descriptor(ring, xdpf); + if (err) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (flags & XDP_XMIT_FLUSH) + igc_flush_tx_descriptors(ring); + + __netif_tx_unlock(nq); + + return num_frames - drops; +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -4857,6 +5185,8 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_features_check = igc_features_check, .ndo_do_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, + .ndo_bpf = igc_bpf, + .ndo_xdp_xmit = igc_xdp_xmit, }; /* PCIe configuration access */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 545f4d0e67cf..dfa3b247fcd8 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -153,20 +153,20 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, /** * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer - * @q_vector: Pointer to interrupt specific structure - * @va: Pointer to address containing Rx buffer - * @skb: Buffer containing timestamp and packet + * @adapter: Pointer to adapter the packet buffer belongs to + * @buf: Pointer to packet buffer * * This function retrieves the timestamp saved in the beginning of packet * buffer. While two timestamps are available, one in timer0 reference and the * other in timer1 reference, this function considers only the timestamp in * timer0 reference. + * + * Returns timestamp value. */ -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, - struct sk_buff *skb) +ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf) { - struct igc_adapter *adapter = q_vector->adapter; - u64 regval; + ktime_t timestamp; + u32 secs, nsecs; int adjust; /* Timestamps are saved in little endian at the beginning of the packet @@ -178,9 +178,10 @@ void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds * part of the timestamp. */ - regval = le32_to_cpu(va[2]); - regval |= (u64)le32_to_cpu(va[3]) << 32; - igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); + nsecs = le32_to_cpu(buf[2]); + secs = le32_to_cpu(buf[3]); + + timestamp = ktime_set(secs, nsecs); /* Adjust timestamp for the RX latency based on link speed */ switch (adapter->link_speed) { @@ -201,8 +202,8 @@ void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, netdev_warn_once(adapter->netdev, "Imprecise timestamp\n"); break; } - skb_hwtstamps(skb)->hwtstamp = - ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + + return ktime_sub_ns(timestamp, adjust); } static void igc_ptp_disable_rx_timestamp(struct igc_adapter *adapter) diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c new file mode 100644 index 000000000000..11133c4619bb --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +#include "igc.h" +#include "igc_xdp.h" + +int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = adapter->netdev; + bool if_running = netif_running(dev); + struct bpf_prog *old_prog; + + if (dev->mtu > ETH_DATA_LEN) { + /* For now, the driver doesn't support XDP functionality with + * jumbo frames so we return error. + */ + NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported"); + return -EOPNOTSUPP; + } + + if (if_running) + igc_close(dev); + + old_prog = xchg(&adapter->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (if_running) + igc_open(dev); + + return 0; +} + +int igc_xdp_register_rxq_info(struct igc_ring *ring) +{ + struct net_device *dev = ring->netdev; + int err; + + err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0); + if (err) { + netdev_err(dev, "Failed to register xdp rxq info\n"); + return err; + } + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) { + netdev_err(dev, "Failed to register xdp rxq mem model\n"); + xdp_rxq_info_unreg(&ring->xdp_rxq); + return err; + } + + return 0; +} + +void igc_xdp_unregister_rxq_info(struct igc_ring *ring) +{ + xdp_rxq_info_unreg(&ring->xdp_rxq); +} diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h new file mode 100644 index 000000000000..cfecb515b718 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_xdp.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Intel Corporation. */ + +#ifndef _IGC_XDP_H_ +#define _IGC_XDP_H_ + +int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, + struct netlink_ext_ack *extack); + +int igc_xdp_register_rxq_info(struct igc_ring *ring); +void igc_xdp_unregister_rxq_info(struct igc_ring *ring); + +#endif /* _IGC_XDP_H_ */ diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 51ed8a54d380..0f8ef8f1232c 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -460,10 +460,8 @@ static int xrx200_probe(struct platform_device *pdev) } priv->pmac_reg = devm_ioremap_resource(dev, res); - if (IS_ERR(priv->pmac_reg)) { - dev_err(dev, "failed to request and remap io ranges\n"); + if (IS_ERR(priv->pmac_reg)) return PTR_ERR(priv->pmac_reg); - } priv->chan_rx.dma.irq = platform_get_irq_byname(pdev, "rx"); if (priv->chan_rx.dma.irq < 0) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 3bfb659b5c99..ca1681aa951a 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -700,7 +700,8 @@ static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb, ip_hdr(skb)->ihl << TX_IHL_SHIFT; /* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL - * it seems we don't need to pass the initial checksum. */ + * it seems we don't need to pass the initial checksum. + */ switch (ip_hdr(skb)->protocol) { case IPPROTO_UDP: cmd |= UDP_FRAME; @@ -790,7 +791,8 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length, WARN(1, "failed to prepare checksum!"); /* Should we set this? Can't use the value from skb_tx_csum() - * as it's not the correct initial L4 checksum to use. */ + * as it's not the correct initial L4 checksum to use. + */ desc->l4i_chk = 0; desc->byte_cnt = hdr_len; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 20307eec8988..f20dfd1d7a6b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1087,7 +1087,7 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize, return 0; } -static int mvneta_bm_port_mbus_init(struct mvneta_port *pp) +static int mvneta_bm_port_mbus_init(struct mvneta_port *pp) { u32 wsize; u8 target, attr; @@ -3993,7 +3993,8 @@ static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, /* Armada 370 documentation says we can only change the port mode * and in-band enable when the link is down, so force it down - * while making these changes. We also do this for GMAC_CTRL2 */ + * while making these changes. We also do this for GMAC_CTRL2 + */ if ((new_ctrl0 ^ gmac_ctrl0) & MVNETA_GMAC0_PORT_1000BASE_X || (new_ctrl2 ^ gmac_ctrl2) & MVNETA_GMAC2_INBAND_AN_ENABLE || (new_an ^ gmac_an) & MVNETA_GMAC_INBAND_AN_ENABLE) { @@ -4175,9 +4176,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); if (cpu == elected_cpu) - /* Map the default receive queue queue to the - * elected CPU - */ + /* Map the default receive queue to the elected CPU */ rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def); /* We update the TX queue map only if we have one @@ -4907,7 +4906,8 @@ static int mvneta_ethtool_set_eee(struct net_device *dev, u32 lpi_ctl0; /* The Armada 37x documents do not give limits for this other than - * it being an 8-bit register. */ + * it being an 8-bit register. + */ if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255) return -EINVAL; diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 8a9c0f490bfb..d4bb27ba1419 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -1617,7 +1617,7 @@ static void genesis_mac_init(struct skge_hw *hw, int port) xm_write16(hw, port, XM_TX_THR, 512); /* - * Enable the reception of all error frames. This is is + * Enable the reception of all error frames. This is * a necessary evil due to the design of the XMAC. The * XMAC's receive FIFO is only 8K in size, however jumbo * frames can be up to 9000 bytes in length. When bad @@ -2959,8 +2959,9 @@ static void genesis_set_multicast(struct net_device *dev) static void yukon_add_filter(u8 filter[8], const u8 *addr) { - u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f; - filter[bit/8] |= 1 << (bit%8); + u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f; + + filter[bit / 8] |= 1 << (bit % 8); } static void yukon_set_multicast(struct net_device *dev) @@ -3849,7 +3850,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, /* Only used for Genesis XMAC */ if (is_genesis(hw)) - timer_setup(&skge->link_timer, xm_link_timer, 0); + timer_setup(&skge->link_timer, xm_link_timer, 0); else { dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_RXCSUM; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 2a752fb6b758..68c154d715d6 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -55,7 +55,8 @@ #define RX_DEF_PENDING RX_MAX_PENDING /* This is the worst case number of transmit list elements for a single skb: - VLAN:GSO + CKSUM + Data + skb_frags * DMA */ + * VLAN:GSO + CKSUM + Data + skb_frags * DMA + */ #define MAX_SKB_TX_LE (2 + (sizeof(dma_addr_t)/sizeof(u32))*(MAX_SKB_FRAGS+1)) #define TX_MIN_PENDING (MAX_SKB_TX_LE+1) #define TX_MAX_PENDING 1024 @@ -1529,7 +1530,8 @@ static void sky2_rx_start(struct sky2_port *sky2) sky2_write32(hw, Q_ADDR(rxq, Q_WM), BMU_WM_PEX); /* These chips have no ram buffer? - * MAC Rx RAM Read is controlled by hardware */ + * MAC Rx RAM Read is controlled by hardware + */ if (hw->chip_id == CHIP_ID_YUKON_EC_U && hw->chip_rev > CHIP_REV_YU_EC_U_A0) sky2_write32(hw, Q_ADDR(rxq, Q_TEST), F_M_RX_RAM_DIS); @@ -4684,7 +4686,8 @@ static __exit void sky2_debug_cleanup(void) #endif /* Two copies of network device operations to handle special case of - not allowing netpoll on second port */ + * not allowing netpoll on second port + */ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_open, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 0396f0db855f..810def064f11 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3202,6 +3202,7 @@ static const struct mtk_soc_data mt7623_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, + .offload_version = 2, }; static const struct mtk_soc_data mt7629_data = { diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c index 8ae9efab6d02..98b1d3577bcd 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c @@ -77,9 +77,9 @@ static int mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind) { struct mtk_ppe *ppe = m->private; - int i, count; + int i; - for (i = 0, count = 0; i < MTK_PPE_ENTRIES; i++) { + for (i = 0; i < MTK_PPE_ENTRIES; i++) { struct mtk_foe_entry *entry = &ppe->foe_table[i]; struct mtk_foe_mac_info *l2; struct mtk_flow_addr_info ai = {}; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index d0c46786571f..4975106fbc42 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -5,7 +5,6 @@ #include <linux/if_ether.h> #include <linux/rhashtable.h> -#include <linux/if_ether.h> #include <linux/ip.h> #include <net/flow_offload.h> #include <net/pkt_cls.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8cb2625472c3..9cf7de72df52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -27,7 +27,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o + en/qos.o en/trap.o en/fs_tt_redirect.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9ea3f3befe74..b425b4a539bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -269,6 +269,7 @@ struct mlx5e_params { struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; + bool ptp_rx; }; enum { @@ -707,11 +708,11 @@ struct mlx5e_channel { int cpu; }; -struct mlx5e_port_ptp; +struct mlx5e_ptp; struct mlx5e_channels { struct mlx5e_channel **c; - struct mlx5e_port_ptp *port_ptp; + struct mlx5e_ptp *ptp; unsigned int num; struct mlx5e_params params; }; @@ -726,10 +727,11 @@ struct mlx5e_channel_stats { struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; -struct mlx5e_port_ptp_stats { +struct mlx5e_ptp_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; } ____cacheline_aligned_in_smp; enum { @@ -836,6 +838,7 @@ struct mlx5e_priv { struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir ptp_tir; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -855,10 +858,11 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_channel_stats trap_stats; - struct mlx5e_port_ptp_stats port_ptp_stats; + struct mlx5e_ptp_stats ptp_stats; u16 max_nch; u8 max_opened_tc; - bool port_ptp_opened; + bool tx_ptp_opened; + bool rx_ptp_opened; struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; @@ -914,13 +918,12 @@ struct mlx5e_profile { const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; + bool rx_ptp_support; }; void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); @@ -963,9 +966,9 @@ struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types struct mlx5e_xsk_param; struct mlx5e_rq_param; -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq); +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); @@ -1020,18 +1023,11 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@ -1090,10 +1086,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); @@ -1177,8 +1173,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index a16297e7e2ac..d53fb1e31b05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -137,11 +137,13 @@ enum { MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, MLX5E_INNER_TTC_FT_LEVEL, + MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #ifdef CONFIG_MLX5_EN_TLS - MLX5E_ACCEL_FS_TCP_FT_LEVEL, + MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_ARFS - MLX5E_ARFS_FT_LEVEL, + MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_IPSEC MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, @@ -241,6 +243,10 @@ static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSU struct mlx5e_accel_fs_tcp; #endif +struct mlx5e_fs_udp; +struct mlx5e_fs_any; +struct mlx5e_ptp_fs; + struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; struct mlx5_flow_namespace *egress_ns; @@ -259,6 +265,9 @@ struct mlx5e_flow_steering { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_accel_fs_tcp *accel_tcp; #endif + struct mlx5e_fs_udp *udp; + struct mlx5e_fs_any *any; + struct mlx5e_ptp_fs *ptp_fs; }; struct ttc_params { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c new file mode 100644 index 000000000000..909faa6c89d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include <linux/netdevice.h> +#include "en/fs_tt_redirect.h" +#include "fs_core.h" + +enum fs_udp_type { + FS_IPV4_UDP, + FS_IPV6_UDP, + FS_UDP_NUM_TYPES, +}; + +struct mlx5e_fs_udp { + struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES]; + int ref_cnt; +}; + +struct mlx5e_fs_any { + struct mlx5e_flow_table table; + struct mlx5_flow_handle *default_rule; + int ref_cnt; +}; + +static char *fs_udp_type2str(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return "UDP v4"; + default: /* FS_IPV6_UDP */ + return "UDP v6"; + } +} + +static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + default: /* FS_IPV6_UDP */ + return MLX5E_TT_IPV6_UDP; + } +} + +static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i) +{ + switch (i) { + case MLX5E_TT_IPV4_UDP: + return FS_IPV4_UDP; + case MLX5E_TT_IPV6_UDP: + return FS_IPV6_UDP; + default: + return FS_UDP_NUM_TYPES; + } +} + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type, + u16 udp_dport) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + type == FS_IPV4_UDP ? 4 : 6); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, + enum mlx5e_traffic_types ttc_type, + u32 tir_num, u16 d_port) +{ + enum fs_udp_type type = tt2fs_udp(ttc_type); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_fs_udp *fs_udp; + int err; + + if (type == FS_UDP_NUM_TYPES) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + fs_udp = priv->fs.udp; + ft = fs_udp->tables[type].t; + + fs_udp_set_dport_flow(spec, type, d_port); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n", + __func__, fs_udp_type2str(type), err); + } + return rule; +} + +static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type) +{ + struct mlx5e_flow_table *fs_udp_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5e_fs_udp *fs_udp; + int err; + + fs_udp = priv->fs.udp; + fs_udp_t = &fs_udp->tables[type]; + + dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type)); + rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, + "%s: add default rule failed, fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_udp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_FS_UDP_NUM_GROUPS (2) +#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\ + MLX5E_FS_UDP_GROUP2_SIZE) +static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case FS_IPV4_UDP: + case FS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + break; + default: + err = -EINVAL; + goto out; + } + /* Match on udp protocol, Ipv4/6 and dport */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type) +{ + struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type]; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n", + fs_udp_type2str(type), ft->t->id, ft->t->level); + + err = fs_udp_create_groups(ft, type); + if (err) + goto err; + + err = fs_udp_add_default_rule(priv, type); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i) +{ + if (IS_ERR_OR_NULL(fs_udp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_udp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_udp->tables[i]); + fs_udp->tables[i].t = NULL; +} + +static int fs_udp_disable(struct mlx5e_priv *priv) +{ + int err, i; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i)); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + + return 0; +} + +static int fs_udp_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + dest.ft = priv->fs.udp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + return 0; +} + +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_fs_udp *fs_udp = priv->fs.udp; + int i; + + if (!fs_udp) + return; + + if (--fs_udp->ref_cnt) + return; + + fs_udp_disable(priv); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) + fs_udp_destroy_table(fs_udp, i); + + kfree(fs_udp); + priv->fs.udp = NULL; +} + +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv) +{ + int i, err; + + if (priv->fs.udp) { + priv->fs.udp->ref_cnt++; + return 0; + } + + priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL); + if (!priv->fs.udp) + return -ENOMEM; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + err = fs_udp_create_table(priv, i); + if (err) + goto err_destroy_tables; + } + + err = fs_udp_enable(priv); + if (err) + goto err_destroy_tables; + + priv->fs.udp->ref_cnt = 1; + + return 0; + +err_destroy_tables: + while (--i >= 0) + fs_udp_destroy_table(priv->fs.udp, i); + + kfree(priv->fs.udp); + priv->fs.udp = NULL; + return err; +} + +static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, + u32 tir_num, u16 ether_type) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_fs_any *fs_any; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + fs_any = priv->fs.any; + ft = fs_any->table.t; + + fs_any_set_ethertype_flow(spec, ether_type); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n", + __func__, err); + } + return rule; +} + +static int fs_any_add_default_rule(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *fs_any_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5e_fs_any *fs_any; + int err; + + fs_any = priv->fs.any; + fs_any_t = &fs_any->table; + + dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY); + rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, + "%s: add default rule failed, fs type=ANY, err %d\n", + __func__, err); + return err; + } + + fs_any->default_rule = rule; + return 0; +} + +#define MLX5E_FS_ANY_NUM_GROUPS (2) +#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\ + MLX5E_FS_ANY_GROUP2_SIZE) + +static int fs_any_create_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + /* Match on ethertype */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int fs_any_create_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fs.any->table; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n", + ft->t->id, ft->t->level); + + err = fs_any_create_groups(ft); + if (err) + goto err; + + err = fs_any_add_default_rule(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int fs_any_disable(struct mlx5e_priv *priv) +{ + int err; + + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, MLX5E_TT_ANY, err); + return err; + } + return 0; +} + +static int fs_any_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + int err; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.any->table.t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, MLX5E_TT_ANY, err); + return err; + } + return 0; +} + +static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any) +{ + if (IS_ERR_OR_NULL(fs_any->table.t)) + return; + + mlx5_del_flow_rules(fs_any->default_rule); + mlx5e_destroy_flow_table(&fs_any->table); + fs_any->table.t = NULL; +} + +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_fs_any *fs_any = priv->fs.any; + + if (!fs_any) + return; + + if (--fs_any->ref_cnt) + return; + + fs_any_disable(priv); + + fs_any_destroy_table(fs_any); + + kfree(fs_any); + priv->fs.any = NULL; +} + +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv) +{ + int err; + + if (priv->fs.any) { + priv->fs.any->ref_cnt++; + return 0; + } + + priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL); + if (!priv->fs.any) + return -ENOMEM; + + err = fs_any_create_table(priv); + if (err) + return err; + + err = fs_any_enable(priv); + if (err) + goto err_destroy_table; + + priv->fs.any->ref_cnt = 1; + + return 0; + +err_destroy_table: + fs_any_destroy_table(priv->fs.any); + + kfree(priv->fs.any); + priv->fs.any = NULL; + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h new file mode 100644 index 000000000000..8385df24eb99 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5E_FS_TT_REDIRECT_H__ +#define __MLX5E_FS_TT_REDIRECT_H__ + +#include "en.h" +#include "en/fs.h" + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); + +/* UDP traffic type redirect */ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, + enum mlx5e_traffic_types ttc_type, + u32 tir_num, u16 d_port); +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); + +/* ANY traffic type redirect*/ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, + u32 tir_num, u16 ether_type); +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv); +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 36381a2ed5a5..7b2b52e75222 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -3,10 +3,12 @@ #include "en/params.h" #include "en/txrx.h" -#include "en_accel/tls_rxtx.h" +#include "en/port.h" +#include "en_accel/en_accel.h" +#include "accel/ipsec.h" -static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { return params->xdp_prog || xsk; } @@ -37,8 +39,8 @@ u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, return linear_rq_headroom + hw_mtu; } -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); @@ -172,17 +174,485 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par return stop_room; } -int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params) +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { size_t sq_size = 1 << params->log_sq_size; u16 stop_room; - stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); + stop_room = mlx5e_calc_sq_stop_room(mdev, params); if (stop_room >= sq_size) { - netdev_err(priv->netdev, "Stop room %u is bigger than the SQ size %zu\n", - stop_room, sq_size); + mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n", + stop_room, sq_size); return -EINVAL; } return 0; } + +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } +} + +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->rx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); + } +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_tx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_rx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev) +{ + u32 link_speed = 0; + u32 pci_bw = 0; + + mlx5e_port_max_linkspeed(mdev, &link_speed); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; +} + +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (MLX5_IPSEC_DEV(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; +} + +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_CYCLIC; +} + +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - Slow PCI heuristic. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. + */ + if (!slow_pci_heuristic(mdev) && + mlx5e_striding_rq_possible(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); +} + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +{ + *ccp = (struct mlx5e_create_cq_param) { + .napi = &c->napi, + .ch_stats = c->stats, + .node = cpu_to_node(c->cpu), + .ix = c->ix, + }; +} + +#define DEFAULT_FRAG_SIZE (2048) + +static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) +{ + u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); + int frag_size_max = DEFAULT_FRAG_SIZE; + u32 buf_size = 0; + int i; + + if (MLX5_IPSEC_DEV(mdev)) + byte_count += MLX5E_METADATA_ETHER_LEN; + + if (mlx5e_rx_is_linear_skb(params, xsk)) { + int frag_stride; + + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); + frag_stride = roundup_pow_of_two(frag_stride); + + info->arr[0].frag_size = byte_count; + info->arr[0].frag_stride = frag_stride; + info->num_frags = 1; + info->wqe_bulk = PAGE_SIZE / frag_stride; + goto out; + } + + if (byte_count > PAGE_SIZE + + (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + frag_size_max = PAGE_SIZE; + + i = 0; + while (buf_size < byte_count) { + int frag_size = byte_count - buf_size; + + if (i < MLX5E_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + info->arr[i].frag_size = frag_size; + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + + buf_size += frag_size; + i++; + } + info->num_frags = i; + /* number of different wqes sharing a page */ + info->wqe_bulk = 1 + (info->num_frags % 2); + +out: + info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); + info->log_num_frags = order_base_2(info->num_frags); +} + +static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + +static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); +} + +static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) +{ + bool hw_stridx = false; + void *cqc = param->cqc; + u8 log_cq_size; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + log_cq_size = params->log_rq_mtu_frames; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; +} + +void mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - + MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - + MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + ndsegs = param->frags_info.num_frags; + } + + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); +} + +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; +} + +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + + allow_swp = mlx5_geneve_tx_allowed(mdev) || + !!MLX5_IPSEC_DEV(mdev); + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(mdev, params); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(mdev, param); + + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +static u8 mlx5e_get_rq_log_wq_sz(void *rqc) +{ + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + return MLX5_GET(wq, wq, log_wq_sz); +} + +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, + struct mlx5e_rq_param *rqp) +{ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, + order_base_2(MLX5E_UMR_WQEBBS) + + mlx5e_get_rq_log_wq_sz(rqp->rqc)); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + } +} + +static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) +{ + if (mlx5_accel_is_ktls_rx(mdev)) + return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +void mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz, async_icosq_log_wq_sz; + + mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); + async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); + + mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index ea2cfb04b31a..602e41a2bddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -84,12 +84,21 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, /* Parameter calculations */ +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev); +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, @@ -112,32 +121,31 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, /* Build queue parameters */ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); -void mlx5e_build_rq_param(struct mlx5e_priv *priv, +void mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, + u16 q_counter, struct mlx5e_rq_param *param); -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param); -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_sq_param *param); -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct mlx5e_cq_param *param); -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param); -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param); -void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param); -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_sq_param *param); +void mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam); u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index bb5d108f75d0..72e7dd6d78c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -1,8 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2020 Mellanox Technologies +#include <linux/ptp_classify.h> #include "en/ptp.h" #include "en/txrx.h" +#include "en/params.h" +#include "en/fs_tt_redirect.h" + +struct mlx5e_ptp_fs { + struct mlx5_flow_handle *l2_rule; + struct mlx5_flow_handle *udp_v4_rule; + struct mlx5_flow_handle *udp_v6_rule; + bool valid; +}; + +#define MLX5E_PTP_CHANNEL_IX 0 + +struct mlx5e_ptp_params { + struct mlx5e_params params; + struct mlx5e_sq_param txq_sq_param; + struct mlx5e_rq_param rq_param; +}; struct mlx5e_skb_cb_hwtstamp { ktime_t cqe_hwtstamp; @@ -116,9 +134,9 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) { - struct mlx5e_port_ptp *c = container_of(napi, struct mlx5e_port_ptp, - napi); + struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; bool busy = false; int work_done = 0; int i; @@ -127,9 +145,19 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) ch_stats->poll++; - for (i = 0; i < c->num_tc; i++) { - busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); - busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); + busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) { + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); } if (busy) { @@ -142,10 +170,14 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) ch_stats->arm++; - for (i = 0; i < c->num_tc; i++) { - mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); - mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); + mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + } } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_cq_arm(&rq->cq); out: rcu_read_unlock(); @@ -153,7 +185,7 @@ out: return work_done; } -static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix, +static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct mlx5e_txqsq *sq, int tc, @@ -172,20 +204,18 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix, sq->netdev = c->netdev; sq->priv = c->priv; sq->mdev = mdev; - sq->ch_ix = c->ix; + sq->ch_ix = MLX5E_PTP_CHANNEL_IX; sq->txq_ix = txq_ix; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = &c->priv->port_ptp_stats.sq[tc]; + sq->stats = &c->priv->ptp_stats.sq[tc]; sq->ptpsq = ptpsq; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); node = dev_to_node(mlx5_core_dma_dev(mdev)); @@ -243,7 +273,7 @@ static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo) kvfree(skb_fifo->fifo); } -static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, +static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, int txq_ix, struct mlx5e_ptp_params *cparams, int tc, struct mlx5e_ptpsq *ptpsq) { @@ -293,7 +323,7 @@ static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq) mlx5e_free_txqsq(sq); } -static int mlx5e_ptp_open_txqsqs(struct mlx5e_port_ptp *c, +static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c, struct mlx5e_ptp_params *cparams) { struct mlx5e_params *params = &cparams->params; @@ -321,7 +351,7 @@ close_txqsq: return err; } -static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c) +static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c) { int tc; @@ -329,8 +359,8 @@ static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c) mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); } -static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, - struct mlx5e_ptp_params *cparams) +static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) { struct mlx5e_params *params = &cparams->params; struct mlx5e_create_cq_param ccp = {}; @@ -342,7 +372,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); ccp.ch_stats = c->stats; ccp.napi = &c->napi; - ccp.ix = c->ix; + ccp.ix = MLX5E_PTP_CHANNEL_IX; cq_param = &cparams->txq_sq_param.cqp; @@ -362,7 +392,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, if (err) goto out_err_ts_cq; - ptpsq->cq_stats = &c->priv->port_ptp_stats.cq[tc]; + ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc]; } return 0; @@ -378,7 +408,25 @@ out_err_txqsq_cq: return err; } -static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c) +static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + struct mlx5e_cq *cq = &c->rq.cq; + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->rq_param.cqp; + + return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); +} + +static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c) { int tc; @@ -389,22 +437,36 @@ static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c) mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); } -static void mlx5e_ptp_build_sq_param(struct mlx5e_priv *priv, +static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq; - mlx5e_build_sq_param_common(priv, param); + mlx5e_build_sq_param_common(mdev, param); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, + struct net_device *netdev, + u16 q_counter, + struct mlx5e_ptp_params *ptp_params) +{ + struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; + struct mlx5e_params *params = &ptp_params->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = netdev->max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params); } -static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c, +static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, struct mlx5e_ptp_params *cparams, struct mlx5e_params *orig) { @@ -417,52 +479,193 @@ static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c, params->num_tc = orig->num_tc; /* SQ */ - params->log_sq_size = orig->log_sq_size; - - mlx5e_ptp_build_sq_param(c->priv, params, &cparams->txq_sq_param); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + params->log_sq_size = orig->log_sq_size; + mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); } -static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c, - struct mlx5e_ptp_params *cparams) +static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5e_priv *priv = c->priv; int err; - err = mlx5e_ptp_open_cqs(c, cparams); + rq->wq_type = params->rq_wq_type; + rq->pdev = mdev->device; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &c->priv->ptp_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, false); if (err) return err; - err = mlx5e_ptp_open_txqsqs(c, cparams); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int node = dev_to_node(c->mdev->device); + int err; + + err = mlx5e_init_ptp_rq(c, params, &c->rq); if (err) - goto close_cqs; + return err; + + return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq); +} + +static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + int err; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + err = mlx5e_ptp_open_tx_cqs(c, cparams); + if (err) + return err; + + err = mlx5e_ptp_open_txqsqs(c, cparams); + if (err) + goto close_tx_cqs; + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + err = mlx5e_ptp_open_rx_cq(c, cparams); + if (err) + goto close_txqsq; + err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param); + if (err) + goto close_rx_cq; + } return 0; -close_cqs: - mlx5e_ptp_close_cqs(c); +close_rx_cq: + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_close_cq(&c->rq.cq); +close_txqsq: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_txqsqs(c); +close_tx_cqs: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_tx_cqs(c); return err; } -static void mlx5e_ptp_close_queues(struct mlx5e_port_ptp *c) +static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c) +{ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_close_rq(&c->rq); + mlx5e_close_cq(&c->rq.cq); + } + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + mlx5e_ptp_close_txqsqs(c); + mlx5e_ptp_close_tx_cqs(c); + } +} + +static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params) +{ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS)) + __set_bit(MLX5E_PTP_STATE_TX, c->state); + + if (params->ptp_rx) + __set_bit(MLX5E_PTP_STATE_RX, c->state); + + return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0; +} + +static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + + if (!ptp_fs->valid) + return; + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule); + mlx5e_fs_tt_redirect_any_destroy(priv); + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); + mlx5e_fs_tt_redirect_udp_destroy(priv); + ptp_fs->valid = false; +} + +static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { - mlx5e_ptp_close_txqsqs(c); - mlx5e_ptp_close_cqs(c); + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + struct mlx5_flow_handle *rule; + u32 tirn = priv->ptp_tir.tirn; + int err; + + if (ptp_fs->valid) + return 0; + + err = mlx5e_fs_tt_redirect_udp_create(priv); + if (err) + goto out_free; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_udp; + } + ptp_fs->udp_v4_rule = rule; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_udp_v4_rule; + } + ptp_fs->udp_v6_rule = rule; + + err = mlx5e_fs_tt_redirect_any_create(priv); + if (err) + goto out_destroy_udp_v6_rule; + + rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_any; + } + ptp_fs->l2_rule = rule; + ptp_fs->valid = true; + + return 0; + +out_destroy_fs_any: + mlx5e_fs_tt_redirect_any_destroy(priv); +out_destroy_udp_v6_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); +out_destroy_udp_v4_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); +out_destroy_fs_udp: + mlx5e_fs_tt_redirect_udp_destroy(priv); +out_free: + return err; } -int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, - u8 lag_port, struct mlx5e_port_ptp **cp) +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_ptp_params *cparams; - struct mlx5e_port_ptp *c; - unsigned int irq; + struct mlx5e_ptp *c; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, 0, &eqn, &irq); - if (err) - return err; c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); @@ -472,14 +675,17 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->priv = priv; c->mdev = priv->mdev; c->tstamp = &priv->tstamp; - c->ix = 0; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; - c->stats = &priv->port_ptp_stats.ch; + c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; + err = mlx5e_ptp_set_state(c, params); + if (err) + goto err_free; + netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64); mlx5e_ptp_build_params(c, cparams, params); @@ -488,6 +694,9 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) goto err_napi_del; + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + priv->rx_ptp_opened = true; + *cp = c; kvfree(cparams); @@ -496,13 +705,13 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, err_napi_del: netif_napi_del(&c->napi); - +err_free: kvfree(cparams); kvfree(c); return err; } -void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c) +void mlx5e_ptp_close(struct mlx5e_ptp *c) { mlx5e_ptp_close_queues(c); netif_napi_del(&c->napi); @@ -510,22 +719,91 @@ void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c) kvfree(c); } -void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c) +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) { int tc; napi_enable(&c->napi); - for (tc = 0; tc < c->num_tc; tc++) - mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_ptp_rx_set_fs(c->priv); + mlx5e_activate_rq(&c->rq); + } } -void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c) +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) { int tc; - for (tc = 0; tc < c->num_tc; tc++) - mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_deactivate_rq(&c->rq); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + } napi_disable(&c->napi); } + +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) +{ + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return -EINVAL; + + *rqn = c->rq.rqn; + return 0; +} + +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs; + + if (!priv->profile->rx_ptp_support) + return 0; + + ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); + if (!ptp_fs) + return -ENOMEM; + + priv->fs.ptp_fs = ptp_fs; + return 0; +} + +void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + + if (!priv->profile->rx_ptp_support) + return; + + mlx5e_ptp_rx_unset_fs(priv); + kfree(ptp_fs); +} + +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) +{ + struct mlx5e_ptp *c = priv->channels.ptp; + + if (!priv->profile->rx_ptp_support) + return 0; + + if (set) { + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules"); + return -EINVAL; + } + return mlx5e_ptp_rx_set_fs(priv); + } + /* set == false */ + if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules"); + return -EINVAL; + } + mlx5e_ptp_rx_unset_fs(priv); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 90c98ea63b7f..ab935cce952b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -5,7 +5,6 @@ #define __MLX5_EN_PTP_H__ #include "en.h" -#include "en/params.h" #include "en_stats.h" struct mlx5e_ptpsq { @@ -17,9 +16,16 @@ struct mlx5e_ptpsq { struct mlx5e_ptp_cq_stats *cq_stats; }; -struct mlx5e_port_ptp { +enum { + MLX5E_PTP_STATE_TX, + MLX5E_PTP_STATE_RX, + MLX5E_PTP_STATE_NUM_STATES, +}; + +struct mlx5e_ptp { /* data path */ struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq rq; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -34,20 +40,18 @@ struct mlx5e_port_ptp { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; - DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); - int ix; -}; - -struct mlx5e_ptp_params { - struct mlx5e_params params; - struct mlx5e_sq_param txq_sq_param; + DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); }; -int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, - u8 lag_port, struct mlx5e_port_ptp **cp); -void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c); -void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c); -void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c); +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp); +void mlx5e_ptp_close(struct mlx5e_ptp *c); +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c); +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c); +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn); +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv); +void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); enum { MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 12d7ad061237..5efe3278b0f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -232,8 +232,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs memset(¶m_sq, 0, sizeof(param_sq)); memset(¶m_cq, 0, sizeof(param_cq)); - mlx5e_build_sq_param(priv, params, ¶m_sq); - mlx5e_build_tx_cq_param(priv, params, ¶m_cq); + mlx5e_build_sq_param(priv->mdev, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv->mdev, params, ¶m_cq); err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); if (err) goto err_free_sq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 34b3b316b688..f9fdf3606bbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -5,6 +5,7 @@ #include "params.h" #include "txrx.h" #include "devlink.h" +#include "ptp.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { @@ -230,8 +231,9 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, - struct devlink_fmsg *fmsg) +static int +mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) { u16 wqe_counter; int wqes_sz; @@ -247,14 +249,6 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, wq_head = mlx5e_rqwq_get_head(rq); wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); - err = devlink_fmsg_obj_nest_start(fmsg); - if (err) - return err; - - err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); - if (err) - return err; - err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); if (err) return err; @@ -300,61 +294,149 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, return err; } - err = devlink_fmsg_obj_nest_end(fmsg); + return 0; +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); if (err) return err; - return 0; + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_params *params = &priv->channels.params; - struct mlx5e_rq *generic_rq; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_params *params; u32 rq_stride, rq_sz; - int i, err = 0; - - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; + int err; - generic_rq = &priv->channels.c[0]->rq; - rq_sz = mlx5e_rqwq_get_size(generic_rq); + params = &priv->channels.params; + rq_sz = mlx5e_rqwq_get_size(rq); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); - err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); - if (err) - goto unlock; - err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) - goto unlock; + return err; err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); if (err) - goto unlock; + return err; err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); if (err) - goto unlock; + return err; err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); if (err) - goto unlock; + return err; - err = mlx5e_health_cq_common_diag_fmsg(&generic_rq->cq, fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); if (err) - goto unlock; + return err; - err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg); + if (err) + return err; + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; - err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); if (err) goto unlock; @@ -369,9 +451,12 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + if (err) + goto unlock; + } err = devlink_fmsg_arr_pair_nest_end(fmsg); - if (err) - goto unlock; unlock: mutex_unlock(&priv->state_lock); return err; @@ -503,6 +588,7 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5_rsc_key key = {}; int i, err; @@ -535,6 +621,12 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, return err; } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ"); + if (err) + return err; + } + return devlink_fmsg_arr_pair_nest_end(fmsg); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 63ee3b9416de..1a0505bd1e9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -304,6 +304,7 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5e_ptpsq *generic_ptpsq; int err; @@ -315,12 +316,11 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte if (err) return err; - generic_ptpsq = priv->channels.port_ptp ? - &priv->channels.port_ptp->ptpsq[0] : - NULL; - if (!generic_ptpsq) + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) goto out; + generic_ptpsq = &ptp_ch->ptpsq[0]; + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); if (err) return err; @@ -346,7 +346,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; int i, tc, err = 0; @@ -375,7 +375,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, } } - if (!ptp_ch) + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) goto close_sqs_nest; for (tc = 0; tc < priv->channels.params.num_tc; tc++) { @@ -460,7 +460,7 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5_rsc_key key = {}; int i, tc, err; @@ -497,7 +497,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, } } - if (ptp_ch) { + if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { for (tc = 0; tc < priv->channels.params.num_tc; tc++) { struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 41db93883fea..86ab4e864fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -30,172 +30,62 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) return work_done; } -static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *rqp, - struct mlx5e_rq_stats *stats, struct mlx5e_params *params, - struct mlx5e_ch_stats *ch_stats, +static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params, struct mlx5e_rq *rq) { - void *rqc_wq = MLX5_ADDR_OF(rqc, rqp->rqc, wq); - struct mlx5_core_dev *mdev = priv->mdev; - struct page_pool_params pp_params = {}; - int node = dev_to_node(mdev->device); - u32 pool_size; - int wq_sz; - int err; - int i; - - rqp->wq.db_numa_node = node; - - rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; - rq->netdev = priv->netdev; - rq->mdev = mdev; - rq->priv = priv; - rq->stats = stats; - rq->clock = &mdev->clock; - rq->tstamp = &priv->tstamp; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - + struct mlx5_core_dev *mdev = t->mdev; + struct mlx5e_priv *priv = t->priv; + + rq->wq_type = params->rq_wq_type; + rq->pdev = mdev->device; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &priv->trap_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); xdp_rxq_info_unused(&rq->xdp_rxq); - - rq->buff.map_dir = DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, NULL); - pool_size = 1 << params->log_rq_mtu_frames; - - err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); - if (err) - return err; - - rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; - - wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); - - rq->wqe.info = rqp->frags_info; - rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; - rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), - (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, node); - if (!rq->wqe.frags) { - err = -ENOMEM; - goto err_wq_cyc_destroy; - } - - err = mlx5e_init_di_list(rq, wq_sz, node); - if (err) - goto err_free_frags; - - rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); - mlx5e_rq_set_trap_handlers(rq, params); - - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = node; - pp_params.dev = mdev->device; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free_di_list; - } - for (i = 0; i < wq_sz; i++) { - struct mlx5e_rx_wqe_cyc *wqe = - mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); - int f; - - for (f = 0; f < rq->wqe.info.num_frags; f++) { - u32 frag_size = rq->wqe.info.arr[f].frag_size | - MLX5_HW_START_PADDING; - - wqe->data[f].byte_count = cpu_to_be32(frag_size); - wqe->data[f].lkey = rq->mkey_be; - } - /* check if num_frags is not a pow of two */ - if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { - wqe->data[f].byte_count = 0; - wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); - wqe->data[f].addr = 0; - } - } - return 0; - -err_free_di_list: - mlx5e_free_di_list(rq); -err_free_frags: - kvfree(rq->wqe.frags); -err_wq_cyc_destroy: - mlx5_wq_destroy(&rq->wq_ctrl); - - return err; } -static void mlx5e_free_trap_rq(struct mlx5e_rq *rq) -{ - page_pool_destroy(rq->page_pool); - mlx5e_free_di_list(rq); - kvfree(rq->wqe.frags); - mlx5_wq_destroy(&rq->wq_ctrl); -} - -static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct napi_struct *napi, - struct mlx5e_rq_stats *stats, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_param, - struct mlx5e_ch_stats *ch_stats, - struct mlx5e_rq *rq) +static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) { + struct mlx5e_rq_param *rq_param = &t->rq_param; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_create_cq_param ccp = {}; struct dim_cq_moder trap_moder = {}; - struct mlx5e_cq *cq = &rq->cq; + struct mlx5e_rq *rq = &t->rq; + int node; int err; - ccp.node = dev_to_node(mdev->device); - ccp.ch_stats = ch_stats; - ccp.napi = napi; + node = dev_to_node(mdev->device); + + ccp.node = node; + ccp.ch_stats = t->stats; + ccp.napi = &t->napi; ccp.ix = 0; - err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, cq); + err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq); if (err) return err; - err = mlx5e_alloc_trap_rq(priv, rq_param, stats, params, ch_stats, rq); + mlx5e_init_trap_rq(t, &t->params, rq); + err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq); if (err) goto err_destroy_cq; - err = mlx5e_create_rq(rq, rq_param); - if (err) - goto err_free_rq; - - err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); - if (err) - goto err_destroy_rq; - return 0; -err_destroy_rq: - mlx5e_destroy_rq(rq); - mlx5e_free_rx_descs(rq); -err_free_rq: - mlx5e_free_trap_rq(rq); err_destroy_cq: - mlx5e_close_cq(cq); + mlx5e_close_cq(&rq->cq); return err; } static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) { - mlx5e_destroy_rq(rq); - mlx5e_free_rx_descs(rq); - mlx5e_free_trap_rq(rq); + mlx5e_close_rq(rq); mlx5e_close_cq(&rq->cq); } @@ -228,24 +118,16 @@ static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_destroy_tir(mdev, tir); } -static void mlx5e_activate_trap_rq(struct mlx5e_rq *rq) -{ - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); -} - -static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq) -{ - clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); -} - -static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *t) +static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, + int max_mtu, u16 q_counter, + struct mlx5e_trap *t) { struct mlx5e_params *params = &t->params; params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; - mlx5e_init_rq_type_params(priv->mdev, params); - params->sw_mtu = priv->netdev->max_mtu; - mlx5e_build_rq_param(priv, params, NULL, &t->rq_param); + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param); } static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) @@ -259,7 +141,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) if (!t) return ERR_PTR(-ENOMEM); - mlx5e_build_trap_params(priv, t); + mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t); t->priv = priv; t->mdev = priv->mdev; @@ -271,11 +153,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64); - err = mlx5e_open_trap_rq(priv, &t->napi, - &priv->trap_stats.rq, - &t->params, &t->rq_param, - &priv->trap_stats.ch, - &t->rq); + err = mlx5e_open_trap_rq(priv, t); if (unlikely(err)) goto err_napi_del; @@ -304,15 +182,14 @@ void mlx5e_close_trap(struct mlx5e_trap *trap) static void mlx5e_activate_trap(struct mlx5e_trap *trap) { napi_enable(&trap->napi); - mlx5e_activate_trap_rq(&trap->rq); - napi_schedule(&trap->napi); + mlx5e_activate_rq(&trap->rq); } void mlx5e_deactivate_trap(struct mlx5e_priv *priv) { struct mlx5e_trap *trap = priv->en_trap; - mlx5e_deactivate_trap_rq(&trap->rq); + mlx5e_deactivate_rq(&trap->rq); napi_disable(&trap->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index f4bce1365639..a8315f166696 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -35,13 +35,59 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, } } -static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, +static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, + u16 q_counter, struct mlx5e_channel_param *cparam) { - mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); +} + +static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int rq_xdp_ix; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->xsk_pool = pool; + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK; + err = mlx5e_rq_set_handlers(rq, params, xsk); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); +} + +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk) +{ + int err; + + err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, @@ -61,14 +107,14 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (!cparam) return -ENOMEM; - mlx5e_build_xsk_cparam(priv, params, xsk, cparam); + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, &c->xskrq.cq); if (unlikely(err)) goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq); + err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); if (unlikely(err)) goto err_close_rx_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f5f2a8fd0046..964558086ad6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -34,6 +34,7 @@ #include "en/port.h" #include "en/params.h" #include "en/xsk/pool.h" +#include "en/ptp.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -368,7 +369,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size; - err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto unlock; @@ -1865,13 +1866,19 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + new_channels.params.ptp_rx = new_val; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; return 0; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + else + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); if (err) return err; @@ -1892,11 +1899,6 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { - netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); - return -EINVAL; - } - err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); if (err) return err; @@ -2032,7 +2034,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) mlx5e_num_channels_changed_ctx, NULL); out: if (!err) - priv->port_ptp_opened = true; + priv->tx_ptp_opened = true; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index cf1d3c9c88af..98f0b857947e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -38,6 +38,7 @@ #include "en.h" #include "en_rep.h" #include "lib/mpfs.h" +#include "en/ptp.h" static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type); @@ -1792,10 +1793,16 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_l2_table; } + err = mlx5e_ptp_alloc_rx_fs(priv); + if (err) + goto err_destory_vlan_table; + mlx5e_ethtool_init_steering(priv); return 0; +err_destory_vlan_table: + mlx5e_destroy_vlan_table(priv); err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: @@ -1810,6 +1817,7 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { + mlx5e_ptp_free_rx_fs(priv); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d40fc2672530..db2942b61fd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -87,51 +87,6 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) return true; } -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - params->log_rq_mtu_frames = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - - mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : - BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); -} - -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; - - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; - } - - return true; -} - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_CYCLIC; -} - void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -259,18 +214,17 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, - struct mlx5e_channel *c) +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, sizeof(*rq->mpwqe.info)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM; - mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); return 0; } @@ -419,58 +373,53 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) __free_page(rq->wqe_overflow.page); } -static int mlx5e_alloc_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq_param *rqp, - struct mlx5e_rq *rq) + int node, struct mlx5e_rq *rq) { struct page_pool_params pp_params = { 0 }; - struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; int i; - rqp->wq.db_numa_node = cpu_to_node(c->cpu); - - rq->wq_type = params->rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->priv = c->priv; - rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; - rq->icosq = &c->icosq; - rq->ix = c->ix; - rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->xdpsq = &c->rq_xdpsq; - rq->xsk_pool = xsk_pool; - rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; - - if (rq->xsk_pool) - rq->stats = &c->priv->channel_stats[c->ix].xskrq; - else - rq->stats = &c->priv->channel_stats[c->ix].rq; + rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); - rq_xdp_ix = rq->ix; - if (xsk) - rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); - if (err < 0) - goto err_rq_xdp_prog; - rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@ -480,7 +429,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog; err = mlx5e_alloc_mpwqe_rq_drop_page(rq); if (err) @@ -504,7 +453,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); - err = mlx5e_rq_alloc_mpwqe_info(rq, c); + err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) goto err_rq_mkey; break; @@ -512,7 +461,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; @@ -524,23 +473,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->wqe.frags) { err = -ENOMEM; goto err_rq_wq_destroy; } - err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu)); + err = mlx5e_init_di_list(rq, wq_sz, node); if (err) goto err_rq_frags; - rq->mkey_be = c->mkey_be; + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); } - err = mlx5e_rq_set_handlers(rq, params, xsk); - if (err) - goto err_free_by_rq_type; - if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@ -550,8 +495,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, pp_params.order = 0; pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; + pp_params.nid = node; + pp_params.dev = rq->pdev; pp_params.dma_dir = rq->buff.map_dir; /* page_pool can be used even when there is no rq->xdp_prog, @@ -635,8 +580,6 @@ err_rq_frags: } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); -err_rq_xdp: - xdp_rxq_info_unreg(&rq->xdp_rxq); err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); @@ -649,10 +592,12 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) struct bpf_prog *old_prog; int i; - old_prog = rcu_dereference_protected(rq->xdp_prog, - lockdep_is_held(&rq->priv->state_lock)); - if (old_prog) - bpf_prog_put(old_prog); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + } switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -888,13 +833,14 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = rq->mdev; int err; - err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq); + err = mlx5e_alloc_rq(params, xsk, param, node, rq); if (err) return err; @@ -906,28 +852,28 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, if (err) goto err_destroy_rq; - if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */ + if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev)) + __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */ - if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) - __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + __set_bit(MLX5E_RQ_STATE_AM, &rq->state); /* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render * skb->checksum incorrect. */ - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) - __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); /* For CQE compression on striding RQ, use stride index provided by * HW if capability is supported. */ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && - MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) - __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); return 0; @@ -942,7 +888,10 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_trigger_irq(rq->icosq); + if (rq->icosq) + mlx5e_trigger_irq(rq->icosq); + else + napi_schedule(rq->cq.napi); } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@ -954,7 +903,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - cancel_work_sync(&rq->icosq->recover_work); + if (rq->icosq) + cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1187,9 +1137,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1860,14 +1808,16 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) { - *ccp = (struct mlx5e_create_cq_param) { - .napi = &c->napi, - .ch_stats = c->stats, - .node = cpu_to_node(c->cpu), - .ix = c->ix, - }; + int err; + + err = mlx5e_init_rxq_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); } static int mlx5e_open_queues(struct mlx5e_channel *c, @@ -1930,7 +1880,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, goto err_close_sqs; } - err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); if (err) goto err_close_xdp_sq; @@ -2111,296 +2061,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) kvfree(c); } -#define DEFAULT_FRAG_SIZE (2048) - -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) -{ - u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); - int frag_size_max = DEFAULT_FRAG_SIZE; - u32 buf_size = 0; - int i; - - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - - if (mlx5e_rx_is_linear_skb(params, xsk)) { - int frag_stride; - - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); - - info->arr[0].frag_size = byte_count; - info->arr[0].frag_stride = frag_stride; - info->num_frags = 1; - info->wqe_bulk = PAGE_SIZE / frag_stride; - goto out; - } - - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) - frag_size_max = PAGE_SIZE; - - i = 0; - while (buf_size < byte_count) { - int frag_size = byte_count - buf_size; - - if (i < MLX5E_MAX_RX_FRAGS - 1) - frag_size = min(frag_size, frag_size_max); - - info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - - buf_size += frag_size; - i++; - } - info->num_frags = i; - /* number of different wqes sharing a page */ - info->wqe_bulk = 1 + (info->num_frags % 2); - -out: - info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); - info->log_num_frags = order_base_2(info->num_frags); -} - -static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) -{ - int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; - - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - sz += sizeof(struct mlx5e_rx_wqe_ll); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - sz += sizeof(struct mlx5e_rx_wqe_cyc); - } - - return order_base_2(sz); -} - -static u8 mlx5e_get_rq_log_wq_sz(void *rqc) -{ - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - return MLX5_GET(wq, wq, log_wq_sz); -} - -void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - int ndsegs = 1; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); - MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); - ndsegs = param->frags_info.num_frags; - } - - MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); - MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); - MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); - MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); - MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(priv, params, xsk, ¶m->cqp); -} - -static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); -} - -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.hw_objs.pdn); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); -} - -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - bool allow_swp; - - allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || - !!MLX5_IPSEC_DEV(priv->mdev); - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, allow_swp); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); - param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); - if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) - MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); -} - -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_cq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - bool hw_stridx = false; - void *cqc = param->cqc; - u8 log_cq_size; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - log_cq_size = params->log_rq_mtu_frames; - } - - MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? - MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); - MLX5_SET(cqc, cqc, cqe_comp_en, 1); - } - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); - - mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); -} - -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, - struct mlx5e_rq_param *rqp) -{ - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, - order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc)); - default: /* MLX5_WQ_TYPE_CYCLIC */ - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } -} - -static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev) -{ - if (netdev->hw_features & NETIF_F_HW_TLS_RX) - return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; -} - -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam) -{ - u8 icosq_log_wq_sz, async_icosq_log_wq_sz; - - mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); - - icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); - async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev); - - mlx5e_build_sq_param(priv, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_icosq_param(priv, async_icosq_log_wq_sz, &cparam->async_icosq); -} - int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { @@ -2415,7 +2075,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (!chs->c || !cparam) goto err_free; - mlx5e_build_channel_param(priv, &chs->params, cparam); + mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2427,9 +2087,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) { - err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port, - &chs->port_ptp); + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); if (err) goto err_close_channels; } @@ -2443,8 +2102,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, return 0; err_close_ptp: - if (chs->port_ptp) - mlx5e_port_ptp_close(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_close(chs->ptp); err_close_channels: for (i--; i >= 0; i--) @@ -2464,8 +2123,8 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs) for (i = 0; i < chs->num; i++) mlx5e_activate_channel(chs->c[i]); - if (chs->port_ptp) - mlx5e_ptp_activate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); } #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ @@ -2492,8 +2151,8 @@ static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) { int i; - if (chs->port_ptp) - mlx5e_ptp_deactivate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp); for (i = 0; i < chs->num; i++) mlx5e_deactivate_channel(chs->c[i]); @@ -2503,11 +2162,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; - if (chs->port_ptp) { - mlx5e_port_ptp_close(chs->port_ptp); - chs->port_ptp = NULL; + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; } - for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]); @@ -2563,12 +2221,12 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int err; int ix; - for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); if (unlikely(err)) goto err_destroy_rqts; @@ -2584,11 +2242,11 @@ err_destroy_rqts: return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i; - for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_rqt(priv, &tirs[i].rqt); } @@ -2671,7 +2329,8 @@ static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, } static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp) + struct mlx5e_redirect_rqt_param rrp, + struct mlx5e_redirect_rqt_param *ptp_rrp) { u32 rqtn; int ix; @@ -2697,11 +2356,17 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } + if (ptp_rrp) { + rqtn = priv->ptp_tir.rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); + } } static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { + bool rx_ptp_support = priv->profile->rx_ptp_support; + struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; struct mlx5e_redirect_rqt_param rrp = { .is_rss = true, { @@ -2711,12 +2376,22 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, } }, }; + struct mlx5e_redirect_rqt_param ptp_rrp; - mlx5e_redirect_rqts(priv, rrp); + if (rx_ptp_support) { + u32 ptp_rqn; + + ptp_rrp.is_rss = false; + ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? + priv->drop_rq.rqn : ptp_rqn; + ptp_rrp_p = &ptp_rrp; + } + mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); } static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) { + bool rx_ptp_support = priv->profile->rx_ptp_support; struct mlx5e_redirect_rqt_param drop_rrp = { .is_rss = false, { @@ -2724,7 +2399,7 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) }, }; - mlx5e_redirect_rqts(priv, drop_rrp); + mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); } static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { @@ -3013,6 +2688,8 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; + if (priv->channels.params.ptp_rx) + num_rxqs++; mlx5e_netdev_set_tcs(netdev, nch, ntc); @@ -3098,11 +2775,14 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) } } - if (!priv->channels.port_ptp) + if (!priv->channels.ptp) + return; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) return; for (tc = 0; tc < num_tc; tc++) { - struct mlx5e_port_ptp *c = priv->channels.port_ptp; + struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; priv->txq2sq[sq->txq_ix] = sq; @@ -3376,7 +3056,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(priv, &rq_param); + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param); err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); if (err) @@ -3589,7 +3269,7 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { struct mlx5e_tir *tir; void *tirc; @@ -3603,7 +3283,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) if (!in) return -ENOMEM; - for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { memset(in, 0, inlen); tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); @@ -3641,11 +3321,11 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i; - for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_tir(priv->mdev, &tirs[i]); } @@ -3826,15 +3506,22 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) s->tx_dropped += sq_stats->dropped; } } - if (priv->port_ptp_opened) { + if (priv->tx_ptp_opened) { for (i = 0; i < priv->max_opened_tc; i++) { - struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i]; + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; s->tx_dropped += sq_stats->dropped; } } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } } void @@ -4219,7 +3906,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_channels.params); if (err) goto out; @@ -4283,9 +3970,18 @@ static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); +} + int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { + struct mlx5e_channels new_channels = {}; struct hwtstamp_config config; + bool rx_cqe_compress_def; int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || @@ -4305,11 +4001,13 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) } mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + new_channels.params.ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -4326,15 +4024,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) - netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } + new_channels.params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@ -4342,6 +4032,20 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) return -ERANGE; } + if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx) + goto out; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx, + &new_channels.params.ptp_rx); + if (err) { + mutex_unlock(&priv->state_lock); + return err; + } +out: memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock); @@ -4884,93 +4588,6 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, indirection_rqt[i] = i % num_channels; } -static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) -{ - u32 link_speed = 0; - u32 pci_bw = 0; - - mlx5e_port_max_linkspeed(mdev, &link_speed); - pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); - mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - -#define MLX5E_SLOW_PCI_RATIO (2) - - return link_speed && pci_bw && - link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; -} - -static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) -{ - return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - DIM_CQ_PERIOD_MODE_START_FROM_CQE : - DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->tx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); - } else { - params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); - } -} - -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->rx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); - } else { - params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); - } -} - -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_tx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, - params->tx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_rx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -4983,25 +4600,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - /* Prefer Striding RQ, unless any of the following holds: - * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. - * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. - * - * No XSK params: checking the availability of striding RQ in general. - */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || - !mlx5e_rx_is_linear_skb(params, NULL))) - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); - mlx5e_set_rq_type(mdev, params); - mlx5e_init_rq_type_params(mdev, params); -} - void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels) { @@ -5347,6 +4945,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_create_q_counters(priv); @@ -5361,7 +4960,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -5369,22 +4968,30 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; - err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_direct_tirs; - err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_xsk_rqts; + err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_xsk_tirs; + + err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_ptp_rqt; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_xsk_tirs; + goto err_destroy_ptp_direct_tir; } err = mlx5e_tc_nic_init(priv); @@ -5405,16 +5012,20 @@ err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_ptp_direct_tir: + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); +err_destroy_ptp_rqt: + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -5426,14 +5037,18 @@ err_destroy_q_counters: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -5541,6 +5156,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, + .rx_ptp_support = true, }; /* mlx5e generic netdev management API (move to en_common.c) */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4cc902e0d71b..9ef8e4a671a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -40,6 +40,7 @@ #include "eswitch.h" #include "en.h" #include "en_rep.h" +#include "en/params.h" #include "en/txrx.h" #include "en_tc.h" #include "en/rep/tc.h" @@ -752,6 +753,7 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_init_l2_addr(priv); @@ -766,7 +768,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -774,7 +776,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; @@ -799,11 +801,11 @@ err_destroy_root_ft: err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -813,13 +815,15 @@ err_close_drop_rq: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@ -1058,6 +1062,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, + .rx_ptp_support = false, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@ -1078,6 +1083,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, + .rx_ptp_support = false, }; /* e-Switch vport representors */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 92c5b81427b9..f67e51d8291a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -407,13 +407,21 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, { int i; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return; - mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch); - for (i = 0; i < priv->max_opened_tc; i++) { - mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]); + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq); /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ barrier(); @@ -1760,6 +1768,38 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, }; +static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, +}; + static const struct counter_desc qos_sq_stats_desc[] = { { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, @@ -1805,6 +1845,7 @@ static const struct counter_desc qos_sq_stats_desc[] = { #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) #define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) @@ -1851,32 +1892,46 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { - return priv->port_ptp_opened ? - NUM_PTP_CH_STATS + - ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) : - 0; + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) { int i, tc; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx; for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, ptp_ch_stats_desc[i].format); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_sq_stats_desc[i].format, tc); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_cq_stats_desc[i].format, tc); + ptp_rq_stats_desc[i].format); + } return idx; } @@ -1884,26 +1939,33 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) { int i, tc; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx; for (i = 0; i < NUM_PTP_CH_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch, + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, ptp_ch_stats_desc, i); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc], - ptp_sq_stats_desc, i); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc], - ptp_cq_stats_desc, i); - + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 93c41312fb03..ca398eac09c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -54,6 +54,7 @@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4bd882a1018c..dbc06c71c170 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -447,11 +447,11 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) { - u32 *indirection_rqt, rqn; struct mlx5e_priv *priv = hp->func_priv; int i, ix, sz = MLX5E_INDIR_RQT_SIZE; + u32 *indirection_rqt, rqn; - indirection_rqt = kzalloc(sz, GFP_KERNEL); + indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL); if (!indirection_rqt) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d2efe2455955..8ba62671f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -133,6 +133,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { + struct mlx5e_ptp *ptp_channel; + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); @@ -142,10 +144,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, return txq_ix; } - if (unlikely(priv->channels.port_ptp)) - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - mlx5e_use_ptpsq(skb)) - return mlx5e_select_ptpsq(dev, skb); + ptp_channel = READ_ONCE(priv->channels.ptp); + if (unlikely(ptp_channel) && + test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && + mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f5517ea2f6be..dbd910656574 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -105,7 +105,7 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */ +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */ #define KERNEL_NIC_PRIO_NUM_LEVELS 7 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a0a851640804..9ff163c5bcde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -340,7 +340,7 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev) return -EIO; } - mlx5_core_info(dev, "health revovery succeded\n"); + mlx5_core_info(dev, "health recovery succeeded\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 48303286c133..b65b0cefc5b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -33,6 +33,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" #include "ipoib.h" #define IB_DEFAULT_Q_KEY 0xb1b @@ -372,6 +373,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_create_q_counters(priv); @@ -386,7 +388,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -394,7 +396,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; @@ -405,11 +407,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -421,10 +423,12 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -469,6 +473,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, + .rx_ptp_support = false, }; /* mlx5i netdev NDos */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 3d0a18a0bed4..18ee21b06a00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + .rx_ptp_support = false, }; const struct mlx5e_profile *mlx5i_pkey_get_profile(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index a12c7da618a7..ceae6bc378e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -105,4 +105,15 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, } #endif +static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} + +static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index efc7acb4842c..bca0354482cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2668,6 +2668,11 @@ mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key, params); + if (trigger_node->trigger.local_port) { + NL_SET_ERR_MSG_MOD(extack, "Sampling already enabled on port"); + return -EINVAL; + } + if (trigger_node->params.psample_group != params->psample_group || trigger_node->params.truncate != params->truncate || trigger_node->params.rate != params->rate || diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c index ce58a795c6fc..07b371cd9818 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c @@ -238,6 +238,11 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, flower_prio_valid = true; } + if (protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG(f->common.extack, "matchall rules only supported with 'all' protocol"); + return -EOPNOTSUPP; + } + mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL); if (!mall_entry) return -ENOMEM; @@ -245,37 +250,34 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, mall_entry->priority = f->common.prio; mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block); + if (flower_prio_valid && mall_entry->ingress && + mall_entry->priority >= flower_min_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + if (flower_prio_valid && !mall_entry->ingress && + mall_entry->priority <= flower_max_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + act = &f->rule->action.entries[0]; - if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { - if (flower_prio_valid && mall_entry->ingress && - mall_entry->priority >= flower_min_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } - if (flower_prio_valid && !mall_entry->ingress && - mall_entry->priority <= flower_max_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } + switch (act->id) { + case FLOW_ACTION_MIRRED: mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR; mall_entry->mirror.to_dev = act->dev; - } else if (act->id == FLOW_ACTION_SAMPLE && - protocol == htons(ETH_P_ALL)) { - if (flower_prio_valid && - mall_entry->priority >= flower_min_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } + break; + case FLOW_ACTION_SAMPLE: mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE; mall_entry->sample.params.psample_group = act->sample.psample_group; mall_entry->sample.params.truncate = act->sample.truncate; mall_entry->sample.params.trunc_size = act->sample.trunc_size; mall_entry->sample.params.rate = act->sample.rate; - } else { + break; + default: err = -EOPNOTSUPP; goto errout; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6ccaa194733b..41259c0004d1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5068,8 +5068,9 @@ mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, /* No point in trying an atomic replacement if the idle timer interval * is smaller than the interval in which we query and clear activity. */ - force = info->nh_res_bucket->idle_timer_ms < - MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + if (!force && info->nh_res_bucket->idle_timer_ms < + MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL) + force = true; adj_index = nh->nhgi->adj_index + bucket_index; err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl); diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index c5de8f46cdd3..91a755efe2e6 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -730,8 +730,8 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev, static int lan743x_ethtool_set_eee(struct net_device *netdev, struct ethtool_eee *eee) { - struct lan743x_adapter *adapter = netdev_priv(netdev); - struct phy_device *phydev = NULL; + struct lan743x_adapter *adapter; + struct phy_device *phydev; u32 buf = 0; int ret = 0; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 1c3e204d727c..e7ab5f3f73fd 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3004,7 +3004,7 @@ static int lan743x_pm_suspend(struct device *dev) lan743x_pm_set_wol(adapter); /* Host sets PME_En, put D3hot */ - return pci_prepare_to_sleep(pdev);; + return pci_prepare_to_sleep(pdev); } static int lan743x_pm_resume(struct device *dev) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 49fd843c4c8a..b85733942053 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -485,7 +485,6 @@ static int moxart_mac_probe(struct platform_device *pdev) ndev->base_addr = res->start; priv->base = devm_ioremap_resource(p_dev, res); if (IS_ERR(priv->base)) { - dev_err(p_dev, "devm_ioremap_resource failed\n"); ret = PTR_ERR(priv->base); goto init_fail; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1a36b416fd9b..8d06ffaf318a 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -484,7 +484,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port, DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG); /* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of - * reset */ + * reset + */ ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed), DEV_CLOCK_CFG); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 76d13af46a7a..3e9baff07100 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -18,7 +18,6 @@ struct netdev_bpf; struct netlink_ext_ack; struct pci_dev; struct sk_buff; -struct sk_buff; struct nfp_app; struct nfp_cpp; struct nfp_pf; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d3cbb4215f5c..e72fd33a214c 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1044,7 +1044,8 @@ static netdev_tx_t lpc_eth_hard_start_xmit(struct sk_buff *skb, if (pldat->num_used_tx_buffs >= (ENET_TX_DESC - 1)) { /* This function should never be called when there are no - buffers */ + * buffers + */ netif_stop_queue(ndev); spin_unlock_irq(&pldat->lock); WARN(1, "BUG! TX request when no free TX buffers!\n"); @@ -1318,7 +1319,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) pldat->dma_buff_size = PAGE_ALIGN(pldat->dma_buff_size); /* Allocate a chunk of memory for the DMA ethernet buffers - and descriptors */ + * and descriptors + */ pldat->dma_buff_base_v = dma_alloc_coherent(dev, pldat->dma_buff_size, &dma_handle, @@ -1365,7 +1367,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) __lpc_mii_mngt_reset(pldat); /* Force default PHY interface setup in chip, this will probably be - changed by the PHY driver */ + * changed by the PHY driver + */ pldat->link = 0; pldat->speed = 100; pldat->duplex = DUPLEX_FULL; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 0532f7cf086d..0e8e88c69e1c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t) return; hb = ionic_heartbeat_check(ionic); + dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n", + __func__, hb, netif_running(lif->netdev), + test_bit(IONIC_LIF_F_UP, lif->state)); if (hb >= 0 && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) @@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic) return -EFAULT; } - idev->last_fw_status = 0xff; timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; + + /* set times to ensure the first check will proceed */ + atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); + idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; + /* init as ready, so no transition if the first check succeeds */ + idev->last_fw_hb = 0; + idev->fw_hb_ready = true; + idev->fw_status_ready = true; + mod_timer(&ionic->watchdog_timer, round_jiffies(jiffies + ionic->watchdog_period)); @@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic) int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - unsigned long hb_time; + unsigned long check_time, last_check_time; + bool fw_status_ready, fw_hb_ready; u8 fw_status; - u32 hb; + u32 fw_hb; - /* wait a little more than one second before testing again */ - hb_time = jiffies; - if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) + /* wait a least one second before testing again */ + check_time = jiffies; + last_check_time = atomic_long_read(&idev->last_check_time); +do_check_time: + if (time_before(check_time, last_check_time + HZ)) return 0; + if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time, + &last_check_time, check_time)) { + /* if called concurrently, only the first should proceed. */ + dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__); + goto do_check_time; + } /* firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status != 0xff) - fw_status &= IONIC_FW_STS_F_RUNNING; /* use only the run bit */ + fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); /* is this a transition? */ - if (fw_status != idev->last_fw_status && - idev->last_fw_status != 0xff) { + if (fw_status_ready != idev->fw_status_ready) { struct ionic_lif *lif = ionic->lif; bool trigger = false; - if (!fw_status || fw_status == 0xff) { + idev->fw_status_ready = fw_status_ready; + + if (!fw_status_ready) { dev_info(ionic->dev, "FW stopped %u\n", fw_status); if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) trigger = true; @@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic) struct ionic_deferred_work *work; work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - dev_err(ionic->dev, "LIF reset trigger dropped\n"); - } else { + if (work) { work->type = IONIC_DW_TYPE_LIF_RESET; - if (fw_status & IONIC_FW_STS_F_RUNNING && - fw_status != 0xff) - work->fw_status = 1; + work->fw_status = fw_status_ready; ionic_lif_deferred_enqueue(&lif->deferred, work); } } } - idev->last_fw_status = fw_status; - if (!fw_status || fw_status == 0xff) + if (!fw_status_ready) return -ENXIO; - /* early FW has no heartbeat, else FW will return non-zero */ - hb = ioread32(&idev->dev_info_regs->fw_heartbeat); - if (!hb) + /* wait at least one watchdog period since the last heartbeat */ + last_check_time = idev->last_hb_time; + if (time_before(check_time, last_check_time + ionic->watchdog_period)) return 0; - /* are we stalled? */ - if (hb == idev->last_hb) { - /* only complain once for each stall seen */ - if (idev->last_hb_time != 1) { - dev_info(ionic->dev, "FW heartbeat stalled at %d\n", - idev->last_hb); - idev->last_hb_time = 1; - } + fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat); + fw_hb_ready = fw_hb != idev->last_fw_hb; - return -ENXIO; + /* early FW version had no heartbeat, so fake it */ + if (!fw_hb_ready && !fw_hb) + fw_hb_ready = true; + + dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n", + __func__, fw_hb, idev->last_fw_hb, fw_hb_ready); + + idev->last_fw_hb = fw_hb; + + /* log a transition */ + if (fw_hb_ready != idev->fw_hb_ready) { + idev->fw_hb_ready = fw_hb_ready; + if (!fw_hb_ready) + dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb); + else + dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb); } - if (idev->last_hb_time == 1) - dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb); + if (!fw_hb_ready) + return -ENXIO; - idev->last_hb = hb; - idev->last_hb_time = hb_time; + idev->last_hb_time = check_time; return 0; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index ca7e55455165..0c0533737b2b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -4,6 +4,7 @@ #ifndef _IONIC_DEV_H_ #define _IONIC_DEV_H_ +#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/workqueue.h> @@ -135,9 +136,11 @@ struct ionic_dev { union ionic_dev_info_regs __iomem *dev_info_regs; union ionic_dev_cmd_regs __iomem *dev_cmd_regs; + atomic_long_t last_check_time; unsigned long last_hb_time; - u32 last_hb; - u8 last_fw_status; + u32 last_fw_hb; + bool fw_hb_ready; + bool fw_status_ready; u64 __iomem *db_pages; dma_addr_t phy_db_pages; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 889d234e2ffa..a51be25723a5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -676,20 +676,20 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif) err = -ENOMEM; lif->txqcqs = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif, - sizeof(struct ionic_qcq *), GFP_KERNEL); + sizeof(*lif->txqcqs), GFP_KERNEL); if (!lif->txqcqs) goto err_out; lif->rxqcqs = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif, - sizeof(struct ionic_qcq *), GFP_KERNEL); + sizeof(*lif->rxqcqs), GFP_KERNEL); if (!lif->rxqcqs) goto err_out; lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif, - sizeof(struct ionic_tx_stats), GFP_KERNEL); + sizeof(*lif->txqstats), GFP_KERNEL); if (!lif->txqstats) goto err_out; lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif, - sizeof(struct ionic_rx_stats), GFP_KERNEL); + sizeof(*lif->rxqstats), GFP_KERNEL); if (!lif->rxqstats) goto err_out; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index 308b4ac6c57b..ed9cf93d9acd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -177,31 +177,42 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = { #define MAX_Q(lif) ((lif)->netdev->real_num_tx_queues) +static void ionic_add_lif_txq_stats(struct ionic_lif *lif, int q_num, + struct ionic_lif_sw_stats *stats) +{ + struct ionic_tx_stats *txstats = &lif->txqstats[q_num]; + + stats->tx_packets += txstats->pkts; + stats->tx_bytes += txstats->bytes; + stats->tx_tso += txstats->tso; + stats->tx_tso_bytes += txstats->tso_bytes; + stats->tx_csum_none += txstats->csum_none; + stats->tx_csum += txstats->csum; +} + +static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num, + struct ionic_lif_sw_stats *stats) +{ + struct ionic_rx_stats *rxstats = &lif->rxqstats[q_num]; + + stats->rx_packets += rxstats->pkts; + stats->rx_bytes += rxstats->bytes; + stats->rx_csum_none += rxstats->csum_none; + stats->rx_csum_complete += rxstats->csum_complete; + stats->rx_csum_error += rxstats->csum_error; +} + static void ionic_get_lif_stats(struct ionic_lif *lif, struct ionic_lif_sw_stats *stats) { - struct ionic_tx_stats *txstats; - struct ionic_rx_stats *rxstats; struct rtnl_link_stats64 ns; int q_num; memset(stats, 0, sizeof(*stats)); for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txstats = &lif->txqstats[q_num]; - stats->tx_packets += txstats->pkts; - stats->tx_bytes += txstats->bytes; - stats->tx_tso += txstats->tso; - stats->tx_tso_bytes += txstats->tso_bytes; - stats->tx_csum_none += txstats->csum_none; - stats->tx_csum += txstats->csum; - - rxstats = &lif->rxqstats[q_num]; - stats->rx_packets += rxstats->pkts; - stats->rx_bytes += rxstats->bytes; - stats->rx_csum_none += rxstats->csum_none; - stats->rx_csum_complete += rxstats->csum_complete; - stats->rx_csum_error += rxstats->csum_error; + ionic_add_lif_txq_stats(lif, q_num, stats); + ionic_add_lif_rxq_stats(lif, q_num, stats); } ionic_get_stats64(lif->netdev, &ns); @@ -214,16 +225,12 @@ static void ionic_get_lif_stats(struct ionic_lif *lif, static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) { - u64 total = 0; + u64 total = 0, tx_queues = MAX_Q(lif), rx_queues = MAX_Q(lif); /* lif stats */ total += IONIC_NUM_LIF_STATS; - - /* tx stats */ - total += MAX_Q(lif) * IONIC_NUM_TX_STATS; - - /* rx stats */ - total += MAX_Q(lif) * IONIC_NUM_RX_STATS; + total += tx_queues * IONIC_NUM_TX_STATS; + total += rx_queues * IONIC_NUM_RX_STATS; /* port stats */ total += IONIC_NUM_PORT_STATS; @@ -231,13 +238,13 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) if (test_bit(IONIC_LIF_F_UP, lif->state) && test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { /* tx debug stats */ - total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + total += tx_queues * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_TX_Q_STATS + IONIC_NUM_DBG_INTR_STATS + IONIC_MAX_NUM_SG_CNTR); /* rx debug stats */ - total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + total += rx_queues * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_DBG_INTR_STATS + IONIC_NUM_DBG_NAPI_STATS + IONIC_MAX_NUM_NAPI_CNTR); @@ -315,13 +322,99 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) ionic_sw_stats_get_rx_strings(lif, buf, q_num); } +static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf, + int q_num) +{ + struct ionic_tx_stats *txstats; + struct ionic_qcq *txqcq; + int i; + + txstats = &lif->txqstats[q_num]; + + for (i = 0; i < IONIC_NUM_TX_STATS; i++) { + **buf = IONIC_READ_STAT64(txstats, &ionic_tx_stats_desc[i]); + (*buf)++; + } + + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + txqcq = lif->txqcqs[q_num]; + for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->q, + &ionic_txq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->napi_stats, + &ionic_dbg_napi_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + **buf = txqcq->napi_stats.work_done_cntr[i]; + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { + **buf = txstats->sg_cntr[i]; + (*buf)++; + } +} + +static void ionic_sw_stats_get_rxq_values(struct ionic_lif *lif, u64 **buf, + int q_num) +{ + struct ionic_rx_stats *rxstats; + struct ionic_qcq *rxqcq; + int i; + + rxstats = &lif->rxqstats[q_num]; + + for (i = 0; i < IONIC_NUM_RX_STATS; i++) { + **buf = IONIC_READ_STAT64(rxstats, &ionic_rx_stats_desc[i]); + (*buf)++; + } + + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + rxqcq = lif->rxqcqs[q_num]; + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->napi_stats, + &ionic_dbg_napi_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + **buf = rxqcq->napi_stats.work_done_cntr[i]; + (*buf)++; + } +} + static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) { struct ionic_port_stats *port_stats; struct ionic_lif_sw_stats lif_stats; - struct ionic_qcq *txqcq, *rxqcq; - struct ionic_tx_stats *txstats; - struct ionic_rx_stats *rxstats; int i, q_num; ionic_get_lif_stats(lif, &lif_stats); @@ -338,73 +431,11 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) (*buf)++; } - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txstats = &lif->txqstats[q_num]; - - for (i = 0; i < IONIC_NUM_TX_STATS; i++) { - **buf = IONIC_READ_STAT64(txstats, - &ionic_tx_stats_desc[i]); - (*buf)++; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - txqcq = lif->txqcqs[q_num]; - for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->q, - &ionic_txq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->cq, - &ionic_dbg_cq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->intr, - &ionic_dbg_intr_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { - **buf = txstats->sg_cntr[i]; - (*buf)++; - } - } - } + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_txq_values(lif, buf, q_num); - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - rxstats = &lif->rxqstats[q_num]; - - for (i = 0; i < IONIC_NUM_RX_STATS; i++) { - **buf = IONIC_READ_STAT64(rxstats, - &ionic_rx_stats_desc[i]); - (*buf)++; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - rxqcq = lif->rxqcqs[q_num]; - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->cq, - &ionic_dbg_cq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->intr, - &ionic_dbg_intr_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->napi_stats, - &ionic_dbg_napi_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { - **buf = rxqcq->napi_stats.work_done_cntr[i]; - (*buf)++; - } - } - } + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_rxq_values(lif, buf, q_num); } const struct ionic_stats_group_intf ionic_stats_groups[] = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 5985f7c504a9..42d29cd2ca47 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -609,6 +609,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, struct ionic_desc_info *desc_info) { struct ionic_buf_info *buf_info = desc_info->bufs; + struct ionic_tx_stats *stats = q_to_tx_stats(q); struct device *dev = q->dev; dma_addr_t dma_addr; unsigned int nfrags; @@ -616,8 +617,10 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, int frag_idx; dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (dma_mapping_error(dev, dma_addr)) + if (dma_mapping_error(dev, dma_addr)) { + stats->dma_map_err++; return -EIO; + } buf_info->dma_addr = dma_addr; buf_info->len = skb_headlen(skb); buf_info++; @@ -626,8 +629,10 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, nfrags = skb_shinfo(skb)->nr_frags; for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (dma_mapping_error(dev, dma_addr)) + if (dma_mapping_error(dev, dma_addr)) { + stats->dma_map_err++; goto dma_fail; + } buf_info->dma_addr = dma_addr; buf_info->len = skb_frag_size(frag); buf_info++; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4d952036ba82..01ac1e93d27a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -7,7 +7,6 @@ #include <linux/crash_dump.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/version.h> #include <linux/device.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7a8bb7e833f3..1cd5c6f6d44f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2736,11 +2736,6 @@ static void rtl_hw_start_8168c_2(struct rtl8169_private *tp) __rtl_hw_start_8168cp(tp); } -static void rtl_hw_start_8168c_3(struct rtl8169_private *tp) -{ - rtl_hw_start_8168c_2(tp); -} - static void rtl_hw_start_8168c_4(struct rtl8169_private *tp) { rtl_set_def_aspm_entry_latency(tp); @@ -3653,7 +3648,7 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1, [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1, [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2, - [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3, + [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_2, [RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4, [RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2, [RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3, diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 891b49281bc6..cbde83f620a0 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2204,7 +2204,7 @@ static int try_toggle_control_gpio(struct device *dev, const char *name, int index, int value, unsigned int nsdelay) { - struct gpio_desc *gpio = *desc; + struct gpio_desc *gpio; enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH; gpio = devm_gpiod_get_index_optional(dev, name, index, flags); diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d065b11b7b10..c54a56b732b3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -259,6 +259,9 @@ struct stmmac_safety_stats { #define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY iface */ #define DEFAULT_DMA_PBL 8 +/* MSI defines */ +#define STMMAC_MSI_VEC_MAX 32 + /* PCS status and mask defines */ #define PCS_ANE_IRQ BIT(2) /* PCS Auto-Negotiation */ #define PCS_LINK_IRQ BIT(1) /* PCS Link */ @@ -309,6 +312,24 @@ enum dma_irq_status { handle_tx = 0x8, }; +enum dma_irq_dir { + DMA_DIR_RX = 0x1, + DMA_DIR_TX = 0x2, + DMA_DIR_RXTX = 0x3, +}; + +enum request_irq_err { + REQ_IRQ_ERR_ALL, + REQ_IRQ_ERR_TX, + REQ_IRQ_ERR_RX, + REQ_IRQ_ERR_SFTY_UE, + REQ_IRQ_ERR_SFTY_CE, + REQ_IRQ_ERR_LPI, + REQ_IRQ_ERR_WOL, + REQ_IRQ_ERR_MAC, + REQ_IRQ_ERR_NO, +}; + /* EEE and LPI defines */ #define CORE_IRQ_TX_PATH_IN_LPI_MODE (1 << 0) #define CORE_IRQ_TX_PATH_EXIT_LPI_MODE (1 << 1) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 992294d25706..add95e20548d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -22,8 +22,13 @@ #define PCH_PTP_CLK_FREQ_19_2MHZ (GMAC_GPO0) #define PCH_PTP_CLK_FREQ_200MHZ (0) +/* Cross-timestamping defines */ +#define ART_CPUID_LEAF 0x15 +#define EHL_PSE_ART_MHZ 19200000 + struct intel_priv_data { int mdio_adhoc_addr; /* mdio address for serdes & etc */ + unsigned long crossts_adj; bool is_pse; }; @@ -340,9 +345,26 @@ static int intel_crosststamp(ktime_t *device, *system = convert_art_to_tsc(art_time); } + system->cycles *= intel_priv->crossts_adj; + return 0; } +static void intel_mgbe_pse_crossts_adj(struct intel_priv_data *intel_priv, + int base) +{ + if (boot_cpu_has(X86_FEATURE_ART)) { + unsigned int art_freq; + + /* On systems that support ART, ART frequency can be obtained + * from ECX register of CPUID leaf (0x15). + */ + art_freq = cpuid_ecx(ART_CPUID_LEAF); + do_div(art_freq, base); + intel_priv->crossts_adj = art_freq; + } +} + static void common_default_data(struct plat_stmmacenet_data *plat) { plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ @@ -492,6 +514,14 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_crossts = true; plat->crosststamp = intel_crosststamp; + /* Setup MSI vector offset specific to Intel mGbE controller */ + plat->msi_mac_vec = 29; + plat->msi_lpi_vec = 28; + plat->msi_sfty_ce_vec = 27; + plat->msi_sfty_ue_vec = 26; + plat->msi_rx_base_vec = 0; + plat->msi_tx_base_vec = 1; + return 0; } @@ -543,6 +573,8 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, plat->bus_id = 2; plat->addr64 = 32; + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + return ehl_common_data(pdev, plat); } @@ -579,6 +611,8 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, plat->bus_id = 3; plat->addr64 = 32; + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + return ehl_common_data(pdev, plat); } @@ -776,6 +810,79 @@ static const struct stmmac_pci_info quark_info = { .setup = quark_default_data, }; +static int stmmac_config_single_msi(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + dev_info(&pdev->dev, "%s: Single IRQ enablement failed\n", + __func__); + return ret; + } + + res->irq = pci_irq_vector(pdev, 0); + res->wol_irq = res->irq; + plat->multi_msi_en = 0; + dev_info(&pdev->dev, "%s: Single IRQ enablement successful\n", + __func__); + + return 0; +} + +static int stmmac_config_multi_msi(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + int i; + + if (plat->msi_rx_base_vec >= STMMAC_MSI_VEC_MAX || + plat->msi_tx_base_vec >= STMMAC_MSI_VEC_MAX) { + dev_info(&pdev->dev, "%s: Invalid RX & TX vector defined\n", + __func__); + return -1; + } + + ret = pci_alloc_irq_vectors(pdev, 2, STMMAC_MSI_VEC_MAX, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + dev_info(&pdev->dev, "%s: multi MSI enablement failed\n", + __func__); + return ret; + } + + /* For RX MSI */ + for (i = 0; i < plat->rx_queues_to_use; i++) { + res->rx_irq[i] = pci_irq_vector(pdev, + plat->msi_rx_base_vec + i * 2); + } + + /* For TX MSI */ + for (i = 0; i < plat->tx_queues_to_use; i++) { + res->tx_irq[i] = pci_irq_vector(pdev, + plat->msi_tx_base_vec + i * 2); + } + + if (plat->msi_mac_vec < STMMAC_MSI_VEC_MAX) + res->irq = pci_irq_vector(pdev, plat->msi_mac_vec); + if (plat->msi_wol_vec < STMMAC_MSI_VEC_MAX) + res->wol_irq = pci_irq_vector(pdev, plat->msi_wol_vec); + if (plat->msi_lpi_vec < STMMAC_MSI_VEC_MAX) + res->lpi_irq = pci_irq_vector(pdev, plat->msi_lpi_vec); + if (plat->msi_sfty_ce_vec < STMMAC_MSI_VEC_MAX) + res->sfty_ce_irq = pci_irq_vector(pdev, plat->msi_sfty_ce_vec); + if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX) + res->sfty_ue_irq = pci_irq_vector(pdev, plat->msi_sfty_ue_vec); + + plat->multi_msi_en = 1; + dev_info(&pdev->dev, "%s: multi MSI enablement successful\n", __func__); + + return 0; +} + /** * intel_eth_pci_probe * @@ -817,7 +924,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -832,19 +939,26 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, plat->bsp_priv = intel_priv; intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR; + intel_priv->crossts_adj = 1; + + /* Initialize all MSI vectors to invalid so that it can be set + * according to platform data settings below. + * Note: MSI vector takes value from 0 upto 31 (STMMAC_MSI_VEC_MAX) + */ + plat->msi_mac_vec = STMMAC_MSI_VEC_MAX; + plat->msi_wol_vec = STMMAC_MSI_VEC_MAX; + plat->msi_lpi_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ce_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ue_vec = STMMAC_MSI_VEC_MAX; + plat->msi_rx_base_vec = STMMAC_MSI_VEC_MAX; + plat->msi_tx_base_vec = STMMAC_MSI_VEC_MAX; ret = info->setup(pdev, plat); if (ret) return ret; - ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); - if (ret < 0) - return ret; - memset(&res, 0, sizeof(res)); res.addr = pcim_iomap_table(pdev)[0]; - res.wol_irq = pci_irq_vector(pdev, 0); - res.irq = pci_irq_vector(pdev, 0); if (plat->eee_usecs_rate > 0) { u32 tx_lpi_usec; @@ -853,13 +967,28 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER); } + ret = stmmac_config_multi_msi(pdev, plat, &res); + if (ret) { + ret = stmmac_config_single_msi(pdev, plat, &res); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable IRQ\n", + __func__); + goto err_alloc_irq; + } + } + ret = stmmac_dvr_probe(&pdev->dev, plat, &res); if (ret) { - pci_free_irq_vectors(pdev); - clk_disable_unprepare(plat->stmmac_clk); - clk_unregister_fixed_rate(plat->stmmac_clk); + goto err_dvr_probe; } + return 0; + +err_dvr_probe: + pci_free_irq_vectors(pdev); +err_alloc_irq: + clk_disable_unprepare(plat->stmmac_clk); + clk_unregister_fixed_rate(plat->stmmac_clk); return ret; } @@ -877,13 +1006,9 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); - pci_free_irq_vectors(pdev); - clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); - - pci_disable_device(pdev); } static int __maybe_unused intel_eth_pci_suspend(struct device *dev) @@ -899,7 +1024,6 @@ static int __maybe_unused intel_eth_pci_suspend(struct device *dev) if (ret) return ret; - pci_disable_device(pdev); pci_wake_from_d3(pdev, true); return 0; } @@ -912,7 +1036,7 @@ static int __maybe_unused intel_eth_pci_resume(struct device *dev) pci_restore_state(pdev); pci_set_power_state(pdev, PCI_D0); - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index bfc4a92f1d92..a674b7d6b49a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -477,7 +477,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii"); ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ethqos->rgmii_base)) { - dev_err(&pdev->dev, "Can't get rgmii base\n"); ret = PTR_ERR(ethqos->rgmii_base); goto err_mem; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index e62efd166ec8..19e7ec30af4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -239,6 +239,22 @@ static const struct emac_variant emac_variant_h6 = { #define EMAC_RX_EARLY_INT BIT(13) #define EMAC_RGMII_STA_INT BIT(16) +#define EMAC_INT_MSK_COMMON EMAC_RGMII_STA_INT +#define EMAC_INT_MSK_TX (EMAC_TX_INT | \ + EMAC_TX_DMA_STOP_INT | \ + EMAC_TX_BUF_UA_INT | \ + EMAC_TX_TIMEOUT_INT | \ + EMAC_TX_UNDERFLOW_INT | \ + EMAC_TX_EARLY_INT |\ + EMAC_INT_MSK_COMMON) +#define EMAC_INT_MSK_RX (EMAC_RX_INT | \ + EMAC_RX_BUF_UA_INT | \ + EMAC_RX_DMA_STOP_INT | \ + EMAC_RX_TIMEOUT_INT | \ + EMAC_RX_OVERFLOW_INT | \ + EMAC_RX_EARLY_INT | \ + EMAC_INT_MSK_COMMON) + #define MAC_ADDR_TYPE_DST BIT(31) /* H3 specific bits for EPHY */ @@ -412,13 +428,19 @@ static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan) } static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, + u32 dir) { u32 v; int ret = 0; v = readl(ioaddr + EMAC_INT_STA); + if (dir == DMA_DIR_RX) + v &= EMAC_INT_MSK_RX; + else if (dir == DMA_DIR_TX) + v &= EMAC_INT_MSK_TX; + if (v & EMAC_TX_INT) { ret |= handle_tx; x->tx_normal_irq_n++; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 8954b85eb850..cb17f6c35e54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -161,6 +161,13 @@ static void dwmac4_dma_init(void __iomem *ioaddr, value |= DMA_SYS_BUS_EAME; writel(value, ioaddr + DMA_SYS_BUS_MODE); + + if (dma_cfg->multi_msi_en) { + value = readl(ioaddr + DMA_BUS_MODE); + value &= ~DMA_BUS_MODE_INTM_MASK; + value |= (DMA_BUS_MODE_INTM_MODE1 << DMA_BUS_MODE_INTM_SHIFT); + writel(value, ioaddr + DMA_BUS_MODE); + } } static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 8391ca63d943..05481eb13ba6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -25,6 +25,9 @@ #define DMA_TBS_CTRL 0x00001050 /* DMA Bus Mode bitmap */ +#define DMA_BUS_MODE_INTM_MASK GENMASK(17, 16) +#define DMA_BUS_MODE_INTM_SHIFT 16 +#define DMA_BUS_MODE_INTM_MODE1 0x1 #define DMA_BUS_MODE_SFT_RESET BIT(0) /* DMA SYS Bus Mode bitmap */ @@ -149,6 +152,25 @@ #define DMA_CHAN_STATUS_TPS BIT(1) #define DMA_CHAN_STATUS_TI BIT(0) +#define DMA_CHAN_STATUS_MSK_COMMON (DMA_CHAN_STATUS_NIS | \ + DMA_CHAN_STATUS_AIS | \ + DMA_CHAN_STATUS_CDE | \ + DMA_CHAN_STATUS_FBE) + +#define DMA_CHAN_STATUS_MSK_RX (DMA_CHAN_STATUS_REB | \ + DMA_CHAN_STATUS_ERI | \ + DMA_CHAN_STATUS_RWT | \ + DMA_CHAN_STATUS_RPS | \ + DMA_CHAN_STATUS_RBU | \ + DMA_CHAN_STATUS_RI | \ + DMA_CHAN_STATUS_MSK_COMMON) + +#define DMA_CHAN_STATUS_MSK_TX (DMA_CHAN_STATUS_ETI | \ + DMA_CHAN_STATUS_TBU | \ + DMA_CHAN_STATUS_TPS | \ + DMA_CHAN_STATUS_TI | \ + DMA_CHAN_STATUS_MSK_COMMON) + /* Interrupt enable bits per channel */ #define DMA_CHAN_INTR_ENA_NIE BIT(16) #define DMA_CHAN_INTR_ENA_AIE BIT(15) @@ -206,7 +228,7 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan); int dwmac4_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan); + struct stmmac_extra_stats *x, u32 chan, u32 dir); void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan); void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan); void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 71e50751ef2d..e63270267578 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -135,12 +135,17 @@ void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) } int dwmac4_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, u32 dir) { u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); int ret = 0; + if (dir == DMA_DIR_RX) + intr_status &= DMA_CHAN_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= DMA_CHAN_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) { if (unlikely(intr_status & DMA_CHAN_STATUS_RBU)) @@ -161,20 +166,19 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, } } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & DMA_CHAN_STATUS_NIS)) { + if (likely(intr_status & DMA_CHAN_STATUS_NIS)) x->normal_irq_n++; - if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - x->rx_normal_irq_n++; - ret |= handle_rx; - } - if (likely(intr_status & (DMA_CHAN_STATUS_TI | - DMA_CHAN_STATUS_TBU))) { - x->tx_normal_irq_n++; - ret |= handle_tx; - } - if (unlikely(intr_status & DMA_CHAN_STATUS_ERI)) - x->rx_early_irq++; + if (likely(intr_status & DMA_CHAN_STATUS_RI)) { + x->rx_normal_irq_n++; + ret |= handle_rx; + } + if (likely(intr_status & (DMA_CHAN_STATUS_TI | + DMA_CHAN_STATUS_TBU))) { + x->tx_normal_irq_n++; + ret |= handle_tx; } + if (unlikely(intr_status & DMA_CHAN_STATUS_ERI)) + x->rx_early_irq++; writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan)); return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 5b010ebfede9..d8c6ff725237 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -192,6 +192,7 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp) /* 1. Enable Safety Features */ value = readl(ioaddr + MTL_ECC_CONTROL); + value |= MEEAO; /* MTL ECC Error Addr Status Override */ value |= TSOEE; /* TSO ECC */ value |= MRXPEE; /* MTL RX Parser ECC */ value |= MESTEE; /* MTL EST ECC */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index ff555d8b0cdf..6b2fd37b29ad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -98,6 +98,7 @@ #define ADDR GENMASK(15, 0) #define MTL_RXP_IACC_DATA 0x00000cb4 #define MTL_ECC_CONTROL 0x00000cc0 +#define MEEAO BIT(8) #define TSOEE BIT(4) #define MRXPEE BIT(3) #define MESTEE BIT(2) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index e5dbd0bc257e..1914ad698cab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -128,6 +128,26 @@ #define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ #define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ +#define DMA_STATUS_MSK_COMMON (DMA_STATUS_NIS | \ + DMA_STATUS_AIS | \ + DMA_STATUS_FBI) + +#define DMA_STATUS_MSK_RX (DMA_STATUS_ERI | \ + DMA_STATUS_RWT | \ + DMA_STATUS_RPS | \ + DMA_STATUS_RU | \ + DMA_STATUS_RI | \ + DMA_STATUS_OVF | \ + DMA_STATUS_MSK_COMMON) + +#define DMA_STATUS_MSK_TX (DMA_STATUS_ETI | \ + DMA_STATUS_UNF | \ + DMA_STATUS_TJT | \ + DMA_STATUS_TU | \ + DMA_STATUS_TPS | \ + DMA_STATUS_TI | \ + DMA_STATUS_MSK_COMMON) + #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 @@ -139,7 +159,7 @@ void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan); void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan); int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x, - u32 chan); + u32 chan, u32 dir); int dwmac_dma_reset(void __iomem *ioaddr); #endif /* __DWMAC_DMA_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 57a53a600aa5..d1c31200bb91 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -155,7 +155,7 @@ static void show_rx_process_state(unsigned int status) #endif int dwmac_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, u32 dir) { int ret = 0; /* read the status register (CSR5) */ @@ -167,6 +167,12 @@ int dwmac_dma_interrupt(void __iomem *ioaddr, show_tx_process_state(intr_status); show_rx_process_state(intr_status); #endif + + if (dir == DMA_DIR_RX) + intr_status &= DMA_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= DMA_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & DMA_STATUS_AIS)) { if (unlikely(intr_status & DMA_STATUS_UNF)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 6c3b8a950f58..1913385df685 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -412,6 +412,12 @@ #define XGMAC_TI BIT(0) #define XGMAC_REGSIZE ((0x0000317c + (0x80 * 15)) / 4) +#define XGMAC_DMA_STATUS_MSK_COMMON (XGMAC_NIS | XGMAC_AIS | XGMAC_FBE) +#define XGMAC_DMA_STATUS_MSK_RX (XGMAC_RBU | XGMAC_RI | \ + XGMAC_DMA_STATUS_MSK_COMMON) +#define XGMAC_DMA_STATUS_MSK_TX (XGMAC_TBU | XGMAC_TPS | XGMAC_TI | \ + XGMAC_DMA_STATUS_MSK_COMMON) + /* Descriptors */ #define XGMAC_TDES0_LTV BIT(31) #define XGMAC_TDES0_LT GENMASK(7, 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index f2cab5b76732..906e985441a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -323,12 +323,18 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan) } static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, + u32 dir) { u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; + if (dir == DMA_DIR_RX) + intr_status &= XGMAC_DMA_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= XGMAC_DMA_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & XGMAC_AIS)) { if (unlikely(intr_status & XGMAC_RBU)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 45edac5f60db..2b5022ef1e52 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -201,7 +201,7 @@ struct stmmac_dma_ops { void (*start_rx)(void __iomem *ioaddr, u32 chan); void (*stop_rx)(void __iomem *ioaddr, u32 chan); int (*dma_interrupt) (void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan); + struct stmmac_extra_stats *x, u32 chan, u32 dir); /* If supported then get the optional core features */ void (*get_hw_feature)(void __iomem *ioaddr, struct dma_features *dma_cap); @@ -280,7 +280,6 @@ struct stmmac_dma_ops { struct mac_device_info; struct net_device; struct rgmii_adv; -struct stmmac_safety_stats; struct stmmac_tc_entry; struct stmmac_pps_cfg; struct stmmac_rss; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 4faad331a4ca..9966f6f10905 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -30,6 +30,10 @@ struct stmmac_resources { int wol_irq; int lpi_irq; int irq; + int sfty_ce_irq; + int sfty_ue_irq; + int rx_irq[MTL_MAX_RX_QUEUES]; + int tx_irq[MTL_MAX_TX_QUEUES]; }; struct stmmac_tx_info { @@ -225,6 +229,18 @@ struct stmmac_priv { void __iomem *mmcaddr; void __iomem *ptpaddr; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + int sfty_ce_irq; + int sfty_ue_irq; + int rx_irq[MTL_MAX_RX_QUEUES]; + int tx_irq[MTL_MAX_TX_QUEUES]; + /*irq name */ + char int_name_mac[IFNAMSIZ + 9]; + char int_name_wol[IFNAMSIZ + 9]; + char int_name_lpi[IFNAMSIZ + 9]; + char int_name_sfty_ce[IFNAMSIZ + 10]; + char int_name_sfty_ue[IFNAMSIZ + 10]; + char int_name_rx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 14]; + char int_name_tx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 18]; #ifdef CONFIG_DEBUG_FS struct dentry *dbgfs_dir; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 170296820af0..d34388b1ffcc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -105,6 +105,11 @@ module_param(chain_mode, int, 0444); MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode"); static irqreturn_t stmmac_interrupt(int irq, void *dev_id); +/* For MSI interrupts handling */ +static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id); +static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id); +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data); +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -997,7 +1002,8 @@ static void stmmac_mac_link_down(struct phylink_config *config, stmmac_eee_init(priv); stmmac_set_eee_pls(priv, priv->hw, false); - stmmac_fpe_link_state_handle(priv, false); + if (priv->dma_cap.fpesel) + stmmac_fpe_link_state_handle(priv, false); } static void stmmac_mac_link_up(struct phylink_config *config, @@ -1097,7 +1103,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, stmmac_set_eee_pls(priv, priv->hw, true); } - stmmac_fpe_link_state_handle(priv, true); + if (priv->dma_cap.fpesel) + stmmac_fpe_link_state_handle(priv, true); } static const struct phylink_mac_ops stmmac_phylink_mac_ops = { @@ -2337,10 +2344,10 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv) return false; } -static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) +static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir) { int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, - &priv->xstats, chan); + &priv->xstats, chan, dir); struct stmmac_channel *ch = &priv->channel[chan]; unsigned long flags; @@ -2386,7 +2393,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) channels_to_check = ARRAY_SIZE(status); for (chan = 0; chan < channels_to_check; chan++) - status[chan] = stmmac_napi_check(priv, chan); + status[chan] = stmmac_napi_check(priv, chan, + DMA_DIR_RXTX); for (chan = 0; chan < tx_channel_count; chan++) { if (unlikely(status[chan] & tx_hard_error_bump_tc)) { @@ -2985,6 +2993,260 @@ static void stmmac_hw_teardown(struct net_device *dev) clk_disable_unprepare(priv->plat->clk_ptp_ref); } +static void stmmac_free_irq(struct net_device *dev, + enum request_irq_err irq_err, int irq_idx) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int j; + + switch (irq_err) { + case REQ_IRQ_ERR_ALL: + irq_idx = priv->plat->tx_queues_to_use; + fallthrough; + case REQ_IRQ_ERR_TX: + for (j = irq_idx - 1; j >= 0; j--) { + if (priv->tx_irq[j] > 0) + free_irq(priv->tx_irq[j], &priv->tx_queue[j]); + } + irq_idx = priv->plat->rx_queues_to_use; + fallthrough; + case REQ_IRQ_ERR_RX: + for (j = irq_idx - 1; j >= 0; j--) { + if (priv->rx_irq[j] > 0) + free_irq(priv->rx_irq[j], &priv->rx_queue[j]); + } + + if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) + free_irq(priv->sfty_ue_irq, dev); + fallthrough; + case REQ_IRQ_ERR_SFTY_UE: + if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) + free_irq(priv->sfty_ce_irq, dev); + fallthrough; + case REQ_IRQ_ERR_SFTY_CE: + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) + free_irq(priv->lpi_irq, dev); + fallthrough; + case REQ_IRQ_ERR_LPI: + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) + free_irq(priv->wol_irq, dev); + fallthrough; + case REQ_IRQ_ERR_WOL: + free_irq(dev->irq, dev); + fallthrough; + case REQ_IRQ_ERR_MAC: + case REQ_IRQ_ERR_NO: + /* If MAC IRQ request error, no more IRQ to free */ + break; + } +} + +static int stmmac_request_irq_multi_msi(struct net_device *dev) +{ + enum request_irq_err irq_err = REQ_IRQ_ERR_NO; + struct stmmac_priv *priv = netdev_priv(dev); + int irq_idx = 0; + char *int_name; + int ret; + int i; + + /* For common interrupt */ + int_name = priv->int_name_mac; + sprintf(int_name, "%s:%s", dev->name, "mac"); + ret = request_irq(dev->irq, stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc mac MSI %d (error: %d)\n", + __func__, dev->irq, ret); + irq_err = REQ_IRQ_ERR_MAC; + goto irq_error; + } + + /* Request the Wake IRQ in case of another line + * is used for WoL + */ + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { + int_name = priv->int_name_wol; + sprintf(int_name, "%s:%s", dev->name, "wol"); + ret = request_irq(priv->wol_irq, + stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc wol MSI %d (error: %d)\n", + __func__, priv->wol_irq, ret); + irq_err = REQ_IRQ_ERR_WOL; + goto irq_error; + } + } + + /* Request the LPI IRQ in case of another line + * is used for LPI + */ + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { + int_name = priv->int_name_lpi; + sprintf(int_name, "%s:%s", dev->name, "lpi"); + ret = request_irq(priv->lpi_irq, + stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc lpi MSI %d (error: %d)\n", + __func__, priv->lpi_irq, ret); + irq_err = REQ_IRQ_ERR_LPI; + goto irq_error; + } + } + + /* Request the Safety Feature Correctible Error line in + * case of another line is used + */ + if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) { + int_name = priv->int_name_sfty_ce; + sprintf(int_name, "%s:%s", dev->name, "safety-ce"); + ret = request_irq(priv->sfty_ce_irq, + stmmac_safety_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc sfty ce MSI %d (error: %d)\n", + __func__, priv->sfty_ce_irq, ret); + irq_err = REQ_IRQ_ERR_SFTY_CE; + goto irq_error; + } + } + + /* Request the Safety Feature Uncorrectible Error line in + * case of another line is used + */ + if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) { + int_name = priv->int_name_sfty_ue; + sprintf(int_name, "%s:%s", dev->name, "safety-ue"); + ret = request_irq(priv->sfty_ue_irq, + stmmac_safety_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc sfty ue MSI %d (error: %d)\n", + __func__, priv->sfty_ue_irq, ret); + irq_err = REQ_IRQ_ERR_SFTY_UE; + goto irq_error; + } + } + + /* Request Rx MSI irq */ + for (i = 0; i < priv->plat->rx_queues_to_use; i++) { + if (priv->rx_irq[i] == 0) + continue; + + int_name = priv->int_name_rx_irq[i]; + sprintf(int_name, "%s:%s-%d", dev->name, "rx", i); + ret = request_irq(priv->rx_irq[i], + stmmac_msi_intr_rx, + 0, int_name, &priv->rx_queue[i]); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc rx-%d MSI %d (error: %d)\n", + __func__, i, priv->rx_irq[i], ret); + irq_err = REQ_IRQ_ERR_RX; + irq_idx = i; + goto irq_error; + } + } + + /* Request Tx MSI irq */ + for (i = 0; i < priv->plat->tx_queues_to_use; i++) { + if (priv->tx_irq[i] == 0) + continue; + + int_name = priv->int_name_tx_irq[i]; + sprintf(int_name, "%s:%s-%d", dev->name, "tx", i); + ret = request_irq(priv->tx_irq[i], + stmmac_msi_intr_tx, + 0, int_name, &priv->tx_queue[i]); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc tx-%d MSI %d (error: %d)\n", + __func__, i, priv->tx_irq[i], ret); + irq_err = REQ_IRQ_ERR_TX; + irq_idx = i; + goto irq_error; + } + } + + return 0; + +irq_error: + stmmac_free_irq(dev, irq_err, irq_idx); + return ret; +} + +static int stmmac_request_irq_single(struct net_device *dev) +{ + enum request_irq_err irq_err = REQ_IRQ_ERR_NO; + struct stmmac_priv *priv = netdev_priv(dev); + int ret; + + ret = request_irq(dev->irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the IRQ %d (error: %d)\n", + __func__, dev->irq, ret); + irq_err = REQ_IRQ_ERR_MAC; + return ret; + } + + /* Request the Wake IRQ in case of another line + * is used for WoL + */ + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { + ret = request_irq(priv->wol_irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the WoL IRQ %d (%d)\n", + __func__, priv->wol_irq, ret); + irq_err = REQ_IRQ_ERR_WOL; + return ret; + } + } + + /* Request the IRQ lines */ + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { + ret = request_irq(priv->lpi_irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the LPI IRQ %d (%d)\n", + __func__, priv->lpi_irq, ret); + irq_err = REQ_IRQ_ERR_LPI; + goto irq_error; + } + } + + return 0; + +irq_error: + stmmac_free_irq(dev, irq_err, 0); + return ret; +} + +static int stmmac_request_irq(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int ret; + + /* Request the IRQ lines */ + if (priv->plat->multi_msi_en) + ret = stmmac_request_irq_multi_msi(dev); + else + ret = stmmac_request_irq_single(dev); + + return ret; +} + /** * stmmac_open - open entry point of the driver * @dev : pointer to the device structure. @@ -3076,50 +3338,15 @@ static int stmmac_open(struct net_device *dev) /* We may have called phylink_speed_down before */ phylink_speed_up(priv->phylink); - /* Request the IRQ lines */ - ret = request_irq(dev->irq, stmmac_interrupt, - IRQF_SHARED, dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the IRQ %d (error: %d)\n", - __func__, dev->irq, ret); + ret = stmmac_request_irq(dev); + if (ret) goto irq_error; - } - - /* Request the Wake IRQ in case of another line is used for WoL */ - if (priv->wol_irq != dev->irq) { - ret = request_irq(priv->wol_irq, stmmac_interrupt, - IRQF_SHARED, dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the WoL IRQ %d (%d)\n", - __func__, priv->wol_irq, ret); - goto wolirq_error; - } - } - - /* Request the IRQ lines */ - if (priv->lpi_irq > 0) { - ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED, - dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the LPI IRQ %d (%d)\n", - __func__, priv->lpi_irq, ret); - goto lpiirq_error; - } - } stmmac_enable_all_queues(priv); netif_tx_start_all_queues(priv->dev); return 0; -lpiirq_error: - if (priv->wol_irq != dev->irq) - free_irq(priv->wol_irq, dev); -wolirq_error: - free_irq(dev->irq, dev); irq_error: phylink_stop(priv->phylink); @@ -3169,11 +3396,7 @@ static int stmmac_release(struct net_device *dev) hrtimer_cancel(&priv->tx_queue[chan].txtimer); /* Free the IRQ lines */ - free_irq(dev->irq, dev); - if (priv->wol_irq != dev->irq) - free_irq(priv->wol_irq, dev); - if (priv->lpi_irq > 0) - free_irq(priv->lpi_irq, dev); + stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0); if (priv->eee_enabled) { priv->tx_path_in_lpi_mode = false; @@ -4381,21 +4604,8 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) } } -/** - * stmmac_interrupt - main ISR - * @irq: interrupt number. - * @dev_id: to pass the net device pointer (must be valid). - * Description: this is the main driver interrupt service routine. - * It can call: - * o DMA service routine (to manage incoming frame reception and transmission - * status) - * o Core interrupts to manage: remote wake-up, management counter, LPI - * interrupts. - */ -static irqreturn_t stmmac_interrupt(int irq, void *dev_id) +static void stmmac_common_interrupt(struct stmmac_priv *priv) { - struct net_device *dev = (struct net_device *)dev_id; - struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; u32 tx_cnt = priv->plat->tx_queues_to_use; u32 queues_count; @@ -4408,13 +4618,6 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) if (priv->irq_wake) pm_wakeup_event(priv->device, 0); - /* Check if adapter is up */ - if (test_bit(STMMAC_DOWN, &priv->state)) - return IRQ_HANDLED; - /* Check if a fatal error happened */ - if (stmmac_safety_feat_interrupt(priv)) - return IRQ_HANDLED; - if (priv->dma_cap.estsel) stmmac_est_irq_status(priv, priv->ioaddr, priv->dev, &priv->xstats, tx_cnt); @@ -4456,11 +4659,39 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) /* PCS link status */ if (priv->hw->pcs) { if (priv->xstats.pcs_link) - netif_carrier_on(dev); + netif_carrier_on(priv->dev); else - netif_carrier_off(dev); + netif_carrier_off(priv->dev); } } +} + +/** + * stmmac_interrupt - main ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the main driver interrupt service routine. + * It can call: + * o DMA service routine (to manage incoming frame reception and transmission + * status) + * o Core interrupts to manage: remote wake-up, management counter, LPI + * interrupts. + */ +static irqreturn_t stmmac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* Check if a fatal error happened */ + if (stmmac_safety_feat_interrupt(priv)) + return IRQ_HANDLED; + + /* To handle Common interrupts */ + stmmac_common_interrupt(priv); /* To handle DMA interrupts */ stmmac_dma_interrupt(priv); @@ -4468,15 +4699,136 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + if (unlikely(!dev)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* To handle Common interrupts */ + stmmac_common_interrupt(priv); + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + if (unlikely(!dev)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* Check if a fatal error happened */ + stmmac_safety_feat_interrupt(priv); + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) +{ + struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)data; + int chan = tx_q->queue_index; + struct stmmac_priv *priv; + int status; + + priv = container_of(tx_q, struct stmmac_priv, tx_queue[chan]); + + if (unlikely(!data)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + status = stmmac_napi_check(priv, chan, DMA_DIR_TX); + + if (unlikely(status & tx_hard_error_bump_tc)) { + /* Try to bump up the dma threshold on this failure */ + if (unlikely(priv->xstats.threshold != SF_DMA_MODE) && + tc <= 256) { + tc += 64; + if (priv->plat->force_thresh_dma_mode) + stmmac_set_dma_operation_mode(priv, + tc, + tc, + chan); + else + stmmac_set_dma_operation_mode(priv, + tc, + SF_DMA_MODE, + chan); + priv->xstats.threshold = tc; + } + } else if (unlikely(status == tx_hard_error)) { + stmmac_tx_err(priv, chan); + } + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) +{ + struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)data; + int chan = rx_q->queue_index; + struct stmmac_priv *priv; + + priv = container_of(rx_q, struct stmmac_priv, rx_queue[chan]); + + if (unlikely(!data)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + stmmac_napi_check(priv, chan, DMA_DIR_RX); + + return IRQ_HANDLED; +} + #ifdef CONFIG_NET_POLL_CONTROLLER /* Polling receive - used by NETCONSOLE and other diagnostic tools * to allow network I/O with interrupts disabled. */ static void stmmac_poll_controller(struct net_device *dev) { - disable_irq(dev->irq); - stmmac_interrupt(dev->irq, dev); - enable_irq(dev->irq); + struct stmmac_priv *priv = netdev_priv(dev); + int i; + + /* If adapter is down, do nothing */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return; + + if (priv->plat->multi_msi_en) { + for (i = 0; i < priv->plat->rx_queues_to_use; i++) + stmmac_msi_intr_rx(0, &priv->rx_queue[i]); + + for (i = 0; i < priv->plat->tx_queues_to_use; i++) + stmmac_msi_intr_tx(0, &priv->tx_queue[i]); + } else { + disable_irq(dev->irq); + stmmac_interrupt(dev->irq, dev); + enable_irq(dev->irq); + } } #endif @@ -5270,10 +5622,17 @@ int stmmac_dvr_probe(struct device *device, priv->plat = plat_dat; priv->ioaddr = res->addr; priv->dev->base_addr = (unsigned long)res->addr; + priv->plat->dma_cfg->multi_msi_en = priv->plat->multi_msi_en; priv->dev->irq = res->irq; priv->wol_irq = res->wol_irq; priv->lpi_irq = res->lpi_irq; + priv->sfty_ce_irq = res->sfty_ce_irq; + priv->sfty_ue_irq = res->sfty_ue_irq; + for (i = 0; i < MTL_MAX_RX_QUEUES; i++) + priv->rx_irq[i] = res->rx_irq[i]; + for (i = 0; i < MTL_MAX_TX_QUEUES; i++) + priv->tx_irq[i] = res->tx_irq[i]; if (!IS_ERR_OR_NULL(res->mac)) memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); @@ -5605,8 +5964,10 @@ int stmmac_suspend(struct device *dev) /* Disable clock in case of PWM is off */ clk_disable_unprepare(priv->plat->clk_ptp_ref); ret = pm_runtime_force_suspend(dev); - if (ret) + if (ret) { + mutex_unlock(&priv->lock); return ret; + } } mutex_unlock(&priv->lock); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 272cb47af9f2..95e0e4d6f74d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -198,8 +198,6 @@ static int stmmac_pci_probe(struct pci_dev *pdev, if (ret) return ret; - pci_enable_msi(pdev); - memset(&res, 0, sizeof(res)); res.addr = pcim_iomap_table(pdev)[i]; res.wol_irq = pdev->irq; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1d84ee359808..4e70efc45458 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -254,6 +254,16 @@ static int tc_init(struct stmmac_priv *priv) priv->flow_entries_max); } + if (!priv->plat->fpe_cfg) { + priv->plat->fpe_cfg = devm_kzalloc(priv->device, + sizeof(*priv->plat->fpe_cfg), + GFP_KERNEL); + if (!priv->plat->fpe_cfg) + return -ENOMEM; + } else { + memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg)); + } + /* Fail silently as we can still use remaining features, e.g. CBS */ if (!dma_cap->frpsel) return 0; @@ -298,16 +308,6 @@ static int tc_init(struct stmmac_priv *priv) dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n", priv->tc_entries_max, priv->tc_off_max); - if (!priv->plat->fpe_cfg) { - priv->plat->fpe_cfg = devm_kzalloc(priv->device, - sizeof(*priv->plat->fpe_cfg), - GFP_KERNEL); - if (!priv->plat->fpe_cfg) - return -ENOMEM; - } else { - memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg)); - } - return 0; } diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index d5a75ef7e3ca..226a76633e65 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -146,7 +146,8 @@ spider_net_read_phy(struct net_device *netdev, int mii_id, int reg) /* we don't use semaphores to wait for an SPIDER_NET_GPROPCMPINT * interrupt, as we poll for the completion of the read operation - * in spider_net_read_phy. Should take about 50 us */ + * in spider_net_read_phy. Should take about 50 us + */ do { readvalue = spider_net_read_reg(card, SPIDER_NET_GPCROPCMD); } while (readvalue & SPIDER_NET_GPREXEC); @@ -387,7 +388,8 @@ spider_net_prepare_rx_descr(struct spider_net_card *card, (~(SPIDER_NET_RXBUF_ALIGN - 1)); /* and we need to have it 128 byte aligned, therefore we allocate a - * bit more */ + * bit more + */ /* allocate an skb */ descr->skb = netdev_alloc_skb(card->netdev, bufsize + SPIDER_NET_RXBUF_ALIGN - 1); @@ -488,7 +490,8 @@ spider_net_refill_rx_chain(struct spider_net_card *card) /* one context doing the refill (and a second context seeing that * and omitting it) is ok. If called by NAPI, we'll be called again * as spider_net_decode_one_descr is called several times. If some - * interrupt calls us, the NAPI is about to clean up anyway. */ + * interrupt calls us, the NAPI is about to clean up anyway. + */ if (!spin_trylock_irqsave(&chain->lock, flags)) return; @@ -523,14 +526,16 @@ spider_net_alloc_rx_skbs(struct spider_net_card *card) /* Put at least one buffer into the chain. if this fails, * we've got a problem. If not, spider_net_refill_rx_chain - * will do the rest at the end of this function. */ + * will do the rest at the end of this function. + */ if (spider_net_prepare_rx_descr(card, chain->head)) goto error; else chain->head = chain->head->next; /* This will allocate the rest of the rx buffers; - * if not, it's business as usual later on. */ + * if not, it's business as usual later on. + */ spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); return 0; @@ -706,7 +711,8 @@ spider_net_set_low_watermark(struct spider_net_card *card) int i; /* Measure the length of the queue. Measurement does not - * need to be precise -- does not need a lock. */ + * need to be precise -- does not need a lock. + */ while (descr != card->tx_chain.head) { status = descr->hwdescr->dmac_cmd_status & SPIDER_NET_DESCR_NOT_IN_USE; if (status == SPIDER_NET_DESCR_NOT_IN_USE) @@ -786,7 +792,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) /* fallthrough, if we release the descriptors * brutally (then we don't care about - * SPIDER_NET_DESCR_CARDOWNED) */ + * SPIDER_NET_DESCR_CARDOWNED) + */ fallthrough; case SPIDER_NET_DESCR_RESPONSE_ERROR: @@ -948,7 +955,8 @@ spider_net_pass_skb_up(struct spider_net_descr *descr, skb_put(skb, hwdescr->valid_size); /* the card seems to add 2 bytes of junk in front - * of the ethernet frame */ + * of the ethernet frame + */ #define SPIDER_MISALIGN 2 skb_pull(skb, SPIDER_MISALIGN); skb->protocol = eth_type_trans(skb, netdev); @@ -1382,7 +1390,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg, /* PHY read operation completed */ /* we don't use semaphores, as we poll for the completion * of the read operation in spider_net_read_phy. Should take - * about 50 us */ + * about 50 us + */ show_error = 0; break; case SPIDER_NET_GPWFFINT: @@ -1450,7 +1459,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg, { case SPIDER_NET_GTMFLLINT: /* TX RAM full may happen on a usual case. - * Logging is not needed. */ + * Logging is not needed. + */ show_error = 0; break; case SPIDER_NET_GRFDFLLINT: @@ -1694,7 +1704,8 @@ spider_net_enable_card(struct spider_net_card *card) { int i; /* the following array consists of (register),(value) pairs - * that are set in this function. A register of 0 ends the list */ + * that are set in this function. A register of 0 ends the list + */ u32 regs[][2] = { { SPIDER_NET_GRESUMINTNUM, 0 }, { SPIDER_NET_GREINTNUM, 0 }, @@ -1757,7 +1768,8 @@ spider_net_enable_card(struct spider_net_card *card) spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE); /* set chain tail address for RX chains and - * enable DMA */ + * enable DMA + */ spider_net_enable_rxchtails(card); spider_net_enable_rxdmac(card); @@ -1995,7 +2007,8 @@ static void spider_net_link_phy(struct timer_list *t) case BCM54XX_UNKNOWN: /* copper, fiber with and without failed, - * retry from beginning */ + * retry from beginning + */ spider_net_setup_aneg(card); card->medium = BCM54XX_COPPER; break; @@ -2263,7 +2276,8 @@ spider_net_setup_netdev(struct spider_net_card *card) netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX; /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - * NETIF_F_HW_VLAN_CTAG_FILTER */ + * NETIF_F_HW_VLAN_CTAG_FILTER + */ /* MTU range: 64 - 2294 */ netdev->min_mtu = SPIDER_NET_MIN_MTU; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 7a6e5ff8e5d4..fedb2bf69261 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1914,7 +1914,8 @@ tc35815_set_multicast_list(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* With some (all?) 100MHalf HUB, controller will hang - * if we enabled promiscuous mode before linkup... */ + * if we enabled promiscuous mode before linkup... + */ struct tc35815_local *lp = netdev_priv(dev); if (!lp->link) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index b65767f9e499..fecc4d7b00b0 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2525,7 +2525,7 @@ static int velocity_close(struct net_device *dev) * @skb: buffer to transmit * @dev: network device * - * Called by the networ layer to request a packet is queued to + * Called by the network layer to request a packet is queued to * the velocity. Returns zero on success. */ static netdev_tx_t velocity_xmit(struct sk_buff *skb, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 1e966a39967e..708769349f76 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -376,6 +376,8 @@ struct axidma_bd { struct sk_buff *skb; } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT); +#define XAE_NUM_MISC_CLOCKS 3 + /** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. @@ -385,7 +387,8 @@ struct axidma_bd { * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core - * @clk: Clock for AXI bus + * @axi_clk: AXI4-Lite bus clock + * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) * @mii_bus: Pointer to MII bus structure * @mii_clk_div: MII bus clock divider value * @regs_start: Resource start for axienet device addresses @@ -434,7 +437,8 @@ struct axienet_local { bool switch_x_sgmii; - struct clk *clk; + struct clk *axi_clk; + struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS]; struct mii_bus *mii_bus; u8 mii_clk_div; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 5d677db0aee5..92cf9051d557 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1863,22 +1863,39 @@ static int axienet_probe(struct platform_device *pdev) lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; - lp->clk = devm_clk_get_optional(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - ret = PTR_ERR(lp->clk); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); + if (!lp->axi_clk) { + /* For backward compatibility, if named AXI clock is not present, + * treat the first clock specified as the AXI clock. + */ + lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL); + } + if (IS_ERR(lp->axi_clk)) { + ret = PTR_ERR(lp->axi_clk); goto free_netdev; } - ret = clk_prepare_enable(lp->clk); + ret = clk_prepare_enable(lp->axi_clk); if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret); goto free_netdev; } + lp->misc_clks[0].id = "axis_clk"; + lp->misc_clks[1].id = "ref_clk"; + lp->misc_clks[2].id = "mgt_clk"; + + ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + + ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (IS_ERR(lp->regs)) { - dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = PTR_ERR(lp->regs); goto cleanup_clk; } @@ -2109,7 +2126,8 @@ cleanup_mdio: of_node_put(lp->phy_node); cleanup_clk: - clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk); free_netdev: free_netdev(ndev); @@ -2132,7 +2150,8 @@ static int axienet_remove(struct platform_device *pdev) axienet_mdio_teardown(lp); - clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk); of_node_put(lp->phy_node); lp->phy_node = NULL; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 9c014cee34b2..48f544f6c999 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -159,8 +159,8 @@ int axienet_mdio_enable(struct axienet_local *lp) lp->mii_clk_div = 0; - if (lp->clk) { - host_clock = clk_get_rate(lp->clk); + if (lp->axi_clk) { + host_clock = clk_get_rate(lp->axi_clk); } else { struct device_node *np1; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 4ac0373326ef..5d7a2b1469f4 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -461,6 +461,7 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, if (err < 0) return ERR_PTR(err); + udp_allow_gso(sock->sk); return sock; } diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 59ac04a610ad..442c520ab8f3 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -269,7 +269,7 @@ int rndis_filter_receive(struct net_device *ndev, int rndis_filter_set_device_mac(struct netvsc_device *ndev, const char *mac); -void netvsc_switch_datapath(struct net_device *nv_dev, bool vf); +int netvsc_switch_datapath(struct net_device *nv_dev, bool vf); #define NVSP_INVALID_PROTOCOL_VERSION ((u32)0xFFFFFFFF) @@ -1718,4 +1718,8 @@ struct rndis_message { #define TRANSPORT_INFO_IPV6_TCP 0x10 #define TRANSPORT_INFO_IPV6_UDP 0x20 +#define RETRY_US_LO 5000 +#define RETRY_US_HI 10000 +#define RETRY_MAX 2000 /* >10 sec */ + #endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5bce24731502..9d07c9ce4be2 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -31,12 +31,13 @@ * Switch the data path from the synthetic interface to the VF * interface. */ -void netvsc_switch_datapath(struct net_device *ndev, bool vf) +int netvsc_switch_datapath(struct net_device *ndev, bool vf) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; + int ret, retry = 0; /* Block sending traffic to VF if it's about to be gone */ if (!vf) @@ -51,15 +52,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) init_pkt->msg.v4_msg.active_dp.active_datapath = NVSP_DATAPATH_SYNTHETIC; +again: trace_nvsp_send(ndev, init_pkt); - vmbus_sendpacket(dev->channel, init_pkt, + ret = vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message), - (unsigned long)init_pkt, - VM_PKT_DATA_INBAND, + (unsigned long)init_pkt, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + + /* If failed to switch to/from VF, let data_path_is_vf stay false, + * so we use synthetic path to send data. + */ + if (ret) { + if (ret != -EAGAIN) { + netdev_err(ndev, + "Unable to send sw datapath msg, err: %d\n", + ret); + return ret; + } + + if (retry++ < RETRY_MAX) { + usleep_range(RETRY_US_LO, RETRY_US_HI); + goto again; + } else { + netdev_err( + ndev, + "Retry failed to send sw datapath msg, err: %d\n", + ret); + return ret; + } + } + wait_for_completion(&nv_dev->channel_init_wait); net_device_ctx->data_path_is_vf = vf; + + return 0; } /* Worker to setup sub channels on initial setup diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 97b5c9b60503..7349a70af083 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -38,9 +38,6 @@ #include "hyperv_net.h" #define RING_SIZE_MIN 64 -#define RETRY_US_LO 5000 -#define RETRY_US_HI 10000 -#define RETRY_MAX 2000 /* >10 sec */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) @@ -2402,6 +2399,7 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) struct netvsc_device *netvsc_dev; struct net_device *ndev; bool vf_is_up = false; + int ret; if (event != NETDEV_GOING_DOWN) vf_is_up = netif_running(vf_netdev); @@ -2418,9 +2416,17 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) if (net_device_ctx->data_path_is_vf == vf_is_up) return NOTIFY_OK; - netvsc_switch_datapath(ndev, vf_is_up); - netdev_info(ndev, "Data path switched %s VF: %s\n", - vf_is_up ? "to" : "from", vf_netdev->name); + ret = netvsc_switch_datapath(ndev, vf_is_up); + + if (ret) { + netdev_err(ndev, + "Data path failed to switch %s VF: %s, err: %d\n", + vf_is_up ? "to" : "from", vf_netdev->name, ret); + return NOTIFY_DONE; + } else { + netdev_info(ndev, "Data path switched %s VF: %s\n", + vf_is_up ? "to" : "from", vf_netdev->name); + } return NOTIFY_OK; } diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index c0bf7d78276e..da9135231c07 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -268,7 +268,7 @@ static int hwsim_get_radio(struct sk_buff *skb, struct hwsim_phy *phy, struct netlink_callback *cb, int flags) { void *hdr; - int res = -EMSGSIZE; + int res; hdr = genlmsg_put(skb, portid, seq, &hwsim_genl_family, flags, MAC802154_HWSIM_CMD_GET_RADIO); diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 90a90262e0d0..8f99cfa14680 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -12,8 +12,7 @@ config QCOM_IPA that is capable of generic hardware handling of IP packets, including routing, filtering, and NAT. Currently the IPA driver supports only basic transport of network traffic - between the AP and modem, on the Qualcomm SDM845 and SC7180 - SoCs. + between the AP and modem. Note that if selected, the selection type must match that of QCOM_Q6V5_COMMON (Y or M). diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile index afe5df1e6eee..6abd1db9fe33 100644 --- a/drivers/net/ipa/Makefile +++ b/drivers/net/ipa/Makefile @@ -7,6 +7,6 @@ ipa-y := ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \ ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \ ipa_gsi.o ipa_smp2p.o ipa_uc.o \ ipa_endpoint.o ipa_cmd.o ipa_modem.o \ - ipa_qmi.o ipa_qmi_msg.o + ipa_resource.o ipa_qmi.o ipa_qmi_msg.o -ipa-y += ipa_data-sdm845.o ipa_data-sc7180.o +ipa-y += ipa_data-v3.5.1.o ipa_data-v4.2.o diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 7f2a8fce5e0d..1c835b3e1a43 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -701,7 +701,7 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id) val |= u32_encode_bits(GSI_RING_ELEMENT_SIZE, EV_ELEMENT_SIZE_FMASK); iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_0_OFFSET(evt_ring_id)); - val = u32_encode_bits(size, EV_R_LENGTH_FMASK); + val = ev_r_length_encoded(gsi->version, size); iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_1_OFFSET(evt_ring_id)); /* The context 2 and 3 registers store the low-order and @@ -801,14 +801,14 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) channel->tre_ring.index = 0; /* We program all channels as GPI type/protocol */ - val = u32_encode_bits(GSI_CHANNEL_TYPE_GPI, CHTYPE_PROTOCOL_FMASK); + val = chtype_protocol_encoded(gsi->version, GSI_CHANNEL_TYPE_GPI); if (channel->toward_ipa) val |= CHTYPE_DIR_FMASK; val |= u32_encode_bits(channel->evt_ring_id, ERINDEX_FMASK); val |= u32_encode_bits(GSI_RING_ELEMENT_SIZE, ELEMENT_SIZE_FMASK); iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id)); - val = u32_encode_bits(size, R_LENGTH_FMASK); + val = r_length_encoded(gsi->version, size); iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_1_OFFSET(channel_id)); /* The context 2 and 3 registers store the low-order and @@ -842,6 +842,9 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) val |= u32_encode_bits(GSI_ESCAPE_BUF_ONLY, PREFETCH_MODE_FMASK); } + /* All channels set DB_IN_BYTES */ + if (gsi->version >= IPA_VERSION_4_9) + val |= DB_IN_BYTES; iowrite32(val, gsi->virt + GSI_CH_C_QOS_OFFSET(channel_id)); @@ -1441,18 +1444,13 @@ static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count) dma_addr_t addr; /* Hardware requires a 2^n ring size, with alignment equal to size. - * The size is a power of 2, so we can check alignment using just - * the bottom 32 bits for a DMA address of any size. + * The DMA address returned by dma_alloc_coherent() is guaranteed to + * be a power-of-2 number of pages, which satisfies the requirement. */ ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL); - if (ring->virt && lower_32_bits(addr) % size) { - dma_free_coherent(dev, size, ring->virt, addr); - dev_err(dev, "unable to alloc 0x%x-aligned ring buffer\n", - size); - return -EINVAL; /* Not a good error value, but distinct */ - } else if (!ring->virt) { + if (!ring->virt) return -ENOMEM; - } + ring->addr = addr; ring->count = count; diff --git a/drivers/net/ipa/gsi_private.h b/drivers/net/ipa/gsi_private.h index 1785c9d3344d..ea333a244cf5 100644 --- a/drivers/net/ipa/gsi_private.h +++ b/drivers/net/ipa/gsi_private.h @@ -14,7 +14,7 @@ struct gsi_trans; struct gsi_ring; struct gsi_channel; -#define GSI_RING_ELEMENT_SIZE 16 /* bytes */ +#define GSI_RING_ELEMENT_SIZE 16 /* bytes; must be a power of 2 */ /* Return the entry that follows one provided in a transaction pool */ void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element); @@ -100,7 +100,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel); /** * gsi_ring_virt() - Return virtual address for a ring entry * @ring: Ring whose address is to be translated - * @addr: Index (slot number) of entry + * @index: Index (slot number) of entry */ void *gsi_ring_virt(struct gsi_ring *ring, u32 index); diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h index 1622d8cf8dea..b4ac0258d6e1 100644 --- a/drivers/net/ipa/gsi_reg.h +++ b/drivers/net/ipa/gsi_reg.h @@ -64,6 +64,21 @@ (0x0000c01c + 0x1000 * (ee)) /* All other register offsets are relative to gsi->virt */ + +/** enum gsi_channel_type - CHTYPE_PROTOCOL field values in CH_C_CNTXT_0 */ +enum gsi_channel_type { + GSI_CHANNEL_TYPE_MHI = 0x0, + GSI_CHANNEL_TYPE_XHCI = 0x1, + GSI_CHANNEL_TYPE_GPI = 0x2, + GSI_CHANNEL_TYPE_XDCI = 0x3, + GSI_CHANNEL_TYPE_WDI2 = 0x4, + GSI_CHANNEL_TYPE_GCI = 0x5, + GSI_CHANNEL_TYPE_WDI3 = 0x6, + GSI_CHANNEL_TYPE_MHIP = 0x7, + GSI_CHANNEL_TYPE_AQC = 0x8, + GSI_CHANNEL_TYPE_11AD = 0x9, +}; + #define GSI_CH_C_CNTXT_0_OFFSET(ch) \ GSI_EE_N_CH_C_CNTXT_0_OFFSET((ch), GSI_EE_AP) #define GSI_EE_N_CH_C_CNTXT_0_OFFSET(ch, ee) \ @@ -78,19 +93,35 @@ #define CHSTATE_FMASK GENMASK(23, 20) #define ELEMENT_SIZE_FMASK GENMASK(31, 24) -/** enum gsi_channel_type - CHTYPE_PROTOCOL field values in CH_C_CNTXT_0 */ -enum gsi_channel_type { - GSI_CHANNEL_TYPE_MHI = 0x0, - GSI_CHANNEL_TYPE_XHCI = 0x1, - GSI_CHANNEL_TYPE_GPI = 0x2, - GSI_CHANNEL_TYPE_XDCI = 0x3, -}; +/* Encoded value for CH_C_CNTXT_0 register channel protocol fields */ +static inline u32 +chtype_protocol_encoded(enum ipa_version version, enum gsi_channel_type type) +{ + u32 val; + + val = u32_encode_bits(type, CHTYPE_PROTOCOL_FMASK); + if (version < IPA_VERSION_4_5) + return val; + + /* Encode upper bit(s) as well */ + type >>= hweight32(CHTYPE_PROTOCOL_FMASK); + val |= u32_encode_bits(type, CHTYPE_PROTOCOL_MSB_FMASK); + + return val; +} #define GSI_CH_C_CNTXT_1_OFFSET(ch) \ GSI_EE_N_CH_C_CNTXT_1_OFFSET((ch), GSI_EE_AP) #define GSI_EE_N_CH_C_CNTXT_1_OFFSET(ch, ee) \ (0x0001c004 + 0x4000 * (ee) + 0x80 * (ch)) -#define R_LENGTH_FMASK GENMASK(15, 0) + +/* Encoded value for CH_C_CNTXT_1 register R_LENGTH field */ +static inline u32 r_length_encoded(enum ipa_version version, u32 length) +{ + if (version < IPA_VERSION_4_9) + return u32_encode_bits(length, GENMASK(15, 0)); + return u32_encode_bits(length, GENMASK(19, 0)); +} #define GSI_CH_C_CNTXT_2_OFFSET(ch) \ GSI_EE_N_CH_C_CNTXT_2_OFFSET((ch), GSI_EE_AP) @@ -114,6 +145,9 @@ enum gsi_channel_type { /* The next two fields are present for IPA v4.5 and above */ #define PREFETCH_MODE_FMASK GENMASK(13, 10) #define EMPTY_LVL_THRSHOLD_FMASK GENMASK(23, 16) +/* The next field is present for IPA v4.9 and above */ +#define DB_IN_BYTES GENMASK(24, 24) + /** enum gsi_prefetch_mode - PREFETCH_MODE field in CH_C_QOS */ enum gsi_prefetch_mode { GSI_USE_PREFETCH_BUFS = 0x0, @@ -146,19 +180,25 @@ enum gsi_prefetch_mode { GSI_EE_N_EV_CH_E_CNTXT_0_OFFSET((ev), GSI_EE_AP) #define GSI_EE_N_EV_CH_E_CNTXT_0_OFFSET(ev, ee) \ (0x0001d000 + 0x4000 * (ee) + 0x80 * (ev)) +/* enum gsi_channel_type defines EV_CHTYPE field values in EV_CH_E_CNTXT_0 */ #define EV_CHTYPE_FMASK GENMASK(3, 0) #define EV_EE_FMASK GENMASK(7, 4) #define EV_EVCHID_FMASK GENMASK(15, 8) #define EV_INTYPE_FMASK GENMASK(16, 16) #define EV_CHSTATE_FMASK GENMASK(23, 20) #define EV_ELEMENT_SIZE_FMASK GENMASK(31, 24) -/* enum gsi_channel_type defines EV_CHTYPE field values in EV_CH_E_CNTXT_0 */ #define GSI_EV_CH_E_CNTXT_1_OFFSET(ev) \ GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET((ev), GSI_EE_AP) #define GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET(ev, ee) \ (0x0001d004 + 0x4000 * (ee) + 0x80 * (ev)) -#define EV_R_LENGTH_FMASK GENMASK(15, 0) +/* Encoded value for EV_CH_C_CNTXT_1 register EV_R_LENGTH field */ +static inline u32 ev_r_length_encoded(enum ipa_version version, u32 length) +{ + if (version < IPA_VERSION_4_9) + return u32_encode_bits(length, GENMASK(15, 0)); + return u32_encode_bits(length, GENMASK(19, 0)); +} #define GSI_EV_CH_E_CNTXT_2_OFFSET(ev) \ GSI_EE_N_EV_CH_E_CNTXT_2_OFFSET((ev), GSI_EE_AP) @@ -248,6 +288,7 @@ enum gsi_ch_cmd_opcode { GSI_CH_STOP = 0x2, GSI_CH_RESET = 0x9, GSI_CH_DE_ALLOC = 0xa, + GSI_CH_DB_STOP = 0xb, }; #define GSI_EV_CH_CMD_OFFSET \ @@ -278,6 +319,7 @@ enum gsi_generic_cmd_opcode { GSI_GENERIC_ALLOCATE_CHANNEL = 0x2, }; +/* The next register is present for IPA v3.5.1 and above */ #define GSI_GSI_HW_PARAM_2_OFFSET \ GSI_EE_N_GSI_HW_PARAM_2_OFFSET(GSI_EE_AP) #define GSI_EE_N_GSI_HW_PARAM_2_OFFSET(ee) \ @@ -300,7 +342,7 @@ enum gsi_generic_cmd_opcode { enum gsi_iram_size { IRAM_SIZE_ONE_KB = 0x0, IRAM_SIZE_TWO_KB = 0x1, -/* The next two values are available for IPA v4.0 and above */ + /* The next two values are available for IPA v4.0 and above */ IRAM_SIZE_TWO_N_HALF_KB = 0x2, IRAM_SIZE_THREE_KB = 0x3, /* The next two values are available for IPA v4.5 and above */ @@ -424,6 +466,8 @@ enum gsi_general_id { GSI_EE_N_ERROR_LOG_OFFSET(GSI_EE_AP) #define GSI_EE_N_ERROR_LOG_OFFSET(ee) \ (0x0001f200 + 0x4000 * (ee)) + +/* Fields below are present for IPA v3.5.1 and above */ #define ERR_ARG3_FMASK GENMASK(3, 0) #define ERR_ARG2_FMASK GENMASK(7, 4) #define ERR_ARG1_FMASK GENMASK(11, 8) @@ -474,7 +518,4 @@ enum gsi_generic_ee_result { GENERIC_EE_NO_RESOURCES = 0x7, }; -#define USB_MAX_PACKET_FMASK GENMASK(15, 15) /* 0: HS; 1: SS */ -#define MHI_BASE_CHANNEL_FMASK GENMASK(31, 24) - #endif /* _GSI_REG_H_ */ diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 6c3ed5b17b80..70c2b585f98d 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -153,11 +153,10 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, size = __roundup_pow_of_two(size); total_size = (count + max_alloc - 1) * size; - /* The allocator will give us a power-of-2 number of pages. But we - * can't guarantee that, so request it. That way we won't waste any - * memory that would be available beyond the required space. - * - * Note that gsi_trans_pool_exit_dma() assumes the total allocated + /* The allocator will give us a power-of-2 number of pages + * sufficient to satisfy our request. Round up our requested + * size to avoid any unused space in the allocation. This way + * gsi_trans_pool_exit_dma() can assume the total allocated * size is exactly (count * size). */ total_size = get_order(total_size) << PAGE_SHIFT; diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h index 3a4ab8a94d82..17fd1822d8a9 100644 --- a/drivers/net/ipa/gsi_trans.h +++ b/drivers/net/ipa/gsi_trans.h @@ -71,7 +71,7 @@ struct gsi_trans { /** * gsi_trans_pool_init() - Initialize a pool of structures for transactions - * @gsi: GSI pointer + * @pool: GSI transaction poll pointer * @size: Size of elements in the pool * @count: Minimum number of elements in the pool * @max_alloc: Maximum number of elements allocated at a time from pool @@ -123,7 +123,8 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr); /** - * gsi_trans_pool_exit() - Inverse of gsi_trans_pool_init() + * gsi_trans_pool_exit_dma() - Inverse of gsi_trans_pool_init_dma() + * @dev: Device used for DMA * @pool: Pool pointer */ void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool); diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 802077631371..e7ff376cb5b7 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -44,6 +44,8 @@ enum ipa_flag { * @version: IPA hardware version * @pdev: Platform device * @completion: Used to signal pipeline clear transfer complete + * @nb: Notifier block used for remoteproc SSR + * @notifier: Remoteproc SSR notifier * @smp2p: SMP2P information * @clock: IPA clocking information * @table_addr: DMA address of filter/route table content @@ -58,13 +60,12 @@ enum ipa_flag { * @mem_size: Total size (bytes) of memory at @mem_virt * @mem: Array of IPA-local memory region descriptors * @imem_iova: I/O virtual address of IPA region in IMEM - * @imem_size; Size of IMEM region + * @imem_size: Size of IMEM region * @smem_iova: I/O virtual address of IPA region in SMEM - * @smem_size; Size of SMEM region + * @smem_size: Size of SMEM region * @zero_addr: DMA address of preallocated zero-filled memory * @zero_virt: Virtual address of preallocated zero-filled memory * @zero_size: Size (bytes) of preallocated zero-filled memory - * @wakeup_source: Wakeup source information * @available: Bit mask indicating endpoints hardware supports * @filter_map: Bit mask indicating endpoints that support filtering * @initialized: Bit mask indicating endpoints initialized diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index 2ac6dd8413de..525cdf28d9ea 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -153,7 +153,7 @@ static void ipa_cmd_validate_build(void) * of entries, as and IPv4 and IPv6 route tables have the same number * of entries. */ -#define TABLE_SIZE (TABLE_COUNT_MAX * IPA_TABLE_ENTRY_SIZE) +#define TABLE_SIZE (TABLE_COUNT_MAX * sizeof(__le64)) #define TABLE_COUNT_MAX max_t(u32, IPA_ROUTE_COUNT_MAX, IPA_FILTER_COUNT_MAX) BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK)); BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK)); diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index 6dd3d35cf315..b99262281f41 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -20,11 +20,18 @@ struct gsi_channel; /** * enum ipa_cmd_opcode: IPA immediate commands * - * All immediate commands are issued using the AP command TX endpoint. - * The numeric values here are the opcodes for IPA v3.5.1 hardware. + * @IPA_CMD_IP_V4_FILTER_INIT: Initialize IPv4 filter table + * @IPA_CMD_IP_V6_FILTER_INIT: Initialize IPv6 filter table + * @IPA_CMD_IP_V4_ROUTING_INIT: Initialize IPv4 routing table + * @IPA_CMD_IP_V6_ROUTING_INIT: Initialize IPv6 routing table + * @IPA_CMD_HDR_INIT_LOCAL: Initialize IPA-local header memory + * @IPA_CMD_REGISTER_WRITE: Register write performed by IPA + * @IPA_CMD_IP_PACKET_INIT: Set up next packet's destination endpoint + * @IPA_CMD_DMA_SHARED_MEM: DMA command performed by IPA + * @IPA_CMD_IP_PACKET_TAG_STATUS: Have next packet generate tag * status + * @IPA_CMD_NONE: Special (invalid) "not a command" value * - * IPA_CMD_NONE is a special (invalid) value that's used to indicate - * a request is *not* an immediate command. + * All immediate commands are issued using the AP command TX endpoint. */ enum ipa_cmd_opcode { IPA_CMD_NONE = 0x0, @@ -96,7 +103,7 @@ static inline bool ipa_cmd_data_valid(struct ipa *ipa) * * Return: 0 if successful, or a negative error code */ -int ipa_cmd_pool_init(struct gsi_channel *gsi_channel, u32 tre_count); +int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_count); /** * ipa_cmd_pool_exit() - Inverse of ipa_cmd_pool_init() @@ -124,7 +131,7 @@ void ipa_cmd_table_init_add(struct gsi_trans *trans, enum ipa_cmd_opcode opcode, /** * ipa_cmd_hdr_init_local_add() - Add a header init command to a transaction - * @ipa: IPA structure + * @trans: GSI transaction * @offset: Offset of header memory in IPA local space * @size: Size of header memory * @addr: DMA address of buffer to be written from diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-v3.5.1.c index 6b5173f47444..57703e95a3f9 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-v3.5.1.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. - * Copyright (C) 2019-2020 Linaro Ltd. + * Copyright (C) 2019-2021 Linaro Ltd. */ #include <linux/log2.h> @@ -11,7 +11,37 @@ #include "ipa_endpoint.h" #include "ipa_mem.h" -/* QSB configuration for the SDM845 SoC. */ +/** enum ipa_resource_type - IPA resource types for an SoC having IPA v3.5.1 */ +enum ipa_resource_type { + /* Source resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS = 0, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, + IPA_RESOURCE_TYPE_SRC_HPS_DMARS, + IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, + + /* Destination resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_DST_DATA_SECTORS = 0, + IPA_RESOURCE_TYPE_DST_DPS_DMARS, +}; + +/* Resource groups used for an SoC having IPA v3.5.1 */ +enum ipa_rsrc_group_id { + /* Source resource group identifiers */ + IPA_RSRC_GROUP_SRC_LWA_DL = 0, + IPA_RSRC_GROUP_SRC_UL_DL, + IPA_RSRC_GROUP_SRC_MHI_DMA, + IPA_RSRC_GROUP_SRC_UC_RX_Q, + IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ + + /* Destination resource group identifiers */ + IPA_RSRC_GROUP_DST_LWA_DL = 0, + IPA_RSRC_GROUP_DST_UL_DL_DPL, + IPA_RSRC_GROUP_DST_UNUSED_2, + IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ +}; + +/* QSB configuration data for an SoC having IPA v3.5.1 */ static const struct ipa_qsb_data ipa_qsb_data[] = { [IPA_QSB_MASTER_DDR] = { .max_writes = 8, @@ -23,7 +53,7 @@ static const struct ipa_qsb_data ipa_qsb_data[] = { }, }; -/* Endpoint configuration for the SDM845 SoC. */ +/* Endpoint datdata for an SoC having IPA v3.5.1 */ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { [IPA_ENDPOINT_AP_COMMAND_TX] = { .ee_id = GSI_EE_AP, @@ -37,7 +67,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 1, + .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, .dma_mode = true, .dma_endpoint = IPA_ENDPOINT_AP_LAN_RX, .tx = { @@ -58,7 +88,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 1, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, .aggregation = true, .status_enable = true, .rx = { @@ -80,7 +110,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .filter_support = true, .config = { - .resource_group = 1, + .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, .checksum = true, .qmap = true, .status_enable = true, @@ -104,7 +134,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 1, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, .checksum = true, .qmap = true, .aggregation = true, @@ -114,12 +144,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, }, }, - [IPA_ENDPOINT_MODEM_COMMAND_TX] = { - .ee_id = GSI_EE_MODEM, - .channel_id = 1, - .endpoint_id = 4, - .toward_ipa = true, - }, [IPA_ENDPOINT_MODEM_LAN_TX] = { .ee_id = GSI_EE_MODEM, .channel_id = 0, @@ -129,12 +153,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .filter_support = true, }, }, - [IPA_ENDPOINT_MODEM_LAN_RX] = { - .ee_id = GSI_EE_MODEM, - .channel_id = 3, - .endpoint_id = 13, - .toward_ipa = false, - }, [IPA_ENDPOINT_MODEM_AP_TX] = { .ee_id = GSI_EE_MODEM, .channel_id = 4, @@ -152,102 +170,105 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, }; -/* For the SDM845, resource groups are allocated this way: - * group 0: LWA_DL - * group 1: UL_DL - */ -static const struct ipa_resource_src ipa_resource_src[] = { - { - .type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS, - .limits[0] = { - .min = 1, - .max = 255, +/* Source resource configuration data for an SoC having IPA v3.5.1 */ +static const struct ipa_resource ipa_resource_src[] = { + [IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = { + .limits[IPA_RSRC_GROUP_SRC_LWA_DL] = { + .min = 1, .max = 255, }, - .limits[1] = { - .min = 1, - .max = 255, + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 1, .max = 255, + }, + .limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = { + .min = 1, .max = 63, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, - .limits[0] = { - .min = 10, - .max = 10, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = { + .limits[IPA_RSRC_GROUP_SRC_LWA_DL] = { + .min = 10, .max = 10, }, - .limits[1] = { - .min = 10, - .max = 10, + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 10, .max = 10, + }, + .limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = { + .min = 8, .max = 8, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, - .limits[0] = { - .min = 12, - .max = 12, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = { + .limits[IPA_RSRC_GROUP_SRC_LWA_DL] = { + .min = 12, .max = 12, }, - .limits[1] = { - .min = 14, - .max = 14, + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 14, .max = 14, + }, + .limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = { + .min = 8, .max = 8, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS, - .limits[0] = { - .min = 0, - .max = 63, + [IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = { + .limits[IPA_RSRC_GROUP_SRC_LWA_DL] = { + .min = 0, .max = 63, }, - .limits[1] = { - .min = 0, - .max = 63, + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 0, .max = 63, + }, + .limits[IPA_RSRC_GROUP_SRC_MHI_DMA] = { + .min = 0, .max = 63, + }, + .limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = { + .min = 0, .max = 63, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, - .limits[0] = { - .min = 14, - .max = 14, + [IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = { + .limits[IPA_RSRC_GROUP_SRC_LWA_DL] = { + .min = 14, .max = 14, }, - .limits[1] = { - .min = 20, - .max = 20, + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 20, .max = 20, + }, + .limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = { + .min = 14, .max = 14, }, }, }; -static const struct ipa_resource_dst ipa_resource_dst[] = { - { - .type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS, - .limits[0] = { - .min = 4, - .max = 4, +/* Destination resource configuration data for an SoC having IPA v3.5.1 */ +static const struct ipa_resource ipa_resource_dst[] = { + [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { + .limits[IPA_RSRC_GROUP_DST_LWA_DL] = { + .min = 4, .max = 4, }, .limits[1] = { - .min = 4, - .max = 4, + .min = 4, .max = 4, }, + .limits[IPA_RSRC_GROUP_DST_UNUSED_2] = { + .min = 3, .max = 3, + } }, - { - .type = IPA_RESOURCE_TYPE_DST_DPS_DMARS, - .limits[0] = { - .min = 2, - .max = 63, + [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { + .limits[IPA_RSRC_GROUP_DST_LWA_DL] = { + .min = 2, .max = 63, }, - .limits[1] = { - .min = 1, - .max = 63, + .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .min = 1, .max = 63, }, + .limits[IPA_RSRC_GROUP_DST_UNUSED_2] = { + .min = 1, .max = 2, + } }, }; -/* Resource configuration for the SDM845 SoC. */ +/* Resource configuration data for an SoC having IPA v3.5.1 */ static const struct ipa_resource_data ipa_resource_data = { + .rsrc_group_src_count = IPA_RSRC_GROUP_SRC_COUNT, + .rsrc_group_dst_count = IPA_RSRC_GROUP_DST_COUNT, .resource_src_count = ARRAY_SIZE(ipa_resource_src), .resource_src = ipa_resource_src, .resource_dst_count = ARRAY_SIZE(ipa_resource_dst), .resource_dst = ipa_resource_dst, }; -/* IPA-resident memory region configuration for the SDM845 SoC. */ +/* IPA-resident memory region data for an SoC having IPA v3.5.1 */ static const struct ipa_mem ipa_mem_local_data[] = { [IPA_MEM_UC_SHARED] = { .offset = 0x0000, @@ -326,6 +347,7 @@ static const struct ipa_mem ipa_mem_local_data[] = { }, }; +/* Memory configuration data for an SoC having IPA v3.5.1 */ static const struct ipa_mem_data ipa_mem_data = { .local_count = ARRAY_SIZE(ipa_mem_local_data), .local = ipa_mem_local_data, @@ -355,15 +377,21 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = { }, }; +/* Clock and interconnect configuration data for an SoC having IPA v3.5.1 */ static const struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 75 * 1000 * 1000, /* Hz */ .interconnect_count = ARRAY_SIZE(ipa_interconnect_data), .interconnect_data = ipa_interconnect_data, }; -/* Configuration data for the SDM845 SoC. */ -const struct ipa_data ipa_data_sdm845 = { +/* Configuration data for an SoC having IPA v3.5.1 */ +const struct ipa_data ipa_data_v3_5_1 = { .version = IPA_VERSION_3_5_1, + .backward_compat = BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK | + BCR_TX_NOT_USING_BRESP_FMASK | + BCR_SUSPEND_L2_IRQ_FMASK | + BCR_HOLB_DROP_L2_IRQ_FMASK | + BCR_DUAL_TX_FMASK, .qsb_count = ARRAY_SIZE(ipa_qsb_data), .qsb_data = ipa_qsb_data, .endpoint_count = ARRAY_SIZE(ipa_gsi_endpoint_data), diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-v4.2.c index 621ad15c9e67..8744f19c6401 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-v4.2.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2019-2020 Linaro Ltd. */ +/* Copyright (C) 2019-2021 Linaro Ltd. */ #include <linux/log2.h> @@ -9,7 +9,32 @@ #include "ipa_endpoint.h" #include "ipa_mem.h" -/* QSB configuration for the SC7180 SoC. */ +/** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.2 */ +enum ipa_resource_type { + /* Source resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS = 0, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, + IPA_RESOURCE_TYPE_SRC_HPS_DMARS, + IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, + + /* Destination resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_DST_DATA_SECTORS = 0, + IPA_RESOURCE_TYPE_DST_DPS_DMARS, +}; + +/* Resource groups used for an SoC having IPA v4.2 */ +enum ipa_rsrc_group_id { + /* Source resource group identifiers */ + IPA_RSRC_GROUP_SRC_UL_DL = 0, + IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ + + /* Destination resource group identifiers */ + IPA_RSRC_GROUP_DST_UL_DL_DPL = 0, + IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ +}; + +/* QSB configuration data for an SoC having IPA v4.2 */ static const struct ipa_qsb_data ipa_qsb_data[] = { [IPA_QSB_MASTER_DDR] = { .max_writes = 8, @@ -18,7 +43,7 @@ static const struct ipa_qsb_data ipa_qsb_data[] = { }, }; -/* Endpoint configuration for the SC7180 SoC. */ +/* Endpoint configuration data for an SoC having IPA v4.2 */ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { [IPA_ENDPOINT_AP_COMMAND_TX] = { .ee_id = GSI_EE_AP, @@ -32,7 +57,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 0, + .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, .dma_mode = true, .dma_endpoint = IPA_ENDPOINT_AP_LAN_RX, .tx = { @@ -53,7 +78,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 0, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, .aggregation = true, .status_enable = true, .rx = { @@ -75,7 +100,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { .endpoint = { .filter_support = true, .config = { - .resource_group = 0, + .resource_group = IPA_RSRC_GROUP_SRC_UL_DL, .checksum = true, .qmap = true, .status_enable = true, @@ -100,7 +125,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, .endpoint = { .config = { - .resource_group = 0, + .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL, .checksum = true, .qmap = true, .aggregation = true, @@ -139,73 +164,60 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { }, }; -/* For the SC7180, resource groups are allocated this way: - * group 0: UL_DL - */ -static const struct ipa_resource_src ipa_resource_src[] = { - { - .type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS, - .limits[0] = { - .min = 3, - .max = 63, +/* Source resource configuration data for an SoC having IPA v4.2 */ +static const struct ipa_resource ipa_resource_src[] = { + [IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = { + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 3, .max = 63, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, - .limits[0] = { - .min = 3, - .max = 3, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = { + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 3, .max = 3, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, - .limits[0] = { - .min = 10, - .max = 10, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = { + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 10, .max = 10, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS, - .limits[0] = { - .min = 1, - .max = 1, + [IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = { + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 1, .max = 1, }, }, - { - .type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, - .limits[0] = { - .min = 5, - .max = 5, + [IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = { + .limits[IPA_RSRC_GROUP_SRC_UL_DL] = { + .min = 5, .max = 5, }, }, }; -static const struct ipa_resource_dst ipa_resource_dst[] = { - { - .type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS, - .limits[0] = { - .min = 3, - .max = 3, +/* Destination resource configuration data for an SoC having IPA v4.2 */ +static const struct ipa_resource ipa_resource_dst[] = { + [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .min = 3, .max = 3, }, }, - { - .type = IPA_RESOURCE_TYPE_DST_DPS_DMARS, - .limits[0] = { - .min = 1, - .max = 63, + [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { + .limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = { + .min = 1, .max = 63, }, }, }; -/* Resource configuration for the SC7180 SoC. */ +/* Resource configuration data for an SoC having IPA v4.2 */ static const struct ipa_resource_data ipa_resource_data = { + .rsrc_group_src_count = IPA_RSRC_GROUP_SRC_COUNT, + .rsrc_group_dst_count = IPA_RSRC_GROUP_DST_COUNT, .resource_src_count = ARRAY_SIZE(ipa_resource_src), .resource_src = ipa_resource_src, .resource_dst_count = ARRAY_SIZE(ipa_resource_dst), .resource_dst = ipa_resource_dst, }; -/* IPA-resident memory region configuration for the SC7180 SoC. */ +/* IPA-resident memory region data for an SoC having IPA v4.2 */ static const struct ipa_mem ipa_mem_local_data[] = { [IPA_MEM_UC_SHARED] = { .offset = 0x0000, @@ -299,6 +311,7 @@ static const struct ipa_mem ipa_mem_local_data[] = { }, }; +/* Memory configuration data for an SoC having IPA v4.2 */ static const struct ipa_mem_data ipa_mem_data = { .local_count = ARRAY_SIZE(ipa_mem_local_data), .local = ipa_mem_local_data, @@ -308,7 +321,7 @@ static const struct ipa_mem_data ipa_mem_data = { .smem_size = 0x00002000, }; -/* Interconnect bandwidths are in 1000 byte/second units */ +/* Interconnect rates are in 1000 byte/second units */ static const struct ipa_interconnect_data ipa_interconnect_data[] = { { .name = "memory", @@ -328,15 +341,17 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = { }, }; +/* Clock and interconnect configuration data for an SoC having IPA v4.2 */ static const struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 100 * 1000 * 1000, /* Hz */ .interconnect_count = ARRAY_SIZE(ipa_interconnect_data), .interconnect_data = ipa_interconnect_data, }; -/* Configuration data for the SC7180 SoC. */ -const struct ipa_data ipa_data_sc7180 = { +/* Configuration data for an SoC having IPA v4.2 */ +const struct ipa_data ipa_data_v4_2 = { .version = IPA_VERSION_4_2, + /* backward_compat value is 0 */ .qsb_count = ARRAY_SIZE(ipa_qsb_data), .qsb_data = ipa_qsb_data, .endpoint_count = ARRAY_SIZE(ipa_gsi_endpoint_data), diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 7816583fc14a..769f68923527 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. - * Copyright (C) 2019-2020 Linaro Ltd. + * Copyright (C) 2019-2021 Linaro Ltd. */ #ifndef _IPA_DATA_H_ #define _IPA_DATA_H_ @@ -46,9 +46,8 @@ * the IPA endpoint. */ -/* The maximum value returned by ipa_resource_group_{src,dst}_count() */ -#define IPA_RESOURCE_GROUP_SRC_MAX 5 -#define IPA_RESOURCE_GROUP_DST_MAX 5 +/* The maximum possible number of source or destination resource groups */ +#define IPA_RESOURCE_GROUP_MAX 8 /** enum ipa_qsb_master_id - array index for IPA QSB configuration data */ enum ipa_qsb_master_id { @@ -91,8 +90,8 @@ struct ipa_qsb_data { * that can be included in a single transaction. */ struct gsi_channel_data { - u16 tre_count; - u16 event_count; + u16 tre_count; /* must be a power of 2 */ + u16 event_count; /* must be a power of 2 */ u8 tlv_count; }; @@ -177,12 +176,12 @@ struct ipa_endpoint_data { /** * struct ipa_gsi_endpoint_data - GSI channel/IPA endpoint data - * ee: GSI execution environment ID - * channel_id: GSI channel ID - * endpoint_id: IPA endpoint ID - * toward_ipa: direction of data transfer - * gsi: GSI channel configuration data (see above) - * ipa: IPA endpoint configuration data (see above) + * @ee_id: GSI execution environment ID + * @channel_id: GSI channel ID + * @endpoint_id: IPA endpoint ID + * @toward_ipa: direction of data transfer + * @channel: GSI channel configuration data (see above) + * @endpoint: IPA endpoint configuration data (see above) */ struct ipa_gsi_endpoint_data { u8 ee_id; /* enum gsi_ee_id */ @@ -194,21 +193,6 @@ struct ipa_gsi_endpoint_data { struct ipa_endpoint_data endpoint; }; -/** enum ipa_resource_type_src - source resource types */ -enum ipa_resource_type_src { - IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS, - IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, - IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, - IPA_RESOURCE_TYPE_SRC_HPS_DMARS, - IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, -}; - -/** enum ipa_resource_type_dst - destination resource types */ -enum ipa_resource_type_dst { - IPA_RESOURCE_TYPE_DST_DATA_SECTORS, - IPA_RESOURCE_TYPE_DST_DPS_DMARS, -}; - /** * struct ipa_resource_limits - minimum and maximum resource counts * @min: minimum number of resources of a given type @@ -220,27 +204,17 @@ struct ipa_resource_limits { }; /** - * struct ipa_resource_src - source endpoint group resource usage - * @type: source group resource type - * @limits: array of limits to use for each resource group - */ -struct ipa_resource_src { - enum ipa_resource_type_src type; - struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_SRC_MAX]; -}; - -/** - * struct ipa_resource_dst - destination endpoint group resource usage - * @type: destination group resource type - * @limits: array of limits to use for each resource group + * struct ipa_resource - resource group source or destination resource usage + * @limits: array of resource limits, indexed by group */ -struct ipa_resource_dst { - enum ipa_resource_type_dst type; - struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_DST_MAX]; +struct ipa_resource { + struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_MAX]; }; /** * struct ipa_resource_data - IPA resource configuration data + * @rsrc_group_src_count: number of source resource groups supported + * @rsrc_group_dst_count: number of destination resource groups supported * @resource_src_count: number of entries in the resource_src array * @resource_src: source endpoint group resources * @resource_dst_count: number of entries in the resource_dst array @@ -252,10 +226,12 @@ struct ipa_resource_dst { * programming it at initialization time, so we specify it here. */ struct ipa_resource_data { + u32 rsrc_group_src_count; + u32 rsrc_group_dst_count; u32 resource_src_count; - const struct ipa_resource_src *resource_src; + const struct ipa_resource *resource_src; u32 resource_dst_count; - const struct ipa_resource_dst *resource_dst; + const struct ipa_resource *resource_dst; }; /** @@ -303,6 +279,7 @@ struct ipa_clock_data { /** * struct ipa_data - combined IPA/GSI configuration data * @version: IPA hardware version + * @backward_compat: BCR register value (prior to IPA v4.5 only) * @qsb_count: number of entries in the qsb_data array * @qsb_data: Qualcomm System Bus configuration data * @endpoint_count: number of entries in the endpoint_data array @@ -313,6 +290,7 @@ struct ipa_clock_data { */ struct ipa_data { enum ipa_version version; + u32 backward_compat; u32 qsb_count; /* number of entries in qsb_data[] */ const struct ipa_qsb_data *qsb_data; u32 endpoint_count; /* number of entries in endpoint_data[] */ @@ -322,7 +300,7 @@ struct ipa_data { const struct ipa_clock_data *clock_data; }; -extern const struct ipa_data ipa_data_sdm845; -extern const struct ipa_data ipa_data_sc7180; +extern const struct ipa_data ipa_data_v3_5_1; +extern const struct ipa_data ipa_data_v4_2; #endif /* _IPA_DATA_H_ */ diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 38e83cd467b5..dd24179383c1 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -809,7 +809,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds) * The best precision is achieved when the base value is as * large as possible. Find the highest set bit in the tick * count, and extract the number of bits in the base field - * such that that high bit is included. + * such that high bit is included. */ high = fls(ticks); /* 1..32 */ width = HWEIGHT32(BASE_VALUE_FMASK); @@ -1448,7 +1448,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) if (ret) goto out_suspend_again; - /* Finally, reset and reconfigure the channel again (re-enabling the + /* Finally, reset and reconfigure the channel again (re-enabling * the doorbell engine if appropriate). Sleep for 1 millisecond to * complete the channel reset sequence. Finish by suspending the * channel again (if necessary). diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index c6c55ea35394..f034a9e6ef21 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -25,15 +25,16 @@ struct ipa_gsi_endpoint_data; #define IPA_MTU ETH_DATA_LEN enum ipa_endpoint_name { - IPA_ENDPOINT_AP_MODEM_TX, - IPA_ENDPOINT_MODEM_LAN_TX, - IPA_ENDPOINT_MODEM_COMMAND_TX, IPA_ENDPOINT_AP_COMMAND_TX, - IPA_ENDPOINT_MODEM_AP_TX, IPA_ENDPOINT_AP_LAN_RX, + IPA_ENDPOINT_AP_MODEM_TX, IPA_ENDPOINT_AP_MODEM_RX, - IPA_ENDPOINT_MODEM_AP_RX, + IPA_ENDPOINT_MODEM_COMMAND_TX, + IPA_ENDPOINT_MODEM_LAN_TX, IPA_ENDPOINT_MODEM_LAN_RX, + IPA_ENDPOINT_MODEM_AP_TX, + IPA_ENDPOINT_MODEM_AP_RX, + IPA_ENDPOINT_MODEM_DL_NLO_TX, IPA_ENDPOINT_COUNT, /* Number of names (not an index) */ }; @@ -41,8 +42,20 @@ enum ipa_endpoint_name { /** * struct ipa_endpoint - IPA endpoint information - * @channel_id: EP's GSI channel - * @evt_ring_id: EP's GSI channel event ring + * @ipa: IPA pointer + * @ee_id: Execution environmnent endpoint is associated with + * @channel_id: GSI channel used by the endpoint + * @endpoint_id: IPA endpoint number + * @toward_ipa: Endpoint direction (true = TX, false = RX) + * @data: Endpoint configuration data + * @trans_tre_max: Maximum number of TRE descriptors per transaction + * @evt_ring_id: GSI event ring used by the endpoint + * @netdev: Network device pointer, if endpoint uses one + * @replenish_enabled: Whether receive buffer replenishing is enabled + * @replenish_ready: Number of replenish transactions without doorbell + * @replenish_saved: Replenish requests held while disabled + * @replenish_backlog: Number of buffers needed to fill hardware queue + * @replenish_work: Work item used for repeated replenish failures */ struct ipa_endpoint { struct ipa *ipa; @@ -52,7 +65,7 @@ struct ipa_endpoint { bool toward_ipa; const struct ipa_endpoint_config_data *data; - u32 trans_tre_max; /* maximum descriptors per transaction */ + u32 trans_tre_max; u32 evt_ring_id; /* Net device this endpoint is associated with, if any */ diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 61dd7605bcb6..c46df0b7c4e5 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -54,12 +54,14 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id) bool uc_irq = ipa_interrupt_uc(interrupt, irq_id); struct ipa *ipa = interrupt->ipa; u32 mask = BIT(irq_id); + u32 offset; /* For microcontroller interrupts, clear the interrupt right away, * "to avoid clearing unhandled interrupts." */ + offset = ipa_reg_irq_clr_offset(ipa->version); if (uc_irq) - iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET); + iowrite32(mask, ipa->reg_virt + offset); if (irq_id < IPA_IRQ_COUNT && interrupt->handler[irq_id]) interrupt->handler[irq_id](interrupt->ipa, irq_id); @@ -69,7 +71,7 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id) * so defer clearing until after the handler has been called. */ if (!uc_irq) - iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET); + iowrite32(mask, ipa->reg_virt + offset); } /* Process all IPA interrupt types that have been signaled */ @@ -77,13 +79,15 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt) { struct ipa *ipa = interrupt->ipa; u32 enabled = interrupt->enabled; + u32 offset; u32 mask; /* The status register indicates which conditions are present, * including conditions whose interrupt is not enabled. Handle * only the enabled ones. */ - mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET); + offset = ipa_reg_irq_stts_offset(ipa->version); + mask = ioread32(ipa->reg_virt + offset); while ((mask &= enabled)) { do { u32 irq_id = __ffs(mask); @@ -92,7 +96,7 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt) ipa_interrupt_process(interrupt, irq_id); } while (mask); - mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET); + mask = ioread32(ipa->reg_virt + offset); } } @@ -115,14 +119,17 @@ static irqreturn_t ipa_isr(int irq, void *dev_id) { struct ipa_interrupt *interrupt = dev_id; struct ipa *ipa = interrupt->ipa; + u32 offset; u32 mask; - mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET); + offset = ipa_reg_irq_stts_offset(ipa->version); + mask = ioread32(ipa->reg_virt + offset); if (mask & interrupt->enabled) return IRQ_WAKE_THREAD; /* Nothing in the mask was supposed to cause an interrupt */ - iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET); + offset = ipa_reg_irq_clr_offset(ipa->version); + iowrite32(mask, ipa->reg_virt + offset); dev_err(&ipa->pdev->dev, "%s: unexpected interrupt, mask 0x%08x\n", __func__, mask); @@ -136,15 +143,22 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, { struct ipa *ipa = interrupt->ipa; u32 mask = BIT(endpoint_id); + u32 offset; u32 val; /* assert(mask & ipa->available); */ - val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET); + + /* IPA version 3.0 does not support TX_SUSPEND interrupt control */ + if (ipa->version == IPA_VERSION_3_0) + return; + + offset = ipa_reg_irq_suspend_en_offset(ipa->version); + val = ioread32(ipa->reg_virt + offset); if (enable) val |= mask; else val &= ~mask; - iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET); + iowrite32(val, ipa->reg_virt + offset); } /* Enable TX_SUSPEND for an endpoint */ @@ -165,10 +179,18 @@ ipa_interrupt_suspend_disable(struct ipa_interrupt *interrupt, u32 endpoint_id) void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) { struct ipa *ipa = interrupt->ipa; + u32 offset; u32 val; - val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_INFO_OFFSET); - iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_CLR_OFFSET); + offset = ipa_reg_irq_suspend_info_offset(ipa->version); + val = ioread32(ipa->reg_virt + offset); + + /* SUSPEND interrupt status isn't cleared on IPA version 3.0 */ + if (ipa->version == IPA_VERSION_3_0) + return; + + offset = ipa_reg_irq_suspend_clr_offset(ipa->version); + iowrite32(val, ipa->reg_virt + offset); } /* Simulate arrival of an IPA TX_SUSPEND interrupt */ @@ -182,13 +204,15 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq, ipa_irq_handler_t handler) { struct ipa *ipa = interrupt->ipa; + u32 offset; /* assert(ipa_irq < IPA_IRQ_COUNT); */ interrupt->handler[ipa_irq] = handler; /* Update the IPA interrupt mask to enable it */ interrupt->enabled |= BIT(ipa_irq); - iowrite32(interrupt->enabled, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET); + offset = ipa_reg_irq_en_offset(ipa->version); + iowrite32(interrupt->enabled, ipa->reg_virt + offset); } /* Remove the handler for an IPA interrupt type */ @@ -196,11 +220,13 @@ void ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) { struct ipa *ipa = interrupt->ipa; + u32 offset; /* assert(ipa_irq < IPA_IRQ_COUNT); */ /* Update the IPA interrupt mask to disable it */ interrupt->enabled &= ~BIT(ipa_irq); - iowrite32(interrupt->enabled, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET); + offset = ipa_reg_irq_en_offset(ipa->version); + iowrite32(interrupt->enabled, ipa->reg_virt + offset); interrupt->handler[ipa_irq] = NULL; } @@ -211,6 +237,7 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; struct ipa_interrupt *interrupt; unsigned int irq; + u32 offset; int ret; ret = platform_get_irq_byname(ipa->pdev, "ipa"); @@ -228,7 +255,8 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa) interrupt->irq = irq; /* Start with all IPA interrupts disabled */ - iowrite32(0, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET); + offset = ipa_reg_irq_en_offset(ipa->version); + iowrite32(0, ipa->reg_virt + offset); ret = request_threaded_irq(irq, ipa_isr, ipa_isr_thread, IRQF_ONESHOT, "ipa", interrupt); diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index b5d63a0cd19e..d5c486a6800d 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -24,6 +24,7 @@ typedef void (*ipa_irq_handler_t)(struct ipa *ipa, enum ipa_irq_id irq_id); /** * ipa_interrupt_add() - Register a handler for an IPA interrupt type + * @interrupt: IPA interrupt structure * @irq_id: IPA interrupt type * @handler: Handler function for the interrupt * diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index ba1bfc30210a..a970d10e650e 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. - * Copyright (C) 2018-2020 Linaro Ltd. + * Copyright (C) 2018-2021 Linaro Ltd. */ #include <linux/types.h> @@ -22,6 +22,7 @@ #include "ipa_clock.h" #include "ipa_data.h" #include "ipa_endpoint.h" +#include "ipa_resource.h" #include "ipa_cmd.h" #include "ipa_reg.h" #include "ipa_mem.h" @@ -222,7 +223,7 @@ static void ipa_teardown(struct ipa *ipa) gsi_teardown(&ipa->gsi); } -/* Configure QMB Core Master Port selection */ +/* Configure bus access behavior for IPA components */ static void ipa_hardware_config_comp(struct ipa *ipa) { u32 val; @@ -396,9 +397,9 @@ static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data) u32 granularity; u32 val; - /* IPA v4.5 has no backward compatibility register */ + /* IPA v4.5+ has no backward compatibility register */ if (version < IPA_VERSION_4_5) { - val = ipa_reg_bcr_val(version); + val = data->backward_compat; iowrite32(val, ipa->reg_virt + IPA_REG_BCR_OFFSET); } @@ -452,151 +453,6 @@ static void ipa_hardware_deconfig(struct ipa *ipa) ipa_hardware_dcd_deconfig(ipa); } -#ifdef IPA_VALIDATION - -static bool ipa_resource_limits_valid(struct ipa *ipa, - const struct ipa_resource_data *data) -{ - u32 group_count; - u32 i; - u32 j; - - /* We program at most 6 source or destination resource group limits */ - BUILD_BUG_ON(IPA_RESOURCE_GROUP_SRC_MAX > 6); - - group_count = ipa_resource_group_src_count(ipa->version); - if (!group_count || group_count > IPA_RESOURCE_GROUP_SRC_MAX) - return false; - - /* Return an error if a non-zero resource limit is specified - * for a resource group not supported by hardware. - */ - for (i = 0; i < data->resource_src_count; i++) { - const struct ipa_resource_src *resource; - - resource = &data->resource_src[i]; - for (j = group_count; j < IPA_RESOURCE_GROUP_SRC_MAX; j++) - if (resource->limits[j].min || resource->limits[j].max) - return false; - } - - group_count = ipa_resource_group_dst_count(ipa->version); - if (!group_count || group_count > IPA_RESOURCE_GROUP_DST_MAX) - return false; - - for (i = 0; i < data->resource_dst_count; i++) { - const struct ipa_resource_dst *resource; - - resource = &data->resource_dst[i]; - for (j = group_count; j < IPA_RESOURCE_GROUP_DST_MAX; j++) - if (resource->limits[j].min || resource->limits[j].max) - return false; - } - - return true; -} - -#else /* !IPA_VALIDATION */ - -static bool ipa_resource_limits_valid(struct ipa *ipa, - const struct ipa_resource_data *data) -{ - return true; -} - -#endif /* !IPA_VALIDATION */ - -static void -ipa_resource_config_common(struct ipa *ipa, u32 offset, - const struct ipa_resource_limits *xlimits, - const struct ipa_resource_limits *ylimits) -{ - u32 val; - - val = u32_encode_bits(xlimits->min, X_MIN_LIM_FMASK); - val |= u32_encode_bits(xlimits->max, X_MAX_LIM_FMASK); - if (ylimits) { - val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK); - val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK); - } - - iowrite32(val, ipa->reg_virt + offset); -} - -static void ipa_resource_config_src(struct ipa *ipa, - const struct ipa_resource_src *resource) -{ - u32 group_count = ipa_resource_group_src_count(ipa->version); - const struct ipa_resource_limits *ylimits; - u32 offset; - - offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 1 ? NULL : &resource->limits[1]; - ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); - - if (group_count < 2) - return; - - offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 3 ? NULL : &resource->limits[3]; - ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); - - if (group_count < 4) - return; - - offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 5 ? NULL : &resource->limits[5]; - ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); -} - -static void ipa_resource_config_dst(struct ipa *ipa, - const struct ipa_resource_dst *resource) -{ - u32 group_count = ipa_resource_group_dst_count(ipa->version); - const struct ipa_resource_limits *ylimits; - u32 offset; - - offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 1 ? NULL : &resource->limits[1]; - ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); - - if (group_count < 2) - return; - - offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 3 ? NULL : &resource->limits[3]; - ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); - - if (group_count < 4) - return; - - offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type); - ylimits = group_count == 5 ? NULL : &resource->limits[5]; - ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); -} - -static int -ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data) -{ - u32 i; - - if (!ipa_resource_limits_valid(ipa, data)) - return -EINVAL; - - for (i = 0; i < data->resource_src_count; i++) - ipa_resource_config_src(ipa, &data->resource_src[i]); - - for (i = 0; i < data->resource_dst_count; i++) - ipa_resource_config_dst(ipa, &data->resource_dst[i]); - - return 0; -} - -static void ipa_resource_deconfig(struct ipa *ipa) -{ - /* Nothing to do */ -} - /** * ipa_config() - Configure IPA hardware * @ipa: IPA pointer @@ -722,11 +578,11 @@ out_release_firmware: static const struct of_device_id ipa_match[] = { { .compatible = "qcom,sdm845-ipa", - .data = &ipa_data_sdm845, + .data = &ipa_data_v3_5_1, }, { .compatible = "qcom,sc7180-ipa", - .data = &ipa_data_sc7180, + .data = &ipa_data_v4_2, }, { }, }; diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h index f82e8939622b..df61ef48df36 100644 --- a/drivers/net/ipa/ipa_mem.h +++ b/drivers/net/ipa/ipa_mem.h @@ -77,7 +77,7 @@ enum ipa_mem_id { * struct ipa_mem - IPA local memory region description * @offset: offset in IPA memory space to base of the region * @size: size in bytes base of the region - * @canary_count # 32-bit "canary" values that precede region + * @canary_count: Number of 32-bit "canary" values that precede region */ struct ipa_mem { u32 offset; diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index ccdb4a6a4c75..593665efbcf9 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -308,12 +308,12 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi) mem = &ipa->mem[IPA_MEM_V4_ROUTE]; req.v4_route_tbl_info_valid = 1; req.v4_route_tbl_info.start = ipa->mem_offset + mem->offset; - req.v4_route_tbl_info.count = mem->size / IPA_TABLE_ENTRY_SIZE; + req.v4_route_tbl_info.count = mem->size / sizeof(__le64); mem = &ipa->mem[IPA_MEM_V6_ROUTE]; req.v6_route_tbl_info_valid = 1; req.v6_route_tbl_info.start = ipa->mem_offset + mem->offset; - req.v6_route_tbl_info.count = mem->size / IPA_TABLE_ENTRY_SIZE; + req.v6_route_tbl_info.count = mem->size / sizeof(__le64); mem = &ipa->mem[IPA_MEM_V4_FILTER]; req.v4_filter_tbl_start_valid = 1; @@ -352,8 +352,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi) req.v4_hash_route_tbl_info_valid = 1; req.v4_hash_route_tbl_info.start = ipa->mem_offset + mem->offset; - req.v4_hash_route_tbl_info.count = - mem->size / IPA_TABLE_ENTRY_SIZE; + req.v4_hash_route_tbl_info.count = mem->size / sizeof(__le64); } mem = &ipa->mem[IPA_MEM_V6_ROUTE_HASHED]; @@ -361,8 +360,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi) req.v6_hash_route_tbl_info_valid = 1; req.v6_hash_route_tbl_info.start = ipa->mem_offset + mem->offset; - req.v6_hash_route_tbl_info.count = - mem->size / IPA_TABLE_ENTRY_SIZE; + req.v6_hash_route_tbl_info.count = mem->size / sizeof(__le64); } mem = &ipa->mem[IPA_MEM_V4_FILTER_HASHED]; diff --git a/drivers/net/ipa/ipa_qmi.h b/drivers/net/ipa/ipa_qmi.h index 3993687593d0..b6f2055d35a6 100644 --- a/drivers/net/ipa/ipa_qmi.h +++ b/drivers/net/ipa/ipa_qmi.h @@ -13,11 +13,15 @@ struct ipa; /** * struct ipa_qmi - QMI state associated with an IPA - * @client_handle - used to send an QMI requests to the modem - * @server_handle - used to handle QMI requests from the modem - * @initialized - whether QMI initialization has completed - * @indication_register_received - tracks modem request receipt - * @init_driver_response_received - tracks modem response receipt + * @client_handle: Used to send an QMI requests to the modem + * @server_handle: Used to handle QMI requests from the modem + * @modem_sq: QMAP socket address for the modem QMI server + * @init_driver_work: Work structure used for INIT_DRIVER message handling + * @initial_boot: True if first boot has not yet completed + * @uc_ready: True once DRIVER_INIT_COMPLETE request received + * @modem_ready: True when INIT_DRIVER response received + * @indication_requested: True when INDICATION_REGISTER request received + * @indication_sent: True when INIT_COMPLETE indication sent */ struct ipa_qmi { struct qmi_handle client_handle; diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index 86fe2978e810..286ea9634c49 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -66,14 +66,16 @@ struct ipa; */ #define IPA_REG_COMP_CFG_OFFSET 0x0000003c -/* The next field is not supported for IPA v4.1 */ +/* The next field is not supported for IPA v4.0+, not present for IPA v4.5+ */ #define ENABLE_FMASK GENMASK(0, 0) +/* The next field is present for IPA v4.7+ */ +#define RAM_ARB_PRI_CLIENT_SAMP_FIX_DIS_FMASK GENMASK(0, 0) #define GSI_SNOC_BYPASS_DIS_FMASK GENMASK(1, 1) #define GEN_QMB_0_SNOC_BYPASS_DIS_FMASK GENMASK(2, 2) #define GEN_QMB_1_SNOC_BYPASS_DIS_FMASK GENMASK(3, 3) -/* The next field is not present for IPA v4.5 */ +/* The next field is not present for IPA v4.5+ */ #define IPA_DCMP_FAST_CLK_EN_FMASK GENMASK(4, 4) -/* The remaining fields are not present for IPA v3.5.1 */ +/* The next twelve fields are present for IPA v4.0+ */ #define IPA_QMB_SELECT_CONS_EN_FMASK GENMASK(5, 5) #define IPA_QMB_SELECT_PROD_EN_FMASK GENMASK(6, 6) #define GSI_MULTI_INORDER_RD_DIS_FMASK GENMASK(7, 7) @@ -86,9 +88,41 @@ struct ipa; #define GSI_SNOC_CNOC_LOOP_PROT_DISABLE_FMASK GENMASK(14, 14) #define GSI_MULTI_AXI_MASTERS_DIS_FMASK GENMASK(15, 15) #define IPA_QMB_SELECT_GLOBAL_EN_FMASK GENMASK(16, 16) -#define IPA_ATOMIC_FETCHER_ARB_LOCK_DIS_FMASK GENMASK(20, 17) -/* The next field is present for IPA v4.5 */ -#define IPA_FULL_FLUSH_WAIT_RSC_CLOSE_EN_FMASK GENMASK(21, 21) +/* The next five fields are present for IPA v4.9+ */ +#define QMB_RAM_RD_CACHE_DISABLE_FMASK GENMASK(19, 19) +#define GENQMB_AOOOWR_FMASK GENMASK(20, 20) +#define IF_OUT_OF_BUF_STOP_RESET_MASK_EN_FMASK GENMASK(21, 21) +#define GEN_QMB_1_DYNAMIC_ASIZE_FMASK GENMASK(30, 30) +#define GEN_QMB_0_DYNAMIC_ASIZE_FMASK GENMASK(31, 31) + +/* Encoded value for COMP_CFG register ATOMIC_FETCHER_ARB_LOCK_DIS field */ +static inline u32 arbitration_lock_disable_encoded(enum ipa_version version, + u32 mask) +{ + /* assert(version >= IPA_VERSION_4_0); */ + + if (version < IPA_VERSION_4_9) + return u32_encode_bits(mask, GENMASK(20, 17)); + + if (version == IPA_VERSION_4_9) + return u32_encode_bits(mask, GENMASK(24, 22)); + + return u32_encode_bits(mask, GENMASK(23, 22)); +} + +/* Encoded value for COMP_CFG register FULL_FLUSH_WAIT_RS_CLOSURE_EN field */ +static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version, + bool enable) +{ + u32 val = enable ? 1 : 0; + + /* assert(version >= IPA_VERSION_4_5); */ + + if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7) + return u32_encode_bits(val, GENMASK(21, 21)); + + return u32_encode_bits(val, GENMASK(17, 17)); +} #define IPA_REG_CLKON_CFG_OFFSET 0x00000044 #define RX_FMASK GENMASK(0, 0) @@ -108,13 +142,15 @@ struct ipa; #define ACK_MNGR_FMASK GENMASK(14, 14) #define D_DCPH_FMASK GENMASK(15, 15) #define H_DCPH_FMASK GENMASK(16, 16) -/* The next field is not present for IPA v4.5 */ +/* The next field is not present for IPA v4.5+ */ #define DCMP_FMASK GENMASK(17, 17) +/* The next three fields are present for IPA v3.5+ */ #define NTF_TX_CMDQS_FMASK GENMASK(18, 18) #define TX_0_FMASK GENMASK(19, 19) #define TX_1_FMASK GENMASK(20, 20) +/* The next field is present for IPA v3.5.1+ */ #define FNR_FMASK GENMASK(21, 21) -/* The remaining fields are not present for IPA v3.5.1 */ +/* The next eight fields are present for IPA v4.0+ */ #define QSB2AXI_CMDQ_L_FMASK GENMASK(22, 22) #define AGGR_WRAPPER_FMASK GENMASK(23, 23) #define RAM_SLAVEWAY_FMASK GENMASK(24, 24) @@ -123,8 +159,10 @@ struct ipa; #define GSI_IF_FMASK GENMASK(27, 27) #define GLOBAL_FMASK GENMASK(28, 28) #define GLOBAL_2X_CLK_FMASK GENMASK(29, 29) -/* The next field is present for IPA v4.5 */ +/* The next field is present for IPA v4.5+ */ #define DPL_FIFO_FMASK GENMASK(30, 30) +/* The next field is present for IPA v4.7+ */ +#define DRBIP_FMASK GENMASK(31, 31) #define IPA_REG_ROUTE_OFFSET 0x00000048 #define ROUTE_DIS_FMASK GENMASK(0, 0) @@ -145,13 +183,13 @@ struct ipa; #define IPA_REG_QSB_MAX_READS_OFFSET 0x00000078 #define GEN_QMB_0_MAX_READS_FMASK GENMASK(3, 0) #define GEN_QMB_1_MAX_READS_FMASK GENMASK(7, 4) -/* The next two fields are not present for IPA v3.5.1 */ +/* The next two fields are present for IPA v4.0+ */ #define GEN_QMB_0_MAX_READS_BEATS_FMASK GENMASK(23, 16) #define GEN_QMB_1_MAX_READS_BEATS_FMASK GENMASK(31, 24) static inline u32 ipa_reg_filt_rout_hash_en_offset(enum ipa_version version) { - if (version == IPA_VERSION_3_5_1) + if (version < IPA_VERSION_4_0) return 0x000008c; return 0x0000148; @@ -159,7 +197,7 @@ static inline u32 ipa_reg_filt_rout_hash_en_offset(enum ipa_version version) static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version) { - if (version == IPA_VERSION_3_5_1) + if (version < IPA_VERSION_4_0) return 0x0000090; return 0x000014c; @@ -174,49 +212,29 @@ static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version) /* ipa->available defines the valid bits in the STATE_AGGR_ACTIVE register */ static inline u32 ipa_reg_state_aggr_active_offset(enum ipa_version version) { - if (version == IPA_VERSION_3_5_1) + if (version < IPA_VERSION_4_0) return 0x0000010c; return 0x000000b4; } -/* The next register is not present for IPA v4.5 */ +/* The next register is not present for IPA v4.5+ */ #define IPA_REG_BCR_OFFSET 0x000001d0 -/* The next two fields are not present for IPA v4.2 */ +/* The next two fields are not present for IPA v4.2+ */ #define BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK GENMASK(0, 0) #define BCR_TX_NOT_USING_BRESP_FMASK GENMASK(1, 1) -/* The next field is invalid for IPA v4.1 */ +/* The next field is invalid for IPA v4.0+ */ #define BCR_TX_SUSPEND_IRQ_ASSERT_ONCE_FMASK GENMASK(2, 2) -/* The next two fields are not present for IPA v4.2 */ +/* The next two fields are not present for IPA v4.2+ */ #define BCR_SUSPEND_L2_IRQ_FMASK GENMASK(3, 3) #define BCR_HOLB_DROP_L2_IRQ_FMASK GENMASK(4, 4) +/* The next five fields are present for IPA v3.5+ */ #define BCR_DUAL_TX_FMASK GENMASK(5, 5) #define BCR_ENABLE_FILTER_DATA_CACHE_FMASK GENMASK(6, 6) #define BCR_NOTIF_PRIORITY_OVER_ZLT_FMASK GENMASK(7, 7) #define BCR_FILTER_PREFETCH_EN_FMASK GENMASK(8, 8) #define BCR_ROUTER_PREFETCH_EN_FMASK GENMASK(9, 9) -/* Backward compatibility register value to use for each version */ -static inline u32 ipa_reg_bcr_val(enum ipa_version version) -{ - if (version == IPA_VERSION_3_5_1) - return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK | - BCR_TX_NOT_USING_BRESP_FMASK | - BCR_SUSPEND_L2_IRQ_FMASK | - BCR_HOLB_DROP_L2_IRQ_FMASK | - BCR_DUAL_TX_FMASK; - - if (version == IPA_VERSION_4_0 || version == IPA_VERSION_4_1) - return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK | - BCR_SUSPEND_L2_IRQ_FMASK | - BCR_HOLB_DROP_L2_IRQ_FMASK | - BCR_DUAL_TX_FMASK; - - /* assert(version != IPA_VERSION_4_5); */ - - return 0x00000000; -} - /* The value of the next register must be a multiple of 8 (bottom 3 bits 0) */ #define IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET 0x000001e8 @@ -233,35 +251,40 @@ static inline u32 proc_cntxt_base_addr_encoded(enum ipa_version version, /* ipa->available defines the valid bits in the AGGR_FORCE_CLOSE register */ #define IPA_REG_AGGR_FORCE_CLOSE_OFFSET 0x000001ec -/* The next register is not present for IPA v4.5 */ +/* The next register is not present for IPA v4.5+ */ #define IPA_REG_COUNTER_CFG_OFFSET 0x000001f0 +/* The next field is not present for IPA v3.5+ */ +#define EOT_COAL_GRANULARITY GENMASK(3, 0) #define AGGR_GRANULARITY_FMASK GENMASK(8, 4) -/* The next register is not present for IPA v4.5 */ +/* The next register is present for IPA v3.5+ */ #define IPA_REG_TX_CFG_OFFSET 0x000001fc -/* The first three fields are present for IPA v3.5.1 only */ +/* The next three fields are not present for IPA v4.0+ */ #define TX0_PREFETCH_DISABLE_FMASK GENMASK(0, 0) #define TX1_PREFETCH_DISABLE_FMASK GENMASK(1, 1) #define PREFETCH_ALMOST_EMPTY_SIZE_FMASK GENMASK(4, 2) -/* The next six fields are present for IPA v4.0 and above */ +/* The next six fields are present for IPA v4.0+ */ #define PREFETCH_ALMOST_EMPTY_SIZE_TX0_FMASK GENMASK(5, 2) #define DMAW_SCND_OUTSD_PRED_THRESHOLD_FMASK GENMASK(9, 6) #define DMAW_SCND_OUTSD_PRED_EN_FMASK GENMASK(10, 10) #define DMAW_MAX_BEATS_256_DIS_FMASK GENMASK(11, 11) #define PA_MASK_EN_FMASK GENMASK(12, 12) #define PREFETCH_ALMOST_EMPTY_SIZE_TX1_FMASK GENMASK(16, 13) -/* The next field is present for IPA v4.5 */ +/* The next field is present for IPA v4.5+ */ #define DUAL_TX_ENABLE_FMASK GENMASK(17, 17) -/* The next two fields are present for IPA v4.2 only */ +/* The next field is present for IPA v4.2+, but not IPA v4.5 */ #define SSPND_PA_NO_START_STATE_FMASK GENMASK(18, 18) +/* The next field is present for IPA v4.2 only */ #define SSPND_PA_NO_BQ_STATE_FMASK GENMASK(19, 19) +/* The next register is present for IPA v3.5+ */ #define IPA_REG_FLAVOR_0_OFFSET 0x00000210 #define IPA_MAX_PIPES_FMASK GENMASK(3, 0) #define IPA_MAX_CONS_PIPES_FMASK GENMASK(12, 8) #define IPA_MAX_PROD_PIPES_FMASK GENMASK(20, 16) #define IPA_PROD_LOWEST_FMASK GENMASK(27, 24) +/* The next register is present for IPA v3.5+ */ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) { if (version >= IPA_VERSION_4_2) @@ -273,19 +296,19 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version) #define ENTER_IDLE_DEBOUNCE_THRESH_FMASK GENMASK(15, 0) #define CONST_NON_IDLE_ENABLE_FMASK GENMASK(16, 16) -/* The next register is present for IPA v4.5 */ +/* The next register is present for IPA v4.5+ */ #define IPA_REG_QTIME_TIMESTAMP_CFG_OFFSET 0x0000024c #define DPL_TIMESTAMP_LSB_FMASK GENMASK(4, 0) #define DPL_TIMESTAMP_SEL_FMASK GENMASK(7, 7) #define TAG_TIMESTAMP_LSB_FMASK GENMASK(12, 8) #define NAT_TIMESTAMP_LSB_FMASK GENMASK(20, 16) -/* The next register is present for IPA v4.5 */ +/* The next register is present for IPA v4.5+ */ #define IPA_REG_TIMERS_XO_CLK_DIV_CFG_OFFSET 0x00000250 #define DIV_VALUE_FMASK GENMASK(8, 0) #define DIV_ENABLE_FMASK GENMASK(31, 31) -/* The next register is present for IPA v4.5 */ +/* The next register is present for IPA v4.5+ */ #define IPA_REG_TIMERS_PULSE_GRAN_CFG_OFFSET 0x00000254 #define GRAN_0_FMASK GENMASK(2, 0) #define GRAN_1_FMASK GENMASK(5, 3) @@ -302,63 +325,23 @@ enum ipa_pulse_gran { IPA_GRAN_655350_US = 0x7, }; -/* # IPA source resource groups available based on version */ -static inline u32 ipa_resource_group_src_count(enum ipa_version version) -{ - switch (version) { - case IPA_VERSION_3_5_1: - case IPA_VERSION_4_0: - case IPA_VERSION_4_1: - return 4; - - case IPA_VERSION_4_2: - return 1; - - case IPA_VERSION_4_5: - return 5; - - default: - return 0; - } -} - -/* # IPA destination resource groups available based on version */ -static inline u32 ipa_resource_group_dst_count(enum ipa_version version) -{ - switch (version) { - case IPA_VERSION_3_5_1: - return 3; - - case IPA_VERSION_4_0: - case IPA_VERSION_4_1: - return 4; - - case IPA_VERSION_4_2: - return 1; - - case IPA_VERSION_4_5: - return 5; - - default: - return 0; - } -} - -/* Not all of the following are valid (depends on the count, above) */ +/* Not all of the following are present (depends on IPA version) */ #define IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \ (0x00000400 + 0x0020 * (rt)) #define IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \ (0x00000404 + 0x0020 * (rt)) -/* The next register is only present for IPA v4.5 */ #define IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \ (0x00000408 + 0x0020 * (rt)) +#define IPA_REG_SRC_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(rt) \ + (0x0000040c + 0x0020 * (rt)) #define IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \ (0x00000500 + 0x0020 * (rt)) #define IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \ (0x00000504 + 0x0020 * (rt)) -/* The next register is only present for IPA v4.5 */ #define IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \ (0x00000508 + 0x0020 * (rt)) +#define IPA_REG_DST_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(rt) \ + (0x0000050c + 0x0020 * (rt)) /* The next four fields are used for all resource group registers */ #define X_MIN_LIM_FMASK GENMASK(5, 0) #define X_MAX_LIM_FMASK GENMASK(13, 8) @@ -368,8 +351,9 @@ static inline u32 ipa_resource_group_dst_count(enum ipa_version version) #define IPA_REG_ENDP_INIT_CTRL_N_OFFSET(ep) \ (0x00000800 + 0x0070 * (ep)) -/* The next field should only used for IPA v3.5.1 */ +/* Valid only for RX (IPA producer) endpoints (do not use for IPA v4.0+) */ #define ENDP_SUSPEND_FMASK GENMASK(0, 0) +/* Valid only for TX (IPA consumer) endpoints */ #define ENDP_DELAY_FMASK GENMASK(1, 1) #define IPA_REG_ENDP_INIT_CFG_N_OFFSET(ep) \ @@ -379,11 +363,11 @@ static inline u32 ipa_resource_group_dst_count(enum ipa_version version) #define CS_METADATA_HDR_OFFSET_FMASK GENMASK(6, 3) #define CS_GEN_QMB_MASTER_SEL_FMASK GENMASK(8, 8) -/** enum ipa_cs_offload_en - checksum offload field in ENDP_INIT_CFG_N */ +/** enum ipa_cs_offload_en - ENDP_INIT_CFG register CS_OFFLOAD_EN field value */ enum ipa_cs_offload_en { IPA_CS_OFFLOAD_NONE = 0x0, - IPA_CS_OFFLOAD_UL = 0x1, - IPA_CS_OFFLOAD_DL = 0x2, + IPA_CS_OFFLOAD_UL = 0x1, /* Before IPA v4.5 (TX) */ + IPA_CS_OFFLOAD_DL = 0x2, /* Before IPA v4.5 (RX) */ }; /* Valid only for TX (IPA consumer) endpoints */ @@ -406,11 +390,12 @@ enum ipa_nat_en { #define HDR_ADDITIONAL_CONST_LEN_FMASK GENMASK(18, 13) #define HDR_OFST_PKT_SIZE_VALID_FMASK GENMASK(19, 19) #define HDR_OFST_PKT_SIZE_FMASK GENMASK(25, 20) +/* The next field is not present for IPA v4.9+ */ #define HDR_A5_MUX_FMASK GENMASK(26, 26) #define HDR_LEN_INC_DEAGG_HDR_FMASK GENMASK(27, 27) -/* The next field is not present for IPA v4.5 */ +/* The next field is not present for IPA v4.5+ */ #define HDR_METADATA_REG_VALID_FMASK GENMASK(28, 28) -/* The next two fields are present for IPA v4.5 */ +/* The next two fields are present for IPA v4.5+ */ #define HDR_LEN_MSB_FMASK GENMASK(29, 28) #define HDR_OFST_METADATA_MSB_FMASK GENMASK(31, 30) @@ -462,7 +447,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, #define HDR_PAYLOAD_LEN_INC_PADDING_FMASK GENMASK(3, 3) #define HDR_TOTAL_LEN_OR_PAD_OFFSET_FMASK GENMASK(9, 4) #define HDR_PAD_TO_ALIGNMENT_FMASK GENMASK(13, 10) -/* The next three fields are present for IPA v4.5 */ +/* The next three fields are present for IPA v4.5+ */ #define HDR_TOTAL_LEN_OR_PAD_OFFSET_MSB_FMASK GENMASK(17, 16) #define HDR_OFST_PKT_SIZE_MSB_FMASK GENMASK(19, 18) #define HDR_ADDITIONAL_CONST_LEN_MSB_FMASK GENMASK(21, 20) @@ -475,16 +460,18 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, #define IPA_REG_ENDP_INIT_MODE_N_OFFSET(txep) \ (0x00000820 + 0x0070 * (txep)) #define MODE_FMASK GENMASK(2, 0) -/* The next field is present for IPA v4.5 */ +/* The next field is present for IPA v4.5+ */ #define DCPH_ENABLE_FMASK GENMASK(3, 3) #define DEST_PIPE_INDEX_FMASK GENMASK(8, 4) #define BYTE_THRESHOLD_FMASK GENMASK(27, 12) #define PIPE_REPLICATION_EN_FMASK GENMASK(28, 28) #define PAD_EN_FMASK GENMASK(29, 29) -/* The next register is not present for IPA v4.5 */ +/* The next field is not present for IPA v4.5+ */ #define HDR_FTCH_DISABLE_FMASK GENMASK(30, 30) +/* The next field is present for IPA v4.9+ */ +#define DRBIP_ACL_ENABLE GENMASK(30, 30) -/** enum ipa_mode - mode field in ENDP_INIT_MODE_N */ +/** enum ipa_mode - ENDP_INIT_MODE register MODE field value */ enum ipa_mode { IPA_BASIC = 0x0, IPA_ENABLE_FRAMING_HDLC = 0x1, @@ -496,47 +483,54 @@ enum ipa_mode { (0x00000824 + 0x0070 * (ep)) #define AGGR_EN_FMASK GENMASK(1, 0) #define AGGR_TYPE_FMASK GENMASK(4, 2) + +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_byte_limit_fmask(bool legacy) { return legacy ? GENMASK(9, 5) : GENMASK(10, 5); } +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_time_limit_fmask(bool legacy) { return legacy ? GENMASK(14, 10) : GENMASK(16, 12); } +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_pkt_limit_fmask(bool legacy) { return legacy ? GENMASK(20, 15) : GENMASK(22, 17); } +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_sw_eof_active_fmask(bool legacy) { return legacy ? GENMASK(21, 21) : GENMASK(23, 23); } +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_force_close_fmask(bool legacy) { return legacy ? GENMASK(22, 22) : GENMASK(24, 24); } +/* The legacy value is used for IPA hardware before IPA v4.5 */ static inline u32 aggr_hard_byte_limit_enable_fmask(bool legacy) { return legacy ? GENMASK(24, 24) : GENMASK(26, 26); } -/* The next field is present for IPA v4.5 */ +/* The next field is present for IPA v4.5+ */ #define AGGR_GRAN_SEL_FMASK GENMASK(27, 27) -/** enum ipa_aggr_en - aggregation enable field in ENDP_INIT_AGGR_N */ +/** enum ipa_aggr_en - ENDP_INIT_AGGR register AGGR_EN field value */ enum ipa_aggr_en { - IPA_BYPASS_AGGR = 0x0, - IPA_ENABLE_AGGR = 0x1, - IPA_ENABLE_DEAGGR = 0x2, + IPA_BYPASS_AGGR = 0x0, /* (TX, RX) */ + IPA_ENABLE_AGGR = 0x1, /* (RX) */ + IPA_ENABLE_DEAGGR = 0x2, /* (TX) */ }; -/** enum ipa_aggr_type - aggregation type field in ENDP_INIT_AGGR_N */ +/** enum ipa_aggr_type - ENDP_INIT_AGGR register AGGR_TYPE field value */ enum ipa_aggr_type { IPA_MBIM_16 = 0x0, IPA_HDLC = 0x1, @@ -577,14 +571,13 @@ enum ipa_aggr_type { /* Encoded value for ENDP_INIT_RSRC_GRP register RSRC_GRP field */ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp) { - switch (version) { - case IPA_VERSION_4_2: - return u32_encode_bits(rsrc_grp, GENMASK(0, 0)); - case IPA_VERSION_4_5: + if (version < IPA_VERSION_3_5 || version == IPA_VERSION_4_5) return u32_encode_bits(rsrc_grp, GENMASK(2, 0)); - default: - return u32_encode_bits(rsrc_grp, GENMASK(1, 0)); - } + + if (version == IPA_VERSION_4_2 || version == IPA_VERSION_4_7) + return u32_encode_bits(rsrc_grp, GENMASK(0, 0)); + + return u32_encode_bits(rsrc_grp, GENMASK(1, 0)); } /* Valid only for TX (IPA consumer) endpoints */ @@ -595,6 +588,13 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp) /** * enum ipa_seq_type - HPS and DPS sequencer type + * @IPA_SEQ_DMA: Perform DMA only + * @IPA_SEQ_1_PASS: One pass through the pipeline + * @IPA_SEQ_2_PASS_SKIP_LAST_UC: Two passes, skip the microcprocessor + * @IPA_SEQ_1_PASS_SKIP_LAST_UC: One pass, skip the microcprocessor + * @IPA_SEQ_2_PASS: Two passes through the pipeline + * @IPA_SEQ_3_PASS_SKIP_LAST_UC: Three passes, skip the microcprocessor + * @IPA_SEQ_DECIPHER: Optional deciphering step (combined) * * The low-order byte of the sequencer type register defines the number of * passes a packet takes through the IPA pipeline. The last pass through can @@ -604,7 +604,6 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp) * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are * supported (or meaningful). */ -#define IPA_SEQ_DECIPHER 0x11 enum ipa_seq_type { IPA_SEQ_DMA = 0x00, IPA_SEQ_1_PASS = 0x02, @@ -612,10 +611,13 @@ enum ipa_seq_type { IPA_SEQ_1_PASS_SKIP_LAST_UC = 0x06, IPA_SEQ_2_PASS = 0x0a, IPA_SEQ_3_PASS_SKIP_LAST_UC = 0x0c, + /* The next value can be ORed with the above */ + IPA_SEQ_DECIPHER = 0x11, }; /** * enum ipa_seq_rep_type - replicated packet sequencer type + * @IPA_SEQ_REP_DMA_PARSER: DMA parser for replicated packets * * This goes in the second byte of the endpoint sequencer type register. * @@ -630,12 +632,12 @@ enum ipa_seq_rep_type { (0x00000840 + 0x0070 * (ep)) #define STATUS_EN_FMASK GENMASK(0, 0) #define STATUS_ENDP_FMASK GENMASK(5, 1) -/* The next field is not present for IPA v4.5 */ +/* The next field is not present for IPA v4.5+ */ #define STATUS_LOCATION_FMASK GENMASK(8, 8) -/* The next field is not present for IPA v3.5.1 */ +/* The next field is present for IPA v4.0+ */ #define STATUS_PKT_SUPPRESS_FMASK GENMASK(9, 9) -/* The next register is only present for IPA versions that support hashing */ +/* The next register is not present for IPA v4.2 (which no hashing support) */ #define IPA_REG_ENDP_FILTER_ROUTER_HSH_CFG_N_OFFSET(er) \ (0x0000085c + 0x0070 * (er)) #define FILTER_HASH_MSK_SRC_ID_FMASK GENMASK(0, 0) @@ -656,31 +658,87 @@ enum ipa_seq_rep_type { #define ROUTER_HASH_MSK_METADATA_FMASK GENMASK(22, 22) #define IPA_REG_ENDP_ROUTER_HASH_MSK_ALL GENMASK(22, 16) -#define IPA_REG_IRQ_STTS_OFFSET \ - IPA_REG_IRQ_STTS_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_STTS_EE_N_OFFSET(ee) \ - (0x00003008 + 0x1000 * (ee)) +static inline u32 ipa_reg_irq_stts_ee_n_offset(enum ipa_version version, + u32 ee) +{ + if (version < IPA_VERSION_4_9) + return 0x00003008 + 0x1000 * ee; + + return 0x00004008 + 0x1000 * ee; +} + +static inline u32 ipa_reg_irq_stts_offset(enum ipa_version version) +{ + return ipa_reg_irq_stts_ee_n_offset(version, GSI_EE_AP); +} + +static inline u32 ipa_reg_irq_en_ee_n_offset(enum ipa_version version, u32 ee) +{ + if (version < IPA_VERSION_4_9) + return 0x0000300c + 0x1000 * ee; + + return 0x0000400c + 0x1000 * ee; +} + +static inline u32 ipa_reg_irq_en_offset(enum ipa_version version) +{ + return ipa_reg_irq_en_ee_n_offset(version, GSI_EE_AP); +} + +static inline u32 ipa_reg_irq_clr_ee_n_offset(enum ipa_version version, u32 ee) +{ + if (version < IPA_VERSION_4_9) + return 0x00003010 + 0x1000 * ee; + + return 0x00004010 + 0x1000 * ee; +} -#define IPA_REG_IRQ_EN_OFFSET \ - IPA_REG_IRQ_EN_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_EN_EE_N_OFFSET(ee) \ - (0x0000300c + 0x1000 * (ee)) +static inline u32 ipa_reg_irq_clr_offset(enum ipa_version version) +{ + return ipa_reg_irq_clr_ee_n_offset(version, GSI_EE_AP); +} -#define IPA_REG_IRQ_CLR_OFFSET \ - IPA_REG_IRQ_CLR_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_CLR_EE_N_OFFSET(ee) \ - (0x00003010 + 0x1000 * (ee)) /** * enum ipa_irq_id - Bit positions representing type of IPA IRQ * @IPA_IRQ_UC_0: Microcontroller event interrupt * @IPA_IRQ_UC_1: Microcontroller response interrupt * @IPA_IRQ_TX_SUSPEND: Data ready interrupt + * @IPA_IRQ_COUNT: Number of IRQ ids (must be last) * * IRQ types not described above are not currently used. + * + * @IPA_IRQ_BAD_SNOC_ACCESS: (Not currently used) + * @IPA_IRQ_EOT_COAL: (Not currently used) + * @IPA_IRQ_UC_2: (Not currently used) + * @IPA_IRQ_UC_3: (Not currently used) + * @IPA_IRQ_UC_IN_Q_NOT_EMPTY: (Not currently used) + * @IPA_IRQ_UC_RX_CMD_Q_NOT_FULL: (Not currently used) + * @IPA_IRQ_PROC_UC_ACK_Q_NOT_EMPTY: (Not currently used) + * @IPA_IRQ_RX_ERR: (Not currently used) + * @IPA_IRQ_DEAGGR_ERR: (Not currently used) + * @IPA_IRQ_TX_ERR: (Not currently used) + * @IPA_IRQ_STEP_MODE: (Not currently used) + * @IPA_IRQ_PROC_ERR: (Not currently used) + * @IPA_IRQ_TX_HOLB_DROP: (Not currently used) + * @IPA_IRQ_BAM_GSI_IDLE: (Not currently used) + * @IPA_IRQ_PIPE_YELLOW_BELOW: (Not currently used) + * @IPA_IRQ_PIPE_RED_BELOW: (Not currently used) + * @IPA_IRQ_PIPE_YELLOW_ABOVE: (Not currently used) + * @IPA_IRQ_PIPE_RED_ABOVE: (Not currently used) + * @IPA_IRQ_UCP: (Not currently used) + * @IPA_IRQ_DCMP: (Not currently used) + * @IPA_IRQ_GSI_EE: (Not currently used) + * @IPA_IRQ_GSI_IPA_IF_TLV_RCVD: (Not currently used) + * @IPA_IRQ_GSI_UC: (Not currently used) + * @IPA_IRQ_TLV_LEN_MIN_DSM: (Not currently used) + * @IPA_IRQ_DRBIP_PKT_EXCEED_MAX_SIZE_EN: (Not currently used) + * @IPA_IRQ_DRBIP_DATA_SCTR_CFG_ERROR_EN: (Not currently used) + * @IPA_IRQ_DRBIP_IMM_CMD_NO_FLSH_HZRD_EN: (Not currently used) */ enum ipa_irq_id { IPA_IRQ_BAD_SNOC_ACCESS = 0x0, - /* Type (bit) 0x1 is not defined */ + /* The next bit is not present for IPA v3.5+ */ + IPA_IRQ_EOT_COAL = 0x1, IPA_IRQ_UC_0 = 0x2, IPA_IRQ_UC_1 = 0x3, IPA_IRQ_UC_2 = 0x4, @@ -701,38 +759,89 @@ enum ipa_irq_id { IPA_IRQ_PIPE_YELLOW_ABOVE = 0x13, IPA_IRQ_PIPE_RED_ABOVE = 0x14, IPA_IRQ_UCP = 0x15, + /* The next bit is not present for IPA v4.5+ */ IPA_IRQ_DCMP = 0x16, IPA_IRQ_GSI_EE = 0x17, IPA_IRQ_GSI_IPA_IF_TLV_RCVD = 0x18, IPA_IRQ_GSI_UC = 0x19, - /* The next bit is present for IPA v4.5 */ + /* The next bit is present for IPA v4.5+ */ IPA_IRQ_TLV_LEN_MIN_DSM = 0x1a, + /* The next three bits are present for IPA v4.9+ */ + IPA_IRQ_DRBIP_PKT_EXCEED_MAX_SIZE_EN = 0x1b, + IPA_IRQ_DRBIP_DATA_SCTR_CFG_ERROR_EN = 0x1c, + IPA_IRQ_DRBIP_IMM_CMD_NO_FLSH_HZRD_EN = 0x1d, IPA_IRQ_COUNT, /* Last; not an id */ }; -#define IPA_REG_IRQ_UC_OFFSET \ - IPA_REG_IRQ_UC_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_UC_EE_N_OFFSET(ee) \ - (0x0000301c + 0x1000 * (ee)) +static inline u32 ipa_reg_irq_uc_ee_n_offset(enum ipa_version version, u32 ee) +{ + if (version < IPA_VERSION_4_9) + return 0x0000301c + 0x1000 * ee; + + return 0x0000401c + 0x1000 * ee; +} + +static inline u32 ipa_reg_irq_uc_offset(enum ipa_version version) +{ + return ipa_reg_irq_uc_ee_n_offset(version, GSI_EE_AP); +} + #define UC_INTR_FMASK GENMASK(0, 0) /* ipa->available defines the valid bits in the SUSPEND_INFO register */ -#define IPA_REG_IRQ_SUSPEND_INFO_OFFSET \ - IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(ee) \ - (0x00003030 + 0x1000 * (ee)) - -/* ipa->available defines the valid bits in the IRQ_SUSPEND_EN register */ -#define IPA_REG_IRQ_SUSPEND_EN_OFFSET \ - IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(ee) \ - (0x00003034 + 0x1000 * (ee)) - -/* ipa->available defines the valid bits in the IRQ_SUSPEND_CLR register */ -#define IPA_REG_IRQ_SUSPEND_CLR_OFFSET \ - IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(GSI_EE_AP) -#define IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(ee) \ - (0x00003038 + 0x1000 * (ee)) +static inline u32 +ipa_reg_irq_suspend_info_ee_n_offset(enum ipa_version version, u32 ee) +{ + if (version == IPA_VERSION_3_0) + return 0x00003098 + 0x1000 * ee; + + if (version < IPA_VERSION_4_9) + return 0x00003030 + 0x1000 * ee; + + return 0x00004030 + 0x1000 * ee; +} + +static inline u32 +ipa_reg_irq_suspend_info_offset(enum ipa_version version) +{ + return ipa_reg_irq_suspend_info_ee_n_offset(version, GSI_EE_AP); +} + +/* ipa->available defines the valid bits in the SUSPEND_EN register */ +static inline u32 +ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee) +{ + /* assert(version != IPA_VERSION_3_0); */ + + if (version < IPA_VERSION_4_9) + return 0x00003034 + 0x1000 * ee; + + return 0x00004034 + 0x1000 * ee; +} + +static inline u32 +ipa_reg_irq_suspend_en_offset(enum ipa_version version) +{ + return ipa_reg_irq_suspend_en_ee_n_offset(version, GSI_EE_AP); +} + +/* ipa->available defines the valid bits in the SUSPEND_CLR register */ +static inline u32 +ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee) +{ + /* assert(version != IPA_VERSION_3_0); */ + + if (version < IPA_VERSION_4_9) + return 0x00003038 + 0x1000 * ee; + + return 0x00004038 + 0x1000 * ee; +} + +static inline u32 +ipa_reg_irq_suspend_clr_offset(enum ipa_version version) +{ + return ipa_reg_irq_suspend_clr_ee_n_offset(version, GSI_EE_AP); +} int ipa_reg_init(struct ipa *ipa); void ipa_reg_exit(struct ipa *ipa); diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c new file mode 100644 index 000000000000..85f922d6f222 --- /dev/null +++ b/drivers/net/ipa/ipa_resource.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. + * Copyright (C) 2018-2021 Linaro Ltd. + */ + +#include <linux/types.h> +#include <linux/kernel.h> + +#include "ipa.h" +#include "ipa_data.h" +#include "ipa_reg.h" +#include "ipa_resource.h" + +/** + * DOC: IPA Resources + * + * The IPA manages a set of resources internally for various purposes. + * A given IPA version has a fixed number of resource types, and a fixed + * total number of resources of each type. "Source" resource types + * are separate from "destination" resource types. + * + * Each version of IPA also has some number of resource groups. Each + * endpoint is assigned to a resource group, and all endpoints in the + * same group share pools of each type of resource. A subset of the + * total resources of each type is assigned for use by each group. + */ + +static bool ipa_resource_limits_valid(struct ipa *ipa, + const struct ipa_resource_data *data) +{ +#ifdef IPA_VALIDATION + u32 group_count; + u32 i; + u32 j; + + /* We program at most 8 source or destination resource group limits */ + BUILD_BUG_ON(IPA_RESOURCE_GROUP_MAX > 8); + + group_count = data->rsrc_group_src_count; + if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX) + return false; + + /* Return an error if a non-zero resource limit is specified + * for a resource group not supported by hardware. + */ + for (i = 0; i < data->resource_src_count; i++) { + const struct ipa_resource *resource; + + resource = &data->resource_src[i]; + for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++) + if (resource->limits[j].min || resource->limits[j].max) + return false; + } + + group_count = data->rsrc_group_src_count; + if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX) + return false; + + for (i = 0; i < data->resource_dst_count; i++) { + const struct ipa_resource *resource; + + resource = &data->resource_dst[i]; + for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++) + if (resource->limits[j].min || resource->limits[j].max) + return false; + } +#endif /* !IPA_VALIDATION */ + return true; +} + +static void +ipa_resource_config_common(struct ipa *ipa, u32 offset, + const struct ipa_resource_limits *xlimits, + const struct ipa_resource_limits *ylimits) +{ + u32 val; + + val = u32_encode_bits(xlimits->min, X_MIN_LIM_FMASK); + val |= u32_encode_bits(xlimits->max, X_MAX_LIM_FMASK); + if (ylimits) { + val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK); + val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK); + } + + iowrite32(val, ipa->reg_virt + offset); +} + +static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type, + const struct ipa_resource_data *data) +{ + u32 group_count = data->rsrc_group_src_count; + const struct ipa_resource_limits *ylimits; + const struct ipa_resource *resource; + u32 offset; + + resource = &data->resource_src[resource_type]; + + offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 1 ? NULL : &resource->limits[1]; + ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); + + if (group_count < 3) + return; + + offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 3 ? NULL : &resource->limits[3]; + ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); + + if (group_count < 5) + return; + + offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 5 ? NULL : &resource->limits[5]; + ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); + + if (group_count < 7) + return; + + offset = IPA_REG_SRC_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 7 ? NULL : &resource->limits[7]; + ipa_resource_config_common(ipa, offset, &resource->limits[6], ylimits); +} + +static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type, + const struct ipa_resource_data *data) +{ + u32 group_count = data->rsrc_group_dst_count; + const struct ipa_resource_limits *ylimits; + const struct ipa_resource *resource; + u32 offset; + + resource = &data->resource_dst[resource_type]; + + offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 1 ? NULL : &resource->limits[1]; + ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits); + + if (group_count < 3) + return; + + offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 3 ? NULL : &resource->limits[3]; + ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits); + + if (group_count < 5) + return; + + offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 5 ? NULL : &resource->limits[5]; + ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits); + + if (group_count < 7) + return; + + offset = IPA_REG_DST_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(resource_type); + ylimits = group_count == 7 ? NULL : &resource->limits[7]; + ipa_resource_config_common(ipa, offset, &resource->limits[6], ylimits); +} + +/* Configure resources */ +int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data) +{ + u32 i; + + if (!ipa_resource_limits_valid(ipa, data)) + return -EINVAL; + + for (i = 0; i < data->resource_src_count; i++) + ipa_resource_config_src(ipa, i, data); + + for (i = 0; i < data->resource_dst_count; i++) + ipa_resource_config_dst(ipa, i, data); + + return 0; +} + +/* Inverse of ipa_resource_config() */ +void ipa_resource_deconfig(struct ipa *ipa) +{ + /* Nothing to do */ +} diff --git a/drivers/net/ipa/ipa_resource.h b/drivers/net/ipa/ipa_resource.h new file mode 100644 index 000000000000..9f74036fb95c --- /dev/null +++ b/drivers/net/ipa/ipa_resource.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. + * Copyright (C) 2019-2021 Linaro Ltd. + */ +#ifndef _IPA_RESOURCE_H_ +#define _IPA_RESOURCE_H_ + +struct ipa; +struct ipa_resource_data; + +/** + * ipa_resource_config() - Configure resources + * @ipa: IPA pointer + * @data: IPA resource configuration data + * + * Return: true if all regions are valid, false otherwise + */ +int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data); + +/** + * ipa_resource_deconfig() - Inverse of ipa_resource_config() + * @ipa: IPA pointer + */ +void ipa_resource_deconfig(struct ipa *ipa); + +#endif /* _IPA_RESOURCE_H_ */ diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h index bf0e4063cfd9..20319438a841 100644 --- a/drivers/net/ipa/ipa_smp2p.h +++ b/drivers/net/ipa/ipa_smp2p.h @@ -28,7 +28,7 @@ void ipa_smp2p_exit(struct ipa *ipa); /** * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling - * @IPA: IPA pointer + * @ipa: IPA pointer * * Prevent handling of the "setup ready" interrupt from the modem. * This is used before initiating shutdown of the driver. diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 4236a50ff03a..401b568df6a3 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -27,28 +27,38 @@ /** * DOC: IPA Filter and Route Tables * - * The IPA has tables defined in its local shared memory that define filter - * and routing rules. Each entry in these tables contains a 64-bit DMA - * address that refers to DRAM (system memory) containing a rule definition. + * The IPA has tables defined in its local (IPA-resident) memory that define + * filter and routing rules. An entry in either of these tables is a little + * endian 64-bit "slot" that holds the address of a rule definition. (The + * size of these slots is 64 bits regardless of the host DMA address size.) + * + * Separate tables (both filter and route) used for IPv4 and IPv6. There + * are normally another set of "hashed" filter and route tables, which are + * used with a hash of message metadata. Hashed operation is not supported + * by all IPA hardware (IPA v4.2 doesn't support hashed tables). + * + * Rules can be in local memory or in DRAM (system memory). The offset of + * an object (such as a route or filter table) in IPA-resident memory must + * 128-byte aligned. An object in system memory (such as a route or filter + * rule) must be at an 8-byte aligned address. We currently only place + * route or filter rules in system memory. + * * A rule consists of a contiguous block of 32-bit values terminated with * 32 zero bits. A special "zero entry" rule consisting of 64 zero bits * represents "no filtering" or "no routing," and is the reset value for - * filter or route table rules. Separate tables (both filter and route) - * used for IPv4 and IPv6. Additionally, there can be hashed filter or - * route tables, which are used when a hash of message metadata matches. - * Hashed operation is not supported by all IPA hardware. + * filter or route table rules. * * Each filter rule is associated with an AP or modem TX endpoint, though - * not all TX endpoints support filtering. The first 64-bit entry in a + * not all TX endpoints support filtering. The first 64-bit slot in a * filter table is a bitmap indicating which endpoints have entries in * the table. The low-order bit (bit 0) in this bitmap represents a * special global filter, which applies to all traffic. This is not * used in the current code. Bit 1, if set, indicates that there is an - * entry (i.e. a DMA address referring to a rule) for endpoint 0 in the - * table. Bit 2, if set, indicates there is an entry for endpoint 1, - * and so on. Space is set aside in IPA local memory to hold as many - * filter table entries as might be required, but typically they are not - * all used. + * entry (i.e. slot containing a system address referring to a rule) for + * endpoint 0 in the table. Bit 3, if set, indicates there is an entry + * for endpoint 2, and so on. Space is set aside in IPA local memory to + * hold as many filter table entries as might be required, but typically + * they are not all used. * * The AP initializes all entries in a filter table to refer to a "zero" * entry. Once initialized the modem and AP update the entries for @@ -96,9 +106,6 @@ * ---------------------- */ -/* IPA hardware constrains filter and route tables alignment */ -#define IPA_TABLE_ALIGN 128 /* Minimum table alignment */ - /* Assignment of route table entries to the modem and AP */ #define IPA_ROUTE_MODEM_MIN 0 #define IPA_ROUTE_MODEM_COUNT 8 @@ -125,8 +132,7 @@ static void ipa_table_validate_build(void) * code in ipa_table_init() uses a pointer to __le64 to * initialize tables. */ - BUILD_BUG_ON(sizeof(dma_addr_t) > IPA_TABLE_ENTRY_SIZE); - BUILD_BUG_ON(sizeof(__le64) != IPA_TABLE_ENTRY_SIZE); + BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(__le64)); /* A "zero rule" is used to represent no filtering or no routing. * It is a 64-bit block of zeroed memory. Code in ipa_table_init() @@ -157,7 +163,7 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed) else mem = hashed ? &ipa->mem[IPA_MEM_V4_ROUTE_HASHED] : &ipa->mem[IPA_MEM_V4_ROUTE]; - size = IPA_ROUTE_COUNT_MAX * IPA_TABLE_ENTRY_SIZE; + size = IPA_ROUTE_COUNT_MAX * sizeof(__le64); } else { if (ipv6) mem = hashed ? &ipa->mem[IPA_MEM_V6_FILTER_HASHED] @@ -165,7 +171,7 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed) else mem = hashed ? &ipa->mem[IPA_MEM_V4_FILTER_HASHED] : &ipa->mem[IPA_MEM_V4_FILTER]; - size = (1 + IPA_FILTER_COUNT_MAX) * IPA_TABLE_ENTRY_SIZE; + size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64); } if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed)) @@ -264,8 +270,8 @@ static void ipa_table_reset_add(struct gsi_trans *trans, bool filter, if (filter) first++; /* skip over bitmap */ - offset = mem->offset + first * IPA_TABLE_ENTRY_SIZE; - size = count * IPA_TABLE_ENTRY_SIZE; + offset = mem->offset + first * sizeof(__le64); + size = count * sizeof(__le64); addr = ipa_table_addr(ipa, false, count); ipa_cmd_dma_shared_mem_add(trans, offset, size, addr, true); @@ -447,11 +453,11 @@ static void ipa_table_init_add(struct gsi_trans *trans, bool filter, count = hweight32(ipa->filter_map); hash_count = hash_mem->size ? count : 0; } else { - count = mem->size / IPA_TABLE_ENTRY_SIZE; - hash_count = hash_mem->size / IPA_TABLE_ENTRY_SIZE; + count = mem->size / sizeof(__le64); + hash_count = hash_mem->size / sizeof(__le64); } - size = count * IPA_TABLE_ENTRY_SIZE; - hash_size = hash_count * IPA_TABLE_ENTRY_SIZE; + size = count * sizeof(__le64); + hash_size = hash_count * sizeof(__le64); addr = ipa_table_addr(ipa, filter, count); hash_addr = ipa_table_addr(ipa, filter, hash_count); @@ -652,26 +658,17 @@ int ipa_table_init(struct ipa *ipa) ipa_table_validate_build(); - size = IPA_ZERO_RULE_SIZE + (1 + count) * IPA_TABLE_ENTRY_SIZE; + /* The IPA hardware requires route and filter table rules to be + * aligned on a 128-byte boundary. We put the "zero rule" at the + * base of the table area allocated here. The DMA address returned + * by dma_alloc_coherent() is guaranteed to be a power-of-2 number + * of pages, which satisfies the rule alignment requirement. + */ + size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64); virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL); if (!virt) return -ENOMEM; - /* We put the "zero rule" at the base of our table area. The IPA - * hardware requires route and filter table rules to be aligned - * on a 128-byte boundary. As long as the alignment constraint - * is a power of 2, we can check alignment using just the bottom - * 32 bits for a DMA address of any size. - */ - BUILD_BUG_ON(!is_power_of_2(IPA_TABLE_ALIGN)); - if (lower_32_bits(addr) % IPA_TABLE_ALIGN) { - dev_err(dev, "table address %pad not %u-byte aligned\n", - &addr, IPA_TABLE_ALIGN); - dma_free_coherent(dev, size, virt, addr); - - return -ERANGE; - } - ipa->table_virt = virt; ipa->table_addr = addr; @@ -699,7 +696,7 @@ void ipa_table_exit(struct ipa *ipa) struct device *dev = &ipa->pdev->dev; size_t size; - size = IPA_ZERO_RULE_SIZE + (1 + count) * IPA_TABLE_ENTRY_SIZE; + size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64); dma_free_coherent(dev, size, ipa->table_virt, ipa->table_addr); ipa->table_addr = 0; diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h index 889c2e93b122..018045b95aad 100644 --- a/drivers/net/ipa/ipa_table.h +++ b/drivers/net/ipa/ipa_table.h @@ -10,9 +10,6 @@ struct ipa; -/* The size of a filter or route table entry */ -#define IPA_TABLE_ENTRY_SIZE sizeof(__le64) /* Holds a physical address */ - /* The maximum number of filter table entries (IPv4, IPv6; hashed or not) */ #define IPA_FILTER_COUNT_MAX 14 @@ -24,7 +21,7 @@ struct ipa; /** * ipa_table_valid() - Validate route and filter table memory regions * @ipa: IPA pointer - + * * Return: true if all regions are valid, false otherwise */ bool ipa_table_valid(struct ipa *ipa); @@ -32,6 +29,7 @@ bool ipa_table_valid(struct ipa *ipa); /** * ipa_filter_map_valid() - Validate a filter table endpoint bitmap * @ipa: IPA pointer + * @filter_mask: Filter table endpoint bitmap to check * * Return: true if all regions are valid, false otherwise */ diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index dee58a6596d4..2756363e6938 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -192,6 +192,7 @@ void ipa_uc_teardown(struct ipa *ipa) static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); + u32 offset; u32 val; /* Fill in the command data */ @@ -203,8 +204,8 @@ static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param) /* Use an interrupt to tell the microcontroller the command is ready */ val = u32_encode_bits(1, UC_INTR_FMASK); - - iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_UC_OFFSET); + offset = ipa_reg_irq_uc_offset(ipa->version); + iowrite32(val, ipa->reg_virt + offset); } /* Tell the microcontroller the AP is shutting down */ diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c index 5e72cc55afbd..e08c90ac0c6e 100644 --- a/drivers/net/mdio.c +++ b/drivers/net/mdio.c @@ -83,7 +83,7 @@ int mdio_set_flag(const struct mdio_if_info *mdio, EXPORT_SYMBOL(mdio_set_flag); /** - * mdio_link_ok - is link status up/OK + * mdio45_links_ok - is link status up/OK * @mdio: MDIO interface * @mmd_mask: Mask for MMDs to check * diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c index 641cfa41f492..03261e6b9ceb 100644 --- a/drivers/net/mdio/mdio-mux-bcm-iproc.c +++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c @@ -197,10 +197,8 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev) res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1; } md->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(md->base)) { - dev_err(&pdev->dev, "failed to ioremap register\n"); + if (IS_ERR(md->base)) return PTR_ERR(md->base); - } md->mii_bus = devm_mdiobus_alloc(&pdev->dev); if (!md->mii_bus) { diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h index 12e7407d712a..1d0c499d27a3 100644 --- a/drivers/net/mhi/mhi.h +++ b/drivers/net/mhi/mhi.h @@ -29,6 +29,7 @@ struct mhi_net_dev { struct mhi_net_stats stats; u32 rx_queue_sz; int msg_enable; + unsigned int mru; }; struct mhi_net_proto { diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index f59960876083..0d8293a47a56 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -265,10 +265,12 @@ static void mhi_net_rx_refill_work(struct work_struct *work) rx_refill.work); struct net_device *ndev = mhi_netdev->ndev; struct mhi_device *mdev = mhi_netdev->mdev; - int size = READ_ONCE(ndev->mtu); struct sk_buff *skb; + unsigned int size; int err; + size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu); + while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { skb = netdev_alloc_skb(ndev, size); if (unlikely(!skb)) @@ -359,8 +361,7 @@ static void mhi_net_remove(struct mhi_device *mhi_dev) mhi_unprepare_from_transfer(mhi_netdev->mdev); - if (mhi_netdev->skbagg_head) - kfree_skb(mhi_netdev->skbagg_head); + kfree_skb(mhi_netdev->skbagg_head); free_netdev(mhi_netdev->ndev); } diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c index 75b5484c40d5..fc72b3f6ec9e 100644 --- a/drivers/net/mhi/proto_mbim.c +++ b/drivers/net/mhi/proto_mbim.c @@ -26,6 +26,15 @@ #define MBIM_NDP16_SIGN_MASK 0x00ffffff +/* Usual WWAN MTU */ +#define MHI_MBIM_DEFAULT_MTU 1500 + +/* 3500 allows to optimize skb allocation, the skbs will basically fit in + * one 4K page. Large MBIM packets will simply be split over several MHI + * transfers and chained by the MHI net layer (zerocopy). + */ +#define MHI_MBIM_DEFAULT_MRU 3500 + struct mbim_context { u16 rx_seq; u16 tx_seq; @@ -91,20 +100,11 @@ static int mbim_rx_verify_nth16(struct sk_buff *skb) return le16_to_cpu(nth16->wNdpIndex); } -static int mbim_rx_verify_ndp16(struct sk_buff *skb, int ndpoffset) +static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) { struct mhi_net_dev *dev = netdev_priv(skb->dev); - struct usb_cdc_ncm_ndp16 *ndp16; int ret; - if (ndpoffset + sizeof(struct usb_cdc_ncm_ndp16) > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, "invalid NDP offset <%u>\n", - ndpoffset); - return -EINVAL; - } - - ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); - if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n", le16_to_cpu(ndp16->wLength)); @@ -130,9 +130,6 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) struct net_device *ndev = mhi_netdev->ndev; int ndpoffset; - if (skb_linearize(skb)) - goto error; - /* Check NTB header and retrieve first NDP offset */ ndpoffset = mbim_rx_verify_nth16(skb); if (ndpoffset < 0) { @@ -142,12 +139,19 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) /* Process each NDP */ while (1) { - struct usb_cdc_ncm_ndp16 *ndp16; - struct usb_cdc_ncm_dpe16 *dpe16; - int nframes, n; + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16; + int nframes, n, dpeoffset; + + if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { + net_err_ratelimited("%s: Incorrect NDP offset (%u)\n", + ndev->name, ndpoffset); + __mbim_length_errors_inc(mhi_netdev); + goto error; + } /* Check NDP header and retrieve number of datagrams */ - nframes = mbim_rx_verify_ndp16(skb, ndpoffset); + nframes = mbim_rx_verify_ndp16(skb, &ndp16); if (nframes < 0) { net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name); __mbim_length_errors_inc(mhi_netdev); @@ -155,8 +159,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) } /* Only IP data type supported, no DSS in MHI context */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); - if ((ndp16->dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) + if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name); __mbim_errors_inc(mhi_netdev); @@ -164,19 +167,24 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) } /* Only primary IP session 0 (0x00) supported for now */ - if (ndp16->dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { + if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { net_err_ratelimited("%s: bad packet session\n", ndev->name); __mbim_errors_inc(mhi_netdev); goto next_ndp; } /* de-aggregate and deliver IP packets */ - dpe16 = ndp16->dpe16; - for (n = 0; n < nframes; n++, dpe16++) { - u16 dgram_offset = le16_to_cpu(dpe16->wDatagramIndex); - u16 dgram_len = le16_to_cpu(dpe16->wDatagramLength); + dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); + for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { + u16 dgram_offset, dgram_len; struct sk_buff *skbn; + if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) + break; + + dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); + dgram_len = le16_to_cpu(dpe16.wDatagramLength); + if (!dgram_offset || !dgram_len) break; /* null terminator */ @@ -185,7 +193,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) continue; skb_put(skbn, dgram_len); - memcpy(skbn->data, skb->data + dgram_offset, dgram_len); + skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); switch (skbn->data[0] & 0xf0) { case 0x40: @@ -206,7 +214,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) } next_ndp: /* Other NDP to process? */ - ndpoffset = (int)le16_to_cpu(ndp16->wNextNdpIndex); + ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); if (!ndpoffset) break; } @@ -282,6 +290,8 @@ static int mbim_init(struct mhi_net_dev *mhi_netdev) return -ENOMEM; ndev->needed_headroom = sizeof(struct mbim_tx_hdr); + ndev->mtu = MHI_MBIM_DEFAULT_MTU; + mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; return 0; } diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 166f0d6cbcf7..c9ae52595a8f 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -77,6 +77,34 @@ static int nsim_set_ringparam(struct net_device *dev, return 0; } +static int +nsim_get_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (ns->ethtool.get_err) + return -ns->ethtool.get_err; + memcpy(fecparam, &ns->ethtool.fec, sizeof(ns->ethtool.fec)); + return 0; +} + +static int +nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam) +{ + struct netdevsim *ns = netdev_priv(dev); + u32 fec; + + if (ns->ethtool.set_err) + return -ns->ethtool.set_err; + memcpy(&ns->ethtool.fec, fecparam, sizeof(ns->ethtool.fec)); + fec = fecparam->fec; + if (fec == ETHTOOL_FEC_AUTO) + fec |= ETHTOOL_FEC_OFF; + fec |= ETHTOOL_FEC_NONE; + ns->ethtool.fec.active_fec = 1 << (fls(fec) - 1); + return 0; +} + static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, .get_pause_stats = nsim_get_pause_stats, @@ -86,6 +114,8 @@ static const struct ethtool_ops nsim_ethtool_ops = { .get_coalesce = nsim_get_coalesce, .get_ringparam = nsim_get_ringparam, .set_ringparam = nsim_set_ringparam, + .get_fecparam = nsim_get_fecparam, + .set_fecparam = nsim_set_fecparam, }; static void nsim_ethtool_ring_init(struct netdevsim *ns) @@ -104,8 +134,14 @@ void nsim_ethtool_init(struct netdevsim *ns) nsim_ethtool_ring_init(ns); + ns->ethtool.fec.fec = ETHTOOL_FEC_NONE; + ns->ethtool.fec.active_fec = ETHTOOL_FEC_NONE; + ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir); + debugfs_create_u32("get_err", 0600, ethtool, &ns->ethtool.get_err); + debugfs_create_u32("set_err", 0600, ethtool, &ns->ethtool.set_err); + dir = debugfs_create_dir("pause", ethtool); debugfs_create_bool("report_stats_rx", 0600, dir, &ns->ethtool.pauseparam.report_stats_rx); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d735c21def4b..7ff24e03577b 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -60,9 +60,12 @@ struct nsim_ethtool_pauseparam { }; struct nsim_ethtool { + u32 get_err; + u32 set_err; struct nsim_ethtool_pauseparam pauseparam; struct ethtool_coalesce coalesce; struct ethtool_ringparam ring; + struct ethtool_fecparam fec; }; struct netdevsim { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 82fe5f43f0e9..7bf3011b8e77 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -671,13 +671,13 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } -struct bcm53xx_phy_priv { +struct bcm54xx_phy_priv { u64 *stats; }; -static int bcm53xx_phy_probe(struct phy_device *phydev) +static int bcm54xx_phy_probe(struct phy_device *phydev) { - struct bcm53xx_phy_priv *priv; + struct bcm54xx_phy_priv *priv; priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -694,10 +694,10 @@ static int bcm53xx_phy_probe(struct phy_device *phydev) return 0; } -static void bcm53xx_phy_get_stats(struct phy_device *phydev, - struct ethtool_stats *stats, u64 *data) +static void bcm54xx_get_stats(struct phy_device *phydev, + struct ethtool_stats *stats, u64 *data) { - struct bcm53xx_phy_priv *priv = phydev->priv; + struct bcm54xx_phy_priv *priv = phydev->priv; bcm_phy_get_stats(phydev, priv->stats, stats, data); } @@ -708,6 +708,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5411", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -716,6 +720,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5421", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -724,6 +732,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54210E", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -732,6 +744,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5461", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -740,6 +756,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54612E", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -759,6 +779,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5464", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -769,6 +793,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5481", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, .config_intr = bcm_phy_config_intr, @@ -778,6 +806,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54810", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_aneg = bcm5481_config_aneg, .config_intr = bcm_phy_config_intr, @@ -789,6 +821,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54811", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54811_config_init, .config_aneg = bcm5481_config_aneg, .config_intr = bcm_phy_config_intr, @@ -800,6 +836,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5482", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -808,6 +848,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -816,6 +860,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM50610M", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -824,6 +872,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM57780", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -851,8 +903,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .get_sset_count = bcm_phy_get_sset_count, .get_strings = bcm_phy_get_strings, - .get_stats = bcm53xx_phy_get_stats, - .probe = bcm53xx_phy_probe, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, }, { .phy_id = PHY_ID_BCM53125, .phy_id_mask = 0xfffffff0, @@ -861,8 +913,8 @@ static struct phy_driver broadcom_drivers[] = { /* PHY_GBIT_FEATURES */ .get_sset_count = bcm_phy_get_sset_count, .get_strings = bcm_phy_get_strings, - .get_stats = bcm53xx_phy_get_stats, - .probe = bcm53xx_phy_probe, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, @@ -871,6 +923,10 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM89610", /* PHY_GBIT_FEATURES */ + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm54xx_get_stats, + .probe = bcm54xx_phy_probe, .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 033df435f76c..2de679a68115 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -50,7 +50,7 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus, EXPORT_SYMBOL(mdiobus_setup_mdiodev_from_board_info); /** - * mdio_register_board_info - register MDIO devices for a given board + * mdiobus_register_board_info - register MDIO devices for a given board * @info: array of devices descriptors * @n: number of descriptors provided * Context: can sleep diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 823518554079..dadf75ff3ab9 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -510,7 +510,7 @@ static int mdiobus_create_device(struct mii_bus *bus, * on a given bus, and attach them to the bus. Drivers should use * mdiobus_register() rather than __mdiobus_register() unless they * need to pass a specific owner module. MDIO devices which are not - * PHYs will not be brought up by this function. They are expected to + * PHYs will not be brought up by this function. They are expected * to be explicitly listed in DT and instantiated by of_mdiobus_register(). * * Returns 0 on success or < 0 on error. diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index ddb78fb4d6dc..d8cac02a79b9 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -185,10 +185,13 @@ static int lan87xx_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } -static int lan87xx_config_aneg_ext(struct phy_device *phydev) +static int lan95xx_config_aneg_ext(struct phy_device *phydev) { int rc; + if (phydev->phy_id != 0x0007c0f0) /* not (LAN9500A or LAN9505A) */ + return lan87xx_config_aneg(phydev); + /* Extend Manual AutoMDIX timer */ rc = phy_read(phydev, PHY_EDPD_CONFIG); if (rc < 0) @@ -441,7 +444,7 @@ static struct phy_driver smsc_phy_driver[] = { .read_status = lan87xx_read_status, .config_init = smsc_phy_config_init, .soft_reset = smsc_phy_reset, - .config_aneg = lan87xx_config_aneg_ext, + .config_aneg = lan95xx_config_aneg_ext, /* IRQ related */ .config_intr = smsc_phy_config_intr, diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index c457f849e553..e6d48e5c65a3 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -279,7 +279,6 @@ static void z_decomp_free(void *arg) struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg; if (state) { - zlib_inflateEnd(&state->strm); vfree(state->strm.workspace); kfree(state); } diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d650b39b6e5d..c1316718304d 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -296,12 +296,12 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, int ret; if (2 == size) { - u16 buf; + u16 buf = 0; ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); le16_to_cpus(&buf); *((u16 *)data) = buf; } else if (4 == size) { - u32 buf; + u32 buf = 0; ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); le32_to_cpus(&buf); *((u32 *)data) = buf; @@ -1296,6 +1296,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) { u8 mac[ETH_ALEN]; + memset(mac, 0, sizeof(mac)); + /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, mac)) { netif_dbg(dev, ifup, dev->net, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index e81c5699c952..6acc5e904518 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2015 Microchip Technology */ -#include <linux/version.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7665817f3cb6..39ee1300cdd9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3484,6 +3484,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, if (err < 0) return ERR_PTR(err); + udp_allow_gso(sock->sk); return sock; } diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 686a25d3b512..5de71e44fc5a 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -573,7 +573,7 @@ static DECLARE_TASKLET(fst_tx_task, fst_process_tx_work_q); static DECLARE_TASKLET(fst_int_task, fst_process_int_work_q); static struct fst_card_info *fst_card_array[FST_MAX_CARDS]; -static spinlock_t fst_work_q_lock; +static DEFINE_SPINLOCK(fst_work_q_lock); static u64 fst_work_txq; static u64 fst_work_intq; @@ -2648,7 +2648,6 @@ fst_init(void) for (i = 0; i < FST_MAX_CARDS; i++) fst_card_array[i] = NULL; - spin_lock_init(&fst_work_q_lock); return pci_register_driver(&fst_driver); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c index 9aa5183c7a56..be9a69fe1b38 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c @@ -40,10 +40,8 @@ static int mt7622_wmac_probe(struct platform_device *pdev) return irq; mem_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mem_base)) { - dev_err(&pdev->dev, "Failed to get memory resource\n"); + if (IS_ERR(mem_base)) return PTR_ERR(mem_base); - } return mt7615_mmio_probe(&pdev->dev, mem_base, irq, mt7615e_reg_map); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c index 77dcd71e49a5..7bef36feb9c7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c @@ -192,7 +192,7 @@ mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s) } static int -mt7915_tx_stats_read(struct seq_file *file, void *data) +mt7915_tx_stats_show(struct seq_file *file, void *data) { struct mt7915_dev *dev = file->private; int stat[8], i, n; @@ -222,19 +222,7 @@ mt7915_tx_stats_read(struct seq_file *file, void *data) return 0; } -static int -mt7915_tx_stats_open(struct inode *inode, struct file *f) -{ - return single_open(f, mt7915_tx_stats_read, inode->i_private); -} - -static const struct file_operations fops_tx_stats = { - .open = mt7915_tx_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(mt7915_tx_stats); static int mt7915_read_temperature(struct seq_file *s, void *data) { @@ -379,7 +367,7 @@ int mt7915_init_debugfs(struct mt7915_dev *dev) mt7915_queues_read); debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir, mt7915_queues_acq); - debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats); + debugfs_create_file("tx_stats", 0400, dir, dev, &mt7915_tx_stats_fops); debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug); debugfs_create_file("implicit_txbf", 0600, dir, dev, &fops_implicit_txbf); @@ -412,7 +400,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_fixed_rate, NULL, mt7915_sta_fixed_rate_set, "%llx\n"); static int -mt7915_sta_stats_read(struct seq_file *s, void *data) +mt7915_sta_stats_show(struct seq_file *s, void *data) { struct ieee80211_sta *sta = s->private; struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv; @@ -455,24 +443,12 @@ mt7915_sta_stats_read(struct seq_file *s, void *data) return 0; } -static int -mt7915_sta_stats_open(struct inode *inode, struct file *f) -{ - return single_open(f, mt7915_sta_stats_read, inode->i_private); -} - -static const struct file_operations fops_sta_stats = { - .open = mt7915_sta_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(mt7915_sta_stats); void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir) { debugfs_create_file("fixed_rate", 0600, dir, sta, &fops_fixed_rate); - debugfs_create_file("stats", 0400, dir, sta, &fops_sta_stats); + debugfs_create_file("stats", 0400, dir, sta, &mt7915_sta_stats_fops); } #endif diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c index 0dc8e25e18e4..c1a64ff6a197 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c @@ -62,7 +62,7 @@ mt7921_ampdu_stat_read_phy(struct mt7921_phy *phy, } static int -mt7921_tx_stats_read(struct seq_file *file, void *data) +mt7921_tx_stats_show(struct seq_file *file, void *data) { struct mt7921_dev *dev = file->private; int stat[8], i, n; @@ -88,19 +88,7 @@ mt7921_tx_stats_read(struct seq_file *file, void *data) return 0; } -static int -mt7921_tx_stats_open(struct inode *inode, struct file *f) -{ - return single_open(f, mt7921_tx_stats_read, inode->i_private); -} - -static const struct file_operations fops_tx_stats = { - .open = mt7921_tx_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(mt7921_tx_stats); static int mt7921_queues_acq(struct seq_file *s, void *data) @@ -239,7 +227,7 @@ int mt7921_init_debugfs(struct mt7921_dev *dev) mt7921_queues_read); debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir, mt7921_queues_acq); - debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats); + debugfs_create_file("tx_stats", 0400, dir, dev, &mt7921_tx_stats_fops); debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug); debugfs_create_file("runtime-pm", 0600, dir, dev, &fops_pm); debugfs_create_file("idle-timeout", 0600, dir, dev, diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 35b42275a06c..d308ff744a29 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1098,8 +1098,9 @@ walk_ipv6: tmp.disp_flag = QETH_DISP_ADDR_ADD; tmp.is_multicast = 1; - read_lock_bh(&in6_dev->lock); - for (im6 = in6_dev->mc_list; im6 != NULL; im6 = im6->next) { + for (im6 = rtnl_dereference(in6_dev->mc_list); + im6; + im6 = rtnl_dereference(im6->next)) { tmp.u.a6.addr = im6->mca_addr; ipm = qeth_l3_find_addr_by_ip(card, &tmp); @@ -1117,7 +1118,6 @@ walk_ipv6: qeth_l3_ipaddr_hash(ipm)); } - read_unlock_bh(&in6_dev->lock); out: return 0; diff --git a/drivers/scsi/aacraid/TODO b/drivers/scsi/aacraid/TODO deleted file mode 100644 index 78dc863eff4f..000000000000 --- a/drivers/scsi/aacraid/TODO +++ /dev/null @@ -1,3 +0,0 @@ -o Testing -o More testing -o I/O size increase diff --git a/fs/befs/TODO b/fs/befs/TODO deleted file mode 100644 index 3250921aa2e6..000000000000 --- a/fs/befs/TODO +++ /dev/null @@ -1,14 +0,0 @@ -TODO -========== - -* Convert comments to the Kernel-Doc format. - -* Befs_fs.h has gotten big and messy. No reason not to break it up into - smaller peices. - -* See if Alexander Viro's option parser made it into the kernel tree. - Use that if we can. (include/linux/parser.h) - -* See if we really need separate types for on-disk and in-memory - representations of the superblock and inode. - diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO deleted file mode 100644 index ca28964abd4b..000000000000 --- a/fs/jffs2/TODO +++ /dev/null @@ -1,37 +0,0 @@ - - - support asynchronous operation -- add a per-fs 'reserved_space' count, - let each outstanding write reserve the _maximum_ amount of physical - space it could take. Let GC flush the outstanding writes because the - reservations will necessarily be pessimistic. With this we could even - do shared writable mmap, if we can have a fs hook for do_wp_page() to - make the reservation. - - disable compression in commit_write()? - - fine-tune the allocation / GC thresholds - - chattr support - turning on/off and tuning compression per-inode - - checkpointing (do we need this? scan is quite fast) - - make the scan code populate real inodes so read_inode just after - mount doesn't have to read the flash twice for large files. - Make this a per-inode option, changeable with chattr, so you can - decide which inodes should be in-core immediately after mount. - - test, test, test - - - NAND flash support: - - almost done :) - - use bad block check instead of the hardwired byte check - - - Optimisations: - - Split writes so they go to two separate blocks rather than just c->nextblock. - By writing _new_ nodes to one block, and garbage-collected REF_PRISTINE - nodes to a different one, we can separate clean nodes from those which - are likely to become dirty, and end up with blocks which are each far - closer to 100% or 0% clean, hence speeding up later GC progress dramatically. - - Stop keeping name in-core with struct jffs2_full_dirent. If we keep the hash in - the full dirent, we only need to go to the flash in lookup() when we think we've - got a match, and in readdir(). - - Doubly-linked next_in_ino list to allow us to free obsoleted raw_node_refs immediately? - - Remove size from jffs2_raw_node_frag. - -dedekind: -1. __jffs2_flush_wbuf() has a strange 'pad' parameter. Eliminate. -2. get_sb()->build_fs()->scan() path... Why get_sb() removes scan()'s crap in - case of failure? scan() does not clean everything. Fix. diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 984e42f8cb11..7256b8962e3c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1108,6 +1108,11 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table) err |= sysctl_err(path, table, "array not allowed"); } + if (table->proc_handler == proc_dou8vec_minmax) { + if (table->maxlen != sizeof(u8)) + err |= sysctl_err(path, table, "array not allowed"); + } + return err; } @@ -1123,6 +1128,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) (table->proc_handler == proc_douintvec) || (table->proc_handler == proc_douintvec_minmax) || (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dou8vec_minmax) || (table->proc_handler == proc_dointvec_jiffies) || (table->proc_handler == proc_dointvec_userhz_jiffies) || (table->proc_handler == proc_dointvec_ms_jiffies) || diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 707575c668f4..ae7a3411167c 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde <[email protected]> + * Copyright (c) 2021 Vincent Mailhol <[email protected]> */ #ifndef _CAN_BITTIMING_H @@ -10,9 +11,83 @@ #define CAN_SYNC_SEG 1 + +/* Kilobits and Megabits per second */ +#define CAN_KBPS 1000UL +#define CAN_MBPS 1000000UL + +/* Megahertz */ +#define CAN_MHZ 1000000UL + +/* + * struct can_tdc - CAN FD Transmission Delay Compensation parameters + * + * At high bit rates, the propagation delay from the TX pin to the RX + * pin of the transceiver causes measurement errors: the sample point + * on the RX pin might occur on the previous bit. + * + * To solve this issue, ISO 11898-1 introduces in section 11.3.3 + * "Transmitter delay compensation" a SSP (Secondary Sample Point) + * equal to the distance, in time quanta, from the start of the bit + * time on the TX pin to the actual measurement on the RX pin. + * + * This structure contains the parameters to calculate that SSP. + * + * @tdcv: Transmitter Delay Compensation Value. Distance, in time + * quanta, from when the bit is sent on the TX pin to when it is + * received on the RX pin of the transmitter. Possible options: + * + * O: automatic mode. The controller dynamically measure @tdcv + * for each transmitted CAN FD frame. + * + * Other values: manual mode. Use the fixed provided value. + * + * @tdco: Transmitter Delay Compensation Offset. Offset value, in time + * quanta, defining the distance between the start of the bit + * reception on the RX pin of the transceiver and the SSP + * position such as SSP = @tdcv + @tdco. + * + * If @tdco is zero, then TDC is disabled and both @tdcv and + * @tdcf should be ignored. + * + * @tdcf: Transmitter Delay Compensation Filter window. Defines the + * minimum value for the SSP position in time quanta. If SSP is + * less than @tdcf, then no delay compensations occur and the + * normal sampling point is used instead. The feature is enabled + * if and only if @tdcv is set to zero (automatic mode) and @tdcf + * is configured to a value greater than @tdco. + */ +struct can_tdc { + u32 tdcv; + u32 tdco; + u32 tdcf; +}; + +/* + * struct can_tdc_const - CAN hardware-dependent constant for + * Transmission Delay Compensation + * + * @tdcv_max: Transmitter Delay Compensation Value maximum value. + * Should be set to zero if the controller does not support + * manual mode for tdcv. + * @tdco_max: Transmitter Delay Compensation Offset maximum value. + * Should not be zero. If the controller does not support TDC, + * then the pointer to this structure should be NULL. + * @tdcf_max: Transmitter Delay Compensation Filter window maximum + * value. Should be set to zero if the controller does not + * support this feature. + */ +struct can_tdc_const { + u32 tdcv_max; + u32 tdco_max; + u32 tdcf_max; +}; + #ifdef CONFIG_CAN_CALC_BITTIMING int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc); + +void can_calc_tdco(struct net_device *dev); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, @@ -21,6 +96,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, netdev_err(dev, "bit-timing calculation not available\n"); return -EINVAL; } + +static inline void can_calc_tdco(struct net_device *dev) +{ +} #endif /* CONFIG_CAN_CALC_BITTIMING */ int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index ac4d83a1ab81..27b275e463da 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -39,19 +39,23 @@ struct can_priv { struct net_device *dev; struct can_device_stats can_stats; - struct can_bittiming bittiming, data_bittiming; const struct can_bittiming_const *bittiming_const, *data_bittiming_const; - const u16 *termination_const; - unsigned int termination_const_cnt; - u16 termination; - const u32 *bitrate_const; + struct can_bittiming bittiming, data_bittiming; + const struct can_tdc_const *tdc_const; + struct can_tdc tdc; + unsigned int bitrate_const_cnt; + const u32 *bitrate_const; const u32 *data_bitrate_const; unsigned int data_bitrate_const_cnt; u32 bitrate_max; struct can_clock clock; + unsigned int termination_const_cnt; + const u16 *termination_const; + u16 termination; + enum can_state state; /* CAN controller features - see include/uapi/linux/can/netlink.h */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index d438eb058069..d311bc369a39 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -23,7 +23,8 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, unsigned int *frame_len_ptr); -void can_free_echo_skb(struct net_device *dev, unsigned int idx); +void can_free_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, struct canfd_frame **cfd); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index febdb43d27e5..e338ef7abc00 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -96,6 +96,7 @@ struct stmmac_dma_cfg { int mixed_burst; bool aal; bool eame; + bool multi_msi_en; }; #define AXI_BLEN 7 @@ -237,5 +238,13 @@ struct plat_stmmacenet_data { struct pci_dev *pdev; bool has_crossts; int int_snapshot_num; + bool multi_msi_en; + int msi_mac_vec; + int msi_wol_vec; + int msi_lpi_vec; + int msi_sfty_ce_vec; + int msi_sfty_ue_vec; + int msi_rx_base_vec; + int msi_tx_base_vec; }; #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 51298a4f4623..d99ca99837de 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -53,6 +53,8 @@ int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); diff --git a/include/linux/udp.h b/include/linux/udp.h index aa84597bdc33..ae66dadd8543 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -51,7 +51,9 @@ struct udp_sock { * different encapsulation layer set * this */ - gro_enabled:1; /* Can accept GRO packets */ + gro_enabled:1, /* Request GRO aggregation */ + accept_udp_l4:1, + accept_udp_fraglist:1; /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -131,8 +133,22 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { - return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) && - skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4; + if (!skb_is_gso(skb)) + return false; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + return true; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + return true; + + return false; +} + +static inline void udp_allow_gso(struct sock *sk) +{ + udp_sk(sk)->accept_udp_l4 = 1; + udp_sk(sk)->accept_udp_fraglist = 1; } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 8bf5906073bc..71bb4cc4d05d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -78,6 +78,7 @@ struct inet6_ifaddr { struct ip6_sf_socklist { unsigned int sl_max; unsigned int sl_count; + struct rcu_head rcu; struct in6_addr sl_addr[]; }; @@ -91,18 +92,18 @@ struct ipv6_mc_socklist { int ifindex; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ipv6_mc_socklist __rcu *next; - rwlock_t sflock; - struct ip6_sf_socklist *sflist; + struct ip6_sf_socklist __rcu *sflist; struct rcu_head rcu; }; struct ip6_sf_list { - struct ip6_sf_list *sf_next; + struct ip6_sf_list __rcu *sf_next; struct in6_addr sf_addr; unsigned long sf_count[2]; /* include/exclude counts */ unsigned char sf_gsresp; /* include in g & s response? */ unsigned char sf_oldin; /* change state */ unsigned char sf_crcount; /* retrans. left to send */ + struct rcu_head rcu; }; #define MAF_TIMER_RUNNING 0x01 @@ -114,19 +115,19 @@ struct ip6_sf_list { struct ifmcaddr6 { struct in6_addr mca_addr; struct inet6_dev *idev; - struct ifmcaddr6 *next; - struct ip6_sf_list *mca_sources; - struct ip6_sf_list *mca_tomb; + struct ifmcaddr6 __rcu *next; + struct ip6_sf_list __rcu *mca_sources; + struct ip6_sf_list __rcu *mca_tomb; unsigned int mca_sfmode; unsigned char mca_crcount; unsigned long mca_sfcount[2]; - struct timer_list mca_timer; + struct delayed_work mca_work; unsigned int mca_flags; int mca_users; refcount_t mca_refcnt; - spinlock_t mca_lock; unsigned long mca_cstamp; unsigned long mca_tstamp; + struct rcu_head rcu; }; /* Anycast stuff */ @@ -165,9 +166,8 @@ struct inet6_dev { struct list_head addr_list; - struct ifmcaddr6 *mc_list; - struct ifmcaddr6 *mc_tomb; - spinlock_t mc_lock; + struct ifmcaddr6 __rcu *mc_list; + struct ifmcaddr6 __rcu *mc_tomb; unsigned char mc_qrv; /* Query Robustness Variable */ unsigned char mc_gq_running; @@ -179,9 +179,18 @@ struct inet6_dev { unsigned long mc_qri; /* Query Response Interval */ unsigned long mc_maxdelay; - struct timer_list mc_gq_timer; /* general query timer */ - struct timer_list mc_ifc_timer; /* interface change timer */ - struct timer_list mc_dad_timer; /* dad complete mc timer */ + struct delayed_work mc_gq_work; /* general query work */ + struct delayed_work mc_ifc_work; /* interface change work */ + struct delayed_work mc_dad_work; /* dad complete mc work */ + struct delayed_work mc_query_work; /* mld query work */ + struct delayed_work mc_report_work; /* mld report work */ + + struct sk_buff_head mc_query_queue; /* mld query queue */ + struct sk_buff_head mc_report_queue; /* mld report queue */ + + spinlock_t mc_query_lock; /* mld query queue lock */ + spinlock_t mc_report_lock; /* mld query report lock */ + struct mutex mc_lock; /* mld global lock */ struct ifacaddr6 *ac_list; rwlock_t lock; diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8fce558b5fea..afbce90c4480 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -66,6 +66,8 @@ struct ipv6_stub { int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); + struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, + struct net_device *dev); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/mld.h b/include/net/mld.h index 496bddb59942..c07359808493 100644 --- a/include/net/mld.h +++ b/include/net/mld.h @@ -92,6 +92,9 @@ struct mld2_query { #define MLD_EXP_MIN_LIMIT 32768UL #define MLDV1_MRD_MAX_COMPAT (MLD_EXP_MIN_LIMIT - 1) +#define MLD_MAX_QUEUE 8 +#define MLD_MAX_SKBS 32 + static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2) { /* RFC3810, 5.1.3. Maximum Response Code */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index dcaee24a4d87..47457048ab86 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -407,7 +407,6 @@ int register_pernet_device(struct pernet_operations *); void unregister_pernet_device(struct pernet_operations *); struct ctl_table; -struct ctl_table_header; #ifdef CONFIG_SYSCTL int net_sysctl_init(void); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e3cb2722b80..87e1612497ea 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -32,14 +32,18 @@ struct inet_hashinfo; struct inet_timewait_death_row { atomic_t tw_count; + char tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)]; - struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; + struct inet_hashinfo *hashinfo; int sysctl_max_tw_buckets; }; struct tcp_fastopen_context; struct netns_ipv4 { + /* Please keep tcp_death_row at first field in netns_ipv4 */ + struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp; + #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; @@ -53,17 +57,17 @@ struct netns_ipv4 { struct mutex ra_mutex; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; - bool fib_has_custom_rules; - unsigned int fib_rules_require_fldissect; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; + unsigned int fib_rules_require_fldissect; + bool fib_has_custom_rules; #endif bool fib_has_custom_local_routes; + bool fib_offload_disabled; #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; - bool fib_offload_disabled; struct sock *fibnl; struct sock * __percpu *icmp_sk; @@ -83,41 +87,42 @@ struct netns_ipv4 { struct xt_table *nat_table; #endif - int sysctl_icmp_echo_ignore_all; - int sysctl_icmp_echo_ignore_broadcasts; - int sysctl_icmp_ignore_bogus_error_responses; + u8 sysctl_icmp_echo_ignore_all; + u8 sysctl_icmp_echo_enable_probe; + u8 sysctl_icmp_echo_ignore_broadcasts; + u8 sysctl_icmp_ignore_bogus_error_responses; + u8 sysctl_icmp_errors_use_inbound_ifaddr; int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; - int sysctl_icmp_errors_use_inbound_ifaddr; struct local_ports ip_local_ports; - int sysctl_tcp_ecn; - int sysctl_tcp_ecn_fallback; + u8 sysctl_tcp_ecn; + u8 sysctl_tcp_ecn_fallback; - int sysctl_ip_default_ttl; - int sysctl_ip_no_pmtu_disc; - int sysctl_ip_fwd_use_pmtu; - int sysctl_ip_fwd_update_priority; - int sysctl_ip_nonlocal_bind; - int sysctl_ip_autobind_reuse; + u8 sysctl_ip_default_ttl; + u8 sysctl_ip_no_pmtu_disc; + u8 sysctl_ip_fwd_use_pmtu; + u8 sysctl_ip_fwd_update_priority; + u8 sysctl_ip_nonlocal_bind; + u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ - int sysctl_ip_dynaddr; - int sysctl_ip_early_demux; + u8 sysctl_ip_dynaddr; + u8 sysctl_ip_early_demux; #ifdef CONFIG_NET_L3_MASTER_DEV - int sysctl_raw_l3mdev_accept; + u8 sysctl_raw_l3mdev_accept; #endif - int sysctl_tcp_early_demux; - int sysctl_udp_early_demux; + u8 sysctl_tcp_early_demux; + u8 sysctl_udp_early_demux; - int sysctl_nexthop_compat_mode; + u8 sysctl_nexthop_compat_mode; - int sysctl_fwmark_reflect; - int sysctl_tcp_fwmark_accept; + u8 sysctl_fwmark_reflect; + u8 sysctl_tcp_fwmark_accept; #ifdef CONFIG_NET_L3_MASTER_DEV - int sysctl_tcp_l3mdev_accept; + u8 sysctl_tcp_l3mdev_accept; #endif - int sysctl_tcp_mtu_probing; + u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; int sysctl_tcp_min_snd_mss; @@ -125,55 +130,55 @@ struct netns_ipv4 { u32 sysctl_tcp_probe_interval; int sysctl_tcp_keepalive_time; - int sysctl_tcp_keepalive_probes; int sysctl_tcp_keepalive_intvl; + u8 sysctl_tcp_keepalive_probes; - int sysctl_tcp_syn_retries; - int sysctl_tcp_synack_retries; - int sysctl_tcp_syncookies; + u8 sysctl_tcp_syn_retries; + u8 sysctl_tcp_synack_retries; + u8 sysctl_tcp_syncookies; int sysctl_tcp_reordering; - int sysctl_tcp_retries1; - int sysctl_tcp_retries2; - int sysctl_tcp_orphan_retries; + u8 sysctl_tcp_retries1; + u8 sysctl_tcp_retries2; + u8 sysctl_tcp_orphan_retries; + u8 sysctl_tcp_tw_reuse; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; - int sysctl_tcp_tw_reuse; - int sysctl_tcp_sack; - int sysctl_tcp_window_scaling; - int sysctl_tcp_timestamps; - int sysctl_tcp_early_retrans; - int sysctl_tcp_recovery; - int sysctl_tcp_thin_linear_timeouts; - int sysctl_tcp_slow_start_after_idle; - int sysctl_tcp_retrans_collapse; - int sysctl_tcp_stdurg; - int sysctl_tcp_rfc1337; - int sysctl_tcp_abort_on_overflow; - int sysctl_tcp_fack; + u8 sysctl_tcp_sack; + u8 sysctl_tcp_window_scaling; + u8 sysctl_tcp_timestamps; + u8 sysctl_tcp_early_retrans; + u8 sysctl_tcp_recovery; + u8 sysctl_tcp_thin_linear_timeouts; + u8 sysctl_tcp_slow_start_after_idle; + u8 sysctl_tcp_retrans_collapse; + u8 sysctl_tcp_stdurg; + u8 sysctl_tcp_rfc1337; + u8 sysctl_tcp_abort_on_overflow; + u8 sysctl_tcp_fack; /* obsolete */ int sysctl_tcp_max_reordering; - int sysctl_tcp_dsack; - int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; - int sysctl_tcp_frto; - int sysctl_tcp_nometrics_save; - int sysctl_tcp_no_ssthresh_metrics_save; - int sysctl_tcp_moderate_rcvbuf; - int sysctl_tcp_tso_win_divisor; - int sysctl_tcp_workaround_signed_windows; + u8 sysctl_tcp_dsack; + u8 sysctl_tcp_app_win; + u8 sysctl_tcp_frto; + u8 sysctl_tcp_nometrics_save; + u8 sysctl_tcp_no_ssthresh_metrics_save; + u8 sysctl_tcp_moderate_rcvbuf; + u8 sysctl_tcp_tso_win_divisor; + u8 sysctl_tcp_workaround_signed_windows; int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; - int sysctl_tcp_min_tso_segs; int sysctl_tcp_min_rtt_wlen; - int sysctl_tcp_autocorking; + u8 sysctl_tcp_min_tso_segs; + u8 sysctl_tcp_autocorking; + u8 sysctl_tcp_reflect_tos; + u8 sysctl_tcp_comp_sack_nr; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_wmem[3]; int sysctl_tcp_rmem[3]; - int sysctl_tcp_comp_sack_nr; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; - struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; const struct tcp_congestion_ops __rcu *tcp_congestion_control; @@ -182,20 +187,19 @@ struct netns_ipv4 { unsigned int sysctl_tcp_fastopen_blackhole_timeout; atomic_t tfo_active_disable_times; unsigned long tfo_active_disable_stamp; - int sysctl_tcp_reflect_tos; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; - int sysctl_fib_notify_on_flag_change; + u8 sysctl_fib_notify_on_flag_change; #ifdef CONFIG_NET_L3_MASTER_DEV - int sysctl_udp_l3mdev_accept; + u8 sysctl_udp_l3mdev_accept; #endif + u8 sysctl_igmp_llm_reports; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; - int sysctl_igmp_llm_reports; int sysctl_igmp_qrv; struct ping_group_range ping_group_range; @@ -216,8 +220,8 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH - int sysctl_fib_multipath_use_neigh; - int sysctl_fib_multipath_hash_policy; + u8 sysctl_fib_multipath_use_neigh; + u8 sysctl_fib_multipath_hash_policy; #endif struct fib_notifier_ops *notifier_ops; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 21c0debbd39e..808f0f79ea9c 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -20,7 +20,6 @@ struct netns_sysctl_ipv6 { struct ctl_table_header *frags_hdr; struct ctl_table_header *xfrm6_hdr; #endif - int bindv6only; int flush_delay; int ip6_rt_max_size; int ip6_rt_gc_min_interval; @@ -29,21 +28,22 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; - int multipath_hash_policy; - int flowlabel_consistency; - int auto_flowlabels; + u8 bindv6only; + u8 multipath_hash_policy; + u8 flowlabel_consistency; + u8 auto_flowlabels; int icmpv6_time; - int icmpv6_echo_ignore_all; - int icmpv6_echo_ignore_multicast; - int icmpv6_echo_ignore_anycast; + u8 icmpv6_echo_ignore_all; + u8 icmpv6_echo_ignore_multicast; + u8 icmpv6_echo_ignore_anycast; DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); unsigned long *icmpv6_ratemask_ptr; - int anycast_src_echo_reply; - int ip_nonlocal_bind; - int fwmark_reflect; + u8 anycast_src_echo_reply; + u8 ip_nonlocal_bind; + u8 fwmark_reflect; + u8 flowlabel_state_ranges; int idgen_retries; int idgen_delay; - int flowlabel_state_ranges; int flowlabel_reflect; int max_dst_opts_cnt; int max_hbh_opts_cnt; @@ -51,10 +51,13 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_len; int seg6_flowlabel; bool skip_notify_on_dev_down; - int fib_notify_on_flag_change; + u8 fib_notify_on_flag_change; }; struct netns_ipv6 { + /* Keep ip6_dst_ops at the beginning of netns_sysctl_ipv6 */ + struct dst_ops ip6_dst_ops; + struct netns_sysctl_ipv6 sysctl; struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; @@ -76,7 +79,6 @@ struct netns_ipv6 { struct hlist_head *fib_table_hash; struct fib6_table *fib6_main_tbl; struct list_head fib6_walkers; - struct dst_ops ip6_dst_ops; rwlock_t fib6_walker_lock; spinlock_t fib6_gc_lock; unsigned int ip6_rt_gc_expire; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 28145f714801..10e1777877e6 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -102,7 +102,7 @@ struct nh_grp_entry { union { struct { atomic_t upper_bound; - } mpath; + } hthr; struct { /* Member on uw_nh_entries. */ struct list_head uw_nh_entry; @@ -120,7 +120,7 @@ struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; bool is_multipath; - bool mpath; + bool hash_threshold; bool resilient; bool fdb_nh; bool has_v4; diff --git a/include/net/udp.h b/include/net/udp.h index 347b62a753c3..f55aaeef7e91 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -517,6 +517,29 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; } +static inline void udp_post_segment_fix_csum(struct sk_buff *skb) +{ + /* UDP-lite can't land here - no GRO */ + WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); + + /* UDP packets generated with UDP_SEGMENT and traversing: + * + * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) + * + * can reach an UDP socket with CHECKSUM_NONE, because + * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE. + * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will + * have a valid checksum, as the GRO engine validates the UDP csum + * before the aggregation and nobody strips such info in between. + * Instead of adding another check in the tunnel fastpath, we can force + * a valid csum after the segmentation. + * Additionally fixup the UDP CB. + */ + UDP_SKB_CB(skb)->cscov = skb->len; + if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) + skb->csum_valid = 1; +} + #ifdef CONFIG_BPF_SYSCALL struct sk_psock; struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cde753bb2093..868b513d4f54 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1376,15 +1376,33 @@ struct ethtool_per_queue_op { }; /** - * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * struct ethtool_fecparam - Ethernet Forward Error Correction parameters * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM - * @active_fec: FEC mode which is active on porte - * @fec: Bitmask of supported/configured FEC modes - * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * @active_fec: FEC mode which is active on the port, single bit set, GET only. + * @fec: Bitmask of configured FEC modes. + * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. * - * Drivers should reject a non-zero setting of @autoneg when - * autoneogotiation is disabled (or not supported) for the link. + * Note that @reserved was never validated on input and ethtool user space + * left it uninitialized when calling SET. Hence going forward it can only be + * used to return a value to userspace with GET. + * + * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. + * FEC settings are configured by link autonegotiation whenever it's enabled. + * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. + * + * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. + * It is recommended that drivers only accept a single bit set in @fec. + * When multiple bits are set in @fec drivers may pick mode in an implementation + * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other + * FEC modes, because it's unclear whether in this case other modes constrain + * AUTO or are independent choices. + * Drivers must reject SET requests if they support none of the requested modes. + * + * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead + * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. * + * See enum ethtool_fec_config_bits for definition of valid bits for both + * @fec and @active_fec. */ struct ethtool_fecparam { __u32 cmd; @@ -1396,11 +1414,16 @@ struct ethtool_fecparam { /** * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration - * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported - * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver - * @ETHTOOL_FEC_OFF: No FEC Mode - * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode - * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not + * be used together with other bits. GET only. + * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually + * based link mode and SFP parameters read from module's + * EEPROM. This bit does _not_ mean autonegotiation. + * @ETHTOOL_FEC_OFF_BIT: No FEC Mode + * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode + * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode + * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet + * Consortium) */ enum ethtool_fec_config_bits { ETHTOOL_FEC_NONE_BIT, diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index a286635ac9b8..7f1bdb5b31ba 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -42,6 +42,8 @@ enum { ETHTOOL_MSG_CABLE_TEST_ACT, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_FEC_GET, + ETHTOOL_MSG_FEC_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -80,6 +82,8 @@ enum { ETHTOOL_MSG_CABLE_TEST_NTF, ETHTOOL_MSG_CABLE_TEST_TDR_NTF, ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, + ETHTOOL_MSG_FEC_GET_REPLY, + ETHTOOL_MSG_FEC_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -629,6 +633,19 @@ enum { ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; +/* FEC */ + +enum { + ETHTOOL_A_FEC_UNSPEC, + ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEC_MODES, /* bitset */ + ETHTOOL_A_FEC_AUTO, /* u8 */ + ETHTOOL_A_FEC_ACTIVE, /* u32 */ + + __ETHTOOL_A_FEC_CNT, + ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index fb169a50895e..222325d1d80e 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -20,6 +20,9 @@ #include <linux/types.h> #include <asm/byteorder.h> +#include <linux/in.h> +#include <linux/if.h> +#include <linux/in6.h> #define ICMP_ECHOREPLY 0 /* Echo Reply */ #define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ @@ -66,6 +69,23 @@ #define ICMP_EXC_TTL 0 /* TTL count exceeded */ #define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */ +/* Codes for EXT_ECHO (PROBE) */ +#define ICMP_EXT_ECHO 42 +#define ICMP_EXT_ECHOREPLY 43 +#define ICMP_EXT_MAL_QUERY 1 /* Malformed Query */ +#define ICMP_EXT_NO_IF 2 /* No such Interface */ +#define ICMP_EXT_NO_TABLE_ENT 3 /* No such Table Entry */ +#define ICMP_EXT_MULT_IFS 4 /* Multiple Interfaces Satisfy Query */ + +/* Constants for EXT_ECHO (PROBE) */ +#define EXT_ECHOREPLY_ACTIVE (1 << 2)/* active bit in reply message */ +#define EXT_ECHOREPLY_IPV4 (1 << 1)/* ipv4 bit in reply message */ +#define EXT_ECHOREPLY_IPV6 1 /* ipv6 bit in reply message */ +#define EXT_ECHO_CTYPE_NAME 1 +#define EXT_ECHO_CTYPE_INDEX 2 +#define EXT_ECHO_CTYPE_ADDR 3 +#define ICMP_AFI_IP 1 /* Address Family Identifier for ipv4 */ +#define ICMP_AFI_IP6 2 /* Address Family Identifier for ipv6 */ struct icmphdr { __u8 type; @@ -118,4 +138,26 @@ struct icmp_extobj_hdr { __u8 class_type; }; +/* RFC 8335: 2.1 Header for c-type 3 payload */ +struct icmp_ext_echo_ctype3_hdr { + __be16 afi; + __u8 addrlen; + __u8 reserved; +}; + +/* RFC 8335: 2.1 Interface Identification Object */ +struct icmp_ext_echo_iio { + struct icmp_extobj_hdr extobj_hdr; + union { + char name[IFNAMSIZ]; + __be32 ifindex; + struct { + struct icmp_ext_echo_ctype3_hdr ctype3_hdr; + union { + struct in_addr ipv4_addr; + struct in6_addr ipv6_addr; + } ip_addr; + } addr; + } ident; +}; #endif /* _UAPI_LINUX_ICMP_H */ diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 0564fd7ccde4..ecaece3af38d 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -140,6 +140,9 @@ struct icmp6hdr { #define ICMPV6_UNK_OPTION 2 #define ICMPV6_HDR_INCOMP 3 +/* Codes for EXT_ECHO (PROBE) */ +#define ICMPV6_EXT_ECHO_REQUEST 160 +#define ICMPV6_EXT_ECHO_REPLY 161 /* * constants for (set|get)sockopt */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 62fbd09b5dc1..90d2892ef6a3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1034,6 +1034,65 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, do_proc_douintvec_minmax_conv, ¶m); } +/** + * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars + * values from/to the user buffer, treated as an ASCII string. Negative + * strings are not allowed. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success or an error on write when the range check fails. + */ +int proc_dou8vec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp; + unsigned int min = 0, max = 255U, val; + u8 *data = table->data; + struct do_proc_douintvec_minmax_conv_param param = { + .min = &min, + .max = &max, + }; + int res; + + /* Do not support arrays yet. */ + if (table->maxlen != sizeof(u8)) + return -EINVAL; + + if (table->extra1) { + min = *(unsigned int *) table->extra1; + if (min > 255U) + return -EINVAL; + } + if (table->extra2) { + max = *(unsigned int *) table->extra2; + if (max > 255U) + return -EINVAL; + } + + tmp = *table; + + tmp.maxlen = sizeof(val); + tmp.data = &val; + val = *data; + res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, + do_proc_douintvec_minmax_conv, ¶m); + if (res) + return res; + if (write) + *data = val; + return 0; +} +EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); + static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, unsigned int *valp, int write, void *data) @@ -1582,6 +1641,12 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, return -ENOSYS; } +int proc_dou8vec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_dointvec_jiffies(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/9p/client.c b/net/9p/client.c index 0a9019da18f3..b7b958f61faf 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -122,7 +122,7 @@ static int get_protocol_version(char *s) } /** - * parse_options - parse mount options into client structure + * parse_opts - parse mount options into client structure * @opts: options string passed from mount * @clnt: existing v9fs client information * @@ -256,7 +256,7 @@ EXPORT_SYMBOL(p9_fcall_fini); static struct kmem_cache *p9_req_cache; /** - * p9_req_alloc - Allocate a new request. + * p9_tag_alloc - Allocate a new request. * @c: Client session. * @type: Transaction type. * @max_size: Maximum packet size for this request. diff --git a/net/9p/error.c b/net/9p/error.c index 231f355fa9c6..61c18daf3050 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -197,7 +197,7 @@ int p9_error_init(void) EXPORT_SYMBOL(p9_error_init); /** - * errstr2errno - convert error string to error number + * p9_errstr2errno - convert error string to error number * @errstr: error string * @len: length of error string * diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fa158397bb63..f4dd0456beaf 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -872,7 +872,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) } /** - * p9_mux_destroy - cancels all pending requests of mux + * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ diff --git a/net/ax25/TODO b/net/ax25/TODO deleted file mode 100644 index 69fb4e368d92..000000000000 --- a/net/ax25/TODO +++ /dev/null @@ -1,20 +0,0 @@ -Do the ax25_list_lock, ax25_dev_lock, linkfail_lockreally, ax25_frag_lock and -listen_lock have to be bh-safe? - -Do the netrom and rose locks have to be bh-safe? - -A device might be deleted after lookup in the SIOCADDRT ioctl but before it's -being used. - -Routes to a device being taken down might be deleted by ax25_rt_device_down -but added by somebody else before the device has been deleted fully. - -The ax25_rt_find_route synopsys is pervert but I somehow had to deal with -the race caused by the static variable in it's previous implementation. - -Implement proper socket locking in netrom and rose. - -Check socket locking when ax25_rcv is sending to raw sockets. In particular -ax25_send_to_raw() seems fishy. Heck - ax25_rcv is fishy. - -Handle XID and TEST frames properly. diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 28166402d30c..1d63c8cbbfe7 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -454,8 +454,9 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, return 0; } - read_lock_bh(&in6_dev->lock); - for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) { + for (pmc6 = rcu_dereference(in6_dev->mc_list); + pmc6; + pmc6 = rcu_dereference(pmc6->next)) { if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) continue; @@ -484,7 +485,6 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, hlist_add_head(&new->list, mcast_list); ret++; } - read_unlock_bh(&in6_dev->lock); rcu_read_unlock(); return ret; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 2b1dd252f231..c959320c4775 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1069,7 +1069,7 @@ again: /* * Do not return the error but go back to waiting. We - * have the inital workspace and the CRUSH computation + * have the initial workspace and the CRUSH computation * time is bounded so we will get it eventually. */ WARN_ON(atomic_read(&wsm->total_ws) < 1); diff --git a/net/core/dev.c b/net/core/dev.c index 48b529d59157..b4c67a5be606 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10460,7 +10460,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (refcnt && + if (refcnt != 1 && time_after(jiffies, warning_time + netdev_unregister_timeout_secs * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index fa1c37ec40c9..45ae6eeb2964 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL(__hw_addr_unsync); * @sync: function to call if address should be added * @unsync: function to call if address should be removed * - * This funciton is intended to be called from the ndo_set_rx_mode + * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address add/remove * notifications. The unsync function may be NULL in which case * the addresses requiring removal will simply be removed without @@ -723,7 +723,7 @@ void dev_uc_flush(struct net_device *dev) EXPORT_SYMBOL(dev_uc_flush); /** - * dev_uc_flush - Init unicast address list + * dev_uc_init - Init unicast address list * @dev: device * * Init unicast address list. diff --git a/net/core/netevent.c b/net/core/netevent.c index d76ed7739c70..5bb615e963cc 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -32,7 +32,7 @@ int register_netevent_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(register_netevent_notifier); /** - * netevent_unregister_notifier - unregister a netevent notifier block + * unregister_netevent_notifier - unregister a netevent notifier block * @nb: notifier * * Unregister a notifier previously registered by diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d84c8a1b280e..c8496c1142c9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -577,7 +577,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = SYSCTL_ONE, .extra2 = &int_3600, }, { } diff --git a/net/decnet/TODO b/net/decnet/TODO deleted file mode 100644 index 358e9eb49016..000000000000 --- a/net/decnet/TODO +++ /dev/null @@ -1,40 +0,0 @@ -Steve's quick list of things that need finishing off: -[they are in no particular order and range from the trivial to the long winded] - - o Proper timeouts on each neighbour (in routing mode) rather than - just the 60 second On-Ethernet cache value. - - o Support for X.25 linklayer - - o Support for DDCMP link layer - - o The DDCMP device itself - - o PPP support (rfc1762) - - o Lots of testing with real applications - - o Verify errors etc. against POSIX 1003.1g (draft) - - o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g) - [maybe this should be done at socket level... the control data in the - send/recvmsg() calls should simply be a vector of set/getsockopt() - calls] - - o check MSG_CTRUNC is set where it should be. - - o Find all the commonality between DECnet and IPv4 routing code and extract - it into a small library of routines. [probably a project for 2.7.xx] - - o Add perfect socket hashing - an idea suggested by Paul Koning. Currently - we have a half-way house scheme which seems to work reasonably well, but - the full scheme is still worth implementing, its not not top of my list - right now. - - o Add session control message flow control - - o Add NSP message flow control - - o DECnet sendpages() function - - o AIO for DECnet diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index c97bdca5ec30..1a12912b88d6 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -520,7 +520,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) fcval = *ptr; /* - * Here we ignore erronous packets which should really + * Here we ignore erroneous packets which should really * should cause a connection abort. It is not critical * for now though. */ diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index e9176475bac8..cf8ac316f4c7 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -79,7 +79,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, /* The RTL4 header has its own custom Ethertype 0x8899 and that * starts right at the beginning of the packet, after the src - * ethernet addr. Apparantly skb->data always points 2 bytes in, + * ethernet addr. Apparently skb->data always points 2 bytes in, * behind the Ethertype. */ tag = skb->data - 2; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 7a849ff22dad..c2dc9033a8f7 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o + tunnels.o fec.o diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c new file mode 100644 index 000000000000..31454b9188bd --- /dev/null +++ b/net/ethtool/fec.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct fec_req_info { + struct ethnl_req_info base; +}; + +struct fec_reply_data { + struct ethnl_reply_data base; + __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes); + u32 active_fec; + u8 fec_auto; +}; + +#define FEC_REPDATA(__reply_base) \ + container_of(__reply_base, struct fec_reply_data, base) + +#define ETHTOOL_FEC_MASK ((ETHTOOL_FEC_LLRS << 1) - 1) + +const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = { + [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static void +ethtool_fec_to_link_modes(u32 fec, unsigned long *link_modes, u8 *fec_auto) +{ + if (fec_auto) + *fec_auto = !!(fec & ETHTOOL_FEC_AUTO); + + if (fec & ETHTOOL_FEC_OFF) + __set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes); + if (fec & ETHTOOL_FEC_RS) + __set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes); + if (fec & ETHTOOL_FEC_BASER) + __set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes); + if (fec & ETHTOOL_FEC_LLRS) + __set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes); +} + +static int +ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec, + unsigned long *link_modes, u8 fec_auto) +{ + memset(fec, 0, sizeof(*fec)); + + if (fec_auto) + fec->fec |= ETHTOOL_FEC_AUTO; + + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_OFF; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_RS; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_BASER; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_LLRS; + + if (!bitmap_empty(link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; + + return 0; +} + +static int fec_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {}; + struct fec_reply_data *data = FEC_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + struct ethtool_fecparam fec = {}; + int ret; + + if (!dev->ethtool_ops->get_fecparam) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + ret = dev->ethtool_ops->get_fecparam(dev, &fec); + ethnl_ops_complete(dev); + if (ret) + return ret; + + WARN_ON_ONCE(fec.reserved); + + ethtool_fec_to_link_modes(fec.fec, data->fec_link_modes, + &data->fec_auto); + + ethtool_fec_to_link_modes(fec.active_fec, active_fec_modes, NULL); + data->active_fec = find_first_bit(active_fec_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS); + /* Don't report attr if no FEC mode set. Note that + * ethtool_fecparam_to_link_modes() ignores NONE and AUTO. + */ + if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS) + data->active_fec = 0; + + return 0; +} + +static int fec_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct fec_reply_data *data = FEC_REPDATA(reply_base); + int len = 0; + int ret; + + ret = ethnl_bitset_size(data->fec_link_modes, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); + if (ret < 0) + return ret; + len += ret; + + len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ + nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ + + return len; +} + +static int fec_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct fec_reply_data *data = FEC_REPDATA(reply_base); + int ret; + + ret = ethnl_put_bitset(skb, ETHTOOL_A_FEC_MODES, + data->fec_link_modes, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); + if (ret < 0) + return ret; + + if (nla_put_u8(skb, ETHTOOL_A_FEC_AUTO, data->fec_auto) || + (data->active_fec && + nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_fec_request_ops = { + .request_cmd = ETHTOOL_MSG_FEC_GET, + .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY, + .hdr_attr = ETHTOOL_A_FEC_HEADER, + .req_info_size = sizeof(struct fec_req_info), + .reply_data_size = sizeof(struct fec_reply_data), + + .prepare_data = fec_prepare_data, + .reply_size = fec_reply_size, + .fill_reply = fec_fill_reply, +}; + +/* FEC_SET */ + +const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = { + [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_FEC_MODES] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {}; + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct ethtool_fecparam fec = {}; + const struct ethtool_ops *ops; + struct net_device *dev; + bool mod = false; + u8 fec_auto; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEC_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_fecparam || !ops->set_fecparam) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ret = ops->get_fecparam(dev, &fec); + if (ret < 0) + goto out_ops; + + ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto); + + ret = ethnl_update_bitset(fec_link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS, + tb[ETHTOOL_A_FEC_MODES], + link_mode_names, info->extack, &mod); + if (ret < 0) + goto out_ops; + ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod); + + ret = 0; + if (!mod) + goto out_ops; + + ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], + "invalid FEC modes requested"); + goto out_ops; + } + if (!fec.fec) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], + "no FEC modes set"); + goto out_ops; + } + + ret = dev->ethtool_ops->set_fecparam(dev, &fec); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_FEC_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 0788cc3b3114..26b3e7086075 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2568,6 +2568,9 @@ static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) if (rc) return rc; + if (WARN_ON_ONCE(fecparam.reserved)) + fecparam.reserved = 0; + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; return 0; @@ -2583,6 +2586,12 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) return -EFAULT; + if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE) + return -EINVAL; + + fecparam.active_fec = 0; + fecparam.reserved = 0; + return dev->ethtool_ops->set_fecparam(dev, &fecparam); } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 50d3c8896f91..705a4b201564 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -244,6 +244,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_COALESCE_GET] = ðnl_coalesce_request_ops, [ETHTOOL_MSG_PAUSE_GET] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_GET] = ðnl_eee_request_ops, + [ETHTOOL_MSG_FEC_GET] = ðnl_fec_request_ops, [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, }; @@ -551,6 +552,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_COALESCE_NTF] = ðnl_coalesce_request_ops, [ETHTOOL_MSG_PAUSE_NTF] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops, + [ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops, }; /* default notification handler */ @@ -643,6 +645,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_COALESCE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -912,6 +915,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_tunnel_info_get_policy, .maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_FEC_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_fec_get_policy, + .maxattr = ARRAY_SIZE(ethnl_fec_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_FEC_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_fec, + .policy = ethnl_fec_set_policy, + .maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 6eabd58d81bf..785f7ee45930 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -344,6 +344,7 @@ extern const struct ethnl_request_ops ethnl_coalesce_request_ops; extern const struct ethnl_request_ops ethnl_pause_request_ops; extern const struct ethnl_request_ops ethnl_eee_request_ops; extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; +extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -375,6 +376,8 @@ extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1]; extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1]; extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; +extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; +extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -392,5 +395,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a3271ec3e162..1ae920b93f39 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -309,7 +309,7 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { @@ -854,7 +854,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) struct ip_esp_hdr *esph; /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 616e2dc1c8fa..76990e13a2f9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -971,7 +971,7 @@ static bool icmp_redirect(struct sk_buff *skb) } /* - * Handle ICMP_ECHO ("ping") requests. + * Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo * requests. @@ -979,27 +979,125 @@ static bool icmp_redirect(struct sk_buff *skb) * included in the reply. * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring * echo requests, MUST have default=NOT. + * RFC 8335: 8 MUST have a config option to enable/disable ICMP + * Extended Echo Functionality, MUST be disabled by default * See also WRT handling of options once they are done and working. */ static bool icmp_echo(struct sk_buff *skb) { + struct icmp_ext_hdr *ext_hdr, _ext_hdr; + struct icmp_ext_echo_iio *iio, _iio; + struct icmp_bxm icmp_param; + struct net_device *dev; + char buff[IFNAMSIZ]; struct net *net; + u16 ident_len; + u8 status; net = dev_net(skb_dst(skb)->dev); - if (!net->ipv4.sysctl_icmp_echo_ignore_all) { - struct icmp_bxm icmp_param; + /* should there be an ICMP stat for ignored echos? */ + if (net->ipv4.sysctl_icmp_echo_ignore_all) + return true; + + icmp_param.data.icmph = *icmp_hdr(skb); + icmp_param.skb = skb; + icmp_param.offset = 0; + icmp_param.data_len = skb->len; + icmp_param.head_len = sizeof(struct icmphdr); - icmp_param.data.icmph = *icmp_hdr(skb); + if (icmp_param.data.icmph.type == ICMP_ECHO) { icmp_param.data.icmph.type = ICMP_ECHOREPLY; - icmp_param.skb = skb; - icmp_param.offset = 0; - icmp_param.data_len = skb->len; - icmp_param.head_len = sizeof(struct icmphdr); - icmp_reply(&icmp_param, skb); + goto send_reply; } - /* should there be an ICMP stat for ignored echos? */ - return true; + if (!net->ipv4.sysctl_icmp_echo_enable_probe) + return true; + /* We currently only support probing interfaces on the proxy node + * Check to ensure L-bit is set + */ + if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1)) + return true; + /* Clear status bits in reply message */ + icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00); + icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY; + ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr); + /* Size of iio is class_type dependent. + * Only check header here and assign length based on ctype in the switch statement + */ + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio); + if (!ext_hdr || !iio) + goto send_mal_query; + if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr)) + goto send_mal_query; + ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr); + status = 0; + dev = NULL; + switch (iio->extobj_hdr.class_type) { + case EXT_ECHO_CTYPE_NAME: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len >= IFNAMSIZ) + goto send_mal_query; + memset(buff, 0, sizeof(buff)); + memcpy(buff, &iio->ident.name, ident_len); + dev = dev_get_by_name(net, buff); + break; + case EXT_ECHO_CTYPE_INDEX: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(iio->ident.ifindex), &_iio); + if (ident_len != sizeof(iio->ident.ifindex)) + goto send_mal_query; + dev = dev_get_by_index(net, ntohl(iio->ident.ifindex)); + break; + case EXT_ECHO_CTYPE_ADDR: + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + iio->ident.addr.ctype3_hdr.addrlen) + goto send_mal_query; + switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) { + case ICMP_AFI_IP: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(struct in_addr), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in_addr)) + goto send_mal_query; + dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr); + break; +#if IS_ENABLED(CONFIG_IPV6) + case ICMP_AFI_IP6: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in6_addr)) + goto send_mal_query; + dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); + if (dev) + dev_hold(dev); + break; +#endif + default: + goto send_mal_query; + } + break; + default: + goto send_mal_query; + } + if (!dev) { + icmp_param.data.icmph.code = ICMP_EXT_NO_IF; + goto send_reply; + } + /* Fill bits in reply message */ + if (dev->flags & IFF_UP) + status |= EXT_ECHOREPLY_ACTIVE; + if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + status |= EXT_ECHOREPLY_IPV4; + if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + status |= EXT_ECHOREPLY_IPV6; + dev_put(dev); + icmp_param.data.icmph.un.echo.sequence |= htons(status); +send_reply: + icmp_reply(&icmp_param, skb); + return true; +send_mal_query: + icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY; + goto send_reply; } /* @@ -1088,6 +1186,16 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); ICMPMSGIN_INC_STATS(net, icmph->type); + + /* Check for ICMP Extended Echo (PROBE) messages */ + if (icmph->type == ICMP_EXT_ECHO) { + /* We can't use icmp_pointers[].handler() because it is an array of + * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. + */ + success = icmp_echo(skb); + goto success_check; + } + /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1097,7 +1205,6 @@ int icmp_rcv(struct sk_buff *skb) if (icmph->type > NR_ICMP_TYPES) goto error; - /* * Parse the ICMP message */ @@ -1123,7 +1230,7 @@ int icmp_rcv(struct sk_buff *skb) } success = icmp_pointers[icmph->type].handler(skb); - +success_check: if (success) { consume_skb(skb); return NET_RX_SUCCESS; @@ -1340,6 +1447,7 @@ static int __net_init icmp_sk_init(struct net *net) /* Control parameters for ECHO replies. */ net->ipv4.sysctl_icmp_echo_ignore_all = 0; + net->ipv4.sysctl_icmp_echo_enable_probe = 0; net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3aab53beb4ea..c3efc7d658f6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -34,7 +34,7 @@ * Andi Kleen : Replace ip_reply with ip_send_reply. * Andi Kleen : Split fast and slow ip_build_xmit path * for decreased register pressure on x86 - * and more readibility. + * and more readability. * Marc Boucher : When call_out_firewall returns FW_QUEUE, * silently drop skb instead of failing with -EPERM. * Detlev Wengorz : Copy protocol for fragments. @@ -262,7 +262,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * interface with a smaller MTU. * - Arriving GRO skb (or GSO skb in a virtualized environment) that is * bridged to a NETIF_F_TSO tunnel stacked over an interface with an - * insufficent MTU. + * insufficient MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f09fe3a5608f..5a2fc8798d20 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -116,8 +116,8 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info) kfree(info->nh); } -static int nh_notifier_mp_info_init(struct nh_notifier_info *info, - struct nh_group *nhg) +static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, + struct nh_group *nhg) { u16 num_nh = nhg->num_nh; int i; @@ -181,8 +181,8 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); - if (nhg->mpath) - return nh_notifier_mp_info_init(info, nhg); + if (nhg->hash_threshold) + return nh_notifier_mpath_info_init(info, nhg); else if (nhg->resilient) return nh_notifier_res_table_info_init(info, nhg); return -EINVAL; @@ -193,7 +193,7 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); - if (nhg->mpath) + if (nhg->hash_threshold) kfree(info->nh_grp); else if (nhg->resilient) vfree(info->nh_res_table); @@ -406,7 +406,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, * could potentially veto it in case of unsupported configuration. */ nhg = rtnl_dereference(nh->nh_grp); - err = nh_notifier_mp_info_init(&info, nhg); + err = nh_notifier_mpath_info_init(&info, nhg); if (err) { NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); return err; @@ -661,7 +661,7 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) u16 group_type = 0; int i; - if (nhg->mpath) + if (nhg->hash_threshold) group_type = NEXTHOP_GRP_TYPE_MPATH; else if (nhg->resilient) group_type = NEXTHOP_GRP_TYPE_RES; @@ -992,9 +992,9 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, struct nh_group *nhg = rtnl_dereference(nh->nh_grp); /* Nesting groups within groups is not supported. */ - if (nhg->mpath) { + if (nhg->hash_threshold) { NL_SET_ERR_MSG(extack, - "Multipath group can not be a nexthop within a group"); + "Hash-threshold group can not be a nexthop within a group"); return false; } if (nhg->resilient) { @@ -1151,7 +1151,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) +static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) { struct nexthop *rc = NULL; int i; @@ -1160,7 +1160,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; - if (hash > atomic_read(&nhge->mpath.upper_bound)) + if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; nhi = rcu_dereference(nhge->nh->nh_info); @@ -1212,8 +1212,8 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) return nh; nhg = rcu_dereference(nh->nh_grp); - if (nhg->mpath) - return nexthop_select_path_mp(nhg, hash); + if (nhg->hash_threshold) + return nexthop_select_path_hthr(nhg, hash); else if (nhg->resilient) return nexthop_select_path_res(nhg, hash); @@ -1710,7 +1710,7 @@ static void replace_nexthop_grp_res(struct nh_group *oldg, nh_res_table_upkeep(old_res_table, true, false); } -static void nh_mp_group_rebalance(struct nh_group *nhg) +static void nh_hthr_group_rebalance(struct nh_group *nhg) { int total = 0; int w = 0; @@ -1725,7 +1725,7 @@ static void nh_mp_group_rebalance(struct nh_group *nhg) w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; - atomic_set(&nhge->mpath.upper_bound, upper_bound); + atomic_set(&nhge->hthr.upper_bound, upper_bound); } } @@ -1752,7 +1752,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = false; newg->is_multipath = nhg->is_multipath; - newg->mpath = nhg->mpath; + newg->hash_threshold = nhg->hash_threshold; newg->resilient = nhg->resilient; newg->fdb_nh = nhg->fdb_nh; newg->num_nh = nhg->num_nh; @@ -1781,8 +1781,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, j++; } - if (newg->mpath) - nh_mp_group_rebalance(newg); + if (newg->hash_threshold) + nh_hthr_group_rebalance(newg); else if (newg->resilient) replace_nexthop_grp_res(nhg, newg); @@ -1794,7 +1794,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, /* Removal of a NH from a resilient group is notified through * bucket notifications. */ - if (newg->mpath) { + if (newg->hash_threshold) { err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); if (err) @@ -1928,12 +1928,12 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); - if (newg->mpath != oldg->mpath) { + if (newg->hash_threshold != oldg->hash_threshold) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type."); return -EINVAL; } - if (newg->mpath) { + if (newg->hash_threshold) { err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); if (err) @@ -2063,7 +2063,7 @@ static int replace_nexthop_single_notify(struct net *net, struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp); struct nh_res_table *res_table; - if (nhg->mpath) { + if (nhg->hash_threshold) { return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, group_nh, extack); } else if (nhg->resilient) { @@ -2328,8 +2328,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - /* The initial insertion is a full notification for mpath as well - * as resilient groups. + /* The initial insertion is a full notification for hash-threshold as + * well as resilient groups. */ rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); if (rc) @@ -2438,7 +2438,7 @@ static struct nexthop *nexthop_create_group(struct net *net, } if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { - nhg->mpath = 1; + nhg->hash_threshold = 1; nhg->is_multipath = true; } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { struct nh_res_table *res_table; @@ -2455,10 +2455,10 @@ static struct nexthop *nexthop_create_group(struct net *net, nhg->is_multipath = true; } - WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1); + WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1); - if (nhg->mpath) - nh_mp_group_rebalance(nhg); + if (nhg->hash_threshold) + nh_hthr_group_rebalance(nhg); if (cfg->nh_fdb) nhg->fdb_nh = 1; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8b943f85fff9..1c9f71a37258 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -453,7 +453,9 @@ EXPORT_SYMBOL_GPL(ping_bind); static inline int ping_supported(int family, int type, int code) { return (family == AF_INET && type == ICMP_ECHO && code == 0) || - (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); + (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) || + (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0); } /* diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f55095d3ed16..a09e466ce11d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -31,7 +31,6 @@ static int two = 2; static int four = 4; static int thousand = 1000; -static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -47,7 +46,6 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; -static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; @@ -209,7 +207,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write, net = container_of(table->data, struct net, ipv4.sysctl_ip_fwd_update_priority); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, net); @@ -389,7 +387,7 @@ static int proc_tcp_early_demux(struct ctl_table *table, int write, { int ret = 0; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && !ret) { int enabled = init_net.ipv4.sysctl_tcp_early_demux; @@ -405,7 +403,7 @@ static int proc_udp_early_demux(struct ctl_table *table, int write, { int ret = 0; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && !ret) { int enabled = init_net.ipv4.sysctl_udp_early_demux; @@ -457,7 +455,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ipv4.sysctl_fib_multipath_hash_policy); int ret; - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); @@ -595,30 +593,39 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, + }, + { + .procname = "icmp_echo_enable_probe", + .data = &init_net.ipv4.sysctl_icmp_echo_enable_probe, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_errors_use_inbound_ifaddr", .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_ratelimit", @@ -645,9 +652,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "raw_l3mdev_accept", .data = &init_net.ipv4.sysctl_raw_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -655,60 +662,60 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_ecn", .data = &init_net.ipv4.sysctl_tcp_ecn, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_ecn_fallback", .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_dynaddr", .data = &init_net.ipv4.sysctl_ip_dynaddr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_early_demux", .data = &init_net.ipv4.sysctl_ip_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "udp_early_demux", .data = &init_net.ipv4.sysctl_udp_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_udp_early_demux }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_tcp_early_demux }, { .procname = "nexthop_compat_mode", .data = &init_net.ipv4.sysctl_nexthop_compat_mode, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, @@ -729,21 +736,21 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_use_pmtu", .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_update_priority", .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = ipv4_fwd_update_priority, .extra1 = SYSCTL_ZERO, @@ -752,40 +759,40 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ip_nonlocal_bind", .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_autobind_reuse", .data = &init_net.ipv4.sysctl_ip_autobind_reuse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fwmark_accept", .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "tcp_l3mdev_accept", .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -793,9 +800,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_base_mss", @@ -840,9 +847,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "igmp_link_local_mcast_reports", .data = &init_net.ipv4.sysctl_igmp_llm_reports, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "igmp_max_memberships", @@ -897,9 +904,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_keepalive_probes", .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_keepalive_intvl", @@ -911,26 +918,26 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_syn_retries", .data = &init_net.ipv4.sysctl_tcp_syn_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = &tcp_syn_retries_min, .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", .data = &init_net.ipv4.sysctl_tcp_synack_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_SYN_COOKIES { .procname = "tcp_syncookies", .data = &init_net.ipv4.sysctl_tcp_syncookies, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, #endif { @@ -943,24 +950,24 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_retries1", .data = &init_net.ipv4.sysctl_tcp_retries1, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra2 = &tcp_retr1_max }, { .procname = "tcp_retries2", .data = &init_net.ipv4.sysctl_tcp_retries2, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_orphan_retries", .data = &init_net.ipv4.sysctl_tcp_orphan_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fin_timeout", @@ -979,9 +986,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_tw_reuse", .data = &init_net.ipv4.sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, @@ -1030,16 +1037,16 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "fib_multipath_use_neigh", .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, @@ -1057,9 +1064,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "udp_l3mdev_accept", .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1067,88 +1074,88 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_sack", .data = &init_net.ipv4.sysctl_tcp_sack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_window_scaling", .data = &init_net.ipv4.sysctl_tcp_window_scaling, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_timestamps", .data = &init_net.ipv4.sysctl_tcp_timestamps, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_retrans", .data = &init_net.ipv4.sysctl_tcp_early_retrans, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &four, }, { .procname = "tcp_recovery", .data = &init_net.ipv4.sysctl_tcp_recovery, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_thin_linear_timeouts", .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_slow_start_after_idle", .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_retrans_collapse", .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_stdurg", .data = &init_net.ipv4.sysctl_tcp_stdurg, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_rfc1337", .data = &init_net.ipv4.sysctl_tcp_rfc1337, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_abort_on_overflow", .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fack", .data = &init_net.ipv4.sysctl_tcp_fack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_max_reordering", @@ -1160,16 +1167,16 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_dsack", .data = &init_net.ipv4.sysctl_tcp_dsack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_app_win", .data = &init_net.ipv4.sysctl_tcp_app_win, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_adv_win_scale", @@ -1183,46 +1190,46 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_frto", .data = &init_net.ipv4.sysctl_tcp_frto, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_metrics_save", .data = &init_net.ipv4.sysctl_tcp_nometrics_save, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_ssthresh_metrics_save", .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_moderate_rcvbuf", .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_tso_win_divisor", .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_workaround_signed_windows", .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_limit_output_bytes", @@ -1241,11 +1248,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_min_tso_segs", .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, - .extra2 = &gso_max_segs, }, { .procname = "tcp_min_rtt_wlen", @@ -1259,9 +1265,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_autocorking", .data = &init_net.ipv4.sysctl_tcp_autocorking, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1323,18 +1329,17 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_comp_sack_nr", .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &comp_sack_nr_max, }, { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1357,9 +1362,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e6459537d4d2..82b36ec3f2f8 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -63,7 +63,7 @@ enum tcp_lp_state { * @sowd: smoothed OWD << 3 * @owd_min: min OWD * @owd_max: max OWD - * @owd_max_rsv: resrved max owd + * @owd_max_rsv: reserved max owd * @remote_hz: estimated remote HZ * @remote_ref_time: remote reference time * @local_ref_time: local reference time @@ -305,7 +305,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) /* FIXME: try to reset owd_min and owd_max here * so decrease the chance the min/max is no longer suitable - * and will usually within threshold when whithin inference */ + * and will usually within threshold when within inference */ lp->owd_min = lp->sowd >> 3; lp->owd_max = lp->sowd >> 2; lp->owd_max_rsv = lp->sowd >> 2; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4d02f6839e38..bfcc7f1a8a7f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2207,6 +2207,8 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + + udp_post_segment_fix_csum(skb); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); @@ -2693,9 +2695,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_GRO: lock_sock(sk); + + /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); up->gro_enabled = valbool; + up->accept_udp_l4 = valbool; release_sock(sk); break; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index c5b4b586570f..54e06b88af69 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -515,21 +515,24 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; + /* we can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream + */ NAPI_GRO_CB(skb)->is_flist = 0; - if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; + if (!sk || !udp_sk(sk)->gro_receive) { + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; - if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { - pp = call_gro_receive(udp_gro_receive_segment, head, skb); + if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || + (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + pp = call_gro_receive(udp_gro_receive_segment, head, skb); return pp; } - if (!sk || NAPI_GRO_CB(skb)->encap_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid) || - !udp_sk(sk)->gro_receive) + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the tunnel gro layer */ @@ -639,6 +642,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + /* clear the encap mark, so that inner frag_list gro_complete + * can take place + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + /* Set encapsulation before calling into inner gro_complete() * functions to make them set up the inner offsets. */ @@ -662,7 +670,8 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (NAPI_GRO_CB(skb)->is_flist) { + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ + if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) { uh->len = htons(skb->len - nhoff); skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2337fb756ac..120073ffb666 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2358,7 +2358,7 @@ regen: /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1: * check if generated address is not inappropriate: * - * - Reserved IPv6 Interface Identifers + * - Reserved IPv6 Interface Identifiers * - XXX: already assigned to an address on the device */ @@ -5107,17 +5107,20 @@ next: break; } case MULTICAST_ADDR: + read_unlock_bh(&idev->lock); fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { + for (ifmca = rcu_dereference(idev->mc_list); + ifmca; + ifmca = rcu_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) break; } + read_lock_bh(&idev->lock); break; case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; @@ -6093,10 +6096,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - rcu_read_unlock_bh(); } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index c70c192bc91b..1d4054bb345b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -198,6 +198,12 @@ static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct s return -EAFNOSUPPORT; } +static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev) +{ + return ERR_PTR(-EAFNOSUPPORT); +} + const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, .ipv6_route_input = eafnosupport_ipv6_route_input, @@ -209,6 +215,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .fib6_nh_init = eafnosupport_fib6_nh_init, .ip6_del_rt = eafnosupport_ip6_del_rt, .ipv6_fragment = eafnosupport_ipv6_fragment, + .ipv6_dev_find = eafnosupport_ipv6_dev_find, }; EXPORT_SYMBOL_GPL(ipv6_stub); @@ -250,7 +257,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list); + WARN_ON(rcu_access_pointer(idev->mc_list)); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 71de739b4a9e..2389ff702f51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -222,7 +222,7 @@ lookup_protocol: inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_index = 0; - inet->mc_list = NULL; + RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; if (net->ipv4.sysctl_ip_no_pmtu_disc) @@ -1033,6 +1033,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, + .ipv6_dev_find = ipv6_dev_find, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fd1f896115c1..1bca2b09d77e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -944,11 +944,11 @@ static int icmpv6_rcv(struct sk_buff *skb) case ICMPV6_MGM_QUERY: igmp6_event_query(skb); - break; + return 0; case ICMPV6_MGM_REPORT: igmp6_event_report(skb); - break; + return 0; case ICMPV6_MGM_REDUCTION: case ICMPV6_NI_QUERY: @@ -1169,23 +1169,23 @@ static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "echo_ignore_all", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_multicast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_anycast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ratemask", diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1baf43aacb2e..bc224f917bbd 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -387,7 +387,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; - dev_hold(dev); ip6gre_tunnel_link(ign, nt); return nt; @@ -1496,6 +1495,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); + dev_hold(dev); return 0; cleanup_dst_cache_init: @@ -1538,8 +1538,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) strcpy(tunnel->parms.name, dev->name); tunnel->hlen = sizeof(struct ipv6hdr) + 4; - - dev_hold(dev); } static struct inet6_protocol ip6gre_protocol __read_mostly = { @@ -1889,6 +1887,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); + dev_hold(dev); return 0; cleanup_dst_cache_init: @@ -1988,8 +1987,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); - dev_hold(dev); - out: return err; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3fa0eca5a06f..07a0a06a9b52 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -266,7 +266,6 @@ static int ip6_tnl_create2(struct net_device *dev) strcpy(t->parms.name, dev->name); - dev_hold(dev); ip6_tnl_link(ip6n, t); return 0; @@ -388,7 +387,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) } /** - * parse_tvl_tnl_enc_lim - handle encapsulation limit option + * ip6_tnl_parse_tlv_enc_lim - handle encapsulation limit option * @skb: received socket buffer * @raw: the ICMPv6 error message data * @@ -1882,6 +1881,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev_hold(dev); return 0; destroy_dst: @@ -1925,7 +1925,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); t->parms.proto = IPPROTO_IPV6; - dev_hold(dev); rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index f10e7a72ea62..856e46ad0895 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -193,7 +193,6 @@ static int vti6_tnl_create2(struct net_device *dev) strcpy(t->parms.name, dev->name); - dev_hold(dev); vti6_tnl_link(ip6n, t); return 0; @@ -932,6 +931,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + dev_hold(dev); return 0; } @@ -963,7 +963,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); t->parms.proto = IPPROTO_IPV6; - dev_hold(dev); rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6c8604390266..49b0cebfdcdc 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -29,7 +29,6 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/jiffies.h> -#include <linux/times.h> #include <linux/net.h> #include <linux/in.h> #include <linux/in6.h> @@ -42,6 +41,7 @@ #include <linux/slab.h> #include <linux/pkt_sched.h> #include <net/mld.h> +#include <linux/workqueue.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -67,14 +67,13 @@ static int __mld2_query_bugs[] __attribute__((__unused__)) = { BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4) }; +static struct workqueue_struct *mld_wq; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); -static void igmp6_timer_handler(struct timer_list *t); +static void mld_mca_work(struct work_struct *work); -static void mld_gq_timer_expire(struct timer_list *t); -static void mld_ifc_timer_expire(struct timer_list *t); static void mld_ifc_event(struct inet6_dev *idev); static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); @@ -112,12 +111,49 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; /* * socket join on multicast group */ +#define mc_dereference(e, idev) \ + rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) + +#define for_each_pmc_rtnl(np, pmc) \ + for (pmc = rtnl_dereference((np)->ipv6_mc_list); \ + pmc; \ + pmc = rtnl_dereference(pmc->next)) #define for_each_pmc_rcu(np, pmc) \ - for (pmc = rcu_dereference(np->ipv6_mc_list); \ - pmc != NULL; \ + for (pmc = rcu_dereference((np)->ipv6_mc_list); \ + pmc; \ pmc = rcu_dereference(pmc->next)) +#define for_each_psf_mclock(mc, psf) \ + for (psf = mc_dereference((mc)->mca_sources, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_psf_rcu(mc, psf) \ + for (psf = rcu_dereference((mc)->mca_sources); \ + psf; \ + psf = rcu_dereference(psf->sf_next)) + +#define for_each_psf_tomb(mc, psf) \ + for (psf = mc_dereference((mc)->mca_tomb, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_mc_mclock(idev, mc) \ + for (mc = mc_dereference((idev)->mc_list, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + +#define for_each_mc_rcu(idev, mc) \ + for (mc = rcu_dereference((idev)->mc_list); \ + mc; \ + mc = rcu_dereference(mc->next)) + +#define for_each_mc_tomb(idev, mc) \ + for (mc = mc_dereference((idev)->mc_tomb, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + static int unsolicited_report_interval(struct inet6_dev *idev) { int iv; @@ -144,15 +180,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rcu_read_lock(); - for_each_pmc_rcu(np, mc_lst) { + for_each_pmc_rtnl(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && - ipv6_addr_equal(&mc_lst->addr, addr)) { - rcu_read_unlock(); + ipv6_addr_equal(&mc_lst->addr, addr)) return -EADDRINUSE; - } } - rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -179,8 +211,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->ifindex = dev->ifindex; mc_lst->sfmode = mode; - rwlock_init(&mc_lst->sflock); - mc_lst->sflist = NULL; + RCU_INIT_POINTER(mc_lst->sflist, NULL); /* * now add/increase the group membership on the device @@ -239,11 +270,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -255,10 +287,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -/* called with rcu_read_lock() */ -static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -270,19 +301,17 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, dev = rt->dst.dev; ip6_rt_put(rt); } - } else - dev = dev_get_by_index_rcu(net, ifindex); + } else { + dev = __dev_get_by_index(net, ifindex); + } if (!dev) return NULL; idev = __in6_dev_get(dev); if (!idev) return NULL; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + if (idev->dead) return NULL; - } return idev; } @@ -303,11 +332,12 @@ void __ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -336,7 +366,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct net *net = sock_net(sk); int i, j, rv; int leavegroup = 0; - int pmclocked = 0; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -345,16 +374,14 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + if (!idev) return -ENODEV; - } err = -EADDRNOTAVAIL; - for_each_pmc_rcu(inet6, pmc) { + mutex_lock(&idev->mc_lock); + for_each_pmc_rtnl(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -365,7 +392,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; } /* if a source filter was set, must be the same mode as before */ - if (pmc->sflist) { + if (rcu_access_pointer(pmc->sflist)) { if (pmc->sfmode != omode) { err = -EINVAL; goto done; @@ -377,10 +404,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock(&pmc->sflock); - pmclocked = 1; - - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -420,7 +444,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC); + newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -430,9 +454,11 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - pmc->sflist = psl = newpsl; + psl = newpsl; + rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -448,10 +474,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface list */ ip6_mc_add_src(idev, group, omode, 1, source, 1); done: - if (pmclocked) - write_unlock(&pmc->sflock); - read_unlock_bh(&idev->lock); - rcu_read_unlock(); + mutex_unlock(&idev->mc_lock); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -477,13 +500,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + if (!idev) return -ENODEV; - } err = 0; @@ -492,7 +511,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_rtnl(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -504,7 +523,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } if (gsf->gf_numsrc) { newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_ATOMIC); + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -516,32 +535,37 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } + mutex_lock(&idev->mc_lock); err = ip6_mc_add_src(idev, group, gsf->gf_fmode, - newpsl->sl_count, newpsl->sl_addr, 0); + newpsl->sl_count, newpsl->sl_addr, 0); if (err) { + mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); goto done; } + mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_lock(&idev->mc_lock); + ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); } - write_lock(&pmc->sflock); - psl = pmc->sflist; + mutex_lock(&idev->mc_lock); + psl = rtnl_dereference(pmc->sflist); if (psl) { - (void) ip6_mc_del_src(idev, group, pmc->sfmode, - psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); - } else - (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + ip6_mc_del_src(idev, group, pmc->sfmode, + psl->sl_count, psl->sl_addr, 0); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); + } else { + ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); + } + mutex_unlock(&idev->mc_lock); + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = gsf->gf_fmode; - write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -563,13 +587,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + if (!idev) return -ENODEV; - } err = -EADDRNOTAVAIL; /* changes to the ipv6_mc_list require the socket lock and @@ -577,25 +597,22 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_rtnl(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) break; } if (!pmc) /* must have a prior join */ - goto done; + return err; + gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); count = psl ? psl->sl_count : 0; - read_unlock_bh(&idev->lock); - rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - /* changes to psl require the socket lock, and a write lock - * on pmc->sflock. We have the socket lock so reading here is safe. - */ + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -608,10 +625,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return -EFAULT; } return 0; -done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); - return err; } bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, @@ -631,8 +644,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rcu_read_unlock(); return np->mc_all; } - read_lock(&mc->sflock); - psl = mc->sflist; + psl = rcu_dereference(mc->sflist); if (!psl) { rv = mc->sfmode == MCAST_EXCLUDE; } else { @@ -647,12 +659,12 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) rv = false; } - read_unlock(&mc->sflock); rcu_read_unlock(); return rv; } +/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -662,13 +674,11 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_add(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT)) return; @@ -689,6 +699,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } +/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -698,28 +709,25 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_del(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (mc->mca_flags & MAF_NOREPORT) return; if (!mc->idev->dead) igmp6_leave_group(mc); - spin_lock_bh(&mc->mca_lock); - if (del_timer(&mc->mca_timer)) + if (cancel_delayed_work(&mc->mca_work)) refcount_dec(&mc->mca_refcnt); - spin_unlock_bh(&mc->mca_lock); } /* * deleted ifmcaddr6 manipulation + * called with mc_lock */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { @@ -731,12 +739,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC); + pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; - spin_lock_bh(&im->mca_lock); - spin_lock_init(&pmc->mca_lock); pmc->idev = im->idev; in6_dev_hold(idev); pmc->mca_addr = im->mca_addr; @@ -745,90 +751,110 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc->mca_sfmode == MCAST_INCLUDE) { struct ip6_sf_list *psf; - pmc->mca_tomb = im->mca_tomb; - pmc->mca_sources = im->mca_sources; - im->mca_tomb = im->mca_sources = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(im->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(im->mca_sources, idev)); + RCU_INIT_POINTER(im->mca_tomb, NULL); + RCU_INIT_POINTER(im->mca_sources, NULL); + + for_each_psf_mclock(pmc, psf) psf->sf_crcount = pmc->mca_crcount; } - spin_unlock_bh(&im->mca_lock); - spin_lock_bh(&idev->mc_lock); - pmc->next = idev->mc_tomb; - idev->mc_tomb = pmc; - spin_unlock_bh(&idev->mc_lock); + rcu_assign_pointer(pmc->next, idev->mc_tomb); + rcu_assign_pointer(idev->mc_tomb, pmc); } +/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { - struct ifmcaddr6 *pmc, *pmc_prev; - struct ip6_sf_list *psf; + struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; + struct ifmcaddr6 *pmc, *pmc_prev; - spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { + for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; } if (pmc) { if (pmc_prev) - pmc_prev->next = pmc->next; + rcu_assign_pointer(pmc_prev->next, pmc->next); else - idev->mc_tomb = pmc->next; + rcu_assign_pointer(idev->mc_tomb, pmc->next); } - spin_unlock_bh(&idev->mc_lock); - spin_lock_bh(&im->mca_lock); if (pmc) { im->idev = pmc->idev; if (im->mca_sfmode == MCAST_INCLUDE) { - swap(im->mca_tomb, pmc->mca_tomb); - swap(im->mca_sources, pmc->mca_sources); - for (psf = im->mca_sources; psf; psf = psf->sf_next) + tomb = rcu_replace_pointer(im->mca_tomb, + mc_dereference(pmc->mca_tomb, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_tomb, tomb); + + sources = rcu_replace_pointer(im->mca_sources, + mc_dereference(pmc->mca_sources, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_sources, sources); + for_each_psf_mclock(im, psf) psf->sf_crcount = idev->mc_qrv; } else { im->mca_crcount = idev->mc_qrv; } in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); - kfree(pmc); + kfree_rcu(pmc, rcu); } - spin_unlock_bh(&im->mca_lock); } +/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; - spin_lock_bh(&idev->mc_lock); - pmc = idev->mc_tomb; - idev->mc_tomb = NULL; - spin_unlock_bh(&idev->mc_lock); + pmc = mc_dereference(idev->mc_tomb, idev); + RCU_INIT_POINTER(idev->mc_tomb, NULL); for (; pmc; pmc = nextpmc) { - nextpmc = pmc->next; + nextpmc = mc_dereference(pmc->next, idev); ip6_mc_clear_src(pmc); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } /* clear dead sources, too */ - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { struct ip6_sf_list *psf, *psf_next; - spin_lock_bh(&pmc->mca_lock); - psf = pmc->mca_tomb; - pmc->mca_tomb = NULL; - spin_unlock_bh(&pmc->mca_lock); + psf = mc_dereference(pmc->mca_tomb, idev); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); for (; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + psf_next = mc_dereference(psf->sf_next, idev); + kfree_rcu(psf, rcu); } } - read_unlock_bh(&idev->lock); +} + +static void mld_clear_query(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_clear_report(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_report_lock); } static void mca_get(struct ifmcaddr6 *mc) @@ -840,21 +866,22 @@ static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { in6_dev_put(mc->idev); - kfree(mc); + kfree_rcu(mc, rcu); } } +/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; - mc = kzalloc(sizeof(*mc), GFP_ATOMIC); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - timer_setup(&mc->mca_timer, igmp6_timer_handler, 0); + INIT_DELAYED_WORK(&mc->mca_work, mld_mca_work); mc->mca_addr = *addr; mc->idev = idev; /* reference taken by caller */ @@ -862,7 +889,6 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; refcount_set(&mc->mca_refcnt, 1); - spin_lock_init(&mc->mca_lock); mc->mca_sfmode = mode; mc->mca_sfcount[mode] = 1; @@ -891,18 +917,17 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, if (!idev) return -EINVAL; - write_lock_bh(&idev->lock); if (idev->dead) { - write_unlock_bh(&idev->lock); in6_dev_put(idev); return -ENODEV; } - for (mc = idev->mc_list; mc; mc = mc->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; - write_unlock_bh(&idev->lock); ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return 0; } @@ -910,22 +935,19 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mc = mca_alloc(idev, addr, mode); if (!mc) { - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENOMEM; } - mc->next = idev->mc_list; - idev->mc_list = mc; + rcu_assign_pointer(mc->next, idev->mc_list); + rcu_assign_pointer(idev->mc_list, mc); - /* Hold this for the code below before we unlock, - * it is already exposed via idev->mc_list. - */ mca_get(mc); - write_unlock_bh(&idev->lock); mld_del_delrec(idev, mc); igmp6_group_added(mc); + mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; } @@ -937,33 +959,35 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) EXPORT_SYMBOL(ipv6_dev_mc_inc); /* - * device multicast group del + * device multicast group del */ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { - struct ifmcaddr6 *ma, **map; + struct ifmcaddr6 *ma, __rcu **map; ASSERT_RTNL(); - write_lock_bh(&idev->lock); - for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) { + mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; + (ma = mc_dereference(*map, idev)); + map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { if (--ma->mca_users == 0) { *map = ma->next; - write_unlock_bh(&idev->lock); igmp6_group_dropped(ma); ip6_mc_clear_src(ma); + mutex_unlock(&idev->mc_lock); ma_put(ma); return 0; } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return 0; } } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return -ENOENT; } @@ -997,8 +1021,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (mc = idev->mc_list; mc; mc = mc->next) { + for_each_mc_rcu(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, group)) break; } @@ -1006,8 +1029,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, if (src_addr && !ipv6_addr_any(src_addr)) { struct ip6_sf_list *psf; - spin_lock_bh(&mc->mca_lock); - for (psf = mc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_rcu(mc, psf) { if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } @@ -1017,89 +1039,107 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, mc->mca_sfcount[MCAST_EXCLUDE]; else rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; - spin_unlock_bh(&mc->mca_lock); } else rv = true; /* don't filter unspecified source */ } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); return rv; } -static void mld_gq_start_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = prandom_u32() % idev->mc_maxdelay; idev->mc_gq_running = 1; - if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -static void mld_gq_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_stop_work(struct inet6_dev *idev) { idev->mc_gq_running = 0; - if (del_timer(&idev->mc_gq_timer)) + if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -static void mld_ifc_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_ifc_stop_work(struct inet6_dev *idev) { idev->mc_ifc_count = 0; - if (del_timer(&idev->mc_ifc_timer)) + if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } -static void mld_dad_stop_timer(struct inet6_dev *idev) +static void mld_dad_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work(&idev->mc_dad_work)) + __in6_dev_put(idev); +} + +static void mld_query_stop_work(struct inet6_dev *idev) { - if (del_timer(&idev->mc_dad_timer)) + spin_lock_bh(&idev->mc_query_lock); + if (cancel_delayed_work(&idev->mc_query_work)) + __in6_dev_put(idev); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_report_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work_sync(&idev->mc_report_work)) __in6_dev_put(idev); } /* - * IGMP handling (alias multicast ICMPv6 messages) + * IGMP handling (alias multicast ICMPv6 messages) + * called with mc_lock */ - static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; - /* Do not start timer for these addresses */ + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } if (delay >= resptime) delay = prandom_u32() % resptime; - ma->mca_timer.expires = jiffies + delay; - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources */ +/* mark EXCLUDE-mode sources + * called with mc_lock + */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1107,7 +1147,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, int i, scount; scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1128,6 +1168,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } +/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1140,7 +1181,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, /* mark INCLUDE-mode sources */ scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1305,10 +1346,10 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, if (v1_query) mld_set_v1_mode(idev); - /* cancel MLDv2 report timer */ - mld_gq_stop_timer(idev); - /* cancel the interface change timer */ - mld_ifc_stop_timer(idev); + /* cancel MLDv2 report work */ + mld_gq_stop_work(idev); + /* cancel the interface change work */ + mld_ifc_stop_work(idev); /* clear deleted report items */ mld_clear_delrec(idev); @@ -1332,18 +1373,41 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, /* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_query_lock); + if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_query_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_query_lock); + + return 0; +} + +static void __mld_query_work(struct sk_buff *skb) +{ struct mld2_query *mlh2 = NULL; - struct ifmcaddr6 *ma; const struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int group_type; int mark = 0; int len, err; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - return -EINVAL; + goto kfree_skb; /* compute payload length excluding extension headers */ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); @@ -1360,11 +1424,11 @@ int igmp6_event_query(struct sk_buff *skb) ipv6_hdr(skb)->hop_limit != 1 || !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return 0; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); group = &mld->mld_mca; @@ -1372,60 +1436,56 @@ int igmp6_event_query(struct sk_buff *skb) if (group_type != IPV6_ADDR_ANY && !(group_type&IPV6_ADDR_MULTICAST)) - return -EINVAL; + goto out; if (len < MLD_V1_QUERY_LEN) { - return -EINVAL; + goto out; } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) { err = mld_process_v1(idev, mld, &max_delay, len == MLD_V1_QUERY_LEN); if (err < 0) - return err; + goto out; } else if (len >= MLD_V2_QUERY_LEN_MIN) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); if (!pskb_may_pull(skb, srcs_offset)) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); err = mld_process_v2(idev, mlh2, &max_delay); if (err < 0) - return err; + goto out; if (group_type == IPV6_ADDR_ANY) { /* general query */ if (mlh2->mld2q_nsrcs) - return -EINVAL; /* no sources allowed */ + goto out; /* no sources allowed */ - mld_gq_start_timer(idev); - return 0; + mld_gq_start_work(idev); + goto out; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } } else { - return -EINVAL; + goto out; } - read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { - for (ma = idev->mc_list; ma; ma = ma->next) { - spin_lock_bh(&ma->mca_lock); + for_each_mc_mclock(idev, ma) { igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); } } else { - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; - spin_lock_bh(&ma->mca_lock); if (ma->mca_flags & MAF_TIMER_RUNNING) { /* gsquery <- gsquery && mark */ if (!mark) @@ -1440,34 +1500,91 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ma->mca_flags & MAF_GSQUERY) || mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs)) igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); break; } } - read_unlock_bh(&idev->lock); - return 0; +out: + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_query_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_query_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_query_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_query_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_query_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } /* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { - struct ifmcaddr6 *ma; + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_report_lock); + if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_report_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_report_lock); + + return 0; +} + +static void __mld_report_work(struct sk_buff *skb) +{ struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int addr_type; /* Our own report looped back. Ignore it. */ if (skb->pkt_type == PACKET_LOOPBACK) - return 0; + goto kfree_skb; /* send our report if the MC router may not have heard this report */ if (skb->pkt_type != PACKET_MULTICAST && skb->pkt_type != PACKET_BROADCAST) - return 0; + goto kfree_skb; if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr))) - return -EINVAL; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1475,29 +1592,61 @@ int igmp6_event_report(struct sk_buff *skb) addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); if (addr_type != IPV6_ADDR_ANY && !(addr_type&IPV6_ADDR_LINKLOCAL)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return -ENODEV; + goto kfree_skb; /* - * Cancel the timer for this group + * Cancel the work for this group */ - read_lock_bh(&idev->lock); - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { - spin_lock(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) + if (cancel_delayed_work(&ma->mca_work)) refcount_dec(&ma->mca_refcnt); - ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING); - spin_unlock(&ma->mca_lock); + ma->mca_flags &= ~(MAF_LAST_REPORTER | + MAF_TIMER_RUNNING); break; } } - read_unlock_bh(&idev->lock); - return 0; + + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_report_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_report_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_report_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_report_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_report_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, @@ -1550,7 +1699,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) struct ip6_sf_list *psf; int scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -1724,15 +1873,18 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) +/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, int gdeleted, int sdeleted, int crsend) + int type, int gdeleted, int sdeleted, + int crsend) { + struct ip6_sf_list *psf, *psf_prev, *psf_next; + int scount, stotal, first, isquery, truncate; + struct ip6_sf_list __rcu **psf_list; struct inet6_dev *idev = pmc->idev; struct net_device *dev = idev->dev; - struct mld2_report *pmr; struct mld2_grec *pgr = NULL; - struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; - int scount, stotal, first, isquery, truncate; + struct mld2_report *pmr; unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) @@ -1751,7 +1903,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; - if (!*psf_list) + if (!rcu_access_pointer(*psf_list)) goto empty_source; pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL; @@ -1767,10 +1919,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } first = 1; psf_prev = NULL; - for (psf = *psf_list; psf; psf = psf_next) { + for (psf = mc_dereference(*psf_list, idev); + psf; + psf = psf_next) { struct in6_addr *psrc; - psf_next = psf->sf_next; + psf_next = mc_dereference(psf->sf_next, idev); if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) { psf_prev = psf; @@ -1817,10 +1971,12 @@ decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *psf_list = psf->sf_next; - kfree(psf); + rcu_assign_pointer(*psf_list, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); continue; } } @@ -1849,72 +2005,73 @@ empty_source: return skb; } +/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; - read_lock_bh(&idev->lock); if (!pmc) { - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) continue; - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } } else { - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } /* * remove zero-count source records from a source filter list + * called with mc_lock */ -static void mld_clear_zeros(struct ip6_sf_list **ppsf) +static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf = *ppsf; psf; psf = psf_next) { - psf_next = psf->sf_next; + for (psf = mc_dereference(*ppsf, idev); + psf; + psf = psf_next) { + psf_next = mc_dereference(psf->sf_next, idev); if (psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *ppsf = psf->sf_next; - kfree(psf); - } else + rcu_assign_pointer(*ppsf, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); + } else { psf_prev = psf; + } } } +/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; struct sk_buff *skb = NULL; int type, dtype; - read_lock_bh(&idev->lock); - spin_lock(&idev->mc_lock); - /* deleted MCA's */ pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) { - pmc_next = pmc->next; + for (pmc = mc_dereference(idev->mc_tomb, idev); + pmc; + pmc = pmc_next) { + pmc_next = mc_dereference(pmc->next, idev); if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; @@ -1928,26 +2085,25 @@ static void mld_send_cr(struct inet6_dev *idev) } pmc->mca_crcount--; if (pmc->mca_crcount == 0) { - mld_clear_zeros(&pmc->mca_tomb); - mld_clear_zeros(&pmc->mca_sources); + mld_clear_zeros(&pmc->mca_tomb, idev); + mld_clear_zeros(&pmc->mca_sources, idev); } } - if (pmc->mca_crcount == 0 && !pmc->mca_tomb && - !pmc->mca_sources) { + if (pmc->mca_crcount == 0 && + !rcu_access_pointer(pmc->mca_tomb) && + !rcu_access_pointer(pmc->mca_sources)) { if (pmc_prev) - pmc_prev->next = pmc_next; + rcu_assign_pointer(pmc_prev->next, pmc_next); else - idev->mc_tomb = pmc_next; + rcu_assign_pointer(idev->mc_tomb, pmc_next); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } else pmc_prev = pmc; } - spin_unlock(&idev->mc_lock); /* change recs */ - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_ALLOW_NEW_SOURCES; @@ -1967,9 +2123,7 @@ static void mld_send_cr(struct inet6_dev *idev) skb = add_grec(skb, pmc, type, 0, 0, 0); pmc->mca_crcount--; } - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (!skb) return; (void) mld_sendpack(skb); @@ -2071,6 +2225,7 @@ err_out: goto out; } +/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { struct sk_buff *skb; @@ -2081,47 +2236,49 @@ static void mld_send_initial_cr(struct inet6_dev *idev) return; skb = NULL; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_ALLOW_NEW_SOURCES; skb = add_grec(skb, pmc, type, 0, 0, 1); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } void ipv6_mc_dad_complete(struct inet6_dev *idev) { + mutex_lock(&idev->mc_lock); idev->mc_dad_count = idev->mc_qrv; if (idev->mc_dad_count) { mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); } -static void mld_dad_timer_expire(struct timer_list *t) +static void mld_dad_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer); - + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_dad_work); + mutex_lock(&idev->mc_lock); mld_send_initial_cr(idev); if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2129,7 +2286,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2144,21 +2301,27 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, /* no more filters for this source */ if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - pmc->mca_sources = psf->sf_next; + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(psf->sf_next, idev)); + if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && !mld_in_v1_mode(idev)) { psf->sf_crcount = idev->mc_qrv; - psf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = psf; + rcu_assign_pointer(psf->sf_next, + mc_dereference(pmc->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_tomb, psf); rv = 1; - } else - kfree(psf); + } else { + kfree_rcu(psf, rcu); + } } return rv; } +/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2169,24 +2332,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); + sf_markstate(pmc); if (!delta) { - if (!pmc->mca_sfcount[sfmode]) { - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + if (!pmc->mca_sfcount[sfmode]) return -EINVAL; - } + pmc->mca_sfcount[sfmode]--; } err = 0; @@ -2206,18 +2364,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfmode = MCAST_INCLUDE; pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(pmc->idev); - } else if (sf_setstate(pmc) || changerec) + } else if (sf_setstate(pmc) || changerec) { mld_ifc_event(pmc->idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } + return err; } /* * Add multicast single-source filter to the interface list + * called with mc_lock */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) @@ -2225,40 +2384,45 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, struct ip6_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; } if (!psf) { - psf = kzalloc(sizeof(*psf), GFP_ATOMIC); + psf = kzalloc(sizeof(*psf), GFP_KERNEL); if (!psf) return -ENOBUFS; psf->sf_addr = *psfsrc; if (psf_prev) { - psf_prev->sf_next = psf; - } else - pmc->mca_sources = psf; + rcu_assign_pointer(psf_prev->sf_next, psf); + } else { + rcu_assign_pointer(pmc->mca_sources, psf); + } } psf->sf_count[sfmode]++; return 0; } +/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; - } else + } else { psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0; + } + } } +/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *dpsf; @@ -2267,7 +2431,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) int new_in, rv; rv = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -2277,8 +2441,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) if (!psf->sf_oldin) { struct ip6_sf_list *prev = NULL; - for (dpsf = pmc->mca_tomb; dpsf; - dpsf = dpsf->sf_next) { + for_each_psf_tomb(pmc, dpsf) { if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2286,10 +2449,14 @@ static int sf_setstate(struct ifmcaddr6 *pmc) } if (dpsf) { if (prev) - prev->sf_next = dpsf->sf_next; + rcu_assign_pointer(prev->sf_next, + mc_dereference(dpsf->sf_next, + pmc->idev)); else - pmc->mca_tomb = dpsf->sf_next; - kfree(dpsf); + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(dpsf->sf_next, + pmc->idev)); + kfree_rcu(dpsf, rcu); } psf->sf_crcount = qrv; rv++; @@ -2300,18 +2467,19 @@ static int sf_setstate(struct ifmcaddr6 *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) + + for_each_psf_tomb(pmc, dpsf) if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; if (!dpsf) { - dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_KERNEL); if (!dpsf) continue; *dpsf = *psf; - /* pmc->mca_lock held by callers */ - dpsf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = dpsf; + rcu_assign_pointer(dpsf->sf_next, + mc_dereference(pmc->mca_tomb, pmc->idev)); + rcu_assign_pointer(pmc->mca_tomb, dpsf); } dpsf->sf_crcount = qrv; rv++; @@ -2322,6 +2490,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) /* * Add multicast source filter list to the interface list + * called with mc_lock */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, @@ -2333,17 +2502,13 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); sf_markstate(pmc); isexclude = pmc->mca_sfmode == MCAST_EXCLUDE; @@ -2374,36 +2539,40 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(idev); - } else if (sf_setstate(pmc)) + } else if (sf_setstate(pmc)) { mld_ifc_event(idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } return err; } +/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; - for (psf = pmc->mca_tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_tomb = NULL; - for (psf = pmc->mca_sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); + for (psf = mc_dereference(pmc->mca_sources, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_sources = NULL; + RCU_INIT_POINTER(pmc->mca_sources, NULL); pmc->mca_sfmode = MCAST_EXCLUDE; pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } - +/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; @@ -2415,93 +2584,115 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) delay = prandom_u32() % unsolicited_report_interval(ma->idev); - spin_lock_bh(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER; - spin_unlock_bh(&ma->mca_lock); } static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, struct inet6_dev *idev) { + struct ip6_sf_socklist *psl; int err; - write_lock_bh(&iml->sflock); - if (!iml->sflist) { + psl = rtnl_dereference(iml->sflist); + + if (idev) + mutex_lock(&idev->mc_lock); + + if (!psl) { /* any-source empty exclude case */ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); } else { err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + psl->sl_count, psl->sl_addr, 0); + RCU_INIT_POINTER(iml->sflist, NULL); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - write_unlock_bh(&iml->sflock); + + if (idev) + mutex_unlock(&idev->mc_lock); + return err; } +/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { if (mld_in_v1_mode(ma->idev)) { - if (ma->mca_flags & MAF_LAST_REPORTER) + if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REDUCTION); + } } else { mld_add_delrec(ma->idev, ma); mld_ifc_event(ma->idev); } } -static void mld_gq_timer_expire(struct timer_list *t) +static void mld_gq_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_gq_work); - idev->mc_gq_running = 0; + mutex_lock(&idev->mc_lock); mld_send_report(idev, NULL); + idev->mc_gq_running = 0; + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); } -static void mld_ifc_timer_expire(struct timer_list *t) +static void mld_ifc_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_ifc_work); + mutex_lock(&idev->mc_lock); mld_send_cr(idev); + if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, - unsolicited_report_interval(idev)); + mld_ifc_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { if (mld_in_v1_mode(idev)) return; + idev->mc_ifc_count = idev->mc_qrv; - mld_ifc_start_timer(idev, 1); + mld_ifc_start_work(idev, 1); } -static void igmp6_timer_handler(struct timer_list *t) +static void mld_mca_work(struct work_struct *work) { - struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer); + struct ifmcaddr6 *ma = container_of(to_delayed_work(work), + struct ifmcaddr6, mca_work); + mutex_lock(&ma->idev->mc_lock); if (mld_in_v1_mode(ma->idev)) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); else mld_send_report(ma->idev, ma); - - spin_lock(&ma->mca_lock); ma->mca_flags |= MAF_LAST_REPORTER; ma->mca_flags &= ~MAF_TIMER_RUNNING; - spin_unlock(&ma->mca_lock); + mutex_unlock(&ma->idev->mc_lock); + ma_put(ma); } @@ -2513,10 +2704,10 @@ void ipv6_mc_unmap(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); - for (i = idev->mc_list; i; i = i->next) + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } void ipv6_mc_remap(struct inet6_dev *idev) @@ -2525,25 +2716,25 @@ void ipv6_mc_remap(struct inet6_dev *idev) } /* Device going down */ - void ipv6_mc_down(struct inet6_dev *idev) { struct ifmcaddr6 *i; + mutex_lock(&idev->mc_lock); /* Withdraw multicast list */ - - read_lock_bh(&idev->lock); - - for (i = idev->mc_list; i; i = i->next) + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); + mutex_unlock(&idev->mc_lock); - /* Should stop timer after group drop. or we will - * start timer again in mld_ifc_event() + /* Should stop work after group drop. or we will + * start work again in mld_ifc_event() */ - mld_ifc_stop_timer(idev); - mld_gq_stop_timer(idev); - mld_dad_stop_timer(idev); - read_unlock_bh(&idev->lock); + synchronize_net(); + mld_query_stop_work(idev); + mld_report_stop_work(idev); + mld_ifc_stop_work(idev); + mld_gq_stop_work(idev); + mld_dad_stop_work(idev); } static void ipv6_mc_reset(struct inet6_dev *idev) @@ -2563,29 +2754,33 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); ipv6_mc_reset(idev); - for (i = idev->mc_list; i; i = i->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) { mld_del_delrec(idev, i); igmp6_group_added(i); } - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } /* IPv6 device initialization. */ void ipv6_mc_init_dev(struct inet6_dev *idev) { - write_lock_bh(&idev->lock); - spin_lock_init(&idev->mc_lock); idev->mc_gq_running = 0; - timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0); - idev->mc_tomb = NULL; + INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work); + RCU_INIT_POINTER(idev->mc_tomb, NULL); idev->mc_ifc_count = 0; - timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0); - timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0); + INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work); + INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work); + INIT_DELAYED_WORK(&idev->mc_query_work, mld_query_work); + INIT_DELAYED_WORK(&idev->mc_report_work, mld_report_work); + skb_queue_head_init(&idev->mc_query_queue); + skb_queue_head_init(&idev->mc_report_queue); + spin_lock_init(&idev->mc_query_lock); + spin_lock_init(&idev->mc_report_lock); + mutex_init(&idev->mc_lock); ipv6_mc_reset(idev); - write_unlock_bh(&idev->lock); } /* @@ -2596,9 +2791,13 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) { struct ifmcaddr6 *i; - /* Deactivate timers */ + /* Deactivate works */ ipv6_mc_down(idev); + mutex_lock(&idev->mc_lock); mld_clear_delrec(idev); + mutex_unlock(&idev->mc_lock); + mld_clear_query(idev); + mld_clear_report(idev); /* Delete all-nodes address. */ /* We cannot call ipv6_dev_mc_dec() directly, our caller in @@ -2610,16 +2809,14 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) if (idev->cnf.forwarding) __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters); - write_lock_bh(&idev->lock); - while ((i = idev->mc_list) != NULL) { - idev->mc_list = i->next; + mutex_lock(&idev->mc_lock); + while ((i = mc_dereference(idev->mc_list, idev))) { + rcu_assign_pointer(idev->mc_list, mc_dereference(i->next, idev)); - write_unlock_bh(&idev->lock); ip6_mc_clear_src(i); ma_put(i); - write_lock_bh(&idev->lock); } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) @@ -2628,13 +2825,14 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) ASSERT_RTNL(); + mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) + for_each_mc_mclock(idev, pmc) igmp6_join_group(pmc); - read_unlock_bh(&idev->lock); - } else + } else { mld_send_report(idev, NULL); + } + mutex_unlock(&idev->mc_lock); } static int ipv6_mc_netdev_event(struct notifier_block *this, @@ -2681,13 +2879,12 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (im) { state->idev = idev; break; } - read_unlock_bh(&idev->lock); } return im; } @@ -2696,11 +2893,8 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - im = im->next; + im = rcu_dereference(im->next); while (!im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2709,8 +2903,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->mc_list; + im = rcu_dereference(state->idev->mc_list); } return im; } @@ -2744,10 +2937,8 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } state->dev = NULL; rcu_read_unlock(); } @@ -2762,8 +2953,8 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, &im->mca_addr, im->mca_users, im->mca_flags, - (im->mca_flags&MAF_TIMER_RUNNING) ? - jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0); + (im->mca_flags & MAF_TIMER_RUNNING) ? + jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0); return 0; } @@ -2797,19 +2988,16 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (likely(im)) { - spin_lock_bh(&im->mca_lock); - psf = im->mca_sources; + psf = rcu_dereference(im->mca_sources); if (likely(psf)) { state->im = im; state->idev = idev; break; } - spin_unlock_bh(&im->mca_lock); } - read_unlock_bh(&idev->lock); } return psf; } @@ -2818,14 +3006,10 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - psf = psf->sf_next; + psf = rcu_dereference(psf->sf_next); while (!psf) { - spin_unlock_bh(&state->im->mca_lock); - state->im = state->im->next; + state->im = rcu_dereference(state->im->next); while (!state->im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2834,13 +3018,11 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; - spin_lock_bh(&state->im->mca_lock); - psf = state->im->mca_sources; + psf = rcu_dereference(state->im->mca_sources); } out: return psf; @@ -2877,14 +3059,12 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im)) { - spin_unlock_bh(&state->im->mca_lock); + + if (likely(state->im)) state->im = NULL; - } - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } + state->dev = NULL; rcu_read_unlock(); } @@ -2965,6 +3145,7 @@ static int __net_init igmp6_net_init(struct net *net) } inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + net->ipv6.igmp_sk->sk_allocation = GFP_KERNEL; err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); @@ -3002,7 +3183,19 @@ static struct pernet_operations igmp6_net_ops = { int __init igmp6_init(void) { - return register_pernet_subsys(&igmp6_net_ops); + int err; + + err = register_pernet_subsys(&igmp6_net_ops); + if (err) + return err; + + mld_wq = create_workqueue("mld"); + if (!mld_wq) { + unregister_pernet_subsys(&igmp6_net_ops); + return -ENOMEM; + } + + return err; } int __init igmp6_late_init(void) @@ -3013,6 +3206,7 @@ int __init igmp6_late_init(void) void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); + destroy_workqueue(mld_wq); } void igmp6_late_cleanup(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ebb7519bec2a..28801ae80548 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2085,13 +2085,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, if (rt->rt6i_flags & RTF_GATEWAY) { struct neighbour *neigh; - __u8 neigh_flags = 0; neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); - if (neigh) - neigh_flags = neigh->flags; - if (!(neigh_flags & NTF_ROUTER)) { + if (!(neigh && (neigh->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); rt6_remove_exception(bucket, rt6_ex); @@ -6075,7 +6072,7 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, if (!rcu_access_pointer(f6i->fib6_node)) /* The route was removed from the tree, do not send - * notfication. + * notification. */ return; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 63ccd9f2dccc..ff2ca2e7c7f5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -218,8 +218,6 @@ static int ipip6_tunnel_create(struct net_device *dev) ipip6_tunnel_clone_6rd(dev, sitn); - dev_hold(dev); - ipip6_tunnel_link(sitn, t); return 0; @@ -325,7 +323,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) rcu_read_lock(); - ca = t->prl_count < cmax ? t->prl_count : cmax; + ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for @@ -1456,7 +1454,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - + dev_hold(dev); return 0; } @@ -1472,7 +1470,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 263ab43ed06b..27102c3d6e1d 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,7 +23,6 @@ static int two = 2; static int flowlabel_reflect_max = 0x7; -static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, @@ -34,7 +33,7 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, net = container_of(table->data, struct net, ipv6.sysctl.multipath_hash_policy); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); @@ -45,39 +44,38 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "anycast_src_echo_reply", .data = &init_net.ipv6.sysctl.anycast_src_echo_reply, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_consistency", .data = &init_net.ipv6.sysctl.flowlabel_consistency, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "auto_flowlabels", .data = &init_net.ipv6.sysctl.auto_flowlabels, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &auto_flowlabels_min, + .proc_handler = proc_dou8vec_minmax, .extra2 = &auto_flowlabels_max }, { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "idgen_retries", @@ -96,16 +94,16 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "flowlabel_state_ranges", .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_nonlocal_bind", .data = &init_net.ipv6.sysctl.ip_nonlocal_bind, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_reflect", @@ -147,7 +145,7 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv6.sysctl.multipath_hash_policy, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, @@ -163,9 +161,9 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ef2c75bb4771..199b080d418a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -749,6 +749,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + udp_post_segment_fix_csum(skb); ret = udpv6_queue_rcv_one_skb(sk, skb); if (ret > 0) ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index faa823c24292..b3d9ed96e5ea 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -163,7 +163,8 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (NAPI_GRO_CB(skb)->is_flist) { + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ + if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) { uh->len = htons(skb->len - nhoff); skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6092d5cb7168..0fdb389c3390 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -621,7 +621,7 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, for_each_netdev_rcu(&init_net, dev) { if (!memcmp(dev->perm_addr, uid, 8)) { memcpy(iucv->src_user_id, sa->siucv_user_id, 8); - /* Check for unitialized siucv_name */ + /* Check for uninitialized siucv_name */ if (strncmp(sa->siucv_name, " ", 8) == 0) __iucv_auto_name(iucv); else @@ -2134,7 +2134,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, } /** - * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets + * afiucv_hs_callback_txnotify() - handle send notifications from HiperSockets * transport **/ static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d0b56ffbb057..6201965bd822 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -663,7 +663,7 @@ do_frag: /* Hard failure in sending message, abort this * psock since it has lost framing - * synchonization and retry sending the + * synchronization and retry sending the * message from the beginning. */ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, @@ -1419,7 +1419,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, write_lock_bh(&csk->sk_callback_lock); - /* Check if sk_user_data is aready by KCM or someone else. + /* Check if sk_user_data is already by KCM or someone else. * Must be done under lock to prevent race conditions. */ if (csk->sk_user_data) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ad7730b68772..17927966abb3 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -103,7 +103,7 @@ unlock: EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id); /** - * l3mdev_master_ifindex - get index of L3 master device + * l3mdev_master_ifindex_rcu - get index of L3 master device * @dev: targeted interface */ @@ -136,7 +136,7 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev) EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); /** - * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master + * l3mdev_master_upper_ifindex_by_index_rcu - get index of upper l3 master * device * @net: network namespace for device index lookup * @ifindex: targeted interface diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index 523fdd1cf781..d6627a80cb45 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -608,7 +608,7 @@ int llc_conn_ev_qlfy_p_flag_eq_1(struct sock *sk, struct sk_buff *skb) } /** - * conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window + * llc_conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window * @sk: current connection structure. * @skb: current event. * @@ -624,7 +624,7 @@ int llc_conn_ev_qlfy_last_frame_eq_1(struct sock *sk, struct sk_buff *skb) } /** - * conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window + * llc_conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window * @sk: current connection structure. * @skb: current event. * diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 64d4bef04e73..6e387aadffce 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -59,10 +59,10 @@ out: } /** - * llc_sap_find - searchs a SAP in station + * llc_sap_find - searches a SAP in station * @sap_value: sap to be found * - * Searchs for a sap in the sap list of the LLC's station upon the sap ID. + * Searches for a sap in the sap list of the LLC's station upon the sap ID. * If the sap is found it will be refcounted and the user will have to do * a llc_sap_put after use. * Returns the sap or %NULL if not found. diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c index 792d195c8bae..63749dde542f 100644 --- a/net/llc/llc_pdu.c +++ b/net/llc/llc_pdu.c @@ -24,7 +24,7 @@ void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type) } /** - * pdu_set_pf_bit - sets poll/final bit in LLC header + * llc_pdu_set_pf_bit - sets poll/final bit in LLC header * @skb: Frame to set bit in * @bit_value: poll/final bit (0 or 1). * diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 7ae4cc684d3a..b554f26c68ee 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -27,7 +27,7 @@ /** - * llc_sap_action_unit_data_ind - forward UI PDU to network layer + * llc_sap_action_unitdata_ind - forward UI PDU to network layer * @sap: SAP * @skb: the event to forward * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 68a0de02b561..a0a11624a5be 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1486,7 +1486,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, - * and for unassociated stations that move to assocaited */ + * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2b7eec93c9f5..69cafaacc31b 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -882,7 +882,7 @@ fully_established: subflow->pm_notified = 1; if (subflow->mp_join) { clear_3rdack_retransmission(ssk); - mptcp_pm_subflow_established(msk, subflow); + mptcp_pm_subflow_established(msk); } else { mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); } @@ -1040,6 +1040,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_pm_add_addr_received(msk, &addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { + mptcp_pm_add_addr_echoed(msk, &addr); mptcp_pm_del_add_timer(msk, &addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 4cfd80f90003..9d00fa6d22e9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -14,7 +14,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port) + bool echo) { u8 add_addr = READ_ONCE(msk->pm.addr_signal); @@ -33,7 +33,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); if (addr->family == AF_INET6) add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - if (port) + if (addr->port) add_addr |= BIT(MPTCP_ADD_ADDR_PORT); WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; @@ -53,6 +53,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); + mptcp_pm_nl_addr_send_ack(msk); return 0; } @@ -152,8 +153,7 @@ void mptcp_pm_connection_closed(struct mptcp_sock *msk) pr_debug("msk=%p", msk); } -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) +void mptcp_pm_subflow_established(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -188,7 +188,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { - mptcp_pm_announce_addr(msk, addr, true, addr->port); + mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; @@ -197,6 +197,21 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_unlock_bh(&pm->lock); } +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_data *pm = &msk->pm; + + pr_debug("msk=%p", msk); + + spin_lock_bh(&pm->lock); + + if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) + mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); + + spin_unlock_bh(&pm->lock); +} + void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { if (!mptcp_pm_should_add_signal(msk)) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5857b82c88bf..cadafafa1049 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -56,8 +56,6 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); - static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) { @@ -140,6 +138,24 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, return false; } +static bool lookup_subflow_by_daddr(const struct list_head *list, + struct mptcp_addr_info *daddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + struct sock_common *skc; + + list_for_each_entry(subflow, list, node) { + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + + remote_address(skc, &cur); + if (addresses_equal(&cur, daddr, daddr->port)) + return true; + } + + return false; +} + static struct mptcp_pm_addr_entry * select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_sock *msk) @@ -245,9 +261,9 @@ static void check_work_pending(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.work_pending, false); } -static struct mptcp_pm_add_entry * -lookup_anno_list_by_saddr(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; @@ -308,7 +324,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!mptcp_pm_should_add_signal(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port); + mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } @@ -319,6 +335,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_unlock_bh(&msk->pm.lock); + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) + mptcp_pm_subflow_established(msk); + out: __sock_put(sk); } @@ -331,7 +350,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; spin_lock_bh(&msk->pm.lock); - entry = lookup_anno_list_by_saddr(msk, addr); + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (entry) entry->retrans_times = ADD_ADDR_RETRANS_MAX; spin_unlock_bh(&msk->pm.lock); @@ -351,7 +370,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (lookup_anno_list_by_saddr(msk, &entry->addr)) + if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) return false; add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); @@ -417,8 +436,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); } } else { /* pick failed, avoid fourther attempts later */ @@ -468,7 +487,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) struct mptcp_addr_info remote; struct mptcp_addr_info local; unsigned int subflows_max; - bool use_port = false; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -476,6 +494,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) pr_debug("accepted %d:%d remote family %d", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); + + if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) + goto add_addr_echo; + msk->pm.add_addr_accepted++; msk->pm.subflows++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || @@ -488,8 +510,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - else - use_port = true; memset(&local, 0, sizeof(local)); local.family = remote.family; @@ -497,28 +517,30 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) __mptcp_subflow_connect(sk, &local, &remote); spin_lock_bh(&msk->pm.lock); - mptcp_pm_announce_addr(msk, &remote, true, use_port); - mptcp_pm_nl_add_addr_send_ack(msk); +add_addr_echo: + mptcp_pm_announce_addr(msk, &msk->pm.remote, true); + mptcp_pm_nl_addr_send_ack(msk); } -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); - if (!mptcp_pm_should_add_signal(msk)) + if (!mptcp_pm_should_add_signal(msk) && + !mptcp_pm_should_rm_signal(msk)) return; __mptcp_flush_join_list(msk); subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - u8 add_addr; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for add_addr%s%s", + pr_debug("send ack for %s%s%s", + mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); @@ -526,13 +548,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) tcp_send_ack(ssk); release_sock(ssk); spin_lock_bh(&msk->pm.lock); - - add_addr = READ_ONCE(msk->pm.addr_signal); - if (mptcp_pm_should_add_signal_ipv6(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); - if (mptcp_pm_should_add_signal_port(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT); - WRITE_ONCE(msk->pm.addr_signal, add_addr); } } @@ -571,47 +586,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; u8 i; - pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr); + pr_debug("%s rm_list_nr %d", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); - if (!msk->pm.rm_list_rx.nr) + if (!rm_list->nr) return; if (list_empty(&msk->conn_list)) return; - for (i = 0; i < msk->pm.rm_list_rx.nr; i++) { + for (i = 0; i < rm_list->nr; i++) { list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow->local_id; + + if (rm_type == MPTCP_MIB_RMADDR) + id = subflow->remote_id; - if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id) + if (rm_list->ids[i] != id) continue; - pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]); + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", + i, rm_list->ids[i], subflow->local_id, subflow->remote_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_accepted--; + if (rm_type == MPTCP_MIB_RMADDR) { + msk->pm.add_addr_accepted--; + WRITE_ONCE(msk->pm.accept_addr, true); + } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { + msk->pm.local_addr_used--; + } msk->pm.subflows--; - WRITE_ONCE(msk->pm.accept_addr, true); - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR); - - break; + __MPTCP_INC_STATS(sock_net(sk), rm_type); } } } +static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); +} + +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); +} + void mptcp_pm_nl_work(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -627,7 +663,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) } if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_nl_addr_send_ack(msk); } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); @@ -645,47 +681,6 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) -{ - struct mptcp_subflow_context *subflow, *tmp; - struct sock *sk = (struct sock *)msk; - u8 i; - - pr_debug("subflow rm_list_nr %d", rm_list->nr); - - msk_owned_by_me(msk); - - if (!rm_list->nr) - return; - - if (list_empty(&msk->conn_list)) - return; - - for (i = 0; i < rm_list->nr; i++) { - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - - if (rm_list->ids[i] != subflow->local_id) - continue; - - pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]); - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - msk->pm.local_addr_used--; - msk->pm.subflows--; - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); - - break; - } - } -} - static bool address_use_port(struct mptcp_pm_addr_entry *entry) { return (entry->addr.flags & @@ -1161,6 +1156,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) } } +static int mptcp_nl_remove_id_zero_address(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + list.ids[list.nr++] = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info msk_local; + + if (list_empty(&msk->conn_list)) + goto next; + + local_address((struct sock_common *)msk, &msk_local); + if (!addresses_equal(&msk_local, addr, addr->port)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1173,6 +1203,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; + /* the zero id address is special: the first address used by the msk + * always gets such an id, so different subflows can have different zero + * id addresses. Additionally zero id is not accounted for in id_bitmap. + * Let's use an 'mptcp_rm_list' instead of the common remove code. + */ + if (addr.addr.id == 0) + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); + spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1590b9d4cde2..171b77537dcb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2047,28 +2047,21 @@ out_err: return copied; } -static void mptcp_retransmit_handler(struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); -} - static void mptcp_retransmit_timer(struct timer_list *t) { struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - mptcp_retransmit_handler(sk); + /* we need a process context to retransmit */ + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) + mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, - &sk->sk_tsq_flags)) - sock_hold(sk); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@ -2958,17 +2951,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) } } -#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { - unsigned long flags, nflags; - for (;;) { - flags = 0; + unsigned long flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break; @@ -2983,6 +2975,8 @@ static void mptcp_release_cb(struct sock *sk) spin_unlock_bh(&sk->sk_lock.slock); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk); cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -2998,20 +2992,6 @@ static void mptcp_release_cb(struct sock *sk) */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); - - do { - flags = sk->sk_tsq_flags; - if (!(flags & MPTCP_DEFERRED_ALL)) - return; - nflags = flags & ~MPTCP_DEFERRED_ALL; - } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - - sock_release_ownership(sk); - - if (flags & TCPF_WRITE_TIMER_DEFERRED) { - mptcp_retransmit_handler(sk); - __sock_put(sk); - } } void mptcp_subflow_process_delegated(struct sock *ssk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1111a99b024f..e8c5ff2b8ace 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -104,6 +104,7 @@ #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 #define MPTCP_ERROR_REPORT 8 +#define MPTCP_RETRANSMIT 9 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -642,12 +643,14 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow); +void mptcp_pm_subflow_established(struct mptcp_sock *msk); void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); @@ -659,10 +662,13 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port); + bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d17d39ccdf34..6c074d3db0ed 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1081,7 +1081,7 @@ bool mptcp_subflow_data_available(struct sock *sk) * In mptcp, rwin is about the mptcp-level connection data. * * Data that is still on the ssk rx queue can thus be ignored, - * as far as mptcp peer is concerened that data is still inflight. + * as far as mptcp peer is concerned that data is still inflight. * DSS ACK is updated when skb is moved to the mptcp rx queue. */ void mptcp_space(const struct sock *ssk, int *space, int *full_space) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index e37102546be6..49031f804276 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -100,7 +100,7 @@ enum { struct ncsi_channel_version { u32 version; /* Supported BCD encoded NCSI version */ u32 alpha2; /* Supported BCD encoded NCSI version */ - u8 fw_name[12]; /* Firware name string */ + u8 fw_name[12]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ u32 mf_id; /* Manufacture ID */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 0c132ff9b446..128690c512df 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2398,7 +2398,7 @@ static int __net_init __ip_vs_init(struct net *net) if (ipvs == NULL) return -ENOMEM; - /* Hold the beast until a service is registerd */ + /* Hold the beast until a service is registered */ ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2ccda8ace796..91bc8df3e4b0 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Accouting handling for netfilter. */ +/* Accounting handling for netfilter. */ /* * (C) 2008 Krzysztof Piotr Oledzki <[email protected]> diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index df1b41ed73fd..ca52f5085989 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -63,7 +63,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { */ /** - * netlbl_mgmt_add - Handle an ADD message + * netlbl_mgmt_add_common - Handle an ADD message * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index da7e2112771f..5044c7db577e 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -457,7 +457,7 @@ static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, } /** - * start_poll operation + * digital_start_poll - start_poll operation * @nfc_dev: device to be polled * @im_protocols: bitset of nfc initiator protocols to be used for polling * @tm_protocols: bitset of nfc transport protocols to be used for polling diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index dfc820ee553a..4b46c69e14ab 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -20,6 +20,8 @@ /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff +#define QRTR_EPH_PORT_RANGE \ + XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET) /** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 @@ -106,8 +108,7 @@ static LIST_HEAD(qrtr_all_nodes); static DEFINE_MUTEX(qrtr_node_lock); /* local port allocation management */ -static DEFINE_IDR(qrtr_ports); -static DEFINE_MUTEX(qrtr_port_lock); +static DEFINE_XARRAY_ALLOC(qrtr_ports); /** * struct qrtr_node - endpoint node @@ -653,7 +654,7 @@ static struct qrtr_sock *qrtr_port_lookup(int port) port = 0; rcu_read_lock(); - ipc = idr_find(&qrtr_ports, port); + ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); @@ -695,9 +696,7 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) __sock_put(&ipc->sk); - mutex_lock(&qrtr_port_lock); - idr_remove(&qrtr_ports, port); - mutex_unlock(&qrtr_port_lock); + xa_erase(&qrtr_ports, port); /* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ @@ -716,29 +715,20 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { - u32 min_port; int rc; - mutex_lock(&qrtr_port_lock); if (!*port) { - min_port = QRTR_MIN_EPH_SOCKET; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, + GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { - min_port = 0; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC); + rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { - min_port = *port; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } - mutex_unlock(&qrtr_port_lock); - if (rc == -ENOSPC) + if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; @@ -752,20 +742,16 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; - int id; - - mutex_lock(&qrtr_port_lock); - idr_for_each_entry(&qrtr_ports, ipc, id) { - /* Don't reset control port */ - if (id == 0) - continue; + unsigned long index; + rcu_read_lock(); + xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } - mutex_unlock(&qrtr_port_lock); + rcu_read_unlock(); } /* Bind socket to address. diff --git a/net/rds/send.c b/net/rds/send.c index 985d0b7713ac..53444397de66 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1225,7 +1225,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow - * communicating beween link local and non-link local address. + * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f77484df097b..54e6a708d06e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3217,7 +3217,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, return false; break; default: - /* This is unkown to us, reject! */ + /* This is unknown to us, reject! */ return false; } } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index af2b7041fa4e..7632714c1e5b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1452,7 +1452,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, return 'E'; } -/* Common helper routine for both duplicate and simulataneous INIT +/* Common helper routine for both duplicate and simultaneous INIT * chunk handling. */ static enum sctp_disposition sctp_sf_do_unexpected_init( @@ -1685,7 +1685,7 @@ enum sctp_disposition sctp_sf_do_5_2_1_siminit( void *arg, struct sctp_cmd_seq *commands) { - /* Call helper to do the real work for both simulataneous and + /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); @@ -1740,7 +1740,7 @@ enum sctp_disposition sctp_sf_do_5_2_2_dupinit( void *arg, struct sctp_cmd_seq *commands) { - /* Call helper to do the real work for both simulataneous and + /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); @@ -2221,11 +2221,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( break; } - /* Delete the tempory new association. */ + /* Delete the temporary new association. */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); - /* Restore association pointer to provide SCTP command interpeter + /* Restore association pointer to provide SCTP command interpreter * with a valid context in case it needs to manipulate * the queues */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a710917c5ac7..76a388b5021c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9327,7 +9327,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); - /* Set newsk security attributes from orginal sk and connection + /* Set newsk security attributes from original sk and connection * security attribute from ep. */ security_sctp_sk_clone(ep, sk, newsk); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e8e448771f85..6d6fd1397c87 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -410,7 +410,6 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev, struct smc_sock; struct smc_clc_msg_accept_confirm; -struct smc_clc_msg_local; void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate_sched(struct smc_link_group *lgr); diff --git a/net/socket.c b/net/socket.c index 84a8049c2b09..27e3e7d53f8e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3568,7 +3568,7 @@ EXPORT_SYMBOL(kernel_accept); * @addrlen: address length * @flags: flags (O_NONBLOCK, ...) * - * For datagram sockets, @addr is the addres to which datagrams are sent + * For datagram sockets, @addr is the address to which datagrams are sent * by default, and the only address from which datagrams are received. * For stream sockets, attempts to connect to @addr. * Returns 0 or an error code. diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a4389ef08a98..443f8e5b9477 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -240,10 +240,12 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array + * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -254,20 +256,24 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, &b_names)) { errstr = "illegal name"; + NL_SET_ERR_MSG(extack, "Illegal name"); goto rejected; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -275,33 +281,43 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -315,6 +331,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } @@ -331,6 +348,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } @@ -909,6 +927,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -948,8 +967,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -1007,7 +1028,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -1046,6 +1068,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } @@ -1086,8 +1109,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); - if (!b) + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1106,12 +1131,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif @@ -1239,6 +1270,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); media = tipc_media_find(name); if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } @@ -1275,9 +1307,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); - if (!m) + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; - + } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1293,12 +1326,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (m->type_id != TIPC_MEDIA_TYPE_UDP) + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6bf4550aa1ac..57c6a1a719e2 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -154,9 +154,9 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void __rcu *media_ptr; /* initalized by media */ - u32 mtu; /* initalized by media */ - struct tipc_media_addr addr; /* initalized by media */ + void __rcu *media_ptr; /* initialized by media */ + u32 mtu; /* initialized by media */ + struct tipc_media_addr addr; /* initialized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 6f64acef73dc..76b8428c94a7 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1492,6 +1492,8 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, /* Allocate statistic structure */ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); kfree_sensitive(c); return -ENOMEM; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 61c38eaaa298..707d0dc71fad 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2014,7 +2014,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } - /* No synching needed if only one link */ + /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 117a472a8e61..f21162aa0cf7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1450,7 +1450,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) ua = (struct tipc_uaddr *)&tsk->peer; if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; - atype = ua->addrtype; + atype = ua->addrtype; } if (unlikely(syn)) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ddea6554ec46..60b877531b66 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -49,12 +49,13 @@ struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object + * @s: host-endian copy of the user subscription + * @evt: template for events generated by subscription * @kref: reference count for this subscription * @net: network namespace associated with subscription * @timer: timer governing subscription duration (optional) * @service_list: adjacent subscriptions in name sequence's subscription list * @sub_list: adjacent subscriptions in subscriber's subscription list - * @evt: template for events generated by subscription * @conid: connection identifier of topology server * @inactive: true if this subscription is inactive * @lock: serialize up/down and timer events diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bc7fb9bf3351..92a72f0e0d94 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1855,7 +1855,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a - * peer has not connected or a local shutdown occured with the + * peer has not connected or a local shutdown occurred with the * SOCK_DONE flag. */ if (sock_flag(sk, SOCK_DONE)) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 21536c48deec..68db914df642 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3404,7 +3404,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy) } /* - * Restoring regulatory settings involves ingoring any + * Restoring regulatory settings involves ignoring any * possibly stale country IE information and user regulatory * settings if so desired, this includes any beacon hints * learned as we could have traveled outside to another country diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ff687b97b2d9..44d6566dd23e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1018,7 +1018,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, /* * current neighbour/link might impose additional limits - * on certain facilties + * on certain facilities */ x25_limit_facilities(&facilities, nb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b74f28cabe24..156347fd7e2e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -688,7 +688,7 @@ static void xfrm_hash_resize(struct work_struct *work) } /* Make sure *pol can be inserted into fastbin. - * Useful to check that later insert requests will be sucessful + * Useful to check that later insert requests will be successful * (provided xfrm_policy_lock is held throughout). */ static struct xfrm_pol_inexact_bin * diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5a0ef4361e43..df8bc8fc724c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1761,7 +1761,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived - * in netlink excl is a flag and you wouldnt need + * in netlink excl is a flag and you wouldn't need * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index b4dbda706c4d..5ec3beb637c8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -11,6 +11,7 @@ ALL_TESTS=" matchall_mirror_behind_flower_ingress_test matchall_sample_behind_flower_ingress_test matchall_mirror_behind_flower_egress_test + matchall_proto_match_test police_limits_test multi_police_test " @@ -291,6 +292,22 @@ matchall_mirror_behind_flower_egress_test() matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2" } +matchall_proto_match_test() +{ + RET=0 + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + matchall skip_sw \ + action sample group 1 rate 100 + check_fail $? "Incorrect success to add matchall rule with protocol match" + + tc qdisc del dev $swp1 clsact + + log_test "matchall protocol match" +} + police_limits_test() { RET=0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 57b05f042787..093bed088ad0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -34,6 +34,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" tc_sample_rate_test tc_sample_max_rate_test + tc_sample_conflict_test tc_sample_group_conflict_test tc_sample_md_iif_test tc_sample_md_lag_iif_test @@ -272,6 +273,35 @@ tc_sample_max_rate_test() log_test "tc sample maximum rate" } +tc_sample_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port, + # even when they share the same parameters. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 &> /dev/null + check_fail $? "Managed to configure second sampling rule" + + # Delete the first rule and make sure the second rule can now be + # configured. + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule after deletion" + + log_test "tc sample conflict test" + + tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall +} + tc_sample_group_conflict_test() { RET=0 diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 9f64d5c7107b..7ca1f030d209 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -24,8 +24,11 @@ function check { local code=$1 local str=$2 local exp_str=$3 + local exp_fail=$4 - if [ $code -ne 0 ]; then + [ -z "$exp_fail" ] && cop="-ne" || cop="-eq" + + if [ $code $cop 0 ]; then ((num_errors++)) return fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh new file mode 100755 index 000000000000..0c56746e9ce0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +[ a$ETHTOOL == a ] && ETHTOOL=ethtool + +set -o pipefail + +# netdevsim starts out with None/None +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: None +Active FEC encoding: None" + +# Test Auto +$ETHTOOL --set-fec $NSIM_NETDEV encoding auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: Auto +Active FEC encoding: Off" + +# Test case in-sensitivity +for o in off Off OFF; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: Off +Active FEC encoding: Off" +done + +for o in BaseR baser BAser; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: BaseR +Active FEC encoding: BaseR" +done + +for o in llrs rs; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: ${o^^} +Active FEC encoding: ${o^^}" +done + +# Test mutliple bits +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: RS LLRS +Active FEC encoding: LLRS" + +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: Auto Off RS +Active FEC encoding: RS" + +# Make sure other link modes are rejected +$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null +check $? '' '' 1 + +$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null +check $? '' '' 1 + +# Try JSON +$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1 +if [ $? -eq 0 ]; then + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"Off"' + + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto" +"RS"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"RS"' +fi + +# Test error injection +echo 11 > $NSIM_DEV_DFS/ethtool/get_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? '' '' 1 + +echo 0 > $NSIM_DEV_DFS/ethtool/get_err +echo 11 > $NSIM_DEV_DFS/ethtool/set_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? + +$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null +check $? '' '' 1 + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 25f198bec0b2..2d71b283dde3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -23,6 +23,7 @@ TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh TEST_PROGS += unicast_extensions.sh +TEST_PROGS += udpgro_fwd.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 05c05e02bade..42e28c983d41 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -772,6 +772,15 @@ rate() echo $((8 * (t1 - t0) / interval)) } +packets_rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $(((t1 - t0) / interval)) +} + mac_get() { local if_name=$1 diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 160f9cccdfb7..4f9f17cb45d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -35,6 +35,8 @@ ALL_TESTS=" police_shared_test police_rx_mirror_test police_tx_mirror_test + police_pps_rx_test + police_pps_tx_test " NUM_NETIFS=6 source tc_common.sh @@ -290,6 +292,60 @@ police_tx_mirror_test() police_mirror_common_test $rp2 egress "police tx and mirror" } +police_pps_common_test() +{ + local test_name=$1; shift + + RET=0 + + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .packets) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .packets) + + local er=$((2000)) + local nr=$(packets_rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_rx_test() +{ + # Rule to police traffic destined to $h2 on ingress of $rp1 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on rx" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_tx_test() +{ + # Rule to police traffic destined to $h2 on egress of $rp2 + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on tx" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 77bb62feb872..69d89b5d666f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -55,6 +55,7 @@ static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; +static unsigned int cfg_do_w; static int cfg_wait; static void die_usage(void) @@ -272,8 +273,8 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; - if (cfg_remove && do_w > 50) - do_w = 50; + if (cfg_remove && do_w > cfg_do_w) + do_w = cfg_do_w; bw = write(fd, buf, do_w); if (bw < 0) @@ -829,7 +830,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -840,6 +841,9 @@ static void parse_opts(int argc, char **argv) cfg_remove = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; + cfg_do_w = atoi(optarg); + if (cfg_do_w <= 0) + cfg_do_w = 50; break; case 'l': listen_mode = true; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fe990d8696a9..d2273b88e72c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -234,8 +234,10 @@ do_transfer() if [ $speed = "fast" ]; then mptcp_connect="./mptcp_connect -j" - else - mptcp_connect="./mptcp_connect -r" + elif [ $speed = "slow" ]; then + mptcp_connect="./mptcp_connect -r 50" + elif [ $speed = "least" ]; then + mptcp_connect="./mptcp_connect -r 10" fi local local_addr @@ -292,9 +294,12 @@ do_transfer() let id+=1 done fi - else + elif [ $rm_nr_ns1 -eq 8 ]; then sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl flush + elif [ $rm_nr_ns1 -eq 9 ]; then + sleep 1 + ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr} fi fi @@ -331,9 +336,18 @@ do_transfer() let id+=1 done fi - else + elif [ $rm_nr_ns2 -eq 8 ]; then sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl flush + elif [ $rm_nr_ns2 -eq 9 ]; then + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:1::2" + else + addr="10.0.1.2" + fi + sleep 1 + ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr fi fi @@ -783,6 +797,28 @@ signal_address_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 + + # signal addresses + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal addresses" 3 3 3 + chk_add_nr 3 3 + + # signal invalid addresses + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal invalid addresses" 1 1 1 + chk_add_nr 3 3 } link_failure_tests() @@ -818,6 +854,26 @@ add_addr_timeout_tests() run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 chk_add_nr 4 0 + + # signal addresses timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 least + chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2 + chk_add_nr 8 0 + + # signal invalid addresses timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 least + chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1 + chk_add_nr 8 0 } remove_tests() @@ -874,6 +930,30 @@ remove_tests() chk_add_nr 1 1 chk_rm_nr 2 2 + # addresses remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + chk_join_nr "remove addresses" 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert + + # invalid addresses remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + chk_join_nr "remove invalid addresses" 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + # subflows and signal, flush reset ip netns exec $ns1 ./pm_nl_ctl limits 0 3 @@ -908,6 +988,37 @@ remove_tests() chk_join_nr "flush addresses" 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert + + # invalid addresses flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow + chk_join_nr "flush invalid addresses" 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + + # remove id 0 subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow + chk_join_nr "remove id 0 subflow" 1 1 1 + chk_rm_nr 1 1 + + # remove id 0 address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow + chk_join_nr "remove id 0 address" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert } add_tests() diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index a617e293734c..3c741abe034e 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -100,12 +100,12 @@ done check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" -for i in `seq 9 256`; do +ip netns exec $ns1 ./pm_nl_ctl del 9 +for i in `seq 10 255`; do + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1)) done check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 2 flags 10.0.0.9 id 3 flags signal,backup 10.0.1.3 id 4 flags signal 10.0.1.4 id 5 flags signal 10.0.1.5 diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7b4167f3f9a2..115decfdc1ef 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -26,7 +26,7 @@ static void syntax(char *argv[]) { fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); - fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); fprintf(stderr, "\tflush\n"); @@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int16_t family; int nest_start; u_int8_t id; int off = 0; @@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, MPTCP_PM_VER); - /* the only argument is the address id */ - if (argc != 3) + /* the only argument is the address id (nonzero) */ + if (argc != 3 && argc != 4) syntax(argv); id = atoi(argv[2]); + /* zero id with the IP address */ + if (!id && argc != 4) + syntax(argv); nest_start = off; nest = (void *)(data + off); @@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(1); memcpy(RTA_DATA(rta), &id, 1); off += NLMSG_ALIGN(rta->rta_len); + + if (!id) { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[3]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } nest->rta_len = off - nest_start; do_nl_req(fd, nh, off, 0); diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index b4cca382d125..59067f64b775 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -2,9 +2,12 @@ /* * Test the SO_TXTIME API * - * Takes two streams of { payload, delivery time }[], one input and one output. - * Sends the input stream and verifies arrival matches the output stream. - * The two streams can differ due to out-of-order delivery and drops. + * Takes a stream of { payload, delivery time }[], to be sent across two + * processes. Start this program on two separate network namespaces or + * connected hosts, one instance in transmit mode and the other in receive + * mode using the '-r' option. Receiver will compare arrival timestamps to + * the expected stream. Sender will read transmit timestamps from the error + * queue. The streams can differ due to out-of-order delivery and drops. */ #define _GNU_SOURCE @@ -28,14 +31,17 @@ #include <sys/types.h> #include <time.h> #include <unistd.h> +#include <poll.h> static int cfg_clockid = CLOCK_TAI; -static bool cfg_do_ipv4; -static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; static int cfg_variance_us = 4000; +static uint64_t cfg_start_time_ns; +static int cfg_mark; +static bool cfg_rx; static uint64_t glob_tstart; +static uint64_t tdeliver_max; /* encode one timed transmission (of a 1B payload) */ struct timed_send { @@ -44,18 +50,21 @@ struct timed_send { }; #define MAX_NUM_PKT 8 -static struct timed_send cfg_in[MAX_NUM_PKT]; -static struct timed_send cfg_out[MAX_NUM_PKT]; +static struct timed_send cfg_buf[MAX_NUM_PKT]; static int cfg_num_pkt; static int cfg_errq_level; static int cfg_errq_type; -static uint64_t gettime_ns(void) +static struct sockaddr_storage cfg_dst_addr; +static struct sockaddr_storage cfg_src_addr; +static socklen_t cfg_alen; + +static uint64_t gettime_ns(clockid_t clock) { struct timespec ts; - if (clock_gettime(cfg_clockid, &ts)) + if (clock_gettime(clock, &ts)) error(1, errno, "gettime"); return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec; @@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_iov = &iov; msg.msg_iovlen = 1; + msg.msg_name = (struct sockaddr *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; if (ts->delay_us >= 0) { memset(control, 0, sizeof(control)); @@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_controllen = sizeof(control); tdeliver = glob_tstart + ts->delay_us * 1000; + tdeliver_max = tdeliver_max > tdeliver ? + tdeliver_max : tdeliver; cm = CMSG_FIRSTHDR(&msg); cm->cmsg_level = SOL_SOCKET; @@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static bool do_recv_one(int fdr, struct timed_send *ts) +static void do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; @@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts) ret = recv(fdr, rbuf, sizeof(rbuf), 0); if (ret == -1 && errno == EAGAIN) - return true; + error(1, EAGAIN, "recv: timeout"); if (ret == -1) error(1, errno, "read"); if (ret != 1) error(1, 0, "read: %dB", ret); - tstop = (gettime_ns() - glob_tstart) / 1000; + tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000; texpect = ts->delay_us >= 0 ? ts->delay_us : 0; fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n", @@ -123,8 +136,6 @@ static bool do_recv_one(int fdr, struct timed_send *ts) if (llabs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); - - return false; } static void do_recv_verify_empty(int fdr) @@ -137,18 +148,18 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } -static void do_recv_errqueue_timeout(int fdt) +static int do_recv_errqueue_timeout(int fdt) { char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr) + 1]; struct sock_extended_err *err; + int ret, num_tstamp = 0; struct msghdr msg = {0}; struct iovec iov = {0}; struct cmsghdr *cm; int64_t tstamp = 0; - int ret; iov.iov_base = data; iov.iov_len = sizeof(data); @@ -206,9 +217,47 @@ static void do_recv_errqueue_timeout(int fdt) msg.msg_flags = 0; msg.msg_controllen = sizeof(control); + num_tstamp++; } - error(1, 0, "recv: timeout"); + return num_tstamp; +} + +static void recv_errqueue_msgs(int fdt) +{ + struct pollfd pfd = { .fd = fdt, .events = POLLERR }; + const int timeout_ms = 10; + int ret, num_tstamp = 0; + + do { + ret = poll(&pfd, 1, timeout_ms); + if (ret == -1) + error(1, errno, "poll"); + + if (ret && (pfd.revents & POLLERR)) + num_tstamp += do_recv_errqueue_timeout(fdt); + + if (num_tstamp == cfg_num_pkt) + break; + + } while (gettime_ns(cfg_clockid) < tdeliver_max); +} + +static void start_time_wait(void) +{ + uint64_t now; + int err; + + if (!cfg_start_time_ns) + return; + + now = gettime_ns(CLOCK_REALTIME); + if (cfg_start_time_ns < now) + return; + + err = usleep((cfg_start_time_ns - now) / 1000); + if (err) + error(1, errno, "usleep"); } static void setsockopt_txtime(int fd) @@ -245,6 +294,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen) setsockopt_txtime(fd); + if (cfg_mark && + setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark))) + error(1, errno, "setsockopt mark"); + return fd; } @@ -266,31 +319,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen) return fd; } -static void do_test(struct sockaddr *addr, socklen_t alen) +static void do_test_tx(struct sockaddr *addr, socklen_t alen) { - int fdt, fdr, i; + int fdt, i; fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n", addr->sa_family == PF_INET ? '4' : '6', cfg_clockid == CLOCK_TAI ? "tai" : "monotonic"); fdt = setup_tx(addr, alen); - fdr = setup_rx(addr, alen); - glob_tstart = gettime_ns(); + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); for (i = 0; i < cfg_num_pkt; i++) - do_send_one(fdt, &cfg_in[i]); + do_send_one(fdt, &cfg_buf[i]); + + recv_errqueue_msgs(fdt); + + if (close(fdt)) + error(1, errno, "close t"); +} + +static void do_test_rx(struct sockaddr *addr, socklen_t alen) +{ + int fdr, i; + + fdr = setup_rx(addr, alen); + + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); + for (i = 0; i < cfg_num_pkt; i++) - if (do_recv_one(fdr, &cfg_out[i])) - do_recv_errqueue_timeout(fdt); + do_recv_one(fdr, &cfg_buf[i]); do_recv_verify_empty(fdr); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) - error(1, errno, "close t"); +} + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + } } static int parse_io(const char *optarg, struct timed_send *array) @@ -323,17 +415,46 @@ static int parse_io(const char *optarg, struct timed_send *array) return aoff / 2; } +static void usage(const char *progname) +{ + fprintf(stderr, "\nUsage: %s [options] <payload>\n" + "Options:\n" + " -4 only IPv4\n" + " -6 only IPv6\n" + " -c <clock> monotonic (default) or tai\n" + " -D <addr> destination IP address (server)\n" + " -S <addr> source IP address (client)\n" + " -r run rx mode\n" + " -t <nsec> start time (UTC nanoseconds)\n" + " -m <mark> socket mark\n" + "\n", + progname); + exit(1); +} + static void parse_opts(int argc, char **argv) { - int c, ilen, olen; + char *daddr = NULL, *saddr = NULL; + int domain = PF_UNSPEC; + int c; - while ((c = getopt(argc, argv, "46c:")) != -1) { + while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) { switch (c) { case '4': - cfg_do_ipv4 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; break; case '6': - cfg_do_ipv6 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; break; case 'c': if (!strcmp(optarg, "tai")) @@ -344,50 +465,50 @@ static void parse_opts(int argc, char **argv) else error(1, 0, "unknown clock id %s", optarg); break; + case 'S': + saddr = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'r': + cfg_rx = true; + break; + case 't': + cfg_start_time_ns = strtol(optarg, NULL, 0); + break; + case 'm': + cfg_mark = strtol(optarg, NULL, 0); + break; default: - error(1, 0, "parse error at %d", optind); + usage(argv[0]); } } - if (argc - optind != 2) - error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]); + if (argc - optind != 1) + usage(argv[0]); + + if (domain == PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + if (!daddr) + error(1, 0, "-D <server addr> required\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required\n"); - ilen = parse_io(argv[optind], cfg_in); - olen = parse_io(argv[optind + 1], cfg_out); - if (ilen != olen) - error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen); - cfg_num_pkt = ilen; + setup_sockaddr(domain, daddr, &cfg_dst_addr); + setup_sockaddr(domain, saddr, &cfg_src_addr); + + cfg_num_pkt = parse_io(argv[optind], cfg_buf); } int main(int argc, char **argv) { parse_opts(argc, argv); - if (cfg_do_ipv6) { - struct sockaddr_in6 addr6 = {0}; - - addr6.sin6_family = AF_INET6; - addr6.sin6_port = htons(cfg_port); - addr6.sin6_addr = in6addr_loopback; - - cfg_errq_level = SOL_IPV6; - cfg_errq_type = IPV6_RECVERR; - - do_test((void *)&addr6, sizeof(addr6)); - } - - if (cfg_do_ipv4) { - struct sockaddr_in addr4 = {0}; - - addr4.sin_family = AF_INET; - addr4.sin_port = htons(cfg_port); - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - cfg_errq_level = SOL_IP; - cfg_errq_type = IP_RECVERR; - - do_test((void *)&addr4, sizeof(addr4)); - } + if (cfg_rx) + do_test_rx((void *)&cfg_dst_addr, cfg_alen); + else + do_test_tx((void *)&cfg_src_addr, cfg_alen); return 0; } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f7800eaecb1..3f06f4d286a9 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -3,32 +3,85 @@ # # Regression tests for the SO_TXTIME interface -# Run in network namespace -if [[ $# -eq 0 ]]; then - if ! ./in_netns.sh $0 __subprocess; then - # test is time sensitive, can be flaky - echo "test failed: retry once" - ./in_netns.sh $0 __subprocess +set -e + +readonly DEV="veth0" +readonly BIN="./so_txtime" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" +} + +trap cleanup EXIT + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" netns "${NS1}" type veth \ + peer name "${DEV}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +do_test() { + local readonly IP="$1" + local readonly CLOCK="$2" + local readonly TXARGS="$3" + local readonly RXARGS="$4" + + if [[ "${IP}" == "4" ]]; then + local readonly SADDR="${SADDR4}" + local readonly DADDR="${DADDR4}" + elif [[ "${IP}" == "6" ]]; then + local readonly SADDR="${SADDR6}" + local readonly DADDR="${DADDR6}" + else + echo "Invalid IP version ${IP}" + exit 1 fi - exit $? -fi + local readonly START="$(date +%s%N --date="+ 0.1 seconds")" + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & + ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" + wait "$!" +} -set -e +ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +do_test 4 mono a,-1 a,-1 +do_test 6 mono a,0 a,0 +do_test 6 mono a,10 a,10 +do_test 4 mono a,10,b,20 a,10,b,20 +do_test 6 mono a,20,b,10 b,20,a,20 -tc qdisc add dev lo root fq -./so_txtime -4 -6 -c mono a,-1 a,-1 -./so_txtime -4 -6 -c mono a,0 a,0 -./so_txtime -4 -6 -c mono a,10 a,10 -./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 -./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 - -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then - ! ./so_txtime -4 -6 -c tai a,-1 a,-1 - ! ./so_txtime -4 -6 -c tai a,0 a,0 - ./so_txtime -4 -6 -c tai a,10 a,10 - ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20 - ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20 +if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then + ! do_test 4 tai a,-1 a,-1 + ! do_test 6 tai a,0 a,0 + do_test 6 tai a,10 a,10 + do_test 4 tai a,10,b,20 a,10,b,20 + do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh new file mode 100755 index 000000000000..a8fa64136282 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -0,0 +1,251 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly BASE="ns-$(mktemp -u XXXXXX)" +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +# "overlay" network used for UDP over UDP tunnel traffic +readonly OL_NET_V4=172.16.1. +readonly OL_NET_V6=2001:db8:1:: +readonly NPROCS=`nproc` + +cleanup() { + local ns + local -r jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local net + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns + ip -n $BASE$ns link set dev veth$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +} + +create_vxlan_endpoint() { + local -r netns=$1 + local -r bm_dev=$2 + local -r bm_rem_addr=$3 + local -r vxlan_dev=$4 + local -r vxlan_id=$5 + local -r vxlan_port=4789 + + ip -n $netns link set dev $bm_dev up + ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \ + dstport $vxlan_port remote $bm_rem_addr + ip -n $netns link set dev $vxlan_dev up +} + +create_vxlan_pair() { + local ns + + create_ns + + for ns in $SRC $DST; do + # note that 3 - $SRC == $DST and 3 - $DST == $SRC + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4 + ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24 + done + for ns in $SRC $DST; do + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 + ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad + done +} + +is_ipv6() { + if [[ $1 =~ .*:.* ]]; then + return 0 + fi + return 1 +} + +run_test() { + local -r msg=$1 + local -r dst=$2 + local -r pkts=$3 + local -r vxpkts=$4 + local bind=$5 + local rx_args="" + local rx_family="-4" + local family=-4 + local filter=IpInReceives + local ipt=iptables + + printf "%-40s" "$msg" + + if is_ipv6 $dst; then + # rx program does not support '-6' and implies ipv6 usage by default + rx_family="" + family=-6 + filter=Ip6InReceives + ipt=ip6tables + fi + + rx_args="$rx_family" + [ -n "$bind" ] && rx_args="$rx_args -b $bind" + + # send a single GSO packet, segmented in 10 UDP frames. + # Always expect 10 UDP frames on RX side as rx socket does + # not enable GRO + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 + ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + local spid=$! + sleep 0.1 + ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ + sed -e 's/\[//' -e 's/:.*//'` + if [ $rcv != $pkts ]; then + echo " fail - received $rvs packets, expected $pkts" + ret=1 + return + fi + + local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \ + sed -e 's/\[//' -e 's/:.*//'` + + # upper net can generate a little noise, allow some tolerance + if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then + echo " fail - received $vxrcv vxlan packets, expected $vxpkts" + ret=1 + return + fi + echo " ok" +} + +run_bench() { + local -r msg=$1 + local -r dst=$2 + local family=-4 + + printf "%-40s" "$msg" + if [ $NPROCS -lt 2 ]; then + echo " skip - needed 2 CPUs found $NPROCS" + return + fi + + is_ipv6 $dst && family=-6 + + # bind the sender and the receiver to different CPUs to try + # get reproducible results + ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + local spid=$! + sleep 0.1 + ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi +} + +for family in 4 6; do + BM_NET=$BM_NET_V4 + OL_NET=$OL_NET_V4 + IPT=iptables + SUFFIX=24 + VXDEV=vxlan + + if [ $family = 6 ]; then + BM_NET=$BM_NET_V6 + OL_NET=$OL_NET_V6 + SUFFIX="64 nodad" + VXDEV=vxlan6 + IPT=ip6tables + fi + + echo "IPv$family" + + create_ns + run_test "No GRO" $BM_NET$DST 10 0 + cleanup + + create_ns + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list" $BM_NET$DST 1 0 + cleanup + + # UDP GRO fwd skips aggregation when find an udp socket with the GRO option + # if there is an UDP tunnel in the running system, such lookup happen + # take place. + # use NAT to circumvent GRO FWD check + create_ns + ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \ + -j DNAT --to-destination $BM_NET$DST + run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST + cleanup + + create_ns + run_bench "UDP fwd perf" $BM_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP GRO fwd perf" $BM_NET$DST + cleanup + + create_vxlan_pair + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + cleanup + + # use NAT to circumvent GRO FWD check + create_vxlan_pair + ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \ + -j DNAT --to-destination $OL_NET$DST + + # load arp cache before running the test to reduce the amount of + # stray traffic on top of the UDP tunnel + ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST + cleanup + + create_vxlan_pair + run_bench "UDP tunnel fwd perf" $OL_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + cleanup +done + +exit $ret diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index b8268da5adaa..8e45792703ed 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -764,5 +764,53 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "cdd7", + "name": "Add valid police action with packets per second rate limit", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "f5bc", + "name": "Add invalid police action with both bps and pps", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json index 8e8c1ae12260..e15f708b0fa4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -24,6 +24,30 @@ ] }, { + "id": "4297", + "name": "Add simple action with change command", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { "id": "6d4c", "name": "Add simple action with duplicate index", "category": [ |