From 086ba26d55ddf0d49a76d88164dd6611448b53de Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sat, 9 Mar 2024 04:13:17 +0100
Subject: dt-bindings: net: wireless: brcm,bcm4329-fmac: Add CYW43439 DT
 binding

CYW43439 is a Wi-Fi + Bluetooth combo device from Infineon. The
WiFi part is capable of 802.11 b/g/n. This chip is present e.g.
on muRata 1YN module. Extend the binding with its DT compatible.

Signed-off-by: Marek Vasut <marex@denx.de>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240309031355.269835-1-marex@denx.de
---
 Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml
index 4aa521f1be8c..e564f20d8f41 100644
--- a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml
@@ -44,6 +44,7 @@ properties:
               - brcm,bcm4366-fmac
               - cypress,cyw4373-fmac
               - cypress,cyw43012-fmac
+              - infineon,cyw43439-fmac
           - const: brcm,bcm4329-fmac
       - enum:
           - brcm,bcm4329-fmac
-- 
cgit v1.2.3-73-gaa49b


From 75925fafb4f629b1246d4711f022e82590152355 Mon Sep 17 00:00:00 2001
From: Bharath SM <bharathsm.hsk@gmail.com>
Date: Sun, 24 Mar 2024 16:13:38 +0530
Subject: dns_resolver: correct module name in dns resolver documentation

Fix an incorrect module name and sysfs path in dns resolver
documentation.

Signed-off-by: Bharath SM <bharathsm@microsoft.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240324104338.44083-1-bharathsm@microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/dns_resolver.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/dns_resolver.rst b/Documentation/networking/dns_resolver.rst
index add4d59a99a5..c0364f7070af 100644
--- a/Documentation/networking/dns_resolver.rst
+++ b/Documentation/networking/dns_resolver.rst
@@ -118,7 +118,7 @@ Keys of dns_resolver type can be read from userspace using keyctl_read() or
 Mechanism
 =========
 
-The dnsresolver module registers a key type called "dns_resolver".  Keys of
+The dns_resolver module registers a key type called "dns_resolver".  Keys of
 this type are used to transport and cache DNS lookup results from userspace.
 
 When dns_query() is invoked, it calls request_key() to search the local
@@ -152,4 +152,4 @@ Debugging
 Debugging messages can be turned on dynamically by writing a 1 into the
 following file::
 
-	/sys/module/dnsresolver/parameters/debug
+	/sys/module/dns_resolver/parameters/debug
-- 
cgit v1.2.3-73-gaa49b


From 782c1084b9fa5bc96c7c1ab5b770dec30834517b Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 27 Mar 2024 20:31:29 +0800
Subject: doc/netlink/specs: Add vlan attr in rt_link spec

With command:
 # ./tools/net/ynl/cli.py \
 --spec Documentation/netlink/specs/rt_link.yaml \
 --do getlink --json '{"ifname": "eno1.2"}' --output-json | \
 jq -C '.linkinfo'

Before:
Exception: No message format for 'vlan' in sub-message spec 'linkinfo-data-msg'

After:
 {
   "kind": "vlan",
   "data": {
     "protocol": "8021q",
     "id": 2,
     "flag": {
       "flags": [
         "reorder-hdr"
       ],
       "mask": "0xffffffff"
     },
     "egress-qos": {
       "mapping": [
         {
           "from": 1,
           "to": 2
         },
         {
           "from": 4,
           "to": 4
         }
       ]
     }
   }
 }

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20240327123130.1322921-3-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_link.yaml | 80 +++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index 8e4d19adee8c..81a5a3d1b04d 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -50,7 +50,16 @@ definitions:
         name: dormant
       -
         name: echo
-
+  -
+    name: vlan-protocols
+    type: enum
+    entries:
+      -
+        name: 8021q
+        value: 33024
+      -
+        name: 8021ad
+        value: 34984
   -
     name: rtgenmsg
     type: struct
@@ -729,7 +738,38 @@ definitions:
       -
         name: filter-mask
         type: u32
-
+  -
+    name: ifla-vlan-flags
+    type: struct
+    members:
+      -
+        name: flags
+        type: u32
+        enum: vlan-flags
+        enum-as-flags: true
+      -
+        name: mask
+        type: u32
+        display-hint: hex
+  -
+    name: vlan-flags
+    type: flags
+    entries:
+      - reorder-hdr
+      - gvrp
+      - loose-binding
+      - mvrp
+      - bridge-binding
+  -
+    name: ifla-vlan-qos-mapping
+    type: struct
+    members:
+      -
+        name: from
+        type: u32
+      -
+        name: to
+        type: u32
 
 attribute-sets:
   -
@@ -1507,6 +1547,39 @@ attribute-sets:
       -
         name: num-disabled-queues
         type: u32
+  -
+    name: linkinfo-vlan-attrs
+    name-prefix: ifla-vlan-
+    attributes:
+      -
+        name: id
+        type: u16
+      -
+        name: flag
+        type: binary
+        struct: ifla-vlan-flags
+      -
+        name: egress-qos
+        type: nest
+        nested-attributes: ifla-vlan-qos
+      -
+        name: ingress-qos
+        type: nest
+        nested-attributes: ifla-vlan-qos
+      -
+        name: protocol
+        type: u16
+        enum: vlan-protocols
+        byte-order: big-endian
+  -
+    name: ifla-vlan-qos
+    name-prefix: ifla-vlan-qos
+    attributes:
+      -
+        name: mapping
+        type: binary
+        multi-attr: true
+        struct: ifla-vlan-qos-mapping
   -
     name: linkinfo-vrf-attrs
     name-prefix: ifla-vrf-
@@ -1666,6 +1739,9 @@ sub-messages:
       -
         value: tun
         attribute-set: linkinfo-tun-attrs
+      -
+        value: vlan
+        attribute-set: linkinfo-vlan-attrs
       -
         value: vrf
         attribute-set: linkinfo-vrf-attrs
-- 
cgit v1.2.3-73-gaa49b


From a87590c45c87424953118e8a43f36d967f3ca406 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Mon, 25 Mar 2024 16:34:50 +0100
Subject: dt-bindings: net: renesas,etheravb: Add optional MDIO bus node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Renesas Ethernet AVB bindings do not allow the MDIO bus to be
described. This has not been needed as only a single PHY is
supported and no MDIO bus properties have been needed.

Add an optional mdio node to the binding which allows the MDIO bus to be
described and allow bus properties to be set.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240325153451.2366083-2-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index de7ba7f345a9..21a92f179093 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -88,10 +88,16 @@ properties:
   '#address-cells':
     description: Number of address cells for the MDIO bus.
     const: 1
+    deprecated: true
 
   '#size-cells':
     description: Number of size cells on the MDIO bus.
     const: 0
+    deprecated: true
+
+  mdio:
+    $ref: /schemas/net/mdio.yaml#
+    unevaluatedProperties: false
 
   renesas,no-ether-link:
     type: boolean
@@ -110,9 +116,13 @@ properties:
   tx-internal-delay-ps:
     enum: [0, 2000]
 
+# In older bindings there where no mdio child-node to describe the MDIO bus
+# and the PHY. To not fail older bindings accept any node with an address. New
+# users should describe the PHY inside the mdio child-node.
 patternProperties:
   "@[0-9a-f]$":
     type: object
+    deprecated: true
 
 required:
   - compatible
@@ -123,8 +133,6 @@ required:
   - resets
   - phy-mode
   - phy-handle
-  - '#address-cells'
-  - '#size-cells'
 
 allOf:
   - $ref: ethernet-controller.yaml#
-- 
cgit v1.2.3-73-gaa49b


From 6e9b01909a811555ff3326cf80a5847169c57806 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 26 Mar 2024 21:02:12 -0700
Subject: net: remove gfp_mask from napi_alloc_skb()

__napi_alloc_skb() is napi_alloc_skb() with the added flexibility
of choosing gfp_mask. This is a NAPI function, so GFP_ATOMIC is
implied. The only practical choice the caller has is whether to
set __GFP_NOWARN. But that's a false choice, too, allocation failures
in atomic context will happen, and printing warnings in logs,
effectively for a packet drop, is both too much and very likely
non-actionable.

This leads me to a conclusion that most uses of napi_alloc_skb()
are simply misguided, and should use __GFP_NOWARN in the first
place. We also have a "standard" way of reporting allocation
failures via the queue stat API (qstats::rx-alloc-fail).

The direct motivation for this patch is that one of the drivers
used at Meta calls napi_alloc_skb() (so prior to this patch without
__GFP_NOWARN), and the resulting OOM warning is the top networking
warning in our fleet.

Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240327040213.3153864-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/mm/page_frags.rst                    | 2 +-
 Documentation/translations/zh_CN/mm/page_frags.rst | 2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        | 4 +---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c         | 3 +--
 drivers/net/ethernet/intel/iavf/iavf_txrx.c        | 4 +---
 drivers/net/ethernet/intel/ice/ice_txrx.c          | 3 +--
 drivers/net/ethernet/intel/ice/ice_xsk.c           | 3 +--
 drivers/net/ethernet/intel/idpf/idpf_txrx.c        | 5 ++---
 drivers/net/ethernet/intel/igc/igc_main.c          | 3 +--
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c       | 3 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 5 ++---
 include/linux/skbuff.h                             | 8 +-------
 net/core/skbuff.c                                  | 9 ++++-----
 13 files changed, 18 insertions(+), 36 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/mm/page_frags.rst b/Documentation/mm/page_frags.rst
index a81617e688a8..503ca6cdb804 100644
--- a/Documentation/mm/page_frags.rst
+++ b/Documentation/mm/page_frags.rst
@@ -25,7 +25,7 @@ to be disabled when executing the fragment allocation.
 The network stack uses two separate caches per CPU to handle fragment
 allocation.  The netdev_alloc_cache is used by callers making use of the
 netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
-used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
+used by callers of the __napi_alloc_frag and napi_alloc_skb calls.  The
 main difference between these two calls is the context in which they may be
 called.  The "netdev" prefixed functions are usable in any context as these
 functions will disable interrupts, while the "napi" prefixed functions are
diff --git a/Documentation/translations/zh_CN/mm/page_frags.rst b/Documentation/translations/zh_CN/mm/page_frags.rst
index 20bd3fafdc8c..a5b22486a913 100644
--- a/Documentation/translations/zh_CN/mm/page_frags.rst
+++ b/Documentation/translations/zh_CN/mm/page_frags.rst
@@ -25,7 +25,7 @@ sk_buff->head使用，或者用于skb_shared_info的 “frags” 部分。
 
 网络堆栈在每个CPU使用两个独立的缓存来处理碎片分配。netdev_alloc_cache被使用
 netdev_alloc_frag和__netdev_alloc_skb调用的调用者使用。napi_alloc_cache
-被调用__napi_alloc_frag和__napi_alloc_skb的调用者使用。这两个调用的主要区别是
+被调用__napi_alloc_frag和napi_alloc_skb的调用者使用。这两个调用的主要区别是
 它们可能被调用的环境。“netdev” 前缀的函数可以在任何上下文中使用，因为这些函数
 将禁用中断，而 ”napi“ 前缀的函数只可以在softirq上下文中使用。
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0d7177083708..ac2fcc5ac595 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2144,9 +2144,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	 */
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-			       I40E_RX_HDR_SIZE,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, I40E_RX_HDR_SIZE);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 11500003af0d..a85b425794df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -301,8 +301,7 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
 	net_prefetch(xdp->data_meta);
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
 	if (unlikely(!skb))
 		goto out;
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index b71484c87a84..32bb604a1382 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1334,9 +1334,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
 	net_prefetch(va);
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-			       IAVF_RX_HDR_SIZE,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IAVF_RX_HDR_SIZE);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 97d41d6ebf1f..8bb743f78fcb 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1051,8 +1051,7 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
 	}
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 1857220d27fe..aa81d1162b81 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -555,8 +555,7 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
 	}
 	net_prefetch(xdp->data_meta);
 
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 6dd7a66bb897..f940f650cd78 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3005,8 +3005,7 @@ struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
 	/* prefetch first cache line of first page */
 	net_prefetch(va);
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE,
-			       GFP_ATOMIC);
+	skb = napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE);
 	if (unlikely(!skb)) {
 		idpf_rx_put_page(rx_buf);
 
@@ -3060,7 +3059,7 @@ static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq,
 	struct sk_buff *skb;
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC);
+	skb = napi_alloc_skb(&rxq->q_vector->napi, size);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 35ad40a803cb..0cd923c8ac9b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2712,8 +2712,7 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
 
 	net_prefetch(xdp->data_meta);
 
-	skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&ring->q_vector->napi, totalsize);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index d34d715c59eb..397cb773fabb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -220,8 +220,7 @@ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
 	net_prefetch(xdp->data_meta);
 
 	/* allocate a skb to store the frags */
-	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 24cd80490d19..bcdde68a099a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5109,9 +5109,8 @@ static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch,
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct sk_buff *skb;
 
-	skb = __napi_alloc_skb(&ch->rxtx_napi,
-			       xdp->data_end - xdp->data_hard_start,
-			       GFP_ATOMIC | __GFP_NOWARN);
+	skb = napi_alloc_skb(&ch->rxtx_napi,
+			     xdp->data_end - xdp->data_hard_start);
 	if (unlikely(!skb))
 		return NULL;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 517e546a120a..b7f1ecdaec38 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3350,13 +3350,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz,
 	return __napi_alloc_frag_align(fragsz, -align);
 }
 
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
-				 unsigned int length, gfp_t gfp_mask);
-static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
-					     unsigned int length)
-{
-	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
-}
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length);
 void napi_consume_skb(struct sk_buff *skb, int budget);
 
 void napi_skb_free_stolen_head(struct sk_buff *skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 17617c29be2d..a1be84be5d35 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -775,10 +775,9 @@ skb_fail:
 EXPORT_SYMBOL(__netdev_alloc_skb);
 
 /**
- *	__napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ *	napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
  *	@napi: napi instance this buffer was allocated for
  *	@len: length to allocate
- *	@gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
  *
  *	Allocate a new sk_buff for use in NAPI receive.  This buffer will
  *	attempt to allocate the head from a special reserved region used
@@ -787,9 +786,9 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
  *
  *	%NULL is returned if there is no free memory.
  */
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
-				 gfp_t gfp_mask)
+struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
 {
+	gfp_t gfp_mask = GFP_ATOMIC | __GFP_NOWARN;
 	struct napi_alloc_cache *nc;
 	struct sk_buff *skb;
 	bool pfmemalloc;
@@ -860,7 +859,7 @@ skb_success:
 skb_fail:
 	return skb;
 }
-EXPORT_SYMBOL(__napi_alloc_skb);
+EXPORT_SYMBOL(napi_alloc_skb);
 
 void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
 			    int off, int size, unsigned int truesize)
-- 
cgit v1.2.3-73-gaa49b


From ca7e324e8ad385a2da15049953c04ea7310687f7 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Wed, 27 Mar 2024 15:22:39 +0100
Subject: compiler_types: add Endianness-dependent __counted_by_{le,be}

Some structures contain flexible arrays at the end and the counter for
them, but the counter has explicit Endianness and thus __counted_by()
can't be used directly.

To increase test coverage for potential problems without breaking
anything, introduce __counted_by_{le,be}() defined depending on
platform's Endianness to either __counted_by() when applicable or noop
otherwise.
Maybe it would be a good idea to introduce such attributes on compiler
level if possible, but for now let's stop on what we have.

Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://lore.kernel.org/r/20240327142241.1745989-2-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/conf.py          |  2 ++
 include/linux/compiler_types.h | 11 +++++++++++
 scripts/kernel-doc             |  1 +
 3 files changed, 14 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/conf.py b/Documentation/conf.py
index d148f3e8dd57..0c2205d536b3 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -75,6 +75,8 @@ if major >= 3:
             "__rcu",
             "__user",
             "__force",
+            "__counted_by_le",
+            "__counted_by_be",
 
             # include/linux/compiler_attributes.h:
             "__alias",
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 2abaa3a825a9..a29ba6ef1e27 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -282,6 +282,17 @@ struct ftrace_likely_data {
 #define __no_sanitize_or_inline __always_inline
 #endif
 
+/*
+ * Apply __counted_by() when the Endianness matches to increase test coverage.
+ */
+#ifdef __LITTLE_ENDIAN
+#define __counted_by_le(member)	__counted_by(member)
+#define __counted_by_be(member)
+#else
+#define __counted_by_le(member)
+#define __counted_by_be(member)	__counted_by(member)
+#endif
+
 /* Do not trap wrapping arithmetic within an annotated function. */
 #ifdef CONFIG_UBSAN_SIGNED_WRAP
 # define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 967f1abb0edb..1474e95dbe4f 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1143,6 +1143,7 @@ sub dump_struct($$) {
         $members =~ s/\s*$attribute/ /gi;
         $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
         $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos;
+        $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos;
         $members =~ s/\s*__packed\s*/ /gos;
         $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
         $members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
-- 
cgit v1.2.3-73-gaa49b


From 2434ba2bc8518eafdb04852e70f5a2ba2ba1feee Mon Sep 17 00:00:00 2001
From: Eric Woudstra <ericwouds@gmail.com>
Date: Tue, 26 Mar 2024 17:23:04 +0100
Subject: dt-bindings: net: airoha,en8811h: Add en8811h

Add the Airoha EN8811H 2.5 Gigabit PHY.

The en8811h phy can be set with serdes polarity reversed on rx and/or tx.

Signed-off-by: Eric Woudstra <ericwouds@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20240326162305.303598-2-ericwouds@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/airoha,en8811h.yaml    | 56 ++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/airoha,en8811h.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/airoha,en8811h.yaml b/Documentation/devicetree/bindings/net/airoha,en8811h.yaml
new file mode 100644
index 000000000000..ecb5149ec6b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/airoha,en8811h.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/airoha,en8811h.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha EN8811H PHY
+
+maintainers:
+  - Eric Woudstra <ericwouds@gmail.com>
+
+description:
+  The Airoha EN8811H PHY has the ability to reverse polarity
+  on the lines to and/or from the MAC. It is reversed by
+  the booleans in the devicetree node of the phy.
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+properties:
+  compatible:
+    enum:
+      - ethernet-phy-id03a2.a411
+
+  reg:
+    maxItems: 1
+
+  airoha,pnswap-rx:
+    type: boolean
+    description:
+      Reverse rx polarity of the SERDES. This is the receiving
+      side of the lines from the MAC towards the EN881H.
+
+  airoha,pnswap-tx:
+    type: boolean
+    description:
+      Reverse tx polarity of SERDES. This is the transmitting
+      side of the lines from EN8811H towards the MAC.
+
+required:
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@1 {
+            compatible = "ethernet-phy-id03a2.a411";
+            reg = <1>;
+            airoha,pnswap-rx;
+        };
+    };
-- 
cgit v1.2.3-73-gaa49b


From 929107d3d2a3a363302c4cece3c73742863ef94b Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@foss.st.com>
Date: Thu, 28 Mar 2024 19:53:37 +0100
Subject: dt-bindings: net: dwmac: Document STM32 property st,ext-phyclk

The Linux kernel dwmac-stm32 driver currently supports three DT
properties used to configure whether PHY clock are generated by
the MAC or supplied to the MAC from the PHY.

Originally there were two properties, st,eth-clk-sel and
st,eth-ref-clk-sel, each used to configure MAC clocking in
different bus mode and for different MAC clock frequency.
Since it is possible to determine the MAC 'eth-ck' clock
frequency from the clock subsystem and PHY bus mode from
the 'phy-mode' property, two disparate DT properties are
no longer required to configure MAC clocking.

Linux kernel commit 1bb694e20839 ("net: ethernet: stmmac: simplify phy modes management for stm32")
introduced a third, unified, property st,ext-phyclk. This property
covers both use cases of st,eth-clk-sel and st,eth-ref-clk-sel DT
properties, as well as a new use case for 25 MHz clock generated
by the MAC.

The third property st,ext-phyclk is so far undocumented,
document it.

Below table summarizes the clock requirement and clock sources for
supported PHY interface modes.
 __________________________________________________________________________
|PHY_MODE | Normal | PHY wo crystal|   PHY wo crystal   |No 125Mhz from PHY|
|         |        |      25MHz    |        50MHz       |                  |

---------------------------------------------------------------------------
|  MII    |    -   |     eth-ck    |        n/a         |       n/a        |
|         |        | st,ext-phyclk |                    |                  |

---------------------------------------------------------------------------
|  GMII   |    -   |     eth-ck    |        n/a         |       n/a        |
|         |        | st,ext-phyclk |                    |                  |

---------------------------------------------------------------------------
| RGMII   |    -   |     eth-ck    |        n/a         |      eth-ck      |
|         |        | st,ext-phyclk |                    | st,eth-clk-sel or|
|         |        |               |                    | st,ext-phyclk    |

---------------------------------------------------------------------------
| RMII    |    -   |     eth-ck    |      eth-ck        |       n/a        |
|         |        | st,ext-phyclk | st,eth-ref-clk-sel |                  |
|         |        |               | or st,ext-phyclk   |                  |

---------------------------------------------------------------------------

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: Christophe Roullier <christophe.roullier@foss.st.com>
Link: https://lore.kernel.org/r/20240328185337.332703-2-christophe.roullier@foss.st.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/stm32-dwmac.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
index fc8c96b08d7d..857d58949b02 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
@@ -82,6 +82,13 @@ properties:
       Should be phandle/offset pair. The phandle to the syscon node which
       encompases the glue register, and the offset of the control register
 
+  st,ext-phyclk:
+    description:
+      set this property in RMII mode when you have PHY without crystal 50MHz and want to
+      select RCC clock instead of ETH_REF_CLK. OR in RGMII mode when you want to select
+      RCC clock instead of ETH_CLK125.
+    type: boolean
+
   st,eth-clk-sel:
     description:
       set this property in RGMII PHY when you want to select RCC clock instead of ETH_CLK125.
-- 
cgit v1.2.3-73-gaa49b


From 2dddf8aaf67fe5c5e24e2afa5cbcaabcc7dd4e2a Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 29 Mar 2024 13:50:21 +0000
Subject: doc: netlink: Update tc spec with missing definitions

The tc spec referenced tc-u32-mark and tc-act-police-attrs but did not
define them. The missing definitions were discovered when building the
docs with generated hyperlinks because the hyperlink target labels were
missing.

Add definitions for tc-u32-mark and tc-act-police-attrs.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20240329135021.52534-4-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/tc.yaml | 51 +++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index 324fa182cd14..6068c105c5ee 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -1099,6 +1099,19 @@ definitions:
       -
         name: offmask
         type: s32
+  -
+    name: tc-u32-mark
+    type: struct
+    members:
+      -
+        name: val
+        type: u32
+      -
+        name: mask
+        type: u32
+      -
+        name: success
+        type: u32
   -
     name: tc-u32-sel
     type: struct
@@ -1774,6 +1787,44 @@ attribute-sets:
       -
         name: key-ex
         type: binary
+  -
+    name: tc-act-police-attrs
+    attributes:
+      -
+        name: tbf
+        type: binary
+        struct: tc-police
+      -
+        name: rate
+        type: binary # TODO
+      -
+        name: peakrate
+        type: binary # TODO
+      -
+        name: avrate
+        type: u32
+      -
+        name: result
+        type: u32
+      -
+        name: tm
+        type: binary
+        struct: tcf-t
+      -
+        name: pad
+        type: pad
+      -
+        name: rate64
+        type: u64
+      -
+        name: peakrate64
+        type: u64
+      -
+        name: pktrate64
+        type: u64
+      -
+        name: pktburst64
+        type: u64
   -
     name: tc-act-simple-attrs
     attributes:
-- 
cgit v1.2.3-73-gaa49b


From 8da891720cd407edcb11242879c92b5dba3e3f7a Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Sat, 30 Mar 2024 14:12:28 +0100
Subject: dt-bindings: net: renesas,ethertsn: Create child-node for MDIO bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bindings for Renesas Ethernet TSN was just merged in v6.9 and the
design for the bindings followed that of other Renesas Ethernet drivers
and thus did not force a child-node for the MDIO bus. As there
are no upstream drivers or users of this binding yet take the
opportunity to correct this and force the usage of a child-node for the
MDIO bus.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240330131228.1541227-1-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/renesas,ethertsn.yaml  | 33 +++++++++-------------
 1 file changed, 14 insertions(+), 19 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
index ea35d19be829..b4680a1d0a06 100644
--- a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
@@ -71,16 +71,8 @@ properties:
     enum: [0, 2000]
     default: 0
 
-  '#address-cells':
-    const: 1
-
-  '#size-cells':
-    const: 0
-
-patternProperties:
-  "^ethernet-phy@[0-9a-f]$":
-    type: object
-    $ref: ethernet-phy.yaml#
+  mdio:
+    $ref: /schemas/net/mdio.yaml#
     unevaluatedProperties: false
 
 required:
@@ -94,8 +86,7 @@ required:
   - resets
   - phy-mode
   - phy-handle
-  - '#address-cells'
-  - '#size-cells'
+  - mdio
 
 additionalProperties: false
 
@@ -122,14 +113,18 @@ examples:
         tx-internal-delay-ps = <2000>;
         phy-handle = <&phy3>;
 
-        #address-cells = <1>;
-        #size-cells = <0>;
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
 
-        phy3: ethernet-phy@3 {
-            compatible = "ethernet-phy-ieee802.3-c45";
-            reg = <0>;
-            interrupt-parent = <&gpio4>;
-            interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
             reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>;
+            reset-post-delay-us = <4000>;
+
+            phy3: ethernet-phy@0 {
+                compatible = "ethernet-phy-ieee802.3-c45";
+                reg = <0>;
+                interrupt-parent = <&gpio4>;
+                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+            };
         };
     };
-- 
cgit v1.2.3-73-gaa49b


From 387724cbf4153aba141daa98f7e2a619de113840 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 1 Apr 2024 11:10:01 +0800
Subject: Documentation: netlink: add a YAML spec for team

Add a YAML specification for team.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20240401031004.1159713-2-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/team.yaml | 204 ++++++++++++++++++++++++++++++++++
 MAINTAINERS                           |   1 +
 2 files changed, 205 insertions(+)
 create mode 100644 Documentation/netlink/specs/team.yaml

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/team.yaml b/Documentation/netlink/specs/team.yaml
new file mode 100644
index 000000000000..c13529e011c9
--- /dev/null
+++ b/Documentation/netlink/specs/team.yaml
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: team
+
+protocol: genetlink-legacy
+
+doc: |
+  Network team device driver.
+
+c-family-name: team-genl-name
+c-version-name: team-genl-version
+kernel-policy: global
+uapi-header: linux/if_team.h
+
+definitions:
+  -
+    name: string-max-len
+    type: const
+    value: 32
+  -
+    name: genl-change-event-mc-grp-name
+    type: const
+    value: change_event
+
+attribute-sets:
+  -
+    name: team
+    doc:
+      The team nested layout of get/set msg looks like
+          [TEAM_ATTR_LIST_OPTION]
+              [TEAM_ATTR_ITEM_OPTION]
+                  [TEAM_ATTR_OPTION_*], ...
+              [TEAM_ATTR_ITEM_OPTION]
+                  [TEAM_ATTR_OPTION_*], ...
+              ...
+          [TEAM_ATTR_LIST_PORT]
+              [TEAM_ATTR_ITEM_PORT]
+                  [TEAM_ATTR_PORT_*], ...
+              [TEAM_ATTR_ITEM_PORT]
+                  [TEAM_ATTR_PORT_*], ...
+              ...
+    name-prefix: team-attr-
+    attributes:
+      -
+        name: unspec
+        type: unused
+        value: 0
+      -
+        name: team-ifindex
+        type: u32
+      -
+        name: list-option
+        type: nest
+        nested-attributes: item-option
+      -
+        name: list-port
+        type: nest
+        nested-attributes: item-port
+  -
+    name: item-option
+    name-prefix: team-attr-item-
+    attr-cnt-name: __team-attr-item-option-max
+    attr-max-name: team-attr-item-option-max
+    attributes:
+      -
+        name: option-unspec
+        type: unused
+        value: 0
+      -
+        name: option
+        type: nest
+        nested-attributes: attr-option
+  -
+    name: attr-option
+    name-prefix: team-attr-option-
+    attributes:
+      -
+        name: unspec
+        type: unused
+        value: 0
+      -
+        name: name
+        type: string
+        checks:
+          max-len: string-max-len
+          unterminated-ok: true
+      -
+        name: changed
+        type: flag
+      -
+        name: type
+        type: u8
+      -
+        name: data
+        type: binary
+      -
+        name: removed
+        type: flag
+      -
+        name: port-ifindex
+        type: u32
+        doc: for per-port options
+      -
+        name: array-index
+        type: u32
+        doc: for array options
+  -
+    name: item-port
+    name-prefix: team-attr-item-
+    attr-cnt-name: __team-attr-item-port-max
+    attr-max-name: team-attr-item-port-max
+    attributes:
+      -
+        name: port-unspec
+        type: unused
+        value: 0
+      -
+        name: port
+        type: nest
+        nested-attributes: attr-port
+  -
+    name: attr-port
+    name-prefix: team-attr-port-
+    attributes:
+      -
+        name: unspec
+        type: unused
+        value: 0
+      -
+        name: ifindex
+        type: u32
+      -
+        name: changed
+        type: flag
+      -
+        name: linkup
+        type: flag
+      -
+        name: speed
+        type: u32
+      -
+        name: duplex
+        type: u8
+      -
+        name: removed
+        type: flag
+
+operations:
+  list:
+    -
+      name: noop
+      doc: No operation
+      value: 0
+      attribute-set: team
+      dont-validate: [ strict ]
+
+      do:
+        # Actually it only reply the team netlink family
+        reply:
+          attributes:
+            - team-ifindex
+
+    -
+      name: options-set
+      doc: Set team options
+      attribute-set: team
+      dont-validate: [ strict ]
+      flags: [ admin-perm ]
+
+      do:
+        request: &option_attrs
+          attributes:
+            - team-ifindex
+            - list-option
+        reply: *option_attrs
+
+    -
+      name: options-get
+      doc: Get team options info
+      attribute-set: team
+      dont-validate: [ strict ]
+      flags: [ admin-perm ]
+
+      do:
+        request:
+          attributes:
+            - team-ifindex
+        reply: *option_attrs
+
+    -
+      name: port-list-get
+      doc: Get team ports info
+      attribute-set: team
+      dont-validate: [ strict ]
+      flags: [ admin-perm ]
+
+      do:
+        request:
+          attributes:
+            - team-ifindex
+        reply: &port_attrs
+          attributes:
+            - team-ifindex
+            - list-port
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a233e1a3cf2..909c2c531d8e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21665,6 +21665,7 @@ TEAM DRIVER
 M:	Jiri Pirko <jiri@resnulli.us>
 L:	netdev@vger.kernel.org
 S:	Supported
+F:	Documentation/netlink/specs/team.yaml
 F:	drivers/net/team/
 F:	include/linux/if_team.h
 F:	include/uapi/linux/if_team.h
-- 
cgit v1.2.3-73-gaa49b


From 992c287d87780abd184c67a303dec3361b7cb408 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 1 Apr 2024 15:44:22 -0500
Subject: dt-bindings: net: snps,dwmac: Align 'snps,priority' type definition

'snps,priority' is also defined in dma/snps,dw-axi-dmac.yaml as a
uint32-array. It's preferred to have a single type for a given property
name, so update the type in snps,dwmac schema to match.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240401204422.1692359-2-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 6b0341a8e0ea..15073627c53a 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -242,7 +242,8 @@ properties:
             type: boolean
             description: Multicast & Broadcast Packets
           snps,priority:
-            $ref: /schemas/types.yaml#/definitions/uint32
+            $ref: /schemas/types.yaml#/definitions/uint32-array
+            maxItems: 1
             description: Bitmask of the tagged frames priorities assigned to the queue
         allOf:
           - if:
@@ -393,7 +394,8 @@ properties:
             $ref: /schemas/types.yaml#/definitions/uint32
             description: max read outstanding req. limit
           snps,priority:
-            $ref: /schemas/types.yaml#/definitions/uint32
+            $ref: /schemas/types.yaml#/definitions/uint32-array
+            maxItems: 1
             description:
               Bitmask of the tagged frames priorities assigned to the queue.
               When a PFC frame is received with priorities matching the bitmask,
-- 
cgit v1.2.3-73-gaa49b


From 1d056bf9a4c15d433e9aa49558e58c1cf8d7f70c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 2 Apr 2024 19:34:20 -0700
Subject: netlink: specs: define ethtool header flags

When interfacing with the ethtool commands it's handy to
be able to use the names of the flags. Example:

    ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
                                "flags": {'stats'}}})

Note that not all commands accept all the flags,
but the meaning of the bits does not change command
to command.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://lore.kernel.org/r/20240403023426.1762996-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 197208f419dc..d0e4a47e0f21 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -16,6 +16,11 @@ definitions:
     name: stringset
     type: enum
     entries: []
+  -
+    name: header-flags
+    enum-name:
+    type: flags
+    entries: [ compact-bitsets, omit-reply, stats ]
 
 attribute-sets:
   -
@@ -30,6 +35,7 @@ attribute-sets:
       -
         name: flags
         type: u32
+        enum: header-flags
 
   -
     name: bitset-bit
-- 
cgit v1.2.3-73-gaa49b


From 1a9de5646559d89bc2f90e0286cb257cbae5c562 Mon Sep 17 00:00:00 2001
From: Tan Chun Hau <chunhau.tan@starfivetech.com>
Date: Wed, 3 Apr 2024 03:05:49 -0700
Subject: dt-bindings: net: starfive,jh7110-dwmac: Add StarFive JH8100 support

Add StarFive JH8100 dwmac support.
The JH8100 dwmac shares the same driver code as the JH7110 dwmac
and has only one reset signal.

Please refer to below:

  JH8100: reset-names = "stmmaceth";
  JH7110: reset-names = "stmmaceth", "ahb";
  JH7100: reset-names = "ahb";

Example usage of JH8100 in the device tree:

gmac0: ethernet@16030000 {
        compatible = "starfive,jh8100-dwmac",
                     "starfive,jh7110-dwmac",
                     "snps,dwmac-5.20";
        ...
};

Signed-off-by: Tan Chun Hau <chunhau.tan@starfivetech.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240403100549.78719-2-chunhau.tan@starfivetech.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../bindings/net/starfive,jh7110-dwmac.yaml        | 28 ++++++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
index 0d1962980f57..313a15331661 100644
--- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -30,6 +30,10 @@ properties:
       - items:
           - const: starfive,jh7110-dwmac
           - const: snps,dwmac-5.20
+      - items:
+          - const: starfive,jh8100-dwmac
+          - const: starfive,jh7110-dwmac
+          - const: snps,dwmac-5.20
 
   reg:
     maxItems: 1
@@ -116,11 +120,25 @@ allOf:
           minItems: 3
           maxItems: 3
 
-        resets:
-          minItems: 2
-
-        reset-names:
-          minItems: 2
+      if:
+        properties:
+          compatible:
+            contains:
+              const: starfive,jh8100-dwmac
+      then:
+        properties:
+          resets:
+            maxItems: 1
+
+          reset-names:
+            const: stmmaceth
+      else:
+        properties:
+          resets:
+            minItems: 2
+
+          reset-names:
+            minItems: 2
 
 unevaluatedProperties: false
 
-- 
cgit v1.2.3-73-gaa49b


From 158fff51b4c326d053e42c146e7798ae5748087f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 4 Apr 2024 13:17:25 +0300
Subject: dt-bindings: net: wireless: ath10k: describe firmware-name property

For WCN3990 platforms we need to look for the platform / board specific
firmware-N.mbn file which corresponds to the wlanmdsp.mbn loaded to the
modem DSP via the TQFTPserv. Add firmware-name property describing this
classifier.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240306-wcn3990-firmware-path-v2-1-f89e98e71a57@linaro.org
---
 Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
index 9b3ef4bc3732..5c4498b762c8 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
@@ -73,6 +73,12 @@ properties:
       - sky85703-11
       - sky85803
 
+  firmware-name:
+    maxItems: 1
+    description:
+      If present, a board or platform specific string used to lookup firmware
+      files for the device.
+
   wifi-firmware:
     type: object
     additionalProperties: false
-- 
cgit v1.2.3-73-gaa49b


From 00d5d22a5b42c3ffdfd1b29526885bbcec2d2231 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler1968@googlemail.com>
Date: Fri, 5 Apr 2024 08:52:45 -0700
Subject: bpf, docs: Editorial nits in instruction-set.rst

This patch addresses a number of editorial nits including
spelling, punctuation, grammar, and wording consistency issues
in instruction-set.rst.

Signed-off-by: Dave Thaler <dthaler1968@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20240405155245.3618-1-dthaler1968@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/standardization/instruction-set.rst        | 47 ++++++++++++----------
 1 file changed, 26 insertions(+), 21 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index a5ab00ac0b14..8d0781f0bd17 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -43,7 +43,7 @@ a type's signedness (`S`) and bit width (`N`), respectively.
   ===== =========
 
 For example, `u32` is a type whose valid values are all the 32-bit unsigned
-numbers and `s16` is a types whose valid values are all the 16-bit signed
+numbers and `s16` is a type whose valid values are all the 16-bit signed
 numbers.
 
 Functions
@@ -108,7 +108,7 @@ conformance group means it must support all instructions in that conformance
 group.
 
 The use of named conformance groups enables interoperability between a runtime
-that executes instructions, and tools as such compilers that generate
+that executes instructions, and tools such as compilers that generate
 instructions for the runtime.  Thus, capability discovery in terms of
 conformance groups might be done manually by users or automatically by tools.
 
@@ -181,10 +181,13 @@ A basic instruction is encoded as follows::
     (`64-bit immediate instructions`_ reuse this field for other purposes)
 
   **dst_reg**
-    destination register number (0-10)
+    destination register number (0-10), unless otherwise specified
+    (future instructions might reuse this field for other purposes)
 
 **offset**
-  signed integer offset used with pointer arithmetic
+  signed integer offset used with pointer arithmetic, except where
+  otherwise specified (some arithmetic instructions reuse this field
+  for other purposes)
 
 **imm**
   signed integer immediate value
@@ -228,10 +231,12 @@ This is depicted in the following figure::
   operation to perform, encoded as explained above
 
 **regs**
-  The source and destination register numbers, encoded as explained above
+  The source and destination register numbers (unless otherwise
+  specified), encoded as explained above
 
 **offset**
-  signed integer offset used with pointer arithmetic
+  signed integer offset used with pointer arithmetic, unless
+  otherwise specified
 
 **imm**
   signed integer immediate value
@@ -342,8 +347,8 @@ where '(u32)' indicates that the upper 32 bits are zeroed.
 
   dst = dst ^ imm
 
-Note that most instructions have instruction offset of 0. Only three instructions
-(``SDIV``, ``SMOD``, ``MOVSX``) have a non-zero offset.
+Note that most arithmetic instructions have 'offset' set to 0. Only three instructions
+(``SDIV``, ``SMOD``, ``MOVSX``) have a non-zero 'offset'.
 
 Division, multiplication, and modulo operations for ``ALU`` are part
 of the "divmul32" conformance group, and division, multiplication, and
@@ -370,10 +375,10 @@ etc. This specification requires that signed modulo use truncated division
    a % n = a - n * trunc(a / n)
 
 The ``MOVSX`` instruction does a move operation with sign extension.
-``{MOVSX, X, ALU}`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into 32
-bit operands, and zeroes the remaining upper 32 bits.
+``{MOVSX, X, ALU}`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into
+32-bit operands, and zeroes the remaining upper 32 bits.
 ``{MOVSX, X, ALU64}`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
-operands into 64 bit operands.  Unlike other arithmetic instructions,
+operands into 64-bit operands.  Unlike other arithmetic instructions,
 ``MOVSX`` is only defined for register source operands (``X``).
 
 The ``NEG`` instruction is only defined when the source bit is clear
@@ -411,19 +416,19 @@ conformance group.
 
 Examples:
 
-``{END, TO_LE, ALU}`` with imm = 16/32/64 means::
+``{END, TO_LE, ALU}`` with 'imm' = 16/32/64 means::
 
   dst = htole16(dst)
   dst = htole32(dst)
   dst = htole64(dst)
 
-``{END, TO_BE, ALU}`` with imm = 16/32/64 means::
+``{END, TO_BE, ALU}`` with 'imm' = 16/32/64 means::
 
   dst = htobe16(dst)
   dst = htobe32(dst)
   dst = htobe64(dst)
 
-``{END, TO_LE, ALU64}`` with imm = 16/32/64 means::
+``{END, TO_LE, ALU64}`` with 'imm' = 16/32/64 means::
 
   dst = bswap16(dst)
   dst = bswap32(dst)
@@ -475,7 +480,7 @@ where 's>=' indicates a signed '>=' comparison.
 
   gotol +imm
 
-where 'imm' means the branch offset comes from insn 'imm' field.
+where 'imm' means the branch offset comes from the 'imm' field.
 
 Note that there are two flavors of ``JA`` instructions. The
 ``JMP`` class permits a 16-bit jump offset specified by the 'offset'
@@ -494,25 +499,25 @@ Helper functions are a concept whereby BPF programs can call into a
 set of function calls exposed by the underlying platform.
 
 Historically, each helper function was identified by an address
-encoded in the imm field.  The available helper functions may differ
+encoded in the 'imm' field.  The available helper functions may differ
 for each program type, but address values are unique across all program types.
 
 Platforms that support the BPF Type Format (BTF) support identifying
-a helper function by a BTF ID encoded in the imm field, where the BTF ID
+a helper function by a BTF ID encoded in the 'imm' field, where the BTF ID
 identifies the helper name and type.
 
 Program-local functions
 ~~~~~~~~~~~~~~~~~~~~~~~
 Program-local functions are functions exposed by the same BPF program as the
 caller, and are referenced by offset from the call instruction, similar to
-``JA``.  The offset is encoded in the imm field of the call instruction.
-A ``EXIT`` within the program-local function will return to the caller.
+``JA``.  The offset is encoded in the 'imm' field of the call instruction.
+An ``EXIT`` within the program-local function will return to the caller.
 
 Load and store instructions
 ===========================
 
 For load and store instructions (``LD``, ``LDX``, ``ST``, and ``STX``), the
-8-bit 'opcode' field is divided as::
+8-bit 'opcode' field is divided as follows::
 
   +-+-+-+-+-+-+-+-+
   |mode |sz |class|
@@ -580,7 +585,7 @@ instructions that transfer data between a register and memory.
 
   dst = *(signed size *) (src + offset)
 
-Where size is one of: ``B``, ``H``, or ``W``, and
+Where '<size>' is one of: ``B``, ``H``, or ``W``, and
 'signed size' is one of: s8, s16, or s32.
 
 Atomic operations
-- 
cgit v1.2.3-73-gaa49b


From 0e9c127729be19adcdf2f5cc1f3450a4322fdf3a Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 3 Apr 2024 14:28:39 -0700
Subject: ethtool: add interface to read Tx hardware timestamping statistics

Multiple network devices that support hardware timestamping appear to have
common behavior with regards to timestamp handling. Implement common Tx
hardware timestamping statistics in a tx_stats struct_group. Common Rx
hardware timestamping statistics can subsequently be implemented in a
rx_stats struct_group for ethtool_ts_stats.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Link: https://lore.kernel.org/r/20240403212931.128541-2-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml     | 17 +++++++++
 Documentation/networking/ethtool-netlink.rst |  9 +++++
 include/linux/ethtool.h                      | 27 ++++++++++++++-
 include/uapi/linux/ethtool_netlink.h         | 14 ++++++++
 net/ethtool/tsinfo.c                         | 52 +++++++++++++++++++++++++++-
 5 files changed, 117 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index d0e4a47e0f21..b76916394ec9 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -565,6 +565,18 @@ attribute-sets:
       -
         name: tx-lpi-timer
         type: u32
+  -
+    name: ts-stat
+    attributes:
+      -
+        name: tx-pkts
+        type: uint
+      -
+        name: tx-lost
+        type: uint
+      -
+        name: tx-err
+        type: uint
   -
     name: tsinfo
     attributes:
@@ -587,6 +599,10 @@ attribute-sets:
       -
         name: phc-index
         type: u32
+      -
+        name: stats
+        type: nest
+        nested-attributes: ts-stat
   -
     name: cable-result
     attributes:
@@ -1394,6 +1410,7 @@ operations:
             - tx-types
             - rx-filters
             - phc-index
+            - stats
       dump: *tsinfo-get-op
     -
       name: cable-test-act
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index d583d9abf2f8..08d330b0f50f 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1237,12 +1237,21 @@ Kernel response contents:
   ``ETHTOOL_A_TSINFO_TX_TYPES``          bitset  supported Tx types
   ``ETHTOOL_A_TSINFO_RX_FILTERS``        bitset  supported Rx filters
   ``ETHTOOL_A_TSINFO_PHC_INDEX``         u32     PTP hw clock index
+  ``ETHTOOL_A_TSINFO_STATS``             nested  HW timestamping statistics
   =====================================  ======  ==========================
 
 ``ETHTOOL_A_TSINFO_PHC_INDEX`` is absent if there is no associated PHC (there
 is no special value for this case). The bitset attributes are omitted if they
 would be empty (no bit set).
 
+Additional hardware timestamping statistics response contents:
+
+  =====================================  ======  ===================================
+  ``ETHTOOL_A_TS_STAT_TX_PKTS``          u64     Packets with Tx HW timestamps
+  ``ETHTOOL_A_TS_STAT_TX_LOST``          u64     Tx HW timestamp not arrived count
+  ``ETHTOOL_A_TS_STAT_TX_ERR``           u64     HW error request Tx timestamp count
+  =====================================  ======  ===================================
+
 CABLE_TEST
 ==========
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9901e563f706..6fd9107d3cc0 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -480,6 +480,26 @@ struct ethtool_rmon_stats {
 	);
 };
 
+/**
+ * struct ethtool_ts_stats - HW timestamping statistics
+ * @pkts: Number of packets successfully timestamped by the hardware.
+ * @lost: Number of hardware timestamping requests where the timestamping
+ *	information from the hardware never arrived for submission with
+ *	the skb.
+ * @err: Number of arbitrary timestamp generation error events that the
+ *	hardware encountered, exclusive of @lost statistics. Cases such
+ *	as resource exhaustion, unavailability, firmware errors, and
+ *	detected illogical timestamp values not submitted with the skb
+ *	are inclusive to this counter.
+ */
+struct ethtool_ts_stats {
+	struct_group(tx_stats,
+		u64 pkts;
+		u64 lost;
+		u64 err;
+	);
+};
+
 #define ETH_MODULE_EEPROM_PAGE_LEN	128
 #define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
 
@@ -755,7 +775,10 @@ struct ethtool_rxfh_param {
  * @get_ts_info: Get the time stamping and PTP hardware clock capabilities.
  *	It may be called with RCU, or rtnl or reference on the device.
  *	Drivers supporting transmit time stamps in software should set this to
- *	ethtool_op_get_ts_info().
+ *	ethtool_op_get_ts_info(). Drivers must not zero statistics which they
+ *	don't report. The stats	structure is initialized to ETHTOOL_STAT_NOT_SET
+ *	indicating driver does not report statistics.
+ * @get_ts_stats: Query the device hardware timestamping statistics.
  * @get_module_info: Get the size and type of the eeprom contained within
  *	a plug-in module.
  * @get_module_eeprom: Get the eeprom information from the plug-in module
@@ -898,6 +921,8 @@ struct ethtool_ops {
 				 struct ethtool_dump *, void *);
 	int	(*set_dump)(struct net_device *, struct ethtool_dump *);
 	int	(*get_ts_info)(struct net_device *, struct ethtool_ts_info *);
+	void	(*get_ts_stats)(struct net_device *dev,
+				struct ethtool_ts_stats *ts_stats);
 	int     (*get_module_info)(struct net_device *,
 				   struct ethtool_modinfo *);
 	int     (*get_module_eeprom)(struct net_device *,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index accbb1a231df..708272026d80 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -478,12 +478,26 @@ enum {
 	ETHTOOL_A_TSINFO_TX_TYPES,			/* bitset */
 	ETHTOOL_A_TSINFO_RX_FILTERS,			/* bitset */
 	ETHTOOL_A_TSINFO_PHC_INDEX,			/* u32 */
+	ETHTOOL_A_TSINFO_STATS,				/* nest - _A_TSINFO_STAT */
 
 	/* add new constants above here */
 	__ETHTOOL_A_TSINFO_CNT,
 	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_TS_STAT_UNSPEC,
+
+	ETHTOOL_A_TS_STAT_TX_PKTS,			/* u64 */
+	ETHTOOL_A_TS_STAT_TX_LOST,			/* u64 */
+	ETHTOOL_A_TS_STAT_TX_ERR,			/* u64 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_TS_STAT_CNT,
+	ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1)
+
+};
+
 /* PHC VCLOCKS */
 
 enum {
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 9daed0aab162..be2755c8d8fd 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -13,14 +13,18 @@ struct tsinfo_req_info {
 struct tsinfo_reply_data {
 	struct ethnl_reply_data		base;
 	struct ethtool_ts_info		ts_info;
+	struct ethtool_ts_stats		stats;
 };
 
 #define TSINFO_REPDATA(__reply_base) \
 	container_of(__reply_base, struct tsinfo_reply_data, base)
 
+#define ETHTOOL_TS_STAT_CNT \
+	(__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1))
+
 const struct nla_policy ethnl_tsinfo_get_policy[] = {
 	[ETHTOOL_A_TSINFO_HEADER]		=
-		NLA_POLICY_NESTED(ethnl_header_policy),
+		NLA_POLICY_NESTED(ethnl_header_policy_stats),
 };
 
 static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
@@ -34,6 +38,12 @@ static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		return ret;
+	if (req_base->flags & ETHTOOL_FLAG_STATS &&
+	    dev->ethtool_ops->get_ts_stats) {
+		ethtool_stats_init((u64 *)&data->stats,
+				   sizeof(data->stats) / sizeof(u64));
+		dev->ethtool_ops->get_ts_stats(dev, &data->stats);
+	}
 	ret = __ethtool_get_ts_info(dev, &data->ts_info);
 	ethnl_ops_complete(dev);
 
@@ -79,10 +89,47 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base,
 	}
 	if (ts_info->phc_index >= 0)
 		len += nla_total_size(sizeof(u32));	/* _TSINFO_PHC_INDEX */
+	if (req_base->flags & ETHTOOL_FLAG_STATS)
+		len += nla_total_size(0) + /* _TSINFO_STATS */
+		       nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT;
 
 	return len;
 }
 
+static int tsinfo_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+	if (val == ETHTOOL_STAT_NOT_SET)
+		return 0;
+	if (nla_put_uint(skb, attrtype, val))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int tsinfo_put_stats(struct sk_buff *skb,
+			    const struct ethtool_ts_stats *stats)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_STATS);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (tsinfo_put_stat(skb, stats->tx_stats.pkts,
+			    ETHTOOL_A_TS_STAT_TX_PKTS) ||
+	    tsinfo_put_stat(skb, stats->tx_stats.lost,
+			    ETHTOOL_A_TS_STAT_TX_LOST) ||
+	    tsinfo_put_stat(skb, stats->tx_stats.err,
+			    ETHTOOL_A_TS_STAT_TX_ERR))
+		goto err_cancel;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+err_cancel:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static int tsinfo_fill_reply(struct sk_buff *skb,
 			     const struct ethnl_req_info *req_base,
 			     const struct ethnl_reply_data *reply_base)
@@ -119,6 +166,9 @@ static int tsinfo_fill_reply(struct sk_buff *skb,
 	if (ts_info->phc_index >= 0 &&
 	    nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index))
 		return -EMSGSIZE;
+	if (req_base->flags & ETHTOOL_FLAG_STATS &&
+	    tsinfo_put_stats(skb, &data->stats))
+		return -EMSGSIZE;
 
 	return 0;
 }
-- 
cgit v1.2.3-73-gaa49b


From adda5401807882f5575364442cd583462fc40ae4 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 3 Apr 2024 14:28:40 -0700
Subject: net/mlx5e: Introduce lost_cqe statistic counter for PTP Tx port
 timestamping CQ

Track the number of times a CQE was expected to not be delivered on PTP Tx
port timestamping CQ. A CQE is expected to not be delivered if a certain
amount of time passes since the corresponding CQE containing the DMA
timestamp information has arrived. Increment the late_cqe counter when such
a CQE does manage to be delivered to the CQ.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20240403212931.128541-3-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/device_drivers/ethernet/mellanox/mlx5/counters.rst   | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c                    | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c                  | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h                  | 1 +
 4 files changed, 9 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
index f69ee1ebee01..5464cd9e2694 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -702,6 +702,12 @@ the software port.
        the device typically ensures not posting the CQE.
      - Error
 
+   * - `ptp_cq[i]_lost_cqe`
+     - Number of times a CQE is expected to not be delivered on the PTP
+       timestamping CQE by the device due to a time delta elapsing. If such a
+       CQE is somehow delivered, `ptp_cq[i]_late_cqe` is incremented.
+     - Error
+
 .. [#ring_global] The corresponding ring and global counters do not share the
                   same name (i.e. do not follow the common naming scheme).
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index d0af7271da34..afd654583b6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -169,6 +169,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
 		WARN_ON_ONCE(!pos->inuse);
 		pos->inuse = false;
 		list_del(&pos->entry);
+		ptpsq->cq_stats->lost_cqe++;
 	}
 	spin_unlock_bh(&cqe_list->tracker_list_lock);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4f372cb2fc9a..3004778e253e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -2178,6 +2178,7 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) },
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, late_cqe) },
+	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, lost_cqe) },
 };
 
 static const struct counter_desc ptp_rq_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index b71e3fdf92c5..ccc67a471f68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -463,6 +463,7 @@ struct mlx5e_ptp_cq_stats {
 	u64 abort;
 	u64 abort_abs_diff_ns;
 	u64 late_cqe;
+	u64 lost_cqe;
 };
 
 struct mlx5e_rep_stats {
-- 
cgit v1.2.3-73-gaa49b


From cd429012f078395c5bfa558e2e81e882b48023b0 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 3 Apr 2024 14:28:41 -0700
Subject: net/mlx5e: Introduce timestamps statistic counter for Tx DMA layer

Count number of transmitted packets that were hardware timestamped at the
device DMA layer.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Link: https://lore.kernel.org/r/20240403212931.128541-4-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/device_drivers/ethernet/mellanox/mlx5/counters.rst   | 5 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c                  | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h                  | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c                     | 6 ++++--
 4 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
index 5464cd9e2694..fed821ef9b09 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -300,6 +300,11 @@ the software port.
        in the beginning of the queue. This is a normal condition.
      - Informative
 
+   * - `tx[i]_timestamps`
+     - Transmitted packets that were hardware timestamped at the device's DMA
+       layer.
+     - Informative
+
    * - `tx[i]_added_vlan_packets`
      - The number of packets sent where vlan tag insertion was offloaded to the
        hardware.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 3004778e253e..aa1de8bf0841 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -2066,6 +2066,7 @@ static const struct counter_desc sq_stats_desc[] = {
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, timestamps) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
 #ifdef CONFIG_MLX5_EN_TLS
@@ -2218,6 +2219,7 @@ static const struct counter_desc qos_sq_stats_desc[] = {
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, timestamps) },
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
 #ifdef CONFIG_MLX5_EN_TLS
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index ccc67a471f68..b0090ad133aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -431,6 +431,7 @@ struct mlx5e_sq_stats {
 	u64 stopped;
 	u64 dropped;
 	u64 recover;
+	u64 timestamps;
 	/* dirtied @completion */
 	u64 cqes ____cacheline_aligned_in_smp;
 	u64 wake;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2fa076b23fbe..09a592909101 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -750,11 +750,13 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		u64 ts = get_cqe_ts(cqe);
 
 		hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
-		if (sq->ptpsq)
+		if (sq->ptpsq) {
 			mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
 						      hwts.hwtstamp, sq->ptpsq->cq_stats);
-		else
+		} else {
 			skb_tstamp_tx(skb, &hwts);
+			sq->stats->timestamps++;
+		}
 	}
 
 	napi_consume_skb(skb, napi_budget);
-- 
cgit v1.2.3-73-gaa49b


From ff8877b04ef282b2bdb16c9dccc2e42216a34f62 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 5 Apr 2024 22:22:38 -0700
Subject: netlink: specs: ethtool: define header-flags as an enum

Recent changes added header flags to the spec.
Use an enum instead of defines for more seamless codegen.

[Jakub: drop the already applied parts and rewrite message]

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Link: https://lore.kernel.org/r/20240403212931.128541-6-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml |  1 -
 include/uapi/linux/ethtool_netlink.h     | 11 +++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index b76916394ec9..87ae7b397984 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -18,7 +18,6 @@ definitions:
     entries: []
   -
     name: header-flags
-    enum-name:
     type: flags
     entries: [ compact-bitsets, omit-reply, stats ]
 
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 708272026d80..e06a34cded78 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -117,12 +117,11 @@ enum {
 
 /* request header */
 
-/* use compact bitsets in reply */
-#define ETHTOOL_FLAG_COMPACT_BITSETS	(1 << 0)
-/* provide optional reply for SET or ACT requests */
-#define ETHTOOL_FLAG_OMIT_REPLY	(1 << 1)
-/* request statistics, if supported by the driver */
-#define ETHTOOL_FLAG_STATS		(1 << 2)
+enum ethtool_header_flags {
+	ETHTOOL_FLAG_COMPACT_BITSETS	= 1 << 0,	/* use compact bitsets in reply */
+	ETHTOOL_FLAG_OMIT_REPLY		= 1 << 1,	/* provide optional reply for SET or ACT requests */
+	ETHTOOL_FLAG_STATS		= 1 << 2,	/* request statistics, if supported by the driver */
+};
 
 #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
 			  ETHTOOL_FLAG_OMIT_REPLY | \
-- 
cgit v1.2.3-73-gaa49b


From aa6485d813ad6d884d23e1e9cf3913be29a1f229 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 4 Apr 2024 14:31:12 +0800
Subject: ynl: rename array-nest to indexed-array

Some implementations, like bonding, has nest array with same attr type.
To support all kinds of entries under one nest array. As discussed[1],
let's rename array-nest to indexed-array, and assuming the value is
a nest by passing the type via sub-type.

[1] https://lore.kernel.org/netdev/20240312100105.16a59086@kernel.org/

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240404063114.1221532-2-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-c.yaml              |  2 +-
 Documentation/netlink/genetlink-legacy.yaml         |  2 +-
 Documentation/netlink/genetlink.yaml                |  2 +-
 Documentation/netlink/netlink-raw.yaml              |  2 +-
 Documentation/netlink/specs/nlctrl.yaml             |  6 ++++--
 Documentation/netlink/specs/rt_link.yaml            |  3 ++-
 Documentation/netlink/specs/tc.yaml                 | 21 ++++++++++++++-------
 .../userspace-api/netlink/genetlink-legacy.rst      | 12 +++++++++---
 tools/net/ynl/lib/ynl.py                            | 13 ++++++++-----
 tools/net/ynl/ynl-gen-c.py                          | 18 ++++++++++++------
 10 files changed, 53 insertions(+), 28 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 4dfd899a1661..4f803eaac6d8 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -158,7 +158,7 @@ properties:
               type: &attr-type
                 enum: [ unused, pad, flag, binary,
                         uint, sint, u8, u16, u32, u64, s32, s64,
-                        string, nest, array-nest, nest-type-value ]
+                        string, nest, indexed-array, nest-type-value ]
               doc:
                 description: Documentation of the attribute.
                 type: string
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index b48ad3b1cc32..8db0e22fa72c 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -201,7 +201,7 @@ properties:
                 description: The netlink attribute type
                 enum: [ unused, pad, flag, binary, bitfield32,
                         uint, sint, u8, u16, u32, u64, s32, s64,
-                        string, nest, array-nest, nest-type-value ]
+                        string, nest, indexed-array, nest-type-value ]
               doc:
                 description: Documentation of the attribute.
                 type: string
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index ebd6ee743fcc..b036227b46f1 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -124,7 +124,7 @@ properties:
               type: &attr-type
                 enum: [ unused, pad, flag, binary,
                         uint, sint, u8, u16, u32, u64, s32, s64,
-                        string, nest, array-nest, nest-type-value ]
+                        string, nest, indexed-array, nest-type-value ]
               doc:
                 description: Documentation of the attribute.
                 type: string
diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml
index a76e54cbadbc..914aa1c0a273 100644
--- a/Documentation/netlink/netlink-raw.yaml
+++ b/Documentation/netlink/netlink-raw.yaml
@@ -222,7 +222,7 @@ properties:
                 description: The netlink attribute type
                 enum: [ unused, pad, flag, binary, bitfield32,
                         u8, u16, u32, u64, s8, s16, s32, s64,
-                        string, nest, array-nest, nest-type-value,
+                        string, nest, indexed-array, nest-type-value,
                         sub-message ]
               doc:
                 description: Documentation of the attribute.
diff --git a/Documentation/netlink/specs/nlctrl.yaml b/Documentation/netlink/specs/nlctrl.yaml
index b1632b95f725..a36535350bdb 100644
--- a/Documentation/netlink/specs/nlctrl.yaml
+++ b/Documentation/netlink/specs/nlctrl.yaml
@@ -65,11 +65,13 @@ attribute-sets:
         type: u32
       -
         name: ops
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: op-attrs
       -
         name: mcast-groups
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: mcast-group-attrs
       -
         name: policy
diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index 81a5a3d1b04d..e5dcb2cf1724 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -1690,7 +1690,8 @@ attribute-sets:
         type: binary
       -
         name: hw-s-info
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: hw-s-info-one
       -
         name: l3-stats
diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml
index 6068c105c5ee..8c01e4e13195 100644
--- a/Documentation/netlink/specs/tc.yaml
+++ b/Documentation/netlink/specs/tc.yaml
@@ -1988,7 +1988,8 @@ attribute-sets:
         nested-attributes: tc-ematch-attrs
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
       -
         name: police
@@ -2128,7 +2129,8 @@ attribute-sets:
         type: u32
       -
         name: tin-stats
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-cake-tin-stats-attrs
       -
         name: deficit
@@ -2348,7 +2350,8 @@ attribute-sets:
         type: string
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
       -
         name: key-eth-dst
@@ -2849,7 +2852,8 @@ attribute-sets:
         type: string
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
       -
         name: mask
@@ -3002,7 +3006,8 @@ attribute-sets:
         type: u32
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
       -
         name: flags
@@ -3375,7 +3380,8 @@ attribute-sets:
         nested-attributes: tc-police-attrs
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
   -
     name: tc-taprio-attrs
@@ -3593,7 +3599,8 @@ attribute-sets:
         nested-attributes: tc-police-attrs
       -
         name: act
-        type: array-nest
+        type: indexed-array
+        sub-type: nest
         nested-attributes: tc-act-attrs
       -
         name: indev
diff --git a/Documentation/userspace-api/netlink/genetlink-legacy.rst b/Documentation/userspace-api/netlink/genetlink-legacy.rst
index 70a77387f6c4..54e8fb25e093 100644
--- a/Documentation/userspace-api/netlink/genetlink-legacy.rst
+++ b/Documentation/userspace-api/netlink/genetlink-legacy.rst
@@ -46,10 +46,16 @@ For reference the ``multi-attr`` array may look like this::
 
 where ``ARRAY-ATTR`` is the array entry type.
 
-array-nest
-~~~~~~~~~~
+indexed-array
+~~~~~~~~~~~~~
+
+``indexed-array`` wraps the entire array in an extra attribute (hence
+limiting its size to 64kB). The ``ENTRY`` nests are special and have the
+index of the entry as their type instead of normal attribute type.
 
-``array-nest`` creates the following structure::
+A ``sub-type`` is needed to describe what type in the ``ENTRY``. A ``nest``
+``sub-type`` means there are nest arrays in the ``ENTRY``, with the structure
+looks like::
 
   [SOME-OTHER-ATTR]
   [ARRAY-ATTR]
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index b30210f537f7..d671efea808a 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -631,15 +631,18 @@ class YnlFamily(SpecFamily):
                 decoded = self._formatted_string(decoded, attr_spec.display_hint)
         return decoded
 
-    def _decode_array_nest(self, attr, attr_spec):
+    def _decode_array_attr(self, attr, attr_spec):
         decoded = []
         offset = 0
         while offset < len(attr.raw):
             item = NlAttr(attr.raw, offset)
             offset += item.full_len
 
-            subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
-            decoded.append({ item.type: subattrs })
+            if attr_spec["sub-type"] == 'nest':
+                subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
+                decoded.append({ item.type: subattrs })
+            else:
+                raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
         return decoded
 
     def _decode_nest_type_value(self, attr, attr_spec):
@@ -733,8 +736,8 @@ class YnlFamily(SpecFamily):
                 decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
                 if 'enum' in attr_spec:
                     decoded = self._decode_enum(decoded, attr_spec)
-            elif attr_spec["type"] == 'array-nest':
-                decoded = self._decode_array_nest(attr, attr_spec)
+            elif attr_spec["type"] == 'indexed-array':
+                decoded = self._decode_array_attr(attr, attr_spec)
             elif attr_spec["type"] == 'bitfield32':
                 value, selector = struct.unpack("II", attr.raw)
                 if 'enum' in attr_spec:
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index a451cbfbd781..c0b90c104d92 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -841,8 +841,11 @@ class AttrSet(SpecAttrSet):
             t = TypeBitfield32(self.family, self, elem, value)
         elif elem['type'] == 'nest':
             t = TypeNest(self.family, self, elem, value)
-        elif elem['type'] == 'array-nest':
-            t = TypeArrayNest(self.family, self, elem, value)
+        elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
+            if elem["sub-type"] == 'nest':
+                t = TypeArrayNest(self.family, self, elem, value)
+            else:
+                raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
         elif elem['type'] == 'nest-type-value':
             t = TypeNestTypeValue(self.family, self, elem, value)
         else:
@@ -1055,7 +1058,7 @@ class Family(SpecFamily):
                     if nested in self.root_sets:
                         raise Exception("Inheriting members to a space used as root not supported")
                     inherit.update(set(spec['type-value']))
-                elif spec['type'] == 'array-nest':
+                elif spec['type'] == 'indexed-array':
                     inherit.add('idx')
                 self.pure_nested_structs[nested].set_inherited(inherit)
 
@@ -1619,9 +1622,12 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     multi_attrs = set()
     needs_parg = False
     for arg, aspec in struct.member_list():
-        if aspec['type'] == 'array-nest':
-            local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
-            array_nests.add(arg)
+        if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
+            if aspec["sub-type"] == 'nest':
+                local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
+                array_nests.add(arg)
+            else:
+                raise Exception(f'Not supported sub-type {aspec["sub-type"]}')
         if 'multi-attr' in aspec:
             multi_attrs.add(arg)
         needs_parg |= 'nested-attributes' in aspec
-- 
cgit v1.2.3-73-gaa49b


From a7408b56e5f9e96c486cec9aaf7490452b713ebf Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 4 Apr 2024 14:31:13 +0800
Subject: ynl: support binary and integer sub-type for indexed-array

Add binary and integer sub-type support for indexed-array to display bond
arp and ns targets. Here is what the result looks like:

 # ip link add bond0 type bond mode 1 \
   arp_ip_target 192.168.1.1,192.168.1.2 ns_ip6_target 2001::1,2001::2
 # ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_link.yaml \
   --do getlink --json '{"ifname": "bond0"}' --output-json | jq '.linkinfo'

    "arp-ip-target": [
      "192.168.1.1",
      "192.168.1.2"
    ],
    [...]
    "ns-ip6-target": [
      "2001::1",
      "2001::2"
    ],

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240404063114.1221532-3-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/userspace-api/netlink/genetlink-legacy.rst | 10 +++++++---
 tools/net/ynl/lib/ynl.py                                 | 10 ++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/userspace-api/netlink/genetlink-legacy.rst b/Documentation/userspace-api/netlink/genetlink-legacy.rst
index 54e8fb25e093..fa005989193a 100644
--- a/Documentation/userspace-api/netlink/genetlink-legacy.rst
+++ b/Documentation/userspace-api/netlink/genetlink-legacy.rst
@@ -66,9 +66,13 @@ looks like::
       [MEMBER1]
       [MEMBER2]
 
-It wraps the entire array in an extra attribute (hence limiting its size
-to 64kB). The ``ENTRY`` nests are special and have the index of the entry
-as their type instead of normal attribute type.
+Other ``sub-type`` like ``u32`` means there is only one member as described
+in ``sub-type`` in the ``ENTRY``. The structure looks like::
+
+  [SOME-OTHER-ATTR]
+  [ARRAY-ATTR]
+    [ENTRY u32]
+    [ENTRY u32]
 
 type-value
 ~~~~~~~~~~
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index d671efea808a..0ba5f6fb8747 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -641,6 +641,16 @@ class YnlFamily(SpecFamily):
             if attr_spec["sub-type"] == 'nest':
                 subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
                 decoded.append({ item.type: subattrs })
+            elif attr_spec["sub-type"] == 'binary':
+                subattrs = item.as_bin()
+                if attr_spec.display_hint:
+                    subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
+                decoded.append(subattrs)
+            elif attr_spec["sub-type"] in NlAttr.type_formats:
+                subattrs = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order)
+                if attr_spec.display_hint:
+                    subattrs = self._formatted_string(subattrs, attr_spec.display_hint)
+                decoded.append(subattrs)
             else:
                 raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
         return decoded
-- 
cgit v1.2.3-73-gaa49b


From 841942bc62122c59f654f799c2d00ce3fda93efa Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Thu, 4 Apr 2024 11:29:55 +0200
Subject: net: ethtool: Allow passing a phy index for some commands

Some netlink commands are target towards ethernet PHYs, to control some
of their features. As there's several such commands, add the ability to
pass a PHY index in the ethnl request, which will populate the generic
ethnl_req_info with the relevant phydev when the command targets a PHY.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst |  7 ++++
 include/uapi/linux/ethtool_netlink.h         |  1 +
 net/ethtool/netlink.c                        | 48 ++++++++++++++++++++++++++--
 net/ethtool/netlink.h                        |  5 +++
 4 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 08d330b0f50f..5dc42f7ce429 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -57,6 +57,7 @@ Structure of this header is
   ``ETHTOOL_A_HEADER_DEV_INDEX``  u32     device ifindex
   ``ETHTOOL_A_HEADER_DEV_NAME``   string  device name
   ``ETHTOOL_A_HEADER_FLAGS``      u32     flags common for all requests
+  ``ETHTOOL_A_HEADER_PHY_INDEX``  u32     phy device index
   ==============================  ======  =============================
 
 ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the
@@ -81,6 +82,12 @@ the behaviour is backward compatible, i.e. requests from old clients not aware
 of the flag should be interpreted the way the client expects. A client must
 not set flags it does not understand.
 
+``ETHTOOL_A_HEADER_PHY_INDEX`` identifies the Ethernet PHY the message relates to.
+As there are numerous commands that are related to PHY configuration, and because
+there may be more than one PHY on the link, the PHY index can be passed in the
+request for the commands that needs it. It is, however, not mandatory, and if it
+is not passed for commands that target a PHY, the net_device.phydev pointer
+is used.
 
 Bit sets
 ========
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index e06a34cded78..23e225f00fb0 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -132,6 +132,7 @@ enum {
 	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
 	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
 	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
+	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_HEADER_CNT,
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index bd04f28d5cf4..563e94e0cbd8 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -4,6 +4,7 @@
 #include <linux/ethtool_netlink.h>
 #include <linux/pm_runtime.h>
 #include "netlink.h"
+#include <linux/phy_link_topology.h>
 
 static struct genl_family ethtool_genl_family;
 
@@ -30,6 +31,24 @@ const struct nla_policy ethnl_header_policy_stats[] = {
 							  ETHTOOL_FLAGS_STATS),
 };
 
+const struct nla_policy ethnl_header_policy_phy[] = {
+	[ETHTOOL_A_HEADER_DEV_INDEX]	= { .type = NLA_U32 },
+	[ETHTOOL_A_HEADER_DEV_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = ALTIFNAMSIZ - 1 },
+	[ETHTOOL_A_HEADER_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
+							  ETHTOOL_FLAGS_BASIC),
+	[ETHTOOL_A_HEADER_PHY_INDEX]		= NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+const struct nla_policy ethnl_header_policy_phy_stats[] = {
+	[ETHTOOL_A_HEADER_DEV_INDEX]	= { .type = NLA_U32 },
+	[ETHTOOL_A_HEADER_DEV_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = ALTIFNAMSIZ - 1 },
+	[ETHTOOL_A_HEADER_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
+							  ETHTOOL_FLAGS_STATS),
+	[ETHTOOL_A_HEADER_PHY_INDEX]		= NLA_POLICY_MIN(NLA_U32, 1),
+};
+
 int ethnl_ops_begin(struct net_device *dev)
 {
 	int ret;
@@ -89,8 +108,9 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 			       const struct nlattr *header, struct net *net,
 			       struct netlink_ext_ack *extack, bool require_dev)
 {
-	struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
+	struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
 	const struct nlattr *devname_attr;
+	struct phy_device *phydev = NULL;
 	struct net_device *dev = NULL;
 	u32 flags = 0;
 	int ret;
@@ -104,7 +124,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 	/* No validation here, command policy should have a nested policy set
 	 * for the header, therefore validation should have already been done.
 	 */
-	ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
+	ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
 			       NULL, extack);
 	if (ret < 0)
 		return ret;
@@ -145,6 +165,30 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 		return -EINVAL;
 	}
 
+	if (dev) {
+		if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
+			struct nlattr *phy_id;
+
+			phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX];
+			phydev = phy_link_topo_get_phy(dev->link_topo,
+						       nla_get_u32(phy_id));
+			if (!phydev) {
+				NL_SET_BAD_ATTR(extack, phy_id);
+				return -ENODEV;
+			}
+		} else {
+			/* If we need a PHY but no phy index is specified, fallback
+			 * to dev->phydev
+			 */
+			phydev = dev->phydev;
+		}
+	} else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
+		NL_SET_ERR_MSG_ATTR(extack, header,
+				    "can't target a PHY without a netdev");
+		return -EINVAL;
+	}
+
+	req_info->phydev = phydev;
 	req_info->dev = dev;
 	req_info->flags = flags;
 	return 0;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9a333a8d04c1..d57a890b5d9e 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -250,6 +250,7 @@ static inline unsigned int ethnl_reply_header_size(void)
  * @dev:   network device the request is for (may be null)
  * @dev_tracker: refcount tracker for @dev reference
  * @flags: request flags common for all request types
+ * @phydev: phy_device connected to @dev this request is for (may be null)
  *
  * This is a common base for request specific structures holding data from
  * parsed userspace request. These always embed struct ethnl_req_info at
@@ -259,6 +260,7 @@ struct ethnl_req_info {
 	struct net_device	*dev;
 	netdevice_tracker	dev_tracker;
 	u32			flags;
+	struct phy_device	*phydev;
 };
 
 static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -395,9 +397,12 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
 extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
 extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
 extern const struct ethnl_request_ops ethnl_mm_request_ops;
+extern const struct ethnl_request_ops ethnl_phy_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
+extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
+extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
 extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
 extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
 extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
-- 
cgit v1.2.3-73-gaa49b


From 5af3e3876d567fb79a355bec1cb48e432d69b4fb Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Sat, 6 Apr 2024 04:05:37 +0300
Subject: devlink: Support setting max_io_eqs

Many devices send event notifications for the IO queues,
such as tx and rx queues, through event queues.

Enable a privileged owner, such as a hypervisor PF, to set the number
of IO event queues for the VF and SF during the provisioning stage.

example:
Get maximum IO event queues of the VF device::

  $ devlink port show pci/0000:06:00.0/2
  pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
      function:
          hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 10

Set maximum IO event queues of the VF device::

  $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32

  $ devlink port show pci/0000:06:00.0/2
  pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
      function:
          hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 32

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-port.rst | 33 ++++++++++++++
 include/net/devlink.h                             | 14 ++++++
 include/uapi/linux/devlink.h                      |  1 +
 net/devlink/port.c                                | 53 +++++++++++++++++++++++
 4 files changed, 101 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 562f46b41274..9d22d41a7cd1 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -134,6 +134,9 @@ Users may also set the IPsec crypto capability of the function using
 Users may also set the IPsec packet capability of the function using
 `devlink port function set ipsec_packet` command.
 
+Users may also set the maximum IO event queues of the function
+using `devlink port function set max_io_eqs` command.
+
 Function attributes
 ===================
 
@@ -295,6 +298,36 @@ policy is processed in software by the kernel.
         function:
             hw_addr 00:00:00:00:00:00 ipsec_packet enabled
 
+Maximum IO events queues setup
+------------------------------
+When user sets maximum number of IO event queues for a SF or
+a VF, such function driver is limited to consume only enforced
+number of IO event queues.
+
+IO event queues deliver events related to IO queues, including network
+device transmit and receive queues (txq and rxq) and RDMA Queue Pairs (QPs).
+For example, the number of netdevice channels and RDMA device completion
+vectors are derived from the function's IO event queues. Usually, the number
+of interrupt vectors consumed by the driver is limited by the number of IO
+event queues per device, as each of the IO event queues is connected to an
+interrupt vector.
+
+- Get maximum IO event queues of the VF device::
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 10
+
+- Set maximum IO event queues of the VF device::
+
+    $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 32
+
 Subfunction
 ============
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 9ac394bdfbe4..bb1af599d101 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1602,6 +1602,14 @@ void devlink_free(struct devlink *devlink);
  *			      capability. Should be used by device drivers to
  *			      enable/disable ipsec_packet capability of a
  *			      function managed by the devlink port.
+ * @port_fn_max_io_eqs_get: Callback used to get port function's maximum number
+ *			    of event queues. Should be used by device drivers to
+ *			    report the maximum event queues of a function
+ *			    managed by the devlink port.
+ * @port_fn_max_io_eqs_set: Callback used to set port function's maximum number
+ *			    of event queues. Should be used by device drivers to
+ *			    configure maximum number of event queues
+ *			    of a function managed by the devlink port.
  *
  * Note: Driver should return -EOPNOTSUPP if it doesn't support
  * port function (@port_fn_*) handling for a particular port.
@@ -1651,6 +1659,12 @@ struct devlink_port_ops {
 	int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
 					bool enable,
 					struct netlink_ext_ack *extack);
+	int (*port_fn_max_io_eqs_get)(struct devlink_port *devlink_port,
+				      u32 *max_eqs,
+				      struct netlink_ext_ack *extack);
+	int (*port_fn_max_io_eqs_set)(struct devlink_port *devlink_port,
+				      u32 max_eqs,
+				      struct netlink_ext_ack *extack);
 };
 
 void devlink_port_init(struct devlink *devlink,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 2da0c7eb6710..9401aa343673 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -686,6 +686,7 @@ enum devlink_port_function_attr {
 	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
 	DEVLINK_PORT_FN_ATTR_CAPS,	/* bitfield32 */
 	DEVLINK_PORT_FN_ATTR_DEVLINK,	/* nested */
+	DEVLINK_PORT_FN_ATTR_MAX_IO_EQS,	/* u32 */
 
 	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
 	DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 118d130d2afd..be9158b4453c 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -16,6 +16,7 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
 				 DEVLINK_PORT_FN_STATE_ACTIVE),
 	[DEVLINK_PORT_FN_ATTR_CAPS] =
 		NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+	[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 },
 };
 
 #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port)				\
@@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
 	return 0;
 }
 
+static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port,
+					   struct sk_buff *msg,
+					   struct netlink_ext_ack *extack,
+					   bool *msg_updated)
+{
+	u32 max_io_eqs;
+	int err;
+
+	if (!port->ops->port_fn_max_io_eqs_get)
+		return 0;
+
+	err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, max_io_eqs);
+	if (err)
+		return err;
+	*msg_updated = true;
+	return 0;
+}
+
 int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
 {
 	if (devlink_nl_put_handle(msg, devlink_port->devlink))
@@ -409,6 +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
 	return 0;
 }
 
+static int
+devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port,
+			       const struct nlattr *attr,
+			       struct netlink_ext_ack *extack)
+{
+	u32 max_io_eqs;
+
+	max_io_eqs = nla_get_u32(attr);
+	return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port,
+							 max_io_eqs, extack);
+}
+
 static int
 devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
 				   struct netlink_ext_ack *extack)
@@ -428,6 +465,9 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
 	if (err)
 		goto out;
 	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+	if (err)
+		goto out;
+	err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, &msg_updated);
 	if (err)
 		goto out;
 	err = devlink_rel_devlink_handle_put(msg, port->devlink,
@@ -726,6 +766,12 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
 			}
 		}
 	}
+	if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] &&
+	    !ops->port_fn_max_io_eqs_set) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS],
+				    "Function does not support max_io_eqs setting");
+		return -EOPNOTSUPP;
+	}
 	return 0;
 }
 
@@ -761,6 +807,13 @@ static int devlink_port_function_set(struct devlink_port *port,
 			return err;
 	}
 
+	attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS];
+	if (attr) {
+		err = devlink_port_fn_max_io_eqs_set(port, attr, extack);
+		if (err)
+			return err;
+	}
+
 	/* Keep this as the last function attribute set, so that when
 	 * multiple port function attributes are set along with state,
 	 * Those can be applied first before activating the state.
-- 
cgit v1.2.3-73-gaa49b


From dc073430db8d3f28460ea3ec1901e34bf7e8c0f2 Mon Sep 17 00:00:00 2001
From: Diogo Ivo <diogo.ivo@siemens.com>
Date: Wed, 3 Apr 2024 11:48:11 +0100
Subject: dt-bindings: net: Add support for AM65x SR1.0 in ICSSG

Silicon Revision 1.0 of the AM65x came with a slightly different ICSSG
support: Only 2 PRUs per slice are available and instead 2 additional
DMA channels are used for management purposes. We have no restrictions
on specified PRUs, but the DMA channels need to be adjusted.

Co-developed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Diogo Ivo <diogo.ivo@siemens.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../devicetree/bindings/net/ti,icssg-prueth.yaml   | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
index 229c8f32019f..e253fa786092 100644
--- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
+++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
@@ -13,14 +13,12 @@ description:
   Ethernet based on the Programmable Real-Time Unit and Industrial
   Communication Subsystem.
 
-allOf:
-  - $ref: /schemas/remoteproc/ti,pru-consumer.yaml#
-
 properties:
   compatible:
     enum:
-      - ti,am642-icssg-prueth  # for AM64x SoC family
-      - ti,am654-icssg-prueth  # for AM65x SoC family
+      - ti,am642-icssg-prueth      # for AM64x SoC family
+      - ti,am654-icssg-prueth      # for AM65x SoC family
+      - ti,am654-sr1-icssg-prueth  # for AM65x SoC family, SR1.0
 
   sram:
     $ref: /schemas/types.yaml#/definitions/phandle
@@ -28,9 +26,11 @@ properties:
       phandle to MSMC SRAM node
 
   dmas:
-    maxItems: 10
+    minItems: 10
+    maxItems: 12
 
   dma-names:
+    minItems: 10
     items:
       - const: tx0-0
       - const: tx0-1
@@ -42,6 +42,8 @@ properties:
       - const: tx1-3
       - const: rx0
       - const: rx1
+      - const: rxmgm0
+      - const: rxmgm1
 
   ti,mii-g-rt:
     $ref: /schemas/types.yaml#/definitions/phandle
@@ -132,6 +134,27 @@ required:
   - interrupts
   - interrupt-names
 
+allOf:
+  - $ref: /schemas/remoteproc/ti,pru-consumer.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,am654-sr1-icssg-prueth
+    then:
+      properties:
+        dmas:
+          minItems: 12
+        dma-names:
+          minItems: 12
+    else:
+      properties:
+        dmas:
+          maxItems: 10
+        dma-names:
+          maxItems: 10
+
 unevaluatedProperties: false
 
 examples:
-- 
cgit v1.2.3-73-gaa49b


From 220d63f249ecfa71a9b89dd232383506b56d0073 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 8 Apr 2024 08:44:10 +0200
Subject: dt-bindings: net: rockchip-dwmac: use rgmii-id in example

The dwmac supports specifying the RGMII clock delays, but it is
recommended to use rgmii-id and to specify the delays in the phy node
instead [1].

Change the example accordingly to no longer promote this undesired
setting.

[1] https://lore.kernel.org/all/1a0de7b4-f0f7-4080-ae48-f5ffa9e76be3@lunn.ch/

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Dragan Simic <dsimic@manjaro.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240408-rockchip-dwmac-rgmii-id-binding-v1-1-3886d1a8bd54@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/rockchip-dwmac.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 70bbc4220e2a..6bbe96e35250 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -137,8 +137,6 @@ examples:
         assigned-clock-parents = <&ext_gmac>;
 
         rockchip,grf = <&grf>;
-        phy-mode = "rgmii";
+        phy-mode = "rgmii-id";
         clock_in_out = "input";
-        tx_delay = <0x30>;
-        rx_delay = <0x10>;
     };
-- 
cgit v1.2.3-73-gaa49b


From 65f35aa76c0e21b0243fd734e513fd2263f22a18 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Tue, 9 Apr 2024 16:25:16 -0700
Subject: ethtool: update tsinfo statistics attribute docs with correct type

nla_put_uint can either write a u32 or u64 netlink attribute value. The
size depends on whether the value can be represented with a u32 or requires
a u64. Use a uint annotation in various documentation to represent this.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Link: https://lore.kernel.org/r/20240409232520.237613-2-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst | 6 +++---
 include/uapi/linux/ethtool_netlink.h         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 5dc42f7ce429..4e63d3708ed9 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1254,9 +1254,9 @@ would be empty (no bit set).
 Additional hardware timestamping statistics response contents:
 
   =====================================  ======  ===================================
-  ``ETHTOOL_A_TS_STAT_TX_PKTS``          u64     Packets with Tx HW timestamps
-  ``ETHTOOL_A_TS_STAT_TX_LOST``          u64     Tx HW timestamp not arrived count
-  ``ETHTOOL_A_TS_STAT_TX_ERR``           u64     HW error request Tx timestamp count
+  ``ETHTOOL_A_TS_STAT_TX_PKTS``          uint    Packets with Tx HW timestamps
+  ``ETHTOOL_A_TS_STAT_TX_LOST``          uint    Tx HW timestamp not arrived count
+  ``ETHTOOL_A_TS_STAT_TX_ERR``           uint    HW error request Tx timestamp count
   =====================================  ======  ===================================
 
 CABLE_TEST
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 23e225f00fb0..b4f0d233d048 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -488,9 +488,9 @@ enum {
 enum {
 	ETHTOOL_A_TS_STAT_UNSPEC,
 
-	ETHTOOL_A_TS_STAT_TX_PKTS,			/* u64 */
-	ETHTOOL_A_TS_STAT_TX_LOST,			/* u64 */
-	ETHTOOL_A_TS_STAT_TX_ERR,			/* u64 */
+	ETHTOOL_A_TS_STAT_TX_PKTS,			/* uint */
+	ETHTOOL_A_TS_STAT_TX_LOST,			/* uint */
+	ETHTOOL_A_TS_STAT_TX_ERR,			/* uint */
 
 	/* add new constants above here */
 	__ETHTOOL_A_TS_STAT_CNT,
-- 
cgit v1.2.3-73-gaa49b


From 4ede457542a615b08b1f25a25cb514402d033968 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 9 Apr 2024 16:35:04 +0800
Subject: doc/netlink/specs: Add bond support to rt_link.yaml

Add bond support to rt_link.yaml. Here is an example output:

 $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_link.yaml \
   --do getlink --json '{"ifname": "bond0"}' --output-json | jq '.linkinfo'
 {
   "kind": "bond",
   "data": {
     "mode": 4,
     "miimon": 100,
     ...
     "arp-interval": 0,
     "arp-ip-target": [
       "192.168.1.1",
       "192.168.1.2"
     ],
     "arp-validate": 0,
     "arp-all-targets": 0,
     "ns-ip6-target": [
       "2001::1",
       "2001::2"
     ],
     "primary-reselect": 0,
     ...
     "missed-max": 2,
     "ad-info": {
       "aggregator": 1,
       "num-ports": 1,
       "actor-key": 0,
       "partner-key": 1,
       "partner-mac": "00:00:00:00:00:00"
     }
   }
 }

And here is the downlink info.

 $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_link.yaml \
   --do getlink --json '{"ifname": "dummy0"}' --output-json | jq '.linkinfo'
 {
   "kind": "dummy",
   "slave-kind": "bond",
   "slave-data": {
     "state": 0,
     "mii-status": 0,
     "link-failure-count": 0,
     "perm-hwaddr": "f2:82:f7:cc:47:13",
     "queue-id": 0,
     "prio": 0
   }
 }

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240409083504.3900877-1-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_link.yaml | 163 +++++++++++++++++++++++++++++++
 1 file changed, 163 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index e5dcb2cf1724..113ecd17c880 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -1035,6 +1035,165 @@ attribute-sets:
         type: sub-message
         sub-message: linkinfo-member-data-msg
         selector: slave-kind
+  -
+    name: linkinfo-bond-attrs
+    name-prefix: ifla-bond-
+    attributes:
+      -
+        name: mode
+        type: u8
+      -
+        name: active-slave
+        type: u32
+      -
+        name: miimon
+        type: u32
+      -
+        name: updelay
+        type: u32
+      -
+        name: downdelay
+        type: u32
+      -
+        name: use-carrier
+        type: u8
+      -
+        name: arp-interval
+        type: u32
+      -
+        name: arp-ip-target
+        type: indexed-array
+        sub-type: u32
+        byte-order: big-endian
+        display-hint: ipv4
+      -
+        name: arp-validate
+        type: u32
+      -
+        name: arp-all-targets
+        type: u32
+      -
+        name: primary
+        type: u32
+      -
+        name: primary-reselect
+        type: u8
+      -
+        name: fail-over-mac
+        type: u8
+      -
+        name: xmit-hash-policy
+        type: u8
+      -
+        name: resend-igmp
+        type: u32
+      -
+        name: num-peer-notif
+        type: u8
+      -
+        name: all-slaves-active
+        type: u8
+      -
+        name: min-links
+        type: u32
+      -
+        name: lp-interval
+        type: u32
+      -
+        name: packets-per-slave
+        type: u32
+      -
+        name: ad-lacp-rate
+        type: u8
+      -
+        name: ad-select
+        type: u8
+      -
+        name: ad-info
+        type: nest
+        nested-attributes: bond-ad-info-attrs
+      -
+        name: ad-actor-sys-prio
+        type: u16
+      -
+        name: ad-user-port-key
+        type: u16
+      -
+        name: ad-actor-system
+        type: binary
+        display-hint: mac
+      -
+        name: tlb-dynamic-lb
+        type: u8
+      -
+        name: peer-notif-delay
+        type: u32
+      -
+        name: ad-lacp-active
+        type: u8
+      -
+        name: missed-max
+        type: u8
+      -
+        name: ns-ip6-target
+        type: indexed-array
+        sub-type: binary
+        display-hint: ipv6
+      -
+        name: coupled-control
+        type: u8
+  -
+    name: bond-ad-info-attrs
+    name-prefix: ifla-bond-ad-info-
+    attributes:
+      -
+        name: aggregator
+        type: u16
+      -
+        name: num-ports
+        type: u16
+      -
+        name: actor-key
+        type: u16
+      -
+        name: partner-key
+        type: u16
+      -
+        name: partner-mac
+        type: binary
+        display-hint: mac
+  -
+    name: bond-slave-attrs
+    name-prefix: ifla-bond-slave-
+    attributes:
+      -
+        name: state
+        type: u8
+      -
+        name: mii-status
+        type: u8
+      -
+        name: link-failure-count
+        type: u32
+      -
+        name: perm-hwaddr
+        type: binary
+        display-hint: mac
+      -
+        name: queue-id
+        type: u16
+      -
+        name: ad-aggregator-id
+        type: u16
+      -
+        name: ad-actor-oper-port-state
+        type: u8
+      -
+        name: ad-partner-oper-port-state
+        type: u16
+      -
+        name: prio
+        type: u32
   -
     name: linkinfo-bridge-attrs
     name-prefix: ifla-br-
@@ -1716,6 +1875,9 @@ sub-messages:
   -
     name: linkinfo-data-msg
     formats:
+      -
+        value: bond
+        attribute-set: linkinfo-bond-attrs
       -
         value: bridge
         attribute-set: linkinfo-bridge-attrs
@@ -1754,6 +1916,7 @@ sub-messages:
         attribute-set: linkinfo-brport-attrs
       -
         value: bond
+        attribute-set: bond-slave-attrs
 
 operations:
   enum-model: directional
-- 
cgit v1.2.3-73-gaa49b


From 3bb946c9d323f4b47b87bd73134125d562f72448 Mon Sep 17 00:00:00 2001
From: Fei Qin <fei.qin@corigine.com>
Date: Wed, 10 Apr 2024 13:26:35 +0200
Subject: devlink: add a new info version tag

Add definition and documentation for the new generic
info "board.part_number".

The new one is for part number specific use, and board.id
is modified to match the documentation in devlink-info.

Signed-off-by: Fei Qin <fei.qin@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-info.rst | 5 +++++
 include/net/devlink.h                             | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst
index 1242b0e6826b..23073bc219d8 100644
--- a/Documentation/networking/devlink/devlink-info.rst
+++ b/Documentation/networking/devlink/devlink-info.rst
@@ -146,6 +146,11 @@ board.manufacture
 
 An identifier of the company or the facility which produced the part.
 
+board.part_number
+-----------------
+
+Part number of the board and its components.
+
 fw
 --
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index bb1af599d101..d31769a116ce 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -599,12 +599,14 @@ enum devlink_param_generic_id {
 	.validate = _validate,						\
 }
 
-/* Part number, identifier of board design */
+/* Identifier of board design */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID	"board.id"
 /* Revision of board design */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV	"board.rev"
 /* Maker of the board */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE	"board.manufacture"
+/* Part number of the board and its components */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_PART_NUMBER	"board.part_number"
 
 /* Part number, identifier of asic design */
 #define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID	"asic.id"
-- 
cgit v1.2.3-73-gaa49b


From 8910f93b95706b4b11485bb0f318f88918a46b04 Mon Sep 17 00:00:00 2001
From: Fei Qin <fei.qin@corigine.com>
Date: Wed, 10 Apr 2024 13:26:36 +0200
Subject: nfp: update devlink device info output

Newer NIC will introduce a new part number, now add it
into devlink device info.

This patch also updates the information of "board.id" in
nfp.rst to match the devlink-info.rst.

Signed-off-by: Fei Qin <fei.qin@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/nfp.rst         | 5 ++++-
 drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/devlink/nfp.rst b/Documentation/networking/devlink/nfp.rst
index a1717db0dfcc..3093642bdae4 100644
--- a/Documentation/networking/devlink/nfp.rst
+++ b/Documentation/networking/devlink/nfp.rst
@@ -32,7 +32,7 @@ The ``nfp`` driver reports the following versions
      - Description
    * - ``board.id``
      - fixed
-     - Part number identifying the board design
+     - Identifier of the board design
    * - ``board.rev``
      - fixed
      - Revision of the board design
@@ -42,6 +42,9 @@ The ``nfp`` driver reports the following versions
    * - ``board.model``
      - fixed
      - Model name of the board design
+   * - ``board.part_number``
+     - fixed
+     - Part number of the board and its components
    * - ``fw.bundle_id``
      - stored, running
      - Firmware bundle id
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 635d33c0d6d3..ea75b9a06313 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -160,6 +160,7 @@ static const struct nfp_devlink_versions_simple {
 	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_REV,	"assembly.revision", },
 	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE, "assembly.vendor", },
 	{ "board.model", /* code name */		"assembly.model", },
+	{ DEVLINK_INFO_VERSION_GENERIC_BOARD_PART_NUMBER, "pn", },
 };
 
 static int
-- 
cgit v1.2.3-73-gaa49b


From a1e5de0d07a3b2db047fe8ba0063d129672f979a Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao418@huawei.com>
Date: Wed, 10 Apr 2024 20:53:54 +0800
Subject: net: hns3: add support to query scc version by devlink info

Add support to query scc version by devlink info for device V3.

Signed-off-by: Hao Chen <chenhao418@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Link: https://lore.kernel.org/r/20240410125354.2177067-5-shaojijie@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/devlink/hns3.rst          |  5 +++
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  9 +++++
 .../hisilicon/hns3/hns3_common/hclge_comm_cmd.h    |  8 ++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c | 44 ++++++++++++++++++++--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h |  2 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 18 +++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 +
 7 files changed, 84 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst
index 4562a6e4782f..72bc1b9f3785 100644
--- a/Documentation/networking/devlink/hns3.rst
+++ b/Documentation/networking/devlink/hns3.rst
@@ -23,3 +23,8 @@ The ``hns3`` driver reports the following versions
    * - ``fw``
      - running
      - Used to represent the firmware version.
+   * - ``fw.scc``
+     - running
+     - Used to represent the Soft Congestion Control (SSC) firmware version.
+       SCC is a firmware component which provides multiple RDMA congestion
+       control algorithms, including DCQCN.
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e9266c65b331..7c2c8bea4c06 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -366,6 +366,15 @@ struct hnae3_vector_info {
 #define HNAE3_FW_VERSION_BYTE0_SHIFT	0
 #define HNAE3_FW_VERSION_BYTE0_MASK	GENMASK(7, 0)
 
+#define HNAE3_SCC_VERSION_BYTE3_SHIFT	24
+#define HNAE3_SCC_VERSION_BYTE3_MASK	GENMASK(31, 24)
+#define HNAE3_SCC_VERSION_BYTE2_SHIFT	16
+#define HNAE3_SCC_VERSION_BYTE2_MASK	GENMASK(23, 16)
+#define HNAE3_SCC_VERSION_BYTE1_SHIFT	8
+#define HNAE3_SCC_VERSION_BYTE1_MASK	GENMASK(15, 8)
+#define HNAE3_SCC_VERSION_BYTE0_SHIFT	0
+#define HNAE3_SCC_VERSION_BYTE0_MASK	GENMASK(7, 0)
+
 struct hnae3_ring_chain_node {
 	struct hnae3_ring_chain_node *next;
 	u32 tqp_index;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index a2bc5a9adaa3..2c2a2f1e0d7a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -247,6 +247,9 @@ enum hclge_opcode_type {
 	HCLGE_OPC_QCN_AJUST_INIT	= 0x1A07,
 	HCLGE_OPC_QCN_DFX_CNT_STATUS    = 0x1A08,
 
+	/* SCC commands */
+	HCLGE_OPC_QUERY_SCC_VER		= 0x1A84,
+
 	/* Mailbox command */
 	HCLGEVF_OPC_MBX_PF_TO_VF	= 0x2000,
 	HCLGEVF_OPC_MBX_VF_TO_PF	= 0x2001,
@@ -394,6 +397,11 @@ struct hclge_comm_query_version_cmd {
 	__le32 caps[HCLGE_COMM_QUERY_CAP_LENGTH]; /* capabilities of device */
 };
 
+struct hclge_comm_query_scc_cmd {
+	__le32 scc_version;
+	u8 rsv[20];
+};
+
 #define HCLGE_DESC_DATA_LEN		6
 struct hclge_desc {
 	__le16 opcode;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
index 9a939c0b217f..a1571c108678 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
@@ -5,6 +5,34 @@
 
 #include "hclge_devlink.h"
 
+static int hclge_devlink_scc_info_get(struct devlink *devlink,
+				      struct devlink_info_req *req)
+{
+	struct hclge_devlink_priv *priv = devlink_priv(devlink);
+	char scc_version[HCLGE_DEVLINK_FW_SCC_LEN];
+	struct hclge_dev *hdev = priv->hdev;
+	u32 scc_version_tmp;
+	int ret;
+
+	ret = hclge_query_scc_version(hdev, &scc_version_tmp);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get scc version, ret = %d\n", ret);
+		return ret;
+	}
+
+	snprintf(scc_version, sizeof(scc_version), "%lu.%lu.%lu.%lu",
+		 hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE3_MASK,
+				 HNAE3_FW_VERSION_BYTE3_SHIFT),
+		 hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE2_MASK,
+				 HNAE3_FW_VERSION_BYTE2_SHIFT),
+		 hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE1_MASK,
+				 HNAE3_FW_VERSION_BYTE1_SHIFT),
+		 hnae3_get_field(scc_version_tmp, HNAE3_SCC_VERSION_BYTE0_MASK,
+				 HNAE3_FW_VERSION_BYTE0_SHIFT));
+	return devlink_info_version_running_put(req, "fw.scc", scc_version);
+}
+
 static int hclge_devlink_info_get(struct devlink *devlink,
 				  struct devlink_info_req *req,
 				  struct netlink_ext_ack *extack)
@@ -13,6 +41,7 @@ static int hclge_devlink_info_get(struct devlink *devlink,
 	struct hclge_devlink_priv *priv = devlink_priv(devlink);
 	char version_str[HCLGE_DEVLINK_FW_STRING_LEN];
 	struct hclge_dev *hdev = priv->hdev;
+	int ret;
 
 	snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
 		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
@@ -24,9 +53,18 @@ static int hclge_devlink_info_get(struct devlink *devlink,
 		 hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
 				 HNAE3_FW_VERSION_BYTE0_SHIFT));
 
-	return devlink_info_version_running_put(req,
-						DEVLINK_INFO_VERSION_GENERIC_FW,
-						version_str);
+	ret = devlink_info_version_running_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       version_str);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "failed to set running version of fw\n");
+		return ret;
+	}
+
+	if (hdev->pdev->revision > HNAE3_DEVICE_VERSION_V2)
+		ret = hclge_devlink_scc_info_get(devlink, req);
+
+	return ret;
 }
 
 static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
index 918be04507a5..148effa5ea89 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
@@ -6,6 +6,8 @@
 
 #include "hclge_main.h"
 
+#define	HCLGE_DEVLINK_FW_SCC_LEN	32
+
 struct hclge_devlink_priv {
 	struct hclge_dev *hdev;
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index eb31a7e9c8fc..28336cf17170 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10883,6 +10883,24 @@ static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 	return hdev->fw_version;
 }
 
+int hclge_query_scc_version(struct hclge_dev *hdev, u32 *scc_version)
+{
+	struct hclge_comm_query_scc_cmd *resp;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_SCC_VER, 1);
+	resp = (struct hclge_comm_query_scc_cmd *)desc.data;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		return ret;
+
+	*scc_version = le32_to_cpu(resp->scc_version);
+
+	return 0;
+}
+
 static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index e821dd2f1528..df3c10098349 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1169,4 +1169,5 @@ int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en);
 int hclge_mac_update_stats(struct hclge_dev *hdev);
 struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf);
 int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type);
+int hclge_query_scc_version(struct hclge_dev *hdev, u32 *scc_version);
 #endif
-- 
cgit v1.2.3-73-gaa49b


From 427f6acbc88154f40f68391dcfea36b821fcd06a Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Mon, 15 Apr 2024 18:36:21 +0800
Subject: dt-bindings: net: nxp,dwmac-imx: allow nvmem cells property

Allow nvmem-cells and nvmem-cell-names to get mac_address from onchip
fuse.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240415103621.1644735-1-peng.fan@oss.nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
index 4c01cae7c93a..87bc4416eadf 100644
--- a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
@@ -66,6 +66,10 @@ properties:
       Should be phandle/offset pair. The phandle to the syscon node which
       encompases the GPR register, and the offset of the GPR register.
 
+  nvmem-cells: true
+
+  nvmem-cell-names: true
+
   snps,rmii_refclk_ext:
     $ref: /schemas/types.yaml#/definitions/flag
     description:
-- 
cgit v1.2.3-73-gaa49b


From 30ba0022b30ab421f138fd0a0ccb2f70173cf798 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Sun, 11 Jun 2023 14:37:31 +0200
Subject: dt-bindings: net: wireless: ath11k: add ieee80211-freq-limit property

This is an existing optional property that ieee80211.yaml/cfg80211
provides. It's useful to further restrict supported frequencies
for a specified device through device-tree.

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/fc606d2550d047a53b4289235dd3c0fe23d5daac.1686486468.git.chunkeey@gmail.com
---
 Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
index 672282cdfc2f..a2d55bf4c7a5 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -59,6 +59,8 @@ properties:
     minItems: 1
     maxItems: 2
 
+  ieee80211-freq-limit: true
+
   wifi-firmware:
     type: object
     description: |
@@ -88,6 +90,7 @@ required:
 additionalProperties: false
 
 allOf:
+  - $ref: ieee80211.yaml#
   - if:
       properties:
         compatible:
-- 
cgit v1.2.3-73-gaa49b


From b58be8db6327b21e0c7fbee559b8eb47f5110efe Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:49 +0200
Subject: ethtool: Expand Ethernet Power Equipment with c33 (PoE) alongside
 PoDL

In the current PSE interface for Ethernet Power Equipment, support is
limited to PoDL. This patch extends the interface to accommodate the
objects specified in IEEE 802.3-2022 145.2 for Power sourcing
Equipment (PSE).

The following objects are now supported and considered mandatory:
- IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus
- IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
- IEEE 802.3-2022 30.9.1.2.1 aPSEAdminControl

To avoid confusion between "PoDL PSE" and "PoE PSE", which have similar
names but distinct values, we have followed the suggestion of Oleksij
Rempel and Andrew Lunn to maintain separate naming schemes for each,
using c33 (clause 33) prefix for "PoE PSE".
You can find more details in the discussion threads here:
https://lore.kernel.org/netdev/20230912110637.GI780075@pengutronix.de/
https://lore.kernel.org/netdev/2539b109-72ad-470a-9dae-9f53de4f64ec@lunn.ch/

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-1-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/index.rst               |  1 +
 Documentation/networking/pse-pd/index.rst        |  9 +++
 Documentation/networking/pse-pd/introduction.rst | 73 ++++++++++++++++++++++++
 include/linux/pse-pd/pse.h                       |  9 +++
 include/uapi/linux/ethtool.h                     | 43 ++++++++++++++
 include/uapi/linux/ethtool_netlink.h             |  3 +
 6 files changed, 138 insertions(+)
 create mode 100644 Documentation/networking/pse-pd/index.rst
 create mode 100644 Documentation/networking/pse-pd/introduction.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 473d72c36d61..7664c0bfe461 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -93,6 +93,7 @@ Contents:
    plip
    ppp_generic
    proc_net_tcp
+   pse-pd/index
    radiotap-headers
    rds
    regulatory
diff --git a/Documentation/networking/pse-pd/index.rst b/Documentation/networking/pse-pd/index.rst
new file mode 100644
index 000000000000..18197bc7303d
--- /dev/null
+++ b/Documentation/networking/pse-pd/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Power Sourcing Equipment (PSE) Documentation
+============================================
+
+.. toctree::
+   :maxdepth: 2
+
+   introduction
diff --git a/Documentation/networking/pse-pd/introduction.rst b/Documentation/networking/pse-pd/introduction.rst
new file mode 100644
index 000000000000..e3d3faaef717
--- /dev/null
+++ b/Documentation/networking/pse-pd/introduction.rst
@@ -0,0 +1,73 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Power Sourcing Equipment (PSE) in IEEE 802.3 Standard
+=====================================================
+
+Overview
+--------
+
+Power Sourcing Equipment (PSE) is essential in networks for delivering power
+along with data over Ethernet cables. It usually refers to devices like
+switches and hubs that supply power to Powered Devices (PDs) such as IP
+cameras, VoIP phones, and wireless access points.
+
+PSE vs. PoDL PSE
+----------------
+
+PSE in the IEEE 802.3 standard generally refers to equipment that provides
+power alongside data over Ethernet cables, typically associated with Power over
+Ethernet (PoE).
+
+PoDL PSE, or Power over Data Lines PSE, specifically denotes PSEs operating
+with single balanced twisted-pair PHYs, as per Clause 104 of IEEE 802.3. PoDL
+is significant in contexts like automotive and industrial controls where power
+and data delivery over a single pair is advantageous.
+
+IEEE 802.3-2018 Addendums and Related Clauses
+---------------------------------------------
+
+Key addenda to the IEEE 802.3-2018 standard relevant to power delivery over
+Ethernet are as follows:
+
+- **802.3af (Approved in 2003-06-12)**: Known as PoE in the market, detailed in
+  Clause 33, delivering up to 15.4W of power.
+- **802.3at (Approved in 2009-09-11)**: Marketed as PoE+, enhancing PoE as
+  covered in Clause 33, increasing power delivery to up to 30W.
+- **802.3bt (Approved in 2018-09-27)**: Known as 4PPoE in the market, outlined
+  in Clause 33. Type 3 delivers up to 60W, and Type 4 up to 100W.
+- **802.3bu (Approved in 2016-12-07)**: Formerly referred to as PoDL, detailed
+  in Clause 104. Introduces Classes 0 - 9. Class 9 PoDL PSE delivers up to ~65W
+
+Kernel Naming Convention Recommendations
+----------------------------------------
+
+For clarity and consistency within the Linux kernel's networking subsystem, the
+following naming conventions are recommended:
+
+- For general PSE (PoE) code, use "c33_pse" key words. For example:
+  ``enum ethtool_c33_pse_admin_state c33_admin_control;``.
+  This aligns with Clause 33, encompassing various PoE forms.
+
+- For PoDL PSE - specific code, use "podl_pse". For example:
+  ``enum ethtool_podl_pse_admin_state podl_admin_control;`` to differentiate
+  PoDL PSE settings according to Clause 104.
+
+Summary of Clause 33: Data Terminal Equipment (DTE) Power via Media Dependent Interface (MDI)
+---------------------------------------------------------------------------------------------
+
+Clause 33 of the IEEE 802.3 standard defines the functional and electrical
+characteristics of Powered Device (PD) and Power Sourcing Equipment (PSE).
+These entities enable power delivery using the same generic cabling as for data
+transmission, integrating power with data communication for devices such as
+10BASE-T, 100BASE-TX, or 1000BASE-T.
+
+Summary of Clause 104: Power over Data Lines (PoDL) of Single Balanced Twisted-Pair Ethernet
+--------------------------------------------------------------------------------------------
+
+Clause 104 of the IEEE 802.3 standard delineates the functional and electrical
+characteristics of PoDL Powered Devices (PDs) and PoDL Power Sourcing Equipment
+(PSEs). These are designed for use with single balanced twisted-pair Ethernet
+Physical Layers. In this clause, 'PSE' refers specifically to PoDL PSE, and
+'PD' to PoDL PD. The key intent is to provide devices with a unified interface
+for both data and the power required to process this data over a single
+balanced twisted-pair Ethernet connection.
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 199cf4ae3cf2..be4e5754eb24 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -17,9 +17,12 @@ struct pse_controller_dev;
  *
  * @podl_admin_control: set PoDL PSE admin control as described in
  *	IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl
+ * @c33_admin_control: set PSE admin control as described in
+ *	IEEE 802.3-2022 30.9.1.2.1 acPSEAdminControl
  */
 struct pse_control_config {
 	enum ethtool_podl_pse_admin_state podl_admin_control;
+	enum ethtool_c33_pse_admin_state c33_admin_control;
 };
 
 /**
@@ -29,10 +32,16 @@ struct pse_control_config {
  *	functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
  * @podl_pw_status: power detection status of the PoDL PSE.
  *	IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @c33_admin_state: operational state of the PSE
+ *	functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
+ * @c33_pw_status: power detection status of the PSE.
+ *	IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus:
  */
 struct pse_control_status {
 	enum ethtool_podl_pse_admin_state podl_admin_state;
 	enum ethtool_podl_pse_pw_d_status podl_pw_status;
+	enum ethtool_c33_pse_admin_state c33_admin_state;
+	enum ethtool_c33_pse_pw_d_status c33_pw_status;
 };
 
 /**
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 95c2f09f0d0a..7b9a3d890949 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -752,6 +752,49 @@ enum ethtool_module_power_mode {
 	ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
 
+/**
+ * enum ethtool_c33_pse_admin_state - operational state of the PoDL PSE
+ *	functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN: state of PSE functions is unknown
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED: PSE functions are disabled
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED: PSE functions are enabled
+ */
+enum ethtool_c33_pse_admin_state {
+	ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN = 1,
+	ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
+	ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_c33_pse_pw_d_status - power detection status of the PSE.
+ *	IEEE 802.3-2022 30.9.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN: PSE status is unknown
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED: The enumeration "disabled"
+ *	indicates that the PSE State diagram is in the state DISABLED.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING: The enumeration "searching"
+ *	indicates the PSE State diagram is in a state other than those
+ *	listed.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING: The enumeration
+ *	"deliveringPower" indicates that the PSE State diagram is in the
+ *	state POWER_ON.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_TEST: The enumeration "test" indicates that
+ *	the PSE State diagram is in the state TEST_MODE.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_FAULT: The enumeration "fault" indicates that
+ *	the PSE State diagram is in the state TEST_ERROR.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT: The enumeration "otherFault"
+ *	indicates that the PSE State diagram is in the state IDLE due to
+ *	the variable error_condition = true.
+ */
+enum ethtool_c33_pse_pw_d_status {
+	ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN = 1,
+	ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED,
+	ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING,
+	ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING,
+	ETHTOOL_C33_PSE_PW_D_STATUS_TEST,
+	ETHTOOL_C33_PSE_PW_D_STATUS_FAULT,
+	ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT,
+};
+
 /**
  * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
  *	functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index b4f0d233d048..f17dbe54bf5e 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -913,6 +913,9 @@ enum {
 	ETHTOOL_A_PODL_PSE_ADMIN_STATE,		/* u32 */
 	ETHTOOL_A_PODL_PSE_ADMIN_CONTROL,	/* u32 */
 	ETHTOOL_A_PODL_PSE_PW_D_STATUS,		/* u32 */
+	ETHTOOL_A_C33_PSE_ADMIN_STATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_D_STATUS,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_PSE_CNT,
-- 
cgit v1.2.3-73-gaa49b


From 4d18e3ddf427d93a2895a1c9d61477e1f1726cad Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:51 +0200
Subject: net: ethtool: pse-pd: Expand pse commands with the PSE PoE interface

Add PSE PoE interface support in the ethtool pse command.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-3-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst | 20 ++++++++++
 net/ethtool/pse-pd.c                         | 60 +++++++++++++++++++++++-----
 2 files changed, 70 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 4e63d3708ed9..8bc71f249448 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1733,6 +1733,10 @@ Kernel response contents:
                                                   PSE functions
   ``ETHTOOL_A_PODL_PSE_PW_D_STATUS``         u32  power detection status of the
                                                   PoDL PSE.
+  ``ETHTOOL_A_C33_PSE_ADMIN_STATE``          u32  Operational state of the PoE
+                                                  PSE functions.
+  ``ETHTOOL_A_C33_PSE_PW_D_STATUS``          u32  power detection status of the
+                                                  PoE PSE.
   ======================================  ======  =============================
 
 When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies
@@ -1744,6 +1748,12 @@ aPoDLPSEAdminState. Possible values are:
 .. kernel-doc:: include/uapi/linux/ethtool.h
     :identifiers: ethtool_podl_pse_admin_state
 
+The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` implementing
+``IEEE 802.3-2022`` 30.9.1.1.2 aPSEAdminState.
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+    :identifiers: ethtool_c33_pse_admin_state
+
 When set, the optional ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` attribute identifies
 the power detection status of the PoDL PSE.  The status depend on internal PSE
 state machine and automatic PD classification support. This option is
@@ -1753,6 +1763,12 @@ Possible values are:
 .. kernel-doc:: include/uapi/linux/ethtool.h
     :identifiers: ethtool_podl_pse_pw_d_status
 
+The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_PW_D_STATUS`` implementing
+``IEEE 802.3-2022`` 30.9.1.1.5 aPSEPowerDetectionStatus.
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+    :identifiers: ethtool_c33_pse_pw_d_status
+
 PSE_SET
 =======
 
@@ -1763,6 +1779,7 @@ Request contents:
   ======================================  ======  =============================
   ``ETHTOOL_A_PSE_HEADER``                nested  request header
   ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL``       u32  Control PoDL PSE Admin state
+  ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL``        u32  Control PSE Admin state
   ======================================  ======  =============================
 
 When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` attribute is used
@@ -1770,6 +1787,9 @@ to control PoDL PSE Admin functions. This option is implementing
 ``IEEE 802.3-2018`` 30.15.1.2.1 acPoDLPSEAdminControl. See
 ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` for supported values.
 
+The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` implementing
+``IEEE 802.3-2022`` 30.9.1.2.1 acPSEAdminControl.
+
 RSS_GET
 =======
 
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index aef57a058f0d..2c981d443f27 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -82,6 +82,10 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
 		len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */
 	if (st->podl_pw_status > 0)
 		len += nla_total_size(sizeof(u32)); /* _PODL_PSE_PW_D_STATUS */
+	if (st->c33_admin_state > 0)
+		len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */
+	if (st->c33_pw_status > 0)
+		len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */
 
 	return len;
 }
@@ -103,6 +107,16 @@ static int pse_fill_reply(struct sk_buff *skb,
 			st->podl_pw_status))
 		return -EMSGSIZE;
 
+	if (st->c33_admin_state > 0 &&
+	    nla_put_u32(skb, ETHTOOL_A_C33_PSE_ADMIN_STATE,
+			st->c33_admin_state))
+		return -EMSGSIZE;
+
+	if (st->c33_pw_status > 0 &&
+	    nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_D_STATUS,
+			st->c33_pw_status))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
@@ -113,25 +127,18 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
 	[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
 		NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
 				 ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
+	[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] =
+		NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
+				 ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED),
 };
 
 static int
 ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
-{
-	return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL];
-}
-
-static int
-ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
 {
 	struct net_device *dev = req_info->dev;
-	struct pse_control_config config = {};
 	struct nlattr **tb = info->attrs;
 	struct phy_device *phydev;
 
-	/* this values are already validated by the ethnl_pse_set_policy */
-	config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
-
 	phydev = dev->phydev;
 	if (!phydev) {
 		NL_SET_ERR_MSG(info->extack, "No PHY is attached");
@@ -143,6 +150,39 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
 		return -EOPNOTSUPP;
 	}
 
+	if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] &&
+	    !pse_has_podl(phydev->psec)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL],
+				    "setting PoDL PSE admin control not supported");
+		return -EOPNOTSUPP;
+	}
+	if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL] &&
+	    !pse_has_c33(phydev->psec)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL],
+				    "setting C33 PSE admin control not supported");
+		return -EOPNOTSUPP;
+	}
+
+	return 1;
+}
+
+static int
+ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+	struct net_device *dev = req_info->dev;
+	struct pse_control_config config = {};
+	struct nlattr **tb = info->attrs;
+	struct phy_device *phydev;
+
+	phydev = dev->phydev;
+	/* These values are already validated by the ethnl_pse_set_policy */
+	if (pse_has_podl(phydev->psec))
+		config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+	if (pse_has_c33(phydev->psec))
+		config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]);
+
 	/* Return errno directly - PSE has no notification */
 	return pse_ethtool_set_config(phydev->psec, info->extack, &config);
 }
-- 
cgit v1.2.3-73-gaa49b


From 57b30d2a5475eb837fe1124f15f93a2a230c7bf3 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:52 +0200
Subject: netlink: specs: Modify pse attribute prefix

Remove podl from the attribute prefix to prepare the support of PoE pse
netlink spec.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-4-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 87ae7b397984..9a454cb924f7 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -899,17 +899,17 @@ attribute-sets:
         type: nest
         nested-attributes: header
       -
-        name: admin-state
+        name: podl-pse-admin-state
         type: u32
-        name-prefix: ethtool-a-podl-pse-
+        name-prefix: ethtool-a-
       -
-        name: admin-control
+        name: podl-pse-admin-control
         type: u32
-        name-prefix: ethtool-a-podl-pse-
+        name-prefix: ethtool-a-
       -
-        name: pw-d-status
+        name: podl-pse-pw-d-status
         type: u32
-        name-prefix: ethtool-a-podl-pse-
+        name-prefix: ethtool-a-
   -
     name: rss
     attributes:
@@ -1593,9 +1593,9 @@ operations:
         reply:
           attributes: &pse
             - header
-            - admin-state
-            - admin-control
-            - pw-d-status
+            - podl-pse-admin-state
+            - podl-pse-admin-control
+            - podl-pse-pw-d-status
       dump: *pse-get-op
     -
       name: pse-set
-- 
cgit v1.2.3-73-gaa49b


From f8586411e40ea8152c7a030e4f1d9bd9005e2628 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:53 +0200
Subject: netlink: specs: Expand the pse netlink command with PoE interface

Add the PoE pse attributes prefix to be able to use PoE interface.

Example usage:
./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema --do pse-get \
             --json '{"header":{"dev-name":"eth0"}}'
{'header': {'dev-index': 4, 'dev-name': 'eth0'},
 'c33-pse-admin-state': 3,
 'c33-pse-pw-d-status': 4}

./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema --do pse-set \
             --json '{"header":{"dev-name":"eth0"},
		     "c33-pse-admin-control":3}'

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-5-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 9a454cb924f7..00dc61358be8 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -910,6 +910,18 @@ attribute-sets:
         name: podl-pse-pw-d-status
         type: u32
         name-prefix: ethtool-a-
+      -
+        name: c33-pse-admin-state
+        type: u32
+        name-prefix: ethtool-a-
+      -
+        name: c33-pse-admin-control
+        type: u32
+        name-prefix: ethtool-a-
+      -
+        name: c33-pse-pw-d-status
+        type: u32
+        name-prefix: ethtool-a-
   -
     name: rss
     attributes:
@@ -1596,6 +1608,9 @@ operations:
             - podl-pse-admin-state
             - podl-pse-admin-control
             - podl-pse-pw-d-status
+            - c33-pse-admin-state
+            - c33-pse-admin-control
+            - c33-pse-pw-d-status
       dump: *pse-get-op
     -
       name: pse-set
-- 
cgit v1.2.3-73-gaa49b


From 9be9567a7c59b7314ea776f56945fe3fc28efe99 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:55 +0200
Subject: net: pse-pd: Add support for PSE PIs

The Power Sourcing Equipment Power Interface (PSE PI) plays a pivotal role
in the architecture of Power over Ethernet (PoE) systems. It is essentially
a blueprint that outlines how one or multiple power sources are connected
to the eight-pin modular jack, commonly known as the Ethernet RJ45 port.
This connection scheme is crucial for enabling the delivery of power
alongside data over Ethernet cables.

This patch adds support for getting the PSE controller node through PSE PI
device subnode.

This supports adds a way to get the PSE PI id from the pse_pi devicetree
subnode of a PSE controller node simply by reading the reg property.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-7-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/pse-pd/index.rst  |   1 +
 Documentation/networking/pse-pd/pse-pi.rst | 301 +++++++++++++++++++++++++++++
 drivers/net/pse-pd/pse_core.c              | 262 +++++++++++++++++++++----
 include/linux/pse-pd/pse.h                 |  38 +++-
 4 files changed, 565 insertions(+), 37 deletions(-)
 create mode 100644 Documentation/networking/pse-pd/pse-pi.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/pse-pd/index.rst b/Documentation/networking/pse-pd/index.rst
index 18197bc7303d..de28a5aee316 100644
--- a/Documentation/networking/pse-pd/index.rst
+++ b/Documentation/networking/pse-pd/index.rst
@@ -7,3 +7,4 @@ Power Sourcing Equipment (PSE) Documentation
    :maxdepth: 2
 
    introduction
+   pse-pi
diff --git a/Documentation/networking/pse-pd/pse-pi.rst b/Documentation/networking/pse-pd/pse-pi.rst
new file mode 100644
index 000000000000..5cad14fedc13
--- /dev/null
+++ b/Documentation/networking/pse-pd/pse-pi.rst
@@ -0,0 +1,301 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+PSE Power Interface (PSE PI) Documentation
+==========================================
+
+The Power Sourcing Equipment Power Interface (PSE PI) plays a pivotal role in
+the architecture of Power over Ethernet (PoE) systems. It is essentially a
+blueprint that outlines how one or multiple power sources are connected to the
+eight-pin modular jack, commonly known as the Ethernet RJ45 port. This
+connection scheme is crucial for enabling the delivery of power alongside data
+over Ethernet cables.
+
+Documentation and Standards
+---------------------------
+
+The IEEE 802.3 standard provides detailed documentation on the PSE PI.
+Specifically:
+
+- Section "33.2.3 PI pin assignments" covers the pin assignments for PoE
+  systems that utilize two pairs for power delivery.
+- Section "145.2.4 PSE PI" addresses the configuration for PoE systems that
+  deliver power over all four pairs of an Ethernet cable.
+
+PSE PI and Single Pair Ethernet
+-------------------------------
+
+Single Pair Ethernet (SPE) represents a different approach to Ethernet
+connectivity, utilizing just one pair of conductors for both data and power
+transmission. Unlike the configurations detailed in the PSE PI for standard
+Ethernet, which can involve multiple power sourcing arrangements across four or
+two pairs of wires, SPE operates on a simpler model due to its single-pair
+design. As a result, the complexities of choosing between alternative pin
+assignments for power delivery, as described in the PSE PI for multi-pair
+Ethernet, are not applicable to SPE.
+
+Understanding PSE PI
+--------------------
+
+The Power Sourcing Equipment Power Interface (PSE PI) is a framework defining
+how Power Sourcing Equipment (PSE) delivers power to Powered Devices (PDs) over
+Ethernet cables. It details two main configurations for power delivery, known
+as Alternative A and Alternative B, which are distinguished not only by their
+method of power transmission but also by the implications for polarity and data
+transmission direction.
+
+Alternative A and B Overview
+----------------------------
+
+- **Alternative A:** Utilizes RJ45 conductors 1, 2, 3 and 6. In either case of
+  networks 10/100BaseT or 1G/2G/5G/10GBaseT, the pairs used are carrying data.
+  The power delivery's polarity in this alternative can vary based on the MDI
+  (Medium Dependent Interface) or MDI-X (Medium Dependent Interface Crossover)
+  configuration.
+
+- **Alternative B:** Utilizes RJ45 conductors 4, 5, 7 and 8. In case of
+  10/100BaseT network the pairs used are spare pairs without data and are less
+  influenced by data transmission direction. This is not the case for
+  1G/2G/5G/10GBaseT network. Alternative B includes two configurations with
+  different polarities, known as variant X and variant S, to accommodate
+  different network requirements and device specifications.
+
+Table 145-3 PSE Pinout Alternatives
+-----------------------------------
+
+The following table outlines the pin configurations for both Alternative A and
+Alternative B.
+
++------------+-------------------+-----------------+-----------------+-----------------+
+| Conductor  | Alternative A     | Alternative A   | Alternative B   | Alternative B   |
+|            |    (MDI-X)        |      (MDI)      |        (X)      |        (S)      |
++============+===================+=================+=================+=================+
+| 1          | Negative V        | Positive V      | -               | -               |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 2          | Negative V        | Positive V      | -               | -               |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 3          | Positive V        | Negative V      | -               | -               |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 4          | -                 | -               | Negative V      | Positive V      |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 5          | -                 | -               | Negative V      | Positive V      |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 6          | Positive V        | Negative V      | -               | -               |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 7          | -                 | -               | Positive V      | Negative V      |
++------------+-------------------+-----------------+-----------------+-----------------+
+| 8          | -                 | -               | Positive V      | Negative V      |
++------------+-------------------+-----------------+-----------------+-----------------+
+
+.. note::
+    - "Positive V" and "Negative V" indicate the voltage polarity for each pin.
+    - "-" indicates that the pin is not used for power delivery in that
+      specific configuration.
+
+PSE PI compatibilities
+----------------------
+
+The following table outlines the compatibility between the pinout alternative
+and the 1000/2.5G/5G/10GBaseT in the PSE 2 pairs connection.
+
++---------+---------------+---------------------+-----------------------+
+| Variant | Alternative   | Power Feeding Type  | Compatibility with    |
+|         | (A/B)         | (Direct/Phantom)    | 1000/2.5G/5G/10GBaseT |
++=========+===============+=====================+=======================+
+| 1       | A             | Phantom             | Yes                   |
++---------+---------------+---------------------+-----------------------+
+| 2       | B             | Phantom             | Yes                   |
++---------+---------------+---------------------+-----------------------+
+| 3       | B             | Direct              | No                    |
++---------+---------------+---------------------+-----------------------+
+
+.. note::
+    - "Direct" indicate a variant where the power is injected directly to pairs
+       without using magnetics in case of spare pairs.
+    - "Phantom" indicate power path over coils/magnetics as it is done for
+       Alternative A variant.
+
+In case of PSE 4 pairs, a PSE supporting only 10/100BaseT (which mean Direct
+Power on pinout Alternative B) is not compatible with a 4 pairs
+1000/2.5G/5G/10GBaseT.
+
+PSE Power Interface (PSE PI) Connection Diagram
+-----------------------------------------------
+
+The diagram below illustrates the connection architecture between the RJ45
+port, the Ethernet PHY (Physical Layer), and the PSE PI (Power Sourcing
+Equipment Power Interface), demonstrating how power and data are delivered
+simultaneously through an Ethernet cable. The RJ45 port serves as the physical
+interface for these connections, with each of its eight pins connected to both
+the Ethernet PHY for data transmission and the PSE PI for power delivery.
+
+.. code-block::
+
+    +--------------------------+
+    |                          |
+    |          RJ45 Port       |
+    |                          |
+    +--+--+--+--+--+--+--+--+--+                +-------------+
+      1| 2| 3| 4| 5| 6| 7| 8|                   |             |
+       |  |  |  |  |  |  |  o-------------------+             |
+       |  |  |  |  |  |  o--|-------------------+             +<--- PSE 1
+       |  |  |  |  |  o--|--|-------------------+             |
+       |  |  |  |  o--|--|--|-------------------+             |
+       |  |  |  o--|--|--|--|-------------------+  PSE PI     |
+       |  |  o--|--|--|--|--|-------------------+             |
+       |  o--|--|--|--|--|--|-------------------+             +<--- PSE 2 (optional)
+       o--|--|--|--|--|--|--|-------------------+             |
+       |  |  |  |  |  |  |  |                   |             |
+    +--+--+--+--+--+--+--+--+--+                +-------------+
+    |                          |
+    |       Ethernet PHY       |
+    |                          |
+    +--------------------------+
+
+Simple PSE PI Configuration for Alternative A
+---------------------------------------------
+
+The diagram below illustrates a straightforward PSE PI (Power Sourcing
+Equipment Power Interface) configuration designed to support the Alternative A
+setup for Power over Ethernet (PoE). This implementation is tailored to provide
+power delivery through the data-carrying pairs of an Ethernet cable, suitable
+for either MDI or MDI-X configurations, albeit supporting one variation at a
+time.
+
+.. code-block::
+
+         +-------------+
+         |    PSE PI   |
+ 8  -----+                             +-------------+
+ 7  -----+                    Rail 1   |
+ 6  -----+------+----------------------+
+ 5  -----+      |                      |
+ 4  -----+      |             Rail 2   |  PSE 1
+ 3  -----+------/         +------------+
+ 2  -----+--+-------------/            |
+ 1  -----+--/                          +-------------+
+         |
+         +-------------+
+
+In this configuration:
+
+- Pins 1 and 2, as well as pins 3 and 6, are utilized for power delivery in
+  addition to data transmission. This aligns with the standard wiring for
+  10/100BaseT Ethernet networks where these pairs are used for data.
+- Rail 1 and Rail 2 represent the positive and negative voltage rails, with
+  Rail 1 connected to pins 1 and 2, and Rail 2 connected to pins 3 and 6.
+  More advanced PSE PI configurations may include integrated or external
+  switches to change the polarity of the voltage rails, allowing for
+  compatibility with both MDI and MDI-X configurations.
+
+More complex PSE PI configurations may include additional components, to support
+Alternative B, or to provide additional features such as power management, or
+additional power delivery capabilities such as 2-pair or 4-pair power delivery.
+
+.. code-block::
+
+         +-------------+
+         |    PSE PI   |
+         |        +---+
+ 8  -----+--------+   |                 +-------------+
+ 7  -----+--------+   |       Rail 1   |
+ 6  -----+--------+   +-----------------+
+ 5  -----+--------+   |                |
+ 4  -----+--------+   |       Rail 2   |  PSE 1
+ 3  -----+--------+   +----------------+
+ 2  -----+--------+   |                |
+ 1  -----+--------+   |                 +-------------+
+         |        +---+
+         +-------------+
+
+Device Tree Configuration: Describing PSE PI Configurations
+-----------------------------------------------------------
+
+The necessity for a separate PSE PI node in the device tree is influenced by
+the intricacy of the Power over Ethernet (PoE) system's setup. Here are
+descriptions of both simple and complex PSE PI configurations to illustrate
+this decision-making process:
+
+**Simple PSE PI Configuration:**
+In a straightforward scenario, the PSE PI setup involves a direct, one-to-one
+connection between a single PSE controller and an Ethernet port. This setup
+typically supports basic PoE functionality without the need for dynamic
+configuration or management of multiple power delivery modes. For such simple
+configurations, detailing the PSE PI within the existing PSE controller's node
+may suffice, as the system does not encompass additional complexity that
+warrants a separate node. The primary focus here is on the clear and direct
+association of power delivery to a specific Ethernet port.
+
+**Complex PSE PI Configuration:**
+Contrastingly, a complex PSE PI setup may encompass multiple PSE controllers or
+auxiliary circuits that collectively manage power delivery to one Ethernet
+port. Such configurations might support a range of PoE standards and require
+the capability to dynamically configure power delivery based on the operational
+mode (e.g., PoE2 versus PoE4) or specific requirements of connected devices. In
+these instances, a dedicated PSE PI node becomes essential for accurately
+documenting the system architecture. This node would serve to detail the
+interactions between different PSE controllers, the support for various PoE
+modes, and any additional logic required to coordinate power delivery across
+the network infrastructure.
+
+**Guidance:**
+
+For simple PSE setups, including PSE PI information in the PSE controller node
+might suffice due to the straightforward nature of these systems. However,
+complex configurations, involving multiple components or advanced PoE features,
+benefit from a dedicated PSE PI node. This method adheres to IEEE 802.3
+specifications, improving documentation clarity and ensuring accurate
+representation of the PoE system's complexity.
+
+PSE PI Node: Essential Information
+----------------------------------
+
+The PSE PI (Power Sourcing Equipment Power Interface) node in a device tree can
+include several key pieces of information critical for defining the power
+delivery capabilities and configurations of a PoE (Power over Ethernet) system.
+Below is a list of such information, along with explanations for their
+necessity and reasons why they might not be found within a PSE controller node:
+
+1. **Powered Pairs Configuration**
+
+   - *Description:* Identifies the pairs used for power delivery in the
+     Ethernet cable.
+   - *Necessity:* Essential to ensure the correct pairs are powered according
+     to the board's design.
+   - *PSE Controller Node:* Typically lacks details on physical pair usage,
+     focusing on power regulation.
+
+2. **Polarity of Powered Pairs**
+
+   - *Description:* Specifies the polarity (positive or negative) for each
+     powered pair.
+   - *Necessity:* Critical for safe and effective power transmission to PDs.
+   - *PSE Controller Node:* Polarity management may exceed the standard
+     functionalities of PSE controllers.
+
+3. **PSE Cells Association**
+
+   - *Description:* Details the association of PSE cells with Ethernet ports or
+     pairs in multi-cell configurations.
+   - *Necessity:* Allows for optimized power resource allocation in complex
+     systems.
+   - *PSE Controller Node:* Controllers may not manage cell associations
+     directly, focusing instead on power flow regulation.
+
+4. **Support for PoE Standards**
+
+   - *Description:* Lists the PoE standards and configurations supported by the
+     system.
+   - *Necessity:* Ensures system compatibility with various PDs and adherence
+     to industry standards.
+   - *PSE Controller Node:* Specific capabilities may depend on the overall PSE
+     PI design rather than the controller alone. Multiple PSE cells per PI
+     do not necessarily imply support for multiple PoE standards.
+
+5. **Protection Mechanisms**
+
+   - *Description:* Outlines additional protection mechanisms, such as
+     overcurrent protection and thermal management.
+   - *Necessity:* Provides extra safety and stability, complementing PSE
+     controller protections.
+   - *PSE Controller Node:* Some protections may be implemented via
+     board-specific hardware or algorithms external to the controller.
diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c
index fed006cbc185..ca5ced8e0d8a 100644
--- a/drivers/net/pse-pd/pse_core.c
+++ b/drivers/net/pse-pd/pse_core.c
@@ -27,38 +27,182 @@ struct pse_control {
 	struct kref refcnt;
 };
 
+static int of_load_single_pse_pi_pairset(struct device_node *node,
+					 struct pse_pi *pi,
+					 int pairset_num)
+{
+	struct device_node *pairset_np;
+	const char *name;
+	int ret;
+
+	ret = of_property_read_string_index(node, "pairset-names",
+					    pairset_num, &name);
+	if (ret)
+		return ret;
+
+	if (!strcmp(name, "alternative-a")) {
+		pi->pairset[pairset_num].pinout = ALTERNATIVE_A;
+	} else if (!strcmp(name, "alternative-b")) {
+		pi->pairset[pairset_num].pinout = ALTERNATIVE_B;
+	} else {
+		pr_err("pse: wrong pairset-names value %s (%pOF)\n",
+		       name, node);
+		return -EINVAL;
+	}
+
+	pairset_np = of_parse_phandle(node, "pairsets", pairset_num);
+	if (!pairset_np)
+		return -ENODEV;
+
+	pi->pairset[pairset_num].np = pairset_np;
+
+	return 0;
+}
+
 /**
- * of_pse_zero_xlate - dummy function for controllers with one only control
- * @pcdev: a pointer to the PSE controller device
- * @pse_spec: PSE line specifier as found in the device tree
+ * of_load_pse_pi_pairsets - load PSE PI pairsets pinout and polarity
+ * @node: a pointer of the device node
+ * @pi: a pointer of the PSE PI to fill
+ * @npairsets: the number of pairsets (1 or 2) used by the PI
  *
- * This static translation function is used by default if of_xlate in
- * :c:type:`pse_controller_dev` is not set. It is useful for all PSE
- * controllers with #pse-cells = <0>.
+ * Return: 0 on success and failure value on error
  */
-static int of_pse_zero_xlate(struct pse_controller_dev *pcdev,
-			     const struct of_phandle_args *pse_spec)
+static int of_load_pse_pi_pairsets(struct device_node *node,
+				   struct pse_pi *pi,
+				   int npairsets)
 {
-	return 0;
+	int i, ret;
+
+	ret = of_property_count_strings(node, "pairset-names");
+	if (ret != npairsets) {
+		pr_err("pse: amount of pairsets and pairset-names is not equal %d != %d (%pOF)\n",
+		       npairsets, ret, node);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < npairsets; i++) {
+		ret = of_load_single_pse_pi_pairset(node, pi, i);
+		if (ret)
+			goto out;
+	}
+
+	if (npairsets == 2 &&
+	    pi->pairset[0].pinout == pi->pairset[1].pinout) {
+		pr_err("pse: two PI pairsets can not have identical pinout (%pOF)",
+		       node);
+		ret = -EINVAL;
+	}
+
+out:
+	/* If an error appears, release all the pairset device node kref */
+	if (ret) {
+		of_node_put(pi->pairset[0].np);
+		pi->pairset[0].np = NULL;
+		of_node_put(pi->pairset[1].np);
+		pi->pairset[1].np = NULL;
+	}
+
+	return ret;
+}
+
+static void pse_release_pis(struct pse_controller_dev *pcdev)
+{
+	int i;
+
+	for (i = 0; i <= pcdev->nr_lines; i++) {
+		of_node_put(pcdev->pi[i].pairset[0].np);
+		of_node_put(pcdev->pi[i].pairset[1].np);
+		of_node_put(pcdev->pi[i].np);
+	}
+	kfree(pcdev->pi);
 }
 
 /**
- * of_pse_simple_xlate - translate pse_spec to the PSE line number
+ * of_load_pse_pis - load all the PSE PIs
  * @pcdev: a pointer to the PSE controller device
- * @pse_spec: PSE line specifier as found in the device tree
  *
- * This static translation function is used by default if of_xlate in
- * :c:type:`pse_controller_dev` is not set. It is useful for all PSE
- * controllers with 1:1 mapping, where PSE lines can be indexed by number
- * without gaps.
+ * Return: 0 on success and failure value on error
  */
-static int of_pse_simple_xlate(struct pse_controller_dev *pcdev,
-			       const struct of_phandle_args *pse_spec)
+static int of_load_pse_pis(struct pse_controller_dev *pcdev)
 {
-	if (pse_spec->args[0] >= pcdev->nr_lines)
-		return -EINVAL;
+	struct device_node *np = pcdev->dev->of_node;
+	struct device_node *node, *pis;
+	int ret;
 
-	return pse_spec->args[0];
+	if (!np)
+		return -ENODEV;
+
+	pis = of_get_child_by_name(np, "pse-pis");
+	if (!pis) {
+		/* no description of PSE PIs */
+		pcdev->no_of_pse_pi = true;
+		return 0;
+	}
+
+	pcdev->pi = kcalloc(pcdev->nr_lines, sizeof(*pcdev->pi), GFP_KERNEL);
+	if (!pcdev->pi) {
+		of_node_put(pis);
+		return -ENOMEM;
+	}
+
+	for_each_child_of_node(pis, node) {
+		struct pse_pi pi = {0};
+		u32 id;
+
+		if (!of_node_name_eq(node, "pse-pi"))
+			continue;
+
+		ret = of_property_read_u32(node, "reg", &id);
+		if (ret) {
+			dev_err(pcdev->dev,
+				"can't get reg property for node '%pOF'",
+				node);
+			goto out;
+		}
+
+		if (id >= pcdev->nr_lines) {
+			dev_err(pcdev->dev,
+				"reg value (%u) is out of range (%u) (%pOF)\n",
+				id, pcdev->nr_lines, node);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (pcdev->pi[id].np) {
+			dev_err(pcdev->dev,
+				"other node with same reg value was already registered. %pOF : %pOF\n",
+				pcdev->pi[id].np, node);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		ret = of_count_phandle_with_args(node, "pairsets", NULL);
+		/* npairsets is limited to value one or two */
+		if (ret == 1 || ret == 2) {
+			ret = of_load_pse_pi_pairsets(node, &pi, ret);
+			if (ret)
+				goto out;
+		} else if (ret != ENOENT) {
+			dev_err(pcdev->dev,
+				"error: wrong number of pairsets. Should be 1 or 2, got %d (%pOF)\n",
+				ret, node);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		of_node_get(node);
+		pi.np = node;
+		memcpy(&pcdev->pi[id], &pi, sizeof(pi));
+	}
+
+	of_node_put(pis);
+	return 0;
+
+out:
+	pse_release_pis(pcdev);
+	of_node_put(node);
+	of_node_put(pis);
+	return ret;
 }
 
 /**
@@ -67,16 +211,18 @@ static int of_pse_simple_xlate(struct pse_controller_dev *pcdev,
  */
 int pse_controller_register(struct pse_controller_dev *pcdev)
 {
-	if (!pcdev->of_xlate) {
-		if (pcdev->of_pse_n_cells == 0)
-			pcdev->of_xlate = of_pse_zero_xlate;
-		else if (pcdev->of_pse_n_cells == 1)
-			pcdev->of_xlate = of_pse_simple_xlate;
-	}
+	int ret;
 
 	mutex_init(&pcdev->lock);
 	INIT_LIST_HEAD(&pcdev->pse_control_head);
 
+	if (!pcdev->nr_lines)
+		pcdev->nr_lines = 1;
+
+	ret = of_load_pse_pis(pcdev);
+	if (ret)
+		return ret;
+
 	mutex_lock(&pse_list_mutex);
 	list_add(&pcdev->list, &pse_controller_list);
 	mutex_unlock(&pse_list_mutex);
@@ -91,6 +237,7 @@ EXPORT_SYMBOL_GPL(pse_controller_register);
  */
 void pse_controller_unregister(struct pse_controller_dev *pcdev)
 {
+	pse_release_pis(pcdev);
 	mutex_lock(&pse_list_mutex);
 	list_del(&pcdev->list);
 	mutex_unlock(&pse_list_mutex);
@@ -203,8 +350,48 @@ pse_control_get_internal(struct pse_controller_dev *pcdev, unsigned int index)
 	return psec;
 }
 
-struct pse_control *
-of_pse_control_get(struct device_node *node)
+/**
+ * of_pse_match_pi - Find the PSE PI id matching the device node phandle
+ * @pcdev: a pointer to the PSE controller device
+ * @np: a pointer to the device node
+ *
+ * Return: id of the PSE PI, -EINVAL if not found
+ */
+static int of_pse_match_pi(struct pse_controller_dev *pcdev,
+			   struct device_node *np)
+{
+	int i;
+
+	for (i = 0; i <= pcdev->nr_lines; i++) {
+		if (pcdev->pi[i].np == np)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * psec_id_xlate - translate pse_spec to the PSE line number according
+ *		   to the number of pse-cells in case of no pse_pi node
+ * @pcdev: a pointer to the PSE controller device
+ * @pse_spec: PSE line specifier as found in the device tree
+ *
+ * Return: 0 if #pse-cells = <0>. Return PSE line number otherwise.
+ */
+static int psec_id_xlate(struct pse_controller_dev *pcdev,
+			 const struct of_phandle_args *pse_spec)
+{
+	if (!pcdev->of_pse_n_cells)
+		return 0;
+
+	if (pcdev->of_pse_n_cells > 1 ||
+	    pse_spec->args[0] >= pcdev->nr_lines)
+		return -EINVAL;
+
+	return pse_spec->args[0];
+}
+
+struct pse_control *of_pse_control_get(struct device_node *node)
 {
 	struct pse_controller_dev *r, *pcdev;
 	struct of_phandle_args args;
@@ -222,7 +409,14 @@ of_pse_control_get(struct device_node *node)
 	mutex_lock(&pse_list_mutex);
 	pcdev = NULL;
 	list_for_each_entry(r, &pse_controller_list, list) {
-		if (args.np == r->dev->of_node) {
+		if (!r->no_of_pse_pi) {
+			ret = of_pse_match_pi(r, args.np);
+			if (ret >= 0) {
+				pcdev = r;
+				psec_id = ret;
+				break;
+			}
+		} else if (args.np == r->dev->of_node) {
 			pcdev = r;
 			break;
 		}
@@ -238,10 +432,12 @@ of_pse_control_get(struct device_node *node)
 		goto out;
 	}
 
-	psec_id = pcdev->of_xlate(pcdev, &args);
-	if (psec_id < 0) {
-		psec = ERR_PTR(psec_id);
-		goto out;
+	if (pcdev->no_of_pse_pi) {
+		psec_id = psec_id_xlate(pcdev, &args);
+		if (psec_id < 0) {
+			psec = ERR_PTR(psec_id);
+			goto out;
+		}
 	}
 
 	/* pse_list_mutex also protects the pcdev's pse_control list */
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 19589571157f..e19d58b5e777 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -64,6 +64,36 @@ struct device_node;
 struct of_phandle_args;
 struct pse_control;
 
+/* PSE PI pairset pinout can either be Alternative A or Alternative B */
+enum pse_pi_pairset_pinout {
+	ALTERNATIVE_A,
+	ALTERNATIVE_B,
+};
+
+/**
+ * struct pse_pi_pairset - PSE PI pairset entity describing the pinout
+ *			   alternative ant its phandle
+ *
+ * @pinout: description of the pinout alternative
+ * @np: device node pointer describing the pairset phandle
+ */
+struct pse_pi_pairset {
+	enum pse_pi_pairset_pinout pinout;
+	struct device_node *np;
+};
+
+/**
+ * struct pse_pi - PSE PI (Power Interface) entity as described in
+ *		   IEEE 802.3-2022 145.2.4
+ *
+ * @pairset: table of the PSE PI pinout alternative for the two pairset
+ * @np: device node pointer of the PSE PI node
+ */
+struct pse_pi {
+	struct pse_pi_pairset pairset[2];
+	struct device_node *np;
+};
+
 /**
  * struct pse_controller_dev - PSE controller entity that might
  *                             provide multiple PSE controls
@@ -73,11 +103,11 @@ struct pse_control;
  * @pse_control_head: head of internal list of requested PSE controls
  * @dev: corresponding driver model device struct
  * @of_pse_n_cells: number of cells in PSE line specifiers
- * @of_xlate: translation function to translate from specifier as found in the
- *            device tree to id as given to the PSE control ops
  * @nr_lines: number of PSE controls in this controller device
  * @lock: Mutex for serialization access to the PSE controller
  * @types: types of the PSE controller
+ * @pi: table of PSE PIs described in this controller device
+ * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used
  */
 struct pse_controller_dev {
 	const struct pse_controller_ops *ops;
@@ -86,11 +116,11 @@ struct pse_controller_dev {
 	struct list_head pse_control_head;
 	struct device *dev;
 	int of_pse_n_cells;
-	int (*of_xlate)(struct pse_controller_dev *pcdev,
-			const struct of_phandle_args *pse_spec);
 	unsigned int nr_lines;
 	struct mutex lock;
 	enum ethtool_pse_types types;
+	struct pse_pi *pi;
+	bool no_of_pse_pi;
 };
 
 #if IS_ENABLED(CONFIG_PSE_CONTROLLER)
-- 
cgit v1.2.3-73-gaa49b


From b17181a88fb90d7eab51ab7576e8985cc2a8d1bc Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:56 +0200
Subject: dt-bindings: net: pse-pd: Add another way of describing several PSE
 PIs

PSE PI setup may encompass multiple PSE controllers or auxiliary circuits
that collectively manage power delivery to one Ethernet port.
Such configurations might support a range of PoE standards and require
the capability to dynamically configure power delivery based on the
operational mode (e.g., PoE2 versus PoE4) or specific requirements of
connected devices. In these instances, a dedicated PSE PI node becomes
essential for accurately documenting the system architecture. This node
would serve to detail the interactions between different PSE controllers,
the support for various PoE modes, and any additional logic required to
coordinate power delivery across the network infrastructure.

The old usage of "#pse-cells" is unsuficient as it carries only the PSE PI
index information.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-8-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../bindings/net/pse-pd/pse-controller.yaml        | 101 ++++++++++++++++++++-
 1 file changed, 98 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
index 2d382faca0e6..a12cda8aa764 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
@@ -13,6 +13,7 @@ description: Binding for the Power Sourcing Equipment (PSE) as defined in the
 
 maintainers:
   - Oleksij Rempel <o.rempel@pengutronix.de>
+  - Kory Maincent <kory.maincent@bootlin.com>
 
 properties:
   $nodename:
@@ -22,11 +23,105 @@ properties:
     description:
       Used to uniquely identify a PSE instance within an IC. Will be
       0 on PSE nodes with only a single output and at least 1 on nodes
-      controlling several outputs.
+      controlling several outputs which are not described in the pse-pis
+      subnode. This property is deprecated, please use pse-pis instead.
     enum: [0, 1]
 
-required:
-  - "#pse-cells"
+  pse-pis:
+    type: object
+    description:
+      Overview of the PSE PIs provided by the controller.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+
+    patternProperties:
+      "^pse-pi@[0-9a-f]+$":
+        type: object
+        description:
+          PSE PI for power delivery via pairsets, compliant with IEEE
+          802.3-2022, Section 145.2.4. Each pairset comprises a positive and
+          a negative VPSE pair, adhering to the pinout configurations
+          detailed in the standard.
+          See Documentation/networking/pse-pd/pse-pi.rst for details.
+
+        properties:
+          reg:
+            description:
+              Address describing the PSE PI index.
+            maxItems: 1
+
+          "#pse-cells":
+            const: 0
+
+          pairset-names:
+            $ref: /schemas/types.yaml#/definitions/string-array
+            description:
+              Names of the pairsets as per IEEE 802.3-2022, Section 145.2.4.
+              Each name should correspond to a phandle in the 'pairset'
+              property pointing to the power supply for that pairset.
+            minItems: 1
+            maxItems: 2
+            items:
+              enum:
+                - alternative-a
+                - alternative-b
+
+          pairsets:
+            $ref: /schemas/types.yaml#/definitions/phandle-array
+            description:
+              List of phandles, each pointing to the power supply for the
+              corresponding pairset named in 'pairset-names'. This property
+              aligns with IEEE 802.3-2022, Section 33.2.3 and 145.2.4.
+              PSE Pinout Alternatives (as per IEEE 802.3-2022 Table 145\u20133)
+              |-----------|---------------|---------------|---------------|---------------|
+              | Conductor | Alternative A | Alternative A | Alternative B | Alternative B |
+              |           |    (MDI-X)    |     (MDI)     |      (X)      |      (S)      |
+              |-----------|---------------|---------------|---------------|---------------|
+              | 1         | Negative VPSE | Positive VPSE | -             | -             |
+              | 2         | Negative VPSE | Positive VPSE | -             | -             |
+              | 3         | Positive VPSE | Negative VPSE | -             | -             |
+              | 4         | -             | -             | Negative VPSE | Positive VPSE |
+              | 5         | -             | -             | Negative VPSE | Positive VPSE |
+              | 6         | Positive VPSE | Negative VPSE | -             | -             |
+              | 7         | -             | -             | Positive VPSE | Negative VPSE |
+              | 8         | -             | -             | Positive VPSE | Negative VPSE |
+            minItems: 1
+            maxItems: 2
+
+          polarity-supported:
+            $ref: /schemas/types.yaml#/definitions/string-array
+            description:
+              Polarity configuration supported by the PSE PI pairsets.
+            minItems: 1
+            maxItems: 4
+            items:
+              enum:
+                - MDI-X
+                - MDI
+                - X
+                - S
+
+          vpwr-supply:
+            description: Regulator power supply for the PSE PI.
+
+        required:
+          - reg
+          - "#pse-cells"
+
+oneOf:
+  - required:
+      - "#pse-cells"
+  - required:
+      - pse-pis
 
 additionalProperties: true
 
-- 
cgit v1.2.3-73-gaa49b


From 9c1de033afad216d02fb4efec22d40dd000e72c2 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:39:59 +0200
Subject: dt-bindings: net: pse-pd: Add bindings for PD692x0 PSE controller

Add the PD692x0 I2C Power Sourcing Equipment controller device tree
bindings documentation.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-11-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../bindings/net/pse-pd/microchip,pd692x0.yaml     | 169 +++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
new file mode 100644
index 000000000000..828439398fdf
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pse-pd/microchip,pd692x0.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PD692x0 Power Sourcing Equipment controller
+
+maintainers:
+  - Kory Maincent <kory.maincent@bootlin.com>
+
+allOf:
+  - $ref: pse-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - microchip,pd69200
+      - microchip,pd69210
+      - microchip,pd69220
+
+  reg:
+    maxItems: 1
+
+  managers:
+    type: object
+    description:
+      List of the PD69208T4/PD69204T4/PD69208M PSE managers. Each manager
+      have 4 or 8 physical ports according to the chip version. No need to
+      specify the SPI chip select as it is automatically detected by the
+      PD692x0 PSE controller. The PSE managers have to be described from
+      the lowest chip select to the greatest one, which is the detection
+      behavior of the PD692x0 PSE controller. The PD692x0 support up to
+      12 PSE managers which can expose up to 96 physical ports. All
+      physical ports available on a manager have to be described in the
+      incremental order even if they are not used.
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+    required:
+      - "#address-cells"
+      - "#size-cells"
+
+    patternProperties:
+      "^manager@0[0-9a-b]$":
+        type: object
+        description:
+          PD69208T4/PD69204T4/PD69208M PSE manager exposing 4 or 8 physical
+          ports.
+
+        properties:
+          reg:
+            description:
+              Incremental index of the PSE manager starting from 0, ranging
+              from lowest to highest chip select, up to 11.
+            maxItems: 1
+
+          "#address-cells":
+            const: 1
+
+          "#size-cells":
+            const: 0
+
+        patternProperties:
+          '^port@[0-7]$':
+            type: object
+            required:
+              - reg
+            additionalProperties: false
+
+        required:
+          - reg
+          - "#address-cells"
+          - "#size-cells"
+
+required:
+  - compatible
+  - reg
+  - pse-pis
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      ethernet-pse@3c {
+        compatible = "microchip,pd69200";
+        reg = <0x3c>;
+
+        managers {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          manager@0 {
+            reg = <0>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            phys0: port@0 {
+              reg = <0>;
+            };
+
+            phys1: port@1 {
+              reg = <1>;
+            };
+
+            phys2: port@2 {
+              reg = <2>;
+            };
+
+            phys3: port@3 {
+              reg = <3>;
+            };
+          };
+
+          manager@1 {
+            reg = <1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            phys4: port@0 {
+              reg = <0>;
+            };
+
+            phys5: port@1 {
+              reg = <1>;
+            };
+
+            phys6: port@2 {
+              reg = <2>;
+            };
+
+            phys7: port@3 {
+              reg = <3>;
+            };
+          };
+        };
+
+        pse-pis {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          pse_pi0: pse-pi@0 {
+            reg = <0>;
+            #pse-cells = <0>;
+            pairset-names = "alternative-a", "alternative-b";
+            pairsets = <&phys0>, <&phys1>;
+            polarity-supported = "MDI", "S";
+            vpwr-supply = <&vpwr1>;
+          };
+          pse_pi1: pse-pi@1 {
+            reg = <1>;
+            #pse-cells = <0>;
+            pairset-names = "alternative-a";
+            pairsets = <&phys2>;
+            polarity-supported = "MDI";
+            vpwr-supply = <&vpwr2>;
+          };
+        };
+      };
+    };
-- 
cgit v1.2.3-73-gaa49b


From f562202fedadacdd39f954a371259c2d820aa9b4 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Wed, 17 Apr 2024 16:40:01 +0200
Subject: dt-bindings: net: pse-pd: Add bindings for TPS23881 PSE controller

Add the TPS23881 I2C Power Sourcing Equipment controller device tree
bindings documentation.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20240417-feature_poe-v9-13-242293fd1900@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../bindings/net/pse-pd/ti,tps23881.yaml           | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
new file mode 100644
index 000000000000..4147adb11e10
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pse-pd/ti,tps23881.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI TPS23881 Power Sourcing Equipment controller
+
+maintainers:
+  - Kory Maincent <kory.maincent@bootlin.com>
+
+allOf:
+  - $ref: pse-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - ti,tps23881
+
+  reg:
+    maxItems: 1
+
+  '#pse-cells':
+    const: 1
+
+  channels:
+    description: each set of 8 ports can be assigned to one physical
+      channels or two for PoE4. This parameter describes the configuration
+      of the ports conversion matrix that establishes relationship between
+      the logical ports and the physical channels.
+    type: object
+
+    patternProperties:
+      '^channel@[0-7]$':
+        type: object
+        required:
+          - reg
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      ethernet-pse@20 {
+        compatible = "ti,tps23881";
+        reg = <0x20>;
+
+        channels {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          phys0: channel@0 {
+            reg = <0>;
+          };
+
+          phys1: channel@1 {
+            reg = <1>;
+          };
+
+          phys2: channel@2 {
+            reg = <2>;
+          };
+        };
+
+        pse-pis {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          pse_pi0: pse-pi@0 {
+            reg = <0>;
+            #pse-cells = <0>;
+            pairset-names = "alternative-a", "alternative-b";
+            pairsets = <&phys0>, <&phys1>;
+            polarity-supported = "MDI", "S";
+            vpwr-supply = <&vpwr1>;
+          };
+
+          pse_pi1: pse-pi@1 {
+            reg = <1>;
+            #pse-cells = <0>;
+            pairset-names = "alternative-a";
+            pairsets = <&phys2>;
+            polarity-supported = "MDI";
+            vpwr-supply = <&vpwr2>;
+          };
+        };
+      };
+    };
-- 
cgit v1.2.3-73-gaa49b


From db50040d09cc511889b2c949a0c93d017e44f081 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler1968@googlemail.com>
Date: Fri, 19 Apr 2024 13:36:17 -0700
Subject: bpf, docs: Clarify helper ID and pointer terms in instruction-set.rst

Per IETF 119 meeting discussion and mailing list discussion at
https://mailarchive.ietf.org/arch/msg/bpf/2JwWQwFdOeMGv0VTbD0CKWwAOEA/
the following changes are made.

First, say call by "static ID" rather than call by "address"

Second, change "pointer" to "address"

Signed-off-by: Dave Thaler <dthaler1968@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20240419203617.6850-1-dthaler1968@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/standardization/instruction-set.rst        | 48 +++++++++++-----------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 8d0781f0bd17..1f92551a34b9 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -443,27 +443,27 @@ otherwise identical operations, and indicates the base64 conformance
 group unless otherwise specified.
 The 'code' field encodes the operation as below:
 
-========  =====  =======  ===============================  ===================================================
-code      value  src_reg  description                      notes
-========  =====  =======  ===============================  ===================================================
-JA        0x0    0x0      PC += offset                     {JA, K, JMP} only
-JA        0x0    0x0      PC += imm                        {JA, K, JMP32} only
+========  =====  =======  =================================  ===================================================
+code      value  src_reg  description                        notes
+========  =====  =======  =================================  ===================================================
+JA        0x0    0x0      PC += offset                       {JA, K, JMP} only
+JA        0x0    0x0      PC += imm                          {JA, K, JMP32} only
 JEQ       0x1    any      PC += offset if dst == src
-JGT       0x2    any      PC += offset if dst > src        unsigned
-JGE       0x3    any      PC += offset if dst >= src       unsigned
+JGT       0x2    any      PC += offset if dst > src          unsigned
+JGE       0x3    any      PC += offset if dst >= src         unsigned
 JSET      0x4    any      PC += offset if dst & src
 JNE       0x5    any      PC += offset if dst != src
-JSGT      0x6    any      PC += offset if dst > src        signed
-JSGE      0x7    any      PC += offset if dst >= src       signed
-CALL      0x8    0x0      call helper function by address  {CALL, K, JMP} only, see `Helper functions`_
-CALL      0x8    0x1      call PC += imm                   {CALL, K, JMP} only, see `Program-local functions`_
-CALL      0x8    0x2      call helper function by BTF ID   {CALL, K, JMP} only, see `Helper functions`_
-EXIT      0x9    0x0      return                           {CALL, K, JMP} only
-JLT       0xa    any      PC += offset if dst < src        unsigned
-JLE       0xb    any      PC += offset if dst <= src       unsigned
-JSLT      0xc    any      PC += offset if dst < src        signed
-JSLE      0xd    any      PC += offset if dst <= src       signed
-========  =====  =======  ===============================  ===================================================
+JSGT      0x6    any      PC += offset if dst > src          signed
+JSGE      0x7    any      PC += offset if dst >= src         signed
+CALL      0x8    0x0      call helper function by static ID  {CALL, K, JMP} only, see `Helper functions`_
+CALL      0x8    0x1      call PC += imm                     {CALL, K, JMP} only, see `Program-local functions`_
+CALL      0x8    0x2      call helper function by BTF ID     {CALL, K, JMP} only, see `Helper functions`_
+EXIT      0x9    0x0      return                             {CALL, K, JMP} only
+JLT       0xa    any      PC += offset if dst < src          unsigned
+JLE       0xb    any      PC += offset if dst <= src         unsigned
+JSLT      0xc    any      PC += offset if dst < src          signed
+JSLE      0xd    any      PC += offset if dst <= src         signed
+========  =====  =======  =================================  ===================================================
 
 The BPF program needs to store the return value into register R0 before doing an
 ``EXIT``.
@@ -498,9 +498,9 @@ Helper functions
 Helper functions are a concept whereby BPF programs can call into a
 set of function calls exposed by the underlying platform.
 
-Historically, each helper function was identified by an address
+Historically, each helper function was identified by a static ID
 encoded in the 'imm' field.  The available helper functions may differ
-for each program type, but address values are unique across all program types.
+for each program type, but static IDs are unique across all program types.
 
 Platforms that support the BPF Type Format (BTF) support identifying
 a helper function by a BTF ID encoded in the 'imm' field, where the BTF ID
@@ -667,11 +667,11 @@ src_reg  pseudocode                                 imm type     dst type
 =======  =========================================  ===========  ==============
 0x0      dst = (next_imm << 32) | imm               integer      integer
 0x1      dst = map_by_fd(imm)                       map fd       map
-0x2      dst = map_val(map_by_fd(imm)) + next_imm   map fd       data pointer
-0x3      dst = var_addr(imm)                        variable id  data pointer
-0x4      dst = code_addr(imm)                       integer      code pointer
+0x2      dst = map_val(map_by_fd(imm)) + next_imm   map fd       data address
+0x3      dst = var_addr(imm)                        variable id  data address
+0x4      dst = code_addr(imm)                       integer      code address
 0x5      dst = map_by_idx(imm)                      map index    map
-0x6      dst = map_val(map_by_idx(imm)) + next_imm  map index    data pointer
+0x6      dst = map_val(map_by_idx(imm)) + next_imm  map index    data address
 =======  =========================================  ===========  ==============
 
 where
-- 
cgit v1.2.3-73-gaa49b


From 735f5b8a7ccf383e50d76f7d1c25769eee474812 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler1968@googlemail.com>
Date: Fri, 19 Apr 2024 14:38:26 -0700
Subject: bpf, docs: Fix formatting nit in instruction-set.rst

Other places that had pseudocode were prefixed with ::
so as to appear in a literal block, but one place was inconsistent.
This patch fixes that inconsistency.

Signed-off-by: Dave Thaler <dthaler1968@googlemail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20240419213826.7301-1-dthaler1968@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/standardization/instruction-set.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 1f92551a34b9..d03d90afbd7d 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -370,7 +370,7 @@ Note that there are varying definitions of the signed modulo operation
 when the dividend or divisor are negative, where implementations often
 vary by language such that Python, Ruby, etc.  differ from C, Go, Java,
 etc. This specification requires that signed modulo use truncated division
-(where -13 % 3 == -1) as implemented in C, Go, etc.:
+(where -13 % 3 == -1) as implemented in C, Go, etc.::
 
    a % n = a - n * trunc(a / n)
 
-- 
cgit v1.2.3-73-gaa49b


From 9afff0de30db149a1bf440db26a3ddd6a4f260d8 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Fri, 19 Apr 2024 04:08:54 -0400
Subject: ice: Document tx_scheduling_layers parameter

New driver specific parameter 'tx_scheduling_layers' was introduced.
Describe parameter in the documentation.

Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Co-developed-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 Documentation/networking/devlink/ice.rst | 47 ++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst
index 7f30ebd5debb..830c04354222 100644
--- a/Documentation/networking/devlink/ice.rst
+++ b/Documentation/networking/devlink/ice.rst
@@ -21,6 +21,53 @@ Parameters
    * - ``enable_iwarp``
      - runtime
      - mutually exclusive with ``enable_roce``
+   * - ``tx_scheduling_layers``
+     - permanent
+     - The ice hardware uses hierarchical scheduling for Tx with a fixed
+       number of layers in the scheduling tree. Each of them are decision
+       points. Root node represents a port, while all the leaves represent
+       the queues. This way of configuring the Tx scheduler allows features
+       like DCB or devlink-rate (documented below) to configure how much
+       bandwidth is given to any given queue or group of queues, enabling
+       fine-grained control because scheduling parameters can be configured
+       at any given layer of the tree.
+
+       The default 9-layer tree topology was deemed best for most workloads,
+       as it gives an optimal ratio of performance to configurability. However,
+       for some specific cases, this 9-layer topology might not be desired.
+       One example would be sending traffic to queues that are not a multiple
+       of 8. Because the maximum radix is limited to 8 in 9-layer topology,
+       the 9th queue has a different parent than the rest, and it's given
+       more bandwidth credits. This causes a problem when the system is
+       sending traffic to 9 queues:
+
+       | tx_queue_0_packets: 24163396
+       | tx_queue_1_packets: 24164623
+       | tx_queue_2_packets: 24163188
+       | tx_queue_3_packets: 24163701
+       | tx_queue_4_packets: 24163683
+       | tx_queue_5_packets: 24164668
+       | tx_queue_6_packets: 23327200
+       | tx_queue_7_packets: 24163853
+       | tx_queue_8_packets: 91101417 < Too much traffic is sent from 9th
+
+       To address this need, you can switch to a 5-layer topology, which
+       changes the maximum topology radix to 512. With this enhancement,
+       the performance characteristic is equal as all queues can be assigned
+       to the same parent in the tree. The obvious drawback of this solution
+       is a lower configuration depth of the tree.
+
+       Use the ``tx_scheduling_layer`` parameter with the devlink command
+       to change the transmit scheduler topology. To use 5-layer topology,
+       use a value of 5. For example:
+       $ devlink dev param set pci/0000:16:00.0 name tx_scheduling_layers
+       value 5 cmode permanent
+       Use a value of 9 to set it back to the default value.
+
+       You must do PCI slot powercycle for the selected topology to take effect.
+
+       To verify that value has been set:
+       $ devlink dev param show pci/0000:16:00.0 name tx_scheduling_layers
 
 Info versions
 =============
-- 
cgit v1.2.3-73-gaa49b


From 1ee73168713758515b5a0d6346f58556c9aa72ae Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Thu, 18 Apr 2024 11:47:34 +0100
Subject: doc/netlink/specs: Add draft nftables spec

Add a spec for nftables that has nearly complete coverage of the ops,
but limited coverage of rule types and subexpressions.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20240418104737.77914-2-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/nftables.yaml | 1264 +++++++++++++++++++++++++++++
 1 file changed, 1264 insertions(+)
 create mode 100644 Documentation/netlink/specs/nftables.yaml

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/nftables.yaml b/Documentation/netlink/specs/nftables.yaml
new file mode 100644
index 000000000000..dff2a18f3d90
--- /dev/null
+++ b/Documentation/netlink/specs/nftables.yaml
@@ -0,0 +1,1264 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: nftables
+protocol: netlink-raw
+protonum: 12
+
+doc:
+  Netfilter nftables configuration over netlink.
+
+definitions:
+  -
+    name: nfgenmsg
+    type: struct
+    members:
+      -
+        name: nfgen-family
+        type: u8
+      -
+        name: version
+        type: u8
+      -
+        name: res-id
+        byte-order: big-endian
+        type: u16
+  -
+    name: meta-keys
+    type: enum
+    entries:
+      - len
+      - protocol
+      - priority
+      - mark
+      - iif
+      - oif
+      - iifname
+      - oifname
+      - iftype
+      - oiftype
+      - skuid
+      - skgid
+      - nftrace
+      - rtclassid
+      - secmark
+      - nfproto
+      - l4-proto
+      - bri-iifname
+      - bri-oifname
+      - pkttype
+      - cpu
+      - iifgroup
+      - oifgroup
+      - cgroup
+      - prandom
+      - secpath
+      - iifkind
+      - oifkind
+      - bri-iifpvid
+      - bri-iifvproto
+      - time-ns
+      - time-day
+      - time-hour
+      - sdif
+      - sdifname
+      - bri-broute
+  -
+    name: cmp-ops
+    type: enum
+    entries:
+      - eq
+      - neq
+      - lt
+      - lte
+      - gt
+      - gte
+  -
+    name: object-type
+    type: enum
+    entries:
+      - unspec
+      - counter
+      - quota
+      - ct-helper
+      - limit
+      - connlimit
+      - tunnel
+      - ct-timeout
+      - secmark
+      - ct-expect
+      - synproxy
+  -
+    name: nat-range-flags
+    type: flags
+    entries:
+      - map-ips
+      - proto-specified
+      - proto-random
+      - persistent
+      - proto-random-fully
+      - proto-offset
+      - netmap
+  -
+    name: table-flags
+    type: flags
+    entries:
+      - dormant
+      - owner
+      - persist
+  -
+    name: chain-flags
+    type: flags
+    entries:
+      - base
+      - hw-offload
+      - binding
+  -
+    name: set-flags
+    type: flags
+    entries:
+      - anonymous
+      - constant
+      - interval
+      - map
+      - timeout
+      - eval
+      - object
+      - concat
+      - expr
+
+attribute-sets:
+  -
+    name: empty-attrs
+    attributes:
+      -
+        name: name
+        type: string
+  -
+    name: batch-attrs
+    attributes:
+      -
+        name: genid
+        type: u32
+        byte-order: big-endian
+  -
+    name: table-attrs
+    attributes:
+      -
+        name: name
+        type: string
+        doc: name of the table
+      -
+        name: flags
+        type: u32
+        byte-order: big-endian
+        doc: bitmask of flags
+        enum: table-flags
+        enum-as-flags: true
+      -
+        name: use
+        type: u32
+        byte-order: big-endian
+        doc: number of chains in this table
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+        doc: numeric handle of the table
+      -
+        name: userdata
+        type: binary
+        doc: user data
+  -
+    name: chain-attrs
+    attributes:
+      -
+        name: table
+        type: string
+        doc: name of the table containing the chain
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+        doc: numeric handle of the chain
+      -
+        name: name
+        type: string
+        doc: name of the chain
+      -
+        name: hook
+        type: nest
+        nested-attributes: nft-hook-attrs
+        doc: hook specification for basechains
+      -
+        name: policy
+        type: u32
+        byte-order: big-endian
+        doc: numeric policy of the chain
+      -
+        name: use
+        type: u32
+        byte-order: big-endian
+        doc: number of references to this chain
+      -
+        name: type
+        type: string
+        doc: type name of the chain
+      -
+        name: counters
+        type: nest
+        nested-attributes: nft-counter-attrs
+        doc: counter specification of the chain
+      -
+        name: flags
+        type: u32
+        byte-order: big-endian
+        doc: chain flags
+        enum: chain-flags
+        enum-as-flags: true
+      -
+        name: id
+        type: u32
+        byte-order: big-endian
+        doc: uniquely identifies a chain in a transaction
+      -
+        name: userdata
+        type: binary
+        doc: user data
+  -
+    name: counter-attrs
+    attributes:
+      -
+        name: bytes
+        type: u64
+        byte-order: big-endian
+      -
+        name: packets
+        type: u64
+        byte-order: big-endian
+      -
+        name: pad
+        type: pad
+  -
+    name: nft-hook-attrs
+    attributes:
+      -
+        name: num
+        type: u32
+        byte-order: big-endian
+      -
+        name: priority
+        type: s32
+        byte-order: big-endian
+      -
+        name: dev
+        type: string
+        doc: net device name
+      -
+        name: devs
+        type: nest
+        nested-attributes: hook-dev-attrs
+        doc: list of net devices
+  -
+    name: hook-dev-attrs
+    attributes:
+      -
+        name: name
+        type: string
+        multi-attr: true
+  -
+    name: nft-counter-attrs
+    attributes:
+      -
+        name: bytes
+        type: u64
+      -
+        name: packets
+        type: u64
+  -
+    name: rule-attrs
+    attributes:
+      -
+        name: table
+        type: string
+        doc: name of the table containing the rule
+      -
+        name: chain
+        type: string
+        doc: name of the chain containing the rule
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+        doc: numeric handle of the rule
+      -
+        name: expressions
+        type: nest
+        nested-attributes: expr-list-attrs
+        doc: list of expressions
+      -
+        name: compat
+        type: nest
+        nested-attributes: rule-compat-attrs
+        doc: compatibility specifications of the rule
+      -
+        name: position
+        type: u64
+        byte-order: big-endian
+        doc: numeric handle of the previous rule
+      -
+        name: userdata
+        type: binary
+        doc: user data
+      -
+        name: id
+        type: u32
+        doc: uniquely identifies a rule in a transaction
+      -
+        name: position-id
+        type: u32
+        doc: transaction unique identifier of the previous rule
+      -
+        name: chain-id
+        type: u32
+        doc: add the rule to chain by ID, alternative to chain name
+  -
+    name: expr-list-attrs
+    attributes:
+      -
+        name: elem
+        type: nest
+        nested-attributes: expr-attrs
+        multi-attr: true
+  -
+    name: expr-attrs
+    attributes:
+      -
+        name: name
+        type: string
+        doc: name of the expression type
+      -
+        name: data
+        type: sub-message
+        sub-message: expr-ops
+        selector: name
+        doc: type specific data
+  -
+    name: rule-compat-attrs
+    attributes:
+      -
+        name: proto
+        type: binary
+        doc: numeric value of the handled protocol
+      -
+        name: flags
+        type: binary
+        doc: bitmask of flags
+  -
+    name: set-attrs
+    attributes:
+      -
+        name: table
+        type: string
+        doc: table name
+      -
+        name: name
+        type: string
+        doc: set name
+      -
+        name: flags
+        type: u32
+        enum: set-flags
+        byte-order: big-endian
+        doc: bitmask of enum nft_set_flags
+      -
+        name: key-type
+        type: u32
+        byte-order: big-endian
+        doc: key data type, informational purpose only
+      -
+        name: key-len
+        type: u32
+        byte-order: big-endian
+        doc: key data length
+      -
+        name: data-type
+        type: u32
+        byte-order: big-endian
+        doc: mapping data type
+      -
+        name: data-len
+        type: u32
+        byte-order: big-endian
+        doc: mapping data length
+      -
+        name: policy
+        type: u32
+        byte-order: big-endian
+        doc: selection policy
+      -
+        name: desc
+        type: nest
+        nested-attributes: set-desc-attrs
+        doc: set description
+      -
+        name: id
+        type: u32
+        doc: uniquely identifies a set in a transaction
+      -
+        name: timeout
+        type: u64
+        doc: default timeout value
+      -
+        name: gc-interval
+        type: u32
+        doc: garbage collection interval
+      -
+        name: userdata
+        type: binary
+        doc: user data
+      -
+        name: pad
+        type: pad
+      -
+        name: obj-type
+        type: u32
+        byte-order: big-endian
+        doc: stateful object type
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+        doc: set handle
+      -
+        name: expr
+        type: nest
+        nested-attributes: expr-attrs
+        doc: set expression
+        multi-attr: true
+      -
+        name: expressions
+        type: nest
+        nested-attributes: set-list-attrs
+        doc: list of expressions
+  -
+    name: set-desc-attrs
+    attributes:
+      -
+        name: size
+        type: u32
+        byte-order: big-endian
+        doc: number of elements in set
+      -
+        name: concat
+        type: nest
+        nested-attributes: set-desc-concat-attrs
+        doc: description of field concatenation
+        multi-attr: true
+  -
+    name: set-desc-concat-attrs
+    attributes:
+      -
+        name: elem
+        type: nest
+        nested-attributes: set-field-attrs
+  -
+    name: set-field-attrs
+    attributes:
+      -
+        name: len
+        type: u32
+        byte-order: big-endian
+  -
+    name: set-list-attrs
+    attributes:
+      -
+        name: elem
+        type: nest
+        nested-attributes: expr-attrs
+        multi-attr: true
+  -
+    name: setelem-attrs
+    attributes:
+      -
+        name: key
+        type: nest
+        nested-attributes: data-attrs
+        doc: key value
+      -
+        name: data
+        type: nest
+        nested-attributes: data-attrs
+        doc: data value of mapping
+      -
+        name: flags
+        type: binary
+        doc: bitmask of nft_set_elem_flags
+      -
+        name: timeout
+        type: u64
+        doc: timeout value
+      -
+        name: expiration
+        type: u64
+        doc: expiration time
+      -
+        name: userdata
+        type: binary
+        doc: user data
+      -
+        name: expr
+        type: nest
+        nested-attributes: expr-attrs
+        doc: expression
+      -
+        name: objref
+        type: string
+        doc: stateful object reference
+      -
+        name: key-end
+        type: nest
+        nested-attributes: data-attrs
+        doc: closing key value
+      -
+        name: expressions
+        type: nest
+        nested-attributes: expr-list-attrs
+        doc: list of expressions
+  -
+    name: setelem-list-elem-attrs
+    attributes:
+      -
+        name: elem
+        type: nest
+        nested-attributes: setelem-attrs
+        multi-attr: true
+  -
+    name: setelem-list-attrs
+    attributes:
+      -
+        name: table
+        type: string
+      -
+        name: set
+        type: string
+      -
+        name: elements
+        type: nest
+        nested-attributes: setelem-list-elem-attrs
+      -
+        name: set-id
+        type: u32
+  -
+    name: gen-attrs
+    attributes:
+      -
+        name: id
+        type: u32
+        byte-order: big-endian
+        doc: ruleset generation id
+      -
+        name: proc-pid
+        type: u32
+        byte-order: big-endian
+      -
+        name: proc-name
+        type: string
+  -
+    name: obj-attrs
+    attributes:
+      -
+        name: table
+        type: string
+        doc: name of the table containing the expression
+      -
+        name: name
+        type: string
+        doc: name of this expression type
+      -
+        name: type
+        type: u32
+        enum: object-type
+        byte-order: big-endian
+        doc: stateful object type
+      -
+        name: data
+        type: sub-message
+        sub-message: obj-data
+        selector: type
+        doc: stateful object data
+      -
+        name: use
+        type: u32
+        byte-order: big-endian
+        doc: number of references to this expression
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+        doc: object handle
+      -
+        name: pad
+        type: pad
+      -
+        name: userdata
+        type: binary
+        doc: user data
+  -
+    name: quota-attrs
+    attributes:
+      -
+        name: bytes
+        type: u64
+        byte-order: big-endian
+      -
+        name: flags # TODO
+        type: u32
+        byte-order: big-endian
+      -
+        name: pad
+        type: pad
+      -
+        name: consumed
+        type: u64
+        byte-order: big-endian
+  -
+    name: flowtable-attrs
+    attributes:
+      -
+        name: table
+        type: string
+      -
+        name: name
+        type: string
+      -
+        name: hook
+        type: nest
+        nested-attributes: flowtable-hook-attrs
+      -
+        name: use
+        type: u32
+        byte-order: big-endian
+      -
+        name: handle
+        type: u64
+        byte-order: big-endian
+      -
+        name: pad
+        type: pad
+      -
+        name: flags
+        type: u32
+        byte-order: big-endian
+  -
+    name: flowtable-hook-attrs
+    attributes:
+      -
+        name: num
+        type: u32
+        byte-order: big-endian
+      -
+        name: priority
+        type: u32
+        byte-order: big-endian
+      -
+        name: devs
+        type: nest
+        nested-attributes: hook-dev-attrs
+  -
+    name: expr-cmp-attrs
+    attributes:
+      -
+        name: sreg
+        type: u32
+        byte-order: big-endian
+      -
+        name: op
+        type: u32
+        byte-order: big-endian
+        enum: cmp-ops
+      -
+        name: data
+        type: nest
+        nested-attributes: data-attrs
+  -
+    name: data-attrs
+    attributes:
+      -
+        name: value
+        type: binary
+        # sub-type: u8
+      -
+        name: verdict
+        type: nest
+        nested-attributes: verdict-attrs
+  -
+    name: verdict-attrs
+    attributes:
+      -
+        name: code
+        type: u32
+        byte-order: big-endian
+      -
+        name: chain
+        type: string
+      -
+        name: chain-id
+        type: u32
+  -
+    name: expr-counter-attrs
+    attributes:
+      -
+        name: bytes
+        type: u64
+        doc: Number of bytes
+      -
+        name: packets
+        type: u64
+        doc: Number of packets
+      -
+        name: pad
+        type: pad
+  -
+    name: expr-flow-offload-attrs
+    attributes:
+      -
+        name: name
+        type: string
+        doc: Flow offload table name
+  -
+    name: expr-immediate-attrs
+    attributes:
+      -
+        name: dreg
+        type: u32
+        byte-order: big-endian
+      -
+        name: data
+        type: nest
+        nested-attributes: data-attrs
+  -
+    name: expr-meta-attrs
+    attributes:
+      -
+        name: dreg
+        type: u32
+        byte-order: big-endian
+      -
+        name: key
+        type: u32
+        byte-order: big-endian
+        enum: meta-keys
+      -
+        name: sreg
+        type: u32
+        byte-order: big-endian
+  -
+    name: expr-nat-attrs
+    attributes:
+      -
+        name: type
+        type: u32
+        byte-order: big-endian
+      -
+        name: family
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-addr-min
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-addr-max
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-proto-min
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-proto-max
+        type: u32
+        byte-order: big-endian
+      -
+        name: flags
+        type: u32
+        byte-order: big-endian
+        enum: nat-range-flags
+        enum-as-flags: true
+  -
+    name: expr-payload-attrs
+    attributes:
+      -
+        name: dreg
+        type: u32
+        byte-order: big-endian
+      -
+        name: base
+        type: u32
+        byte-order: big-endian
+      -
+        name: offset
+        type: u32
+        byte-order: big-endian
+      -
+        name: len
+        type: u32
+        byte-order: big-endian
+      -
+        name: sreg
+        type: u32
+        byte-order: big-endian
+      -
+        name: csum-type
+        type: u32
+        byte-order: big-endian
+      -
+        name: csum-offset
+        type: u32
+        byte-order: big-endian
+      -
+        name: csum-flags
+        type: u32
+        byte-order: big-endian
+  -
+    name: expr-tproxy-attrs
+    attributes:
+      -
+        name: family
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-addr
+        type: u32
+        byte-order: big-endian
+      -
+        name: reg-port
+        type: u32
+        byte-order: big-endian
+
+sub-messages:
+  -
+    name: expr-ops
+    formats:
+      -
+        value: bitwise # TODO
+      -
+        value: cmp
+        attribute-set: expr-cmp-attrs
+      -
+        value: counter
+        attribute-set: expr-counter-attrs
+      -
+        value: ct # TODO
+      -
+        value: flow_offload
+        attribute-set: expr-flow-offload-attrs
+      -
+        value: immediate
+        attribute-set: expr-immediate-attrs
+      -
+        value: lookup # TODO
+      -
+        value: meta
+        attribute-set: expr-meta-attrs
+      -
+        value: nat
+        attribute-set: expr-nat-attrs
+      -
+        value: payload
+        attribute-set: expr-payload-attrs
+      -
+        value: tproxy
+        attribute-set: expr-tproxy-attrs
+  -
+    name: obj-data
+    formats:
+      -
+        value: counter
+        attribute-set: counter-attrs
+      -
+        value: quota
+        attribute-set: quota-attrs
+
+operations:
+  enum-model: directional
+  list:
+    -
+      name: batch-begin
+      doc: Start a batch of operations
+      attribute-set: batch-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0x10
+          attributes:
+            - genid
+        reply:
+          value: 0x10
+          attributes:
+            - genid
+    -
+      name: batch-end
+      doc: Finish a batch of operations
+      attribute-set: batch-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0x11
+          attributes:
+            - genid
+    -
+      name: newtable
+      doc: Create a new table.
+      attribute-set: table-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa00
+          attributes:
+            - name
+    -
+      name: gettable
+      doc: Get / dump tables.
+      attribute-set: table-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa01
+          attributes:
+            - name
+        reply:
+          value: 0xa00
+          attributes:
+            - name
+    -
+      name: deltable
+      doc: Delete an existing table.
+      attribute-set: table-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa02
+          attributes:
+            - name
+    -
+      name: destroytable
+      doc: Delete an existing table with destroy semantics (ignoring ENOENT errors).
+      attribute-set: table-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1a
+          attributes:
+            - name
+    -
+      name: newchain
+      doc: Create a new chain.
+      attribute-set: chain-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa03
+          attributes:
+            - name
+    -
+      name: getchain
+      doc: Get / dump chains.
+      attribute-set: chain-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa04
+          attributes:
+            - name
+        reply:
+          value: 0xa03
+          attributes:
+            - name
+    -
+      name: delchain
+      doc: Delete an existing chain.
+      attribute-set: chain-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa05
+          attributes:
+            - name
+    -
+      name: destroychain
+      doc: Delete an existing chain with destroy semantics (ignoring ENOENT errors).
+      attribute-set: chain-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1b
+          attributes:
+            - name
+    -
+      name: newrule
+      doc: Create a new rule.
+      attribute-set: rule-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa06
+          attributes:
+            - name
+    -
+      name: getrule
+      doc: Get / dump rules.
+      attribute-set: rule-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa07
+          attributes:
+            - name
+        reply:
+          value: 0xa06
+          attributes:
+            - name
+    -
+      name: getrule-reset
+      doc: Get / dump rules and reset stateful expressions.
+      attribute-set: rule-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa19
+          attributes:
+            - name
+        reply:
+          value: 0xa06
+          attributes:
+            - name
+    -
+      name: delrule
+      doc: Delete an existing rule.
+      attribute-set: rule-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa08
+          attributes:
+            - name
+    -
+      name: destroyrule
+      doc: Delete an existing rule with destroy semantics (ignoring ENOENT errors).
+      attribute-set: rule-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1c
+          attributes:
+            - name
+    -
+      name: newset
+      doc: Create a new set.
+      attribute-set: set-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa09
+          attributes:
+            - name
+    -
+      name: getset
+      doc: Get / dump sets.
+      attribute-set: set-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa0a
+          attributes:
+            - name
+        reply:
+          value: 0xa09
+          attributes:
+            - name
+    -
+      name: delset
+      doc: Delete an existing set.
+      attribute-set: set-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa0b
+          attributes:
+            - name
+    -
+      name: destroyset
+      doc: Delete an existing set with destroy semantics (ignoring ENOENT errors).
+      attribute-set: set-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1d
+          attributes:
+            - name
+    -
+      name: newsetelem
+      doc: Create a new set element.
+      attribute-set: setelem-list-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa0c
+          attributes:
+            - name
+    -
+      name: getsetelem
+      doc: Get / dump set elements.
+      attribute-set: setelem-list-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa0d
+          attributes:
+            - name
+        reply:
+          value: 0xa0c
+          attributes:
+            - name
+    -
+      name: getsetelem-reset
+      doc: Get / dump set elements and reset stateful expressions.
+      attribute-set: setelem-list-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa21
+          attributes:
+            - name
+        reply:
+          value: 0xa0c
+          attributes:
+            - name
+    -
+      name: delsetelem
+      doc: Delete an existing set element.
+      attribute-set: setelem-list-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa0e
+          attributes:
+            - name
+    -
+      name: destroysetelem
+      doc: Delete an existing set element with destroy semantics.
+      attribute-set: setelem-list-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1e
+          attributes:
+            - name
+    -
+      name: getgen
+      doc: Get / dump rule-set generation.
+      attribute-set: gen-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa10
+          attributes:
+            - name
+        reply:
+          value: 0xa0f
+          attributes:
+            - name
+    -
+      name: newobj
+      doc: Create a new stateful object.
+      attribute-set: obj-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa12
+          attributes:
+            - name
+    -
+      name: getobj
+      doc: Get / dump stateful objects.
+      attribute-set: obj-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa13
+          attributes:
+            - name
+        reply:
+          value: 0xa12
+          attributes:
+            - name
+    -
+      name: delobj
+      doc: Delete an existing stateful object.
+      attribute-set: obj-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa14
+          attributes:
+            - name
+    -
+      name: destroyobj
+      doc: Delete an existing stateful object with destroy semantics.
+      attribute-set: obj-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa1f
+          attributes:
+            - name
+    -
+      name: newflowtable
+      doc: Create a new flow table.
+      attribute-set: flowtable-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa16
+          attributes:
+            - name
+    -
+      name: getflowtable
+      doc: Get / dump flow tables.
+      attribute-set: flowtable-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa17
+          attributes:
+            - name
+        reply:
+          value: 0xa16
+          attributes:
+            - name
+    -
+      name: delflowtable
+      doc: Delete an existing flow table.
+      attribute-set: flowtable-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa18
+          attributes:
+            - name
+    -
+      name: destroyflowtable
+      doc: Delete an existing flow table with destroy semantics.
+      attribute-set: flowtable-attrs
+      fixed-header: nfgenmsg
+      do:
+        request:
+          value: 0xa20
+          attributes:
+            - name
+
+mcast-groups:
+  list:
+    -
+      name: mgmt
-- 
cgit v1.2.3-73-gaa49b


From ce05d0f20368b583b43c99a7c8673e8a7187b76b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 19 Apr 2024 19:35:39 -0700
Subject: netdev: support dumping a single netdev in qstats

Having to filter the right ifindex in the tests is a bit tedious.
Add support for dumping qstats for a single ifindex.

Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240420023543.3300306-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml |  1 +
 net/core/netdev-genl-gen.c              |  1 +
 net/core/netdev-genl.c                  | 52 ++++++++++++++++++++++++---------
 3 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 76352dbd2be4..679c4130707c 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -486,6 +486,7 @@ operations:
       dump:
         request:
           attributes:
+            - ifindex
             - scope
         reply:
           attributes:
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 8d8ace9ef87f..8350a0afa9ec 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -70,6 +70,7 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN
 
 /* NETDEV_CMD_QSTATS_GET - dump */
 static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = {
+	[NETDEV_A_QSTATS_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
 	[NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
 };
 
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 7004b3399c2b..dd6510f2c652 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -639,6 +639,24 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int
+netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope,
+			      struct sk_buff *skb, const struct genl_info *info,
+			      struct netdev_nl_dump_ctx *ctx)
+{
+	if (!netdev->stat_ops)
+		return 0;
+
+	switch (scope) {
+	case 0:
+		return netdev_nl_stats_by_netdev(netdev, skb, info);
+	case NETDEV_QSTATS_SCOPE_QUEUE:
+		return netdev_nl_stats_by_queue(netdev, skb, info, ctx);
+	}
+
+	return -EINVAL;	/* Should not happen, per netlink policy */
+}
+
 int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
@@ -646,6 +664,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
 	const struct genl_info *info = genl_info_dump(cb);
 	struct net *net = sock_net(skb->sk);
 	struct net_device *netdev;
+	unsigned int ifindex;
 	unsigned int scope;
 	int err = 0;
 
@@ -653,21 +672,28 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
 	if (info->attrs[NETDEV_A_QSTATS_SCOPE])
 		scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]);
 
-	rtnl_lock();
-	for_each_netdev_dump(net, netdev, ctx->ifindex) {
-		if (!netdev->stat_ops)
-			continue;
+	ifindex = 0;
+	if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
+		ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
 
-		switch (scope) {
-		case 0:
-			err = netdev_nl_stats_by_netdev(netdev, skb, info);
-			break;
-		case NETDEV_QSTATS_SCOPE_QUEUE:
-			err = netdev_nl_stats_by_queue(netdev, skb, info, ctx);
-			break;
+	rtnl_lock();
+	if (ifindex) {
+		netdev = __dev_get_by_index(net, ifindex);
+		if (netdev && netdev->stat_ops) {
+			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+							    info, ctx);
+		} else {
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[NETDEV_A_QSTATS_IFINDEX]);
+			err = netdev ? -EOPNOTSUPP : -ENODEV;
+		}
+	} else {
+		for_each_netdev_dump(net, netdev, ctx->ifindex) {
+			err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+							    info, ctx);
+			if (err < 0)
+				break;
 		}
-		if (err < 0)
-			break;
 	}
 	rtnl_unlock();
 
-- 
cgit v1.2.3-73-gaa49b


From e51b907d40329d4b4517a155e0bc0bf593d6767d Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler1968@googlemail.com>
Date: Mon, 22 Apr 2024 12:09:42 -0700
Subject: bpf, docs: Add introduction for use in the ISA Internet Draft

The proposed intro paragraph text is derived from the first paragraph
of the IETF BPF WG charter at https://datatracker.ietf.org/wg/bpf/about/

Signed-off-by: Dave Thaler <dthaler1968@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20240422190942.24658-1-dthaler1968@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/standardization/instruction-set.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index d03d90afbd7d..b44bdacd0195 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -5,7 +5,11 @@
 BPF Instruction Set Architecture (ISA)
 ======================================
 
-This document specifies the BPF instruction set architecture (ISA).
+eBPF (which is no longer an acronym for anything), also commonly
+referred to as BPF, is a technology with origins in the Linux kernel
+that can run untrusted programs in a privileged context such as an
+operating system kernel. This document specifies the BPF instruction
+set architecture (ISA).
 
 Documentation conventions
 =========================
-- 
cgit v1.2.3-73-gaa49b


From 07801a24e2f18624cd2400ce15f14569eb416c9a Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler1968@googlemail.com>
Date: Fri, 26 Apr 2024 16:11:26 -0700
Subject: bpf, docs: Clarify PC use in instruction-set.rst

This patch elaborates on the use of PC by expanding the PC acronym,
explaining the units, and the relative position to which the offset
applies.

Signed-off-by: Dave Thaler <dthaler1968@googlemail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20240426231126.5130-1-dthaler1968@gmail.com
---
 Documentation/bpf/standardization/instruction-set.rst | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index b44bdacd0195..997560abadab 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -469,6 +469,12 @@ JSLT      0xc    any      PC += offset if dst < src          signed
 JSLE      0xd    any      PC += offset if dst <= src         signed
 ========  =====  =======  =================================  ===================================================
 
+where 'PC' denotes the program counter, and the offset to increment by
+is in units of 64-bit instructions relative to the instruction following
+the jump instruction.  Thus 'PC += 1' skips execution of the next
+instruction if it's a basic instruction or results in undefined behavior
+if the next instruction is a 128-bit wide instruction.
+
 The BPF program needs to store the return value into register R0 before doing an
 ``EXIT``.
 
-- 
cgit v1.2.3-73-gaa49b


From 0cfe71f45f420e412fda2395807a56c453a6e0b6 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Fri, 26 Apr 2024 11:39:27 +0800
Subject: netdev: add queue stats

These stats are commonly. Support reporting those via netdev-genl queue
stats.

name: rx-hw-drops
name: rx-hw-drop-overruns
name: rx-csum-unnecessary
name: rx-csum-none
name: rx-csum-bad
name: rx-hw-gro-packets
name: rx-hw-gro-bytes
name: rx-hw-gro-wire-packets
name: rx-hw-gro-wire-bytes
name: rx-hw-drop-ratelimits
name: tx-hw-drops
name: tx-hw-drop-errors
name: tx-csum-none
name: tx-needs-csum
name: tx-hw-gso-packets
name: tx-hw-gso-bytes
name: tx-hw-gso-wire-packets
name: tx-hw-gso-wire-bytes
name: tx-hw-drop-ratelimits

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/netlink/specs/netdev.yaml | 104 ++++++++++++++++++++++++++++++++
 include/net/netdev_queues.h             |  27 +++++++++
 include/uapi/linux/netdev.h             |  19 ++++++
 net/core/netdev-genl.c                  |  23 ++++++-
 tools/include/uapi/linux/netdev.h       |  19 ++++++
 5 files changed, 190 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 679c4130707c..2be4b3714d17 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -335,6 +335,110 @@ attribute-sets:
           Allocation failure may, or may not result in a packet drop, depending
           on driver implementation and whether system recovers quickly.
         type: uint
+      -
+        name: rx-hw-drops
+        doc: |
+          Number of all packets which entered the device, but never left it,
+          including but not limited to: packets dropped due to lack of buffer
+          space, processing errors, explicit or implicit policies and packet
+          filters.
+        type: uint
+      -
+        name: rx-hw-drop-overruns
+        doc: |
+          Number of packets dropped due to transient lack of resources, such as
+          buffer space, host descriptors etc.
+        type: uint
+      -
+        name: rx-csum-unnecessary
+        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
+        type: uint
+      -
+        name: rx-csum-none
+        doc: Number of packets that were not checksummed by device.
+        type: uint
+      -
+        name: rx-csum-bad
+        doc: |
+          Number of packets with bad checksum. The packets are not discarded,
+          but still delivered to the stack.
+        type: uint
+      -
+        name: rx-hw-gro-packets
+        doc: |
+          Number of packets that were coalesced from smaller packets by the device.
+          Counts only packets coalesced with the HW-GRO netdevice feature,
+          LRO-coalesced packets are not counted.
+        type: uint
+      -
+        name: rx-hw-gro-bytes
+        doc: See `rx-hw-gro-packets`.
+        type: uint
+      -
+        name: rx-hw-gro-wire-packets
+        doc: |
+          Number of packets that were coalesced to bigger packetss with the HW-GRO
+          netdevice feature. LRO-coalesced packets are not counted.
+        type: uint
+      -
+        name: rx-hw-gro-wire-bytes
+        doc: See `rx-hw-gro-wire-packets`.
+        type: uint
+      -
+        name: rx-hw-drop-ratelimits
+        doc: |
+          Number of the packets dropped by the device due to the received
+          packets bitrate exceeding the device rate limit.
+        type: uint
+      -
+        name: tx-hw-drops
+        doc: |
+          Number of packets that arrived at the device but never left it,
+          encompassing packets dropped for reasons such as processing errors, as
+          well as those affected by explicitly defined policies and packet
+          filtering criteria.
+        type: uint
+      -
+        name: tx-hw-drop-errors
+        doc: Number of packets dropped because they were invalid or malformed.
+        type: uint
+      -
+        name: tx-csum-none
+        doc: |
+          Number of packets that did not require the device to calculate the
+          checksum.
+        type: uint
+      -
+        name: tx-needs-csum
+        doc: |
+          Number of packets that required the device to calculate the checksum.
+        type: uint
+      -
+        name: tx-hw-gso-packets
+        doc: |
+          Number of packets that necessitated segmentation into smaller packets
+          by the device.
+        type: uint
+      -
+        name: tx-hw-gso-bytes
+        doc: See `tx-hw-gso-packets`.
+        type: uint
+      -
+        name: tx-hw-gso-wire-packets
+        doc: |
+          Number of wire-sized packets generated by processing
+          `tx-hw-gso-packets`
+        type: uint
+      -
+        name: tx-hw-gso-wire-bytes
+        doc: See `tx-hw-gso-wire-packets`.
+        type: uint
+      -
+        name: tx-hw-drop-ratelimits
+        doc: |
+          Number of the packets dropped by the device due to the transmit
+          packets bitrate exceeding the device rate limit.
+        type: uint
 
 operations:
   list:
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 1ec408585373..c7ac4539eafc 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -9,11 +9,38 @@ struct netdev_queue_stats_rx {
 	u64 bytes;
 	u64 packets;
 	u64 alloc_fail;
+
+	u64 hw_drops;
+	u64 hw_drop_overruns;
+
+	u64 csum_unnecessary;
+	u64 csum_none;
+	u64 csum_bad;
+
+	u64 hw_gro_packets;
+	u64 hw_gro_bytes;
+	u64 hw_gro_wire_packets;
+	u64 hw_gro_wire_bytes;
+
+	u64 hw_drop_ratelimits;
 };
 
 struct netdev_queue_stats_tx {
 	u64 bytes;
 	u64 packets;
+
+	u64 hw_drops;
+	u64 hw_drop_errors;
+
+	u64 csum_none;
+	u64 needs_csum;
+
+	u64 hw_gso_packets;
+	u64 hw_gso_bytes;
+	u64 hw_gso_wire_packets;
+	u64 hw_gso_wire_bytes;
+
+	u64 hw_drop_ratelimits;
 };
 
 /**
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index bb65ee840cda..cf24f1d9adf8 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -146,6 +146,25 @@ enum {
 	NETDEV_A_QSTATS_TX_PACKETS,
 	NETDEV_A_QSTATS_TX_BYTES,
 	NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+	NETDEV_A_QSTATS_RX_HW_DROPS,
+	NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+	NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
+	NETDEV_A_QSTATS_RX_CSUM_NONE,
+	NETDEV_A_QSTATS_RX_CSUM_BAD,
+	NETDEV_A_QSTATS_RX_HW_GRO_PACKETS,
+	NETDEV_A_QSTATS_RX_HW_GRO_BYTES,
+	NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS,
+	NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES,
+	NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS,
+	NETDEV_A_QSTATS_TX_HW_DROPS,
+	NETDEV_A_QSTATS_TX_HW_DROP_ERRORS,
+	NETDEV_A_QSTATS_TX_CSUM_NONE,
+	NETDEV_A_QSTATS_TX_NEEDS_CSUM,
+	NETDEV_A_QSTATS_TX_HW_GSO_PACKETS,
+	NETDEV_A_QSTATS_TX_HW_GSO_BYTES,
+	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
+	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
+	NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
 
 	__NETDEV_A_QSTATS_MAX,
 	NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index dd6510f2c652..4b5054087309 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -489,7 +489,17 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
 {
 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) ||
 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) ||
-	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail))
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits))
 		return -EMSGSIZE;
 	return 0;
 }
@@ -498,7 +508,16 @@ static int
 netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
 {
 	if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) ||
-	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes))
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits))
 		return -EMSGSIZE;
 	return 0;
 }
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index bb65ee840cda..cf24f1d9adf8 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -146,6 +146,25 @@ enum {
 	NETDEV_A_QSTATS_TX_PACKETS,
 	NETDEV_A_QSTATS_TX_BYTES,
 	NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+	NETDEV_A_QSTATS_RX_HW_DROPS,
+	NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+	NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
+	NETDEV_A_QSTATS_RX_CSUM_NONE,
+	NETDEV_A_QSTATS_RX_CSUM_BAD,
+	NETDEV_A_QSTATS_RX_HW_GRO_PACKETS,
+	NETDEV_A_QSTATS_RX_HW_GRO_BYTES,
+	NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS,
+	NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES,
+	NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS,
+	NETDEV_A_QSTATS_TX_HW_DROPS,
+	NETDEV_A_QSTATS_TX_HW_DROP_ERRORS,
+	NETDEV_A_QSTATS_TX_CSUM_NONE,
+	NETDEV_A_QSTATS_TX_NEEDS_CSUM,
+	NETDEV_A_QSTATS_TX_HW_GSO_PACKETS,
+	NETDEV_A_QSTATS_TX_HW_GSO_BYTES,
+	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
+	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
+	NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
 
 	__NETDEV_A_QSTATS_MAX,
 	NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
-- 
cgit v1.2.3-73-gaa49b


From a2af49293db6dbdfb5313029bdd47d002abc971e Mon Sep 17 00:00:00 2001
From: Flavio Suligoi <f.suligoi@asem.it>
Date: Mon, 29 Apr 2024 11:26:54 +0200
Subject: dt-bindings: net: snps, dwmac: remove tx-sched-sp property

Strict priority for the tx scheduler is by default in Linux driver, so the
tx-sched-sp property was removed in commit aed6864035b1 ("net: stmmac:
platform: Delete a redundant condition branch").

This property is still in use in the following DT (and it will be removed
in a separate patch series):

- arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi
- arch/arm64/boot/dts/freescale/imx8mp-evk.dts
- arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
- arch/arm64/boot/dts/qcom/sa8540p-ride.dts
- arch/arm64/boot/dts/qcom/sa8775p-ride.dts

There is no problem if that property is still used in the DTs above,
since, as seen above, it is a default property of the driver.

Signed-off-by: Flavio Suligoi <f.suligoi@asem.it>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Adam Ford <aford173@gmail.com>
Link: https://lore.kernel.org/r/20240429092654.31390-2-f.suligoi@asem.it
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 15073627c53a..21cc27e75f50 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -328,9 +328,6 @@ properties:
       snps,tx-sched-dwrr:
         type: boolean
         description: Deficit Weighted Round Robin
-      snps,tx-sched-sp:
-        type: boolean
-        description: Strict priority
     allOf:
       - if:
           required:
@@ -339,7 +336,6 @@ properties:
           properties:
             snps,tx-sched-wfq: false
             snps,tx-sched-dwrr: false
-            snps,tx-sched-sp: false
       - if:
           required:
             - snps,tx-sched-wfq
@@ -347,7 +343,6 @@ properties:
           properties:
             snps,tx-sched-wrr: false
             snps,tx-sched-dwrr: false
-            snps,tx-sched-sp: false
       - if:
           required:
             - snps,tx-sched-dwrr
@@ -355,15 +350,6 @@ properties:
           properties:
             snps,tx-sched-wrr: false
             snps,tx-sched-wfq: false
-            snps,tx-sched-sp: false
-      - if:
-          required:
-            - snps,tx-sched-sp
-        then:
-          properties:
-            snps,tx-sched-wrr: false
-            snps,tx-sched-wfq: false
-            snps,tx-sched-dwrr: false
     patternProperties:
       "^queue[0-9]$":
         description: Each subnode represents a queue.
-- 
cgit v1.2.3-73-gaa49b


From 601a0867f86cbb5e137ce485a7eb60cbf9fc5180 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Tue, 30 Apr 2024 09:09:09 +0200
Subject: xfrm: Add dir validation to "out" data path lookup

Introduces validation for the x->dir attribute within the XFRM output
data lookup path. If the configured direction does not match the expected
direction, output, increment the XfrmOutStateDirError counter and drop
the packet to ensure data integrity and correct flow handling.

grep -vw 0 /proc/net/xfrm_stat
XfrmOutPolError         	1
XfrmOutStateDirError    	1

Signed-off-by: Antony Antony <antony.antony@secunet.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 Documentation/networking/xfrm_proc.rst | 3 +++
 include/uapi/linux/snmp.h              | 1 +
 net/xfrm/xfrm_policy.c                 | 6 ++++++
 net/xfrm/xfrm_proc.c                   | 1 +
 4 files changed, 11 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/xfrm_proc.rst b/Documentation/networking/xfrm_proc.rst
index 0a771c5a7399..5ac3acf4cf51 100644
--- a/Documentation/networking/xfrm_proc.rst
+++ b/Documentation/networking/xfrm_proc.rst
@@ -111,3 +111,6 @@ XfrmOutPolError:
 
 XfrmOutStateInvalid:
 	State is invalid, perhaps expired
+
+XfrmOutStateDirError:
+        State direction mismatch (lookup found an input state on the output path, expected output or no direction)
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index a0819c6a5988..23792b8412bd 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -337,6 +337,7 @@ enum
 	LINUX_MIB_XFRMFWDHDRERROR,		/* XfrmFwdHdrError*/
 	LINUX_MIB_XFRMOUTSTATEINVALID,		/* XfrmOutStateInvalid */
 	LINUX_MIB_XFRMACQUIREERROR,		/* XfrmAcquireError */
+	LINUX_MIB_XFRMOUTSTATEDIRERROR,		/* XfrmOutStateDirError */
 	__LINUX_MIB_XFRMMAX
 };
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8..298b3a9eb48d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2489,6 +2489,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
 				    family, policy->if_id);
+		if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR);
+			xfrm_state_put(x);
+			error = -EINVAL;
+			goto fail;
+		}
 
 		if (x && x->km.state == XFRM_STATE_VALID) {
 			xfrm[nx++] = x;
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 5f9bf8e5c933..98606f1078f7 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
 	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
 	SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
+	SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3-73-gaa49b


From 304b44f0d5a4c2f91f82f7c31538d00485fb484c Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Tue, 30 Apr 2024 09:09:29 +0200
Subject: xfrm: Add dir validation to "in" data path lookup

Introduces validation for the x->dir attribute within the XFRM input
data lookup path. If the configured direction does not match the
expected direction, input, increment the XfrmInStateDirError counter
and drop the packet to ensure data integrity and correct flow handling.

grep -vw 0 /proc/net/xfrm_stat
XfrmInStateDirError     	1

Signed-off-by: Antony Antony <antony.antony@secunet.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 Documentation/networking/xfrm_proc.rst |  3 +++
 include/uapi/linux/snmp.h              |  1 +
 net/ipv6/xfrm6_input.c                 |  7 +++++++
 net/xfrm/xfrm_input.c                  | 11 +++++++++++
 net/xfrm/xfrm_proc.c                   |  1 +
 5 files changed, 23 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/xfrm_proc.rst b/Documentation/networking/xfrm_proc.rst
index 5ac3acf4cf51..973d1571acac 100644
--- a/Documentation/networking/xfrm_proc.rst
+++ b/Documentation/networking/xfrm_proc.rst
@@ -73,6 +73,9 @@ XfrmAcquireError:
 XfrmFwdHdrError:
 	Forward routing of a packet is not allowed
 
+XfrmInStateDirError:
+        State direction mismatch (lookup found an output state on the input path, expected input or no direction)
+
 Outbound errors
 ~~~~~~~~~~~~~~~
 XfrmOutError:
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 23792b8412bd..adf5fd78dd50 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -338,6 +338,7 @@ enum
 	LINUX_MIB_XFRMOUTSTATEINVALID,		/* XfrmOutStateInvalid */
 	LINUX_MIB_XFRMACQUIREERROR,		/* XfrmAcquireError */
 	LINUX_MIB_XFRMOUTSTATEDIRERROR,		/* XfrmOutStateDirError */
+	LINUX_MIB_XFRMINSTATEDIRERROR,		/* XfrmInStateDirError */
 	__LINUX_MIB_XFRMMAX
 };
 
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 2c6aeb090b7a..d5bac0d76b6e 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -266,6 +266,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		if (!x)
 			continue;
 
+		if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+			xfrm_state_put(x);
+			x = NULL;
+			continue;
+		}
+
 		spin_lock(&x->lock);
 
 		if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 161f535c8b94..71b42de6e3c9 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -466,6 +466,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 	if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
 		x = xfrm_input_state(skb);
 
+		if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+			goto drop;
+		}
+
 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
 			if (x->km.state == XFRM_STATE_ACQ)
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -571,6 +576,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
+		if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+			xfrm_state_put(x);
+			goto drop;
+		}
+
 		skb->mark = xfrm_smark_get(skb->mark, x);
 
 		sp->xvec[sp->len++] = x;
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 98606f1078f7..eeb984be03a7 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -42,6 +42,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
 	SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
 	SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
+	SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3-73-gaa49b


From f9a6e7fb521cb6e1ff1a654a2a7f9331611f8140 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 19 Apr 2024 13:39:31 +0200
Subject: netfilter: conntrack: documentation: remove reference to non-existent
 sysctl

The referenced sysctl doesn't exist anymore.

Fixes: 4592ee7f525c ("netfilter: conntrack: remove offload_pickup sysctl again")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 Documentation/networking/nf_conntrack-sysctl.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index c383a394c665..238b66d0e059 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -222,11 +222,11 @@ nf_flowtable_tcp_timeout - INTEGER (seconds)
 
         Control offload timeout for tcp connections.
         TCP connections may be offloaded from nf conntrack to nf flow table.
-        Once aged, the connection is returned to nf conntrack with tcp pickup timeout.
+        Once aged, the connection is returned to nf conntrack.
 
 nf_flowtable_udp_timeout - INTEGER (seconds)
         default 30
 
         Control offload timeout for udp connections.
         UDP connections may be offloaded from nf conntrack to nf flow table.
-        Once aged, the connection is returned to nf conntrack with udp pickup timeout.
+        Once aged, the connection is returned to nf conntrack.
-- 
cgit v1.2.3-73-gaa49b


From 3a2a192b0ef1a849f4a17a5e8e277619a88256dd Mon Sep 17 00:00:00 2001
From: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Date: Mon, 6 May 2024 21:47:57 -0500
Subject: dt-bindings: net: ipq4019-mdio: add IPQ9574 compatible

Add a compatible property specific to IPQ9574. This should be used
along with the IPQ4019 compatible. This second compatible serves the
same purpose as the ipq{5,6,8} compatibles. This is to indicate that
the clocks properties are required.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240507024758.2810514-1-mr.nuke.me@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
index 0029e197a825..a94480e819ac 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
@@ -20,6 +20,7 @@ properties:
           - enum:
               - qcom,ipq6018-mdio
               - qcom,ipq8074-mdio
+              - qcom,ipq9574-mdio
           - const: qcom,ipq4019-mdio
 
   "#address-cells":
@@ -76,6 +77,7 @@ allOf:
               - qcom,ipq5018-mdio
               - qcom,ipq6018-mdio
               - qcom,ipq8074-mdio
+              - qcom,ipq9574-mdio
     then:
       required:
         - clocks
-- 
cgit v1.2.3-73-gaa49b


From e497c3228a4e09cdc956f19200ee1d9e84b63f96 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Tue, 7 May 2024 11:36:03 +0100
Subject: netlink/specs: Add VF attributes to rt_link spec

Add support for retrieving VFs as part of link info. For example:

./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_link.yaml \
  --do getlink --json '{"ifi-index": 38, "ext-mask": ["vf", "skip-stats"]}'
{'address': 'b6:75:91:f2:64:65',
 [snip]
 'vfinfo-list': {'info': [{'broadcast': b'\xff\xff\xff\xff\xff\xff\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00',
                           'link-state': {'link-state': 'auto', 'vf': 0},
                           'mac': {'mac': b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00',
                                   'vf': 0},
                           'rate': {'max-tx-rate': 0,
                                    'min-tx-rate': 0,
                                    'vf': 0},
                           'rss-query-en': {'setting': 0, 'vf': 0},
                           'spoofchk': {'setting': 0, 'vf': 0},
                           'trust': {'setting': 0, 'vf': 0},
                           'tx-rate': {'rate': 0, 'vf': 0},
                           'vlan': {'qos': 0, 'vf': 0, 'vlan': 0},
                           'vlan-list': {'info': [{'qos': 0,
                                                   'vf': 0,
                                                   'vlan': 0,
                                                   'vlan-proto': 0}]}},
                          {'broadcast': b'\xff\xff\xff\xff\xff\xff\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00',
                           'link-state': {'link-state': 'auto', 'vf': 1},
                           'mac': {'mac': b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                          b'\x00\x00\x00\x00\x00\x00\x00\x00',
                                   'vf': 1},
                           'rate': {'max-tx-rate': 0,
                                    'min-tx-rate': 0,
                                    'vf': 1},
                           'rss-query-en': {'setting': 0, 'vf': 1},
                           'spoofchk': {'setting': 0, 'vf': 1},
                           'trust': {'setting': 0, 'vf': 1},
                           'tx-rate': {'rate': 0, 'vf': 1},
                           'vlan': {'qos': 0, 'vf': 1, 'vlan': 0},
                           'vlan-list': {'info': [{'qos': 0,
                                                   'vf': 1,
                                                   'vlan': 0,
                                                   'vlan-proto': 0}]}}]},
 'xdp': {'attached': 0}}

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20240507103603.23017-1-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_link.yaml | 237 ++++++++++++++++++++++++++++++-
 1 file changed, 235 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index 113ecd17c880..0f02b9579b5a 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -770,6 +770,139 @@ definitions:
       -
         name: to
         type: u32
+  -
+    name: ifla-vf-mac
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: mac
+        type: binary
+        len: 32
+  -
+    name: ifla-vf-vlan
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: vlan
+        type: u32
+      -
+        name: qos
+        type: u32
+  -
+    name: ifla-vf-tx-rate
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: rate
+        type: u32
+  -
+    name: ifla-vf-spoofchk
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: setting
+        type: u32
+  -
+    name: ifla-vf-link-state
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: link-state
+        type: u32
+        enum: ifla-vf-link-state-enum
+  -
+    name: ifla-vf-link-state-enum
+    type: enum
+    entries:
+      - auto
+      - enable
+      - disable
+  -
+    name: ifla-vf-rate
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: min-tx-rate
+        type: u32
+      -
+        name: max-tx-rate
+        type: u32
+  -
+    name: ifla-vf-rss-query-en
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: setting
+        type: u32
+  -
+    name: ifla-vf-trust
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: setting
+        type: u32
+  -
+    name: ifla-vf-guid
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: guid
+        type: u64
+  -
+    name: ifla-vf-vlan-info
+    type: struct
+    members:
+      -
+        name: vf
+        type: u32
+      -
+        name: vlan
+        type: u32
+      -
+        name: qos
+        type: u32
+      -
+        name: vlan-proto
+        type: u32
+  -
+    name: rtext-filter
+    type: flags
+    entries:
+      - vf
+      - brvlan
+      - brvlan-compressed
+      - skip-stats
+      - mrp
+      - cfm-config
+      - cfm-status
+      - mst
 
 attribute-sets:
   -
@@ -847,7 +980,7 @@ attribute-sets:
       -
         name: vfinfo-list
         type: nest
-        nested-attributes: vfinfo-attrs
+        nested-attributes: vfinfo-list-attrs
       -
         name: stats64
         type: binary
@@ -873,6 +1006,8 @@ attribute-sets:
       -
         name: ext-mask
         type: u32
+        enum: rtext-filter
+        enum-as-flags: true
       -
         name: promiscuity
         type: u32
@@ -1004,9 +1139,107 @@ attribute-sets:
         type: nest
         value: 45
         nested-attributes: mctp-attrs
+  -
+    name: vfinfo-list-attrs
+    attributes:
+      -
+        name: info
+        type: nest
+        nested-attributes: vfinfo-attrs
+        multi-attr: true
   -
     name: vfinfo-attrs
-    attributes: []
+    attributes:
+      -
+        name: mac
+        type: binary
+        struct: ifla-vf-mac
+      -
+        name: vlan
+        type: binary
+        struct: ifla-vf-vlan
+      -
+        name: tx-rate
+        type: binary
+        struct: ifla-vf-tx-rate
+      -
+        name: spoofchk
+        type: binary
+        struct: ifla-vf-spoofchk
+      -
+        name: link-state
+        type: binary
+        struct: ifla-vf-link-state
+      -
+        name: rate
+        type: binary
+        struct: ifla-vf-rate
+      -
+        name: rss-query-en
+        type: binary
+        struct: ifla-vf-rss-query-en
+      -
+        name: stats
+        type: nest
+        nested-attributes: vf-stats-attrs
+      -
+        name: trust
+        type: binary
+        struct: ifla-vf-trust
+      -
+        name: ib-node-guid
+        type: binary
+        struct: ifla-vf-guid
+      -
+        name: ib-port-guid
+        type: binary
+        struct: ifla-vf-guid
+      -
+        name: vlan-list
+        type: nest
+        nested-attributes: vf-vlan-attrs
+      -
+        name: broadcast
+        type: binary
+  -
+    name: vf-stats-attrs
+    attributes:
+      -
+        name: rx-packets
+        type: u64
+        value: 0
+      -
+        name: tx-packets
+        type: u64
+      -
+        name: rx-bytes
+        type: u64
+      -
+        name: tx-bytes
+        type: u64
+      -
+        name: broadcast
+        type: u64
+      -
+        name: multicast
+        type: u64
+      -
+        name: pad
+        type: pad
+      -
+        name: rx-dropped
+        type: u64
+      -
+        name: tx-dropped
+        type: u64
+  -
+    name: vf-vlan-attrs
+    attributes:
+      -
+        name: info
+        type: binary
+        struct: ifla-vf-vlan-info
+        multi-attr: true
   -
     name: vf-ports-attrs
     attributes: []
-- 
cgit v1.2.3-73-gaa49b


From f122668ddcce450c2585f0be4bf4478d6fd6176b Mon Sep 17 00:00:00 2001
From: Shahab Vahedi <shahab@synopsys.com>
Date: Tue, 30 Apr 2024 16:56:04 +0200
Subject: ARC: Add eBPF JIT support

This will add eBPF JIT support to the 32-bit ARCv2 processors. The
implementation is qualified by running the BPF tests on a Synopsys HSDK
board with "ARC HS38 v2.1c at 500 MHz" as the 4-core CPU.

The test_bpf.ko reports 2-10 fold improvements in execution time of its
tests. For instance:

test_bpf: #33 tcpdump port 22 jited:0 704 1766 2104 PASS
test_bpf: #33 tcpdump port 22 jited:1 120  224  260 PASS

test_bpf: #141 ALU_DIV_X: 4294967295 / 4294967295 = 1 jited:0 238 PASS
test_bpf: #141 ALU_DIV_X: 4294967295 / 4294967295 = 1 jited:1  23 PASS

test_bpf: #776 JMP32_JGE_K: all ... magnitudes jited:0 2034681 PASS
test_bpf: #776 JMP32_JGE_K: all ... magnitudes jited:1 1020022 PASS

Deployment and structure
------------------------
The related codes are added to "arch/arc/net":

- bpf_jit.h       -- The interface that a back-end translator must provide
- bpf_jit_core.c  -- Knows how to handle the input eBPF byte stream
- bpf_jit_arcv2.c -- The back-end code that knows the translation logic

The bpf_int_jit_compile() at the end of bpf_jit_core.c is the entrance
to the whole process. Normally, the translation is done in one pass,
namely the "normal pass". In case some relocations are not known during
this pass, some data (arc_jit_data) is allocated for the next pass to
come. This possible next (and last) pass is called the "extra pass".

1. Normal pass       # The necessary pass
     1a. Dry run       # Get the whole JIT length, epilogue offset, etc.
     1b. Emit phase    # Allocate memory and start emitting instructions
2. Extra pass        # Only needed if there are relocations to be fixed
     2a. Patch relocations

Support status
--------------
The JIT compiler supports BPF instructions up to "cpu=v4". However, it
does not yet provide support for:

- Tail calls
- Atomic operations
- 64-bit division/remainder
- BPF_PROBE_MEM* (exception table)

The result of "test_bpf" test suite on an HSDK board is:

hsdk-lnx# insmod test_bpf.ko test_suite=test_bpf

  test_bpf: Summary: 863 PASSED, 186 FAILED, [851/851 JIT'ed]

All the failing test cases are due to the ones that were not JIT'ed.
Categorically, they can be represented as:

  .-----------.------------.-------------.
  | test type |   opcodes  | # of cases  |
  |-----------+------------+-------------|
  | atomic    | 0xC3, 0xDB |         149 |
  | div64     | 0x37, 0x3F |          22 |
  | mod64     | 0x97, 0x9F |          15 |
  `-----------^------------+-------------|
                           | (total) 186 |
                           `-------------'

Setup: build config
-------------------
The following configs must be set to have a working JIT test:

  CONFIG_BPF_JIT=y
  CONFIG_BPF_JIT_ALWAYS_ON=y
  CONFIG_TEST_BPF=m

The following options are not necessary for the tests module,
but are good to have:

  CONFIG_DEBUG_INFO=y             # prerequisite for below
  CONFIG_DEBUG_INFO_BTF=y         # so bpftool can generate vmlinux.h

  CONFIG_FTRACE=y                 #
  CONFIG_BPF_SYSCALL=y            # all these options lead to
  CONFIG_KPROBE_EVENTS=y          # having CONFIG_BPF_EVENTS=y
  CONFIG_PERF_EVENTS=y            #

Some BPF programs provide data through /sys/kernel/debug:
  CONFIG_DEBUG_FS=y
arc# mount -t debugfs debugfs /sys/kernel/debug

Setup: elfutils
---------------
The libdw.{so,a} library that is used by pahole for processing
the final binary must come from elfutils 0.189 or newer. The
support for ARCv2 [1] has been added since that version.

[1]
https://sourceware.org/git/?p=elfutils.git;a=commit;h=de3d46b3e7

Setup: pahole
-------------
The line below in linux/scripts/Makefile.btf must be commented out:

pahole-flags-$(call test-ge, $(pahole-ver), 121) += --btf_gen_floats

Or else, the build will fail:

$ make V=1
  ...
  BTF     .btf.vmlinux.bin.o
pahole -J --btf_gen_floats                    \
       -j --lang_exclude=rust                 \
       --skip_encoding_btf_inconsistent_proto \
       --btf_gen_optimized .tmp_vmlinux.btf
Complex, interval and imaginary float types are not supported
Encountered error while encoding BTF.
  ...
  BTFIDS  vmlinux
./tools/bpf/resolve_btfids/resolve_btfids vmlinux
libbpf: failed to find '.BTF' ELF section in vmlinux
FAILED: load BTF from vmlinux: No data available

This is due to the fact that the ARC toolchains generate
"complex float" DIE entries in libgcc and at the moment, pahole
can't handle such entries.

Running the tests
-----------------
host$ scp /bld/linux/lib/test_bpf.ko arc:
arc # sysctl net.core.bpf_jit_enable=1
arc # insmod test_bpf.ko test_suite=test_bpf
      ...
      test_bpf: #1048 Staggered jumps: JMP32_JSLE_X jited:1 697811 PASS
      test_bpf: Summary: 863 PASSED, 186 FAILED, [851/851 JIT'ed]

Acknowledgments
---------------
- Claudiu Zissulescu for his unwavering support
- Yuriy Kolerov for testing and troubleshooting
- Vladimir Isaev for the pahole workaround
- Sergey Matyukevich for paving the road by adding the interpreter support

Signed-off-by: Shahab Vahedi <shahab@synopsys.com>
Link: https://lore.kernel.org/r/20240430145604.38592-1-list+bpf@vahedi.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/admin-guide/sysctl/net.rst |    1 +
 Documentation/networking/filter.rst      |    4 +-
 MAINTAINERS                              |    6 +
 arch/arc/Kbuild                          |    1 +
 arch/arc/Kconfig                         |    1 +
 arch/arc/net/Makefile                    |    6 +
 arch/arc/net/bpf_jit.h                   |  164 ++
 arch/arc/net/bpf_jit_arcv2.c             | 3005 ++++++++++++++++++++++++++++++
 arch/arc/net/bpf_jit_core.c              | 1425 ++++++++++++++
 9 files changed, 4611 insertions(+), 2 deletions(-)
 create mode 100644 arch/arc/net/Makefile
 create mode 100644 arch/arc/net/bpf_jit.h
 create mode 100644 arch/arc/net/bpf_jit_arcv2.c
 create mode 100644 arch/arc/net/bpf_jit_core.c

(limited to 'Documentation')

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 7250c0542828..7b0c4291c686 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -72,6 +72,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
   - riscv64
   - riscv32
   - loongarch64
+  - arc
 
 And the older cBPF JIT supported on the following archs:
 
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index 7d8c5380492f..8eb9a5d40f31 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -513,7 +513,7 @@ JIT compiler
 ------------
 
 The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC,
-PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through
+PowerPC, ARM, ARM64, MIPS, RISC-V, s390, and ARC and can be enabled through
 CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each
 attached filter from user space or for internal kernel users if it has
 been previously enabled by root::
@@ -650,7 +650,7 @@ before a conversion to the new layout is being done behind the scenes!
 
 Currently, the classic BPF format is being used for JITing on most
 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
-sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation
+sparc64, arm32, riscv64, riscv32, loongarch64, arc perform JIT compilation
 from eBPF instruction set.
 
 Testing
diff --git a/MAINTAINERS b/MAINTAINERS
index 943921d642ad..b6a946d24f00 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3712,6 +3712,12 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml
 F:	drivers/iio/imu/bmi323/
 
+BPF JIT for ARC
+M:	Shahab Vahedi <shahab@synopsys.com>
+L:	bpf@vger.kernel.org
+S:	Maintained
+F:	arch/arc/net/
+
 BPF JIT for ARM
 M:	Russell King <linux@armlinux.org.uk>
 M:	Puranjay Mohan <puranjay12@gmail.com>
diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild
index b94102fff68b..20ea7dd482d4 100644
--- a/arch/arc/Kbuild
+++ b/arch/arc/Kbuild
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += kernel/
 obj-y += mm/
+obj-y += net/
 
 # for cleaning
 subdir- += boot
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 99d2845f3feb..551fc65a9898 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -52,6 +52,7 @@ config ARC
 	select PCI_SYSCALL if PCI
 	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
 	select TRACE_IRQFLAGS_SUPPORT
+	select HAVE_EBPF_JIT if ISA_ARCV2
 
 config LOCKDEP_SUPPORT
 	def_bool y
diff --git a/arch/arc/net/Makefile b/arch/arc/net/Makefile
new file mode 100644
index 000000000000..ea5790952e9a
--- /dev/null
+++ b/arch/arc/net/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ifeq ($(CONFIG_ISA_ARCV2),y)
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_arcv2.o
+endif
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
new file mode 100644
index 000000000000..ec44873c42d1
--- /dev/null
+++ b/arch/arc/net/bpf_jit.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The interface that a back-end should provide to bpf_jit_core.c.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+
+#ifndef _ARC_BPF_JIT_H
+#define _ARC_BPF_JIT_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* Print debug info and assert. */
+//#define ARC_BPF_JIT_DEBUG
+
+/* Determine the address type of the target. */
+#ifdef CONFIG_ISA_ARCV2
+#define ARC_ADDR u32
+#endif
+
+/*
+ * For the translation of some BPF instructions, a temporary register
+ * might be needed for some interim data.
+ */
+#define JIT_REG_TMP MAX_BPF_JIT_REG
+
+/*
+ * Buffer access: If buffer "b" is not NULL, advance by "n" bytes.
+ *
+ * This macro must be used in any place that potentially requires a
+ * "buf + len". This way, we make sure that the "buf" argument for
+ * the underlying "arc_*(buf, ...)" ends up as NULL instead of something
+ * like "0+4" or "0+8", etc. Those "arc_*()" functions check their "buf"
+ * value to decide if instructions should be emitted or not.
+ */
+#define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
+
+/************** Functions that the back-end must provide **************/
+/* Extension for 32-bit operations. */
+inline u8 zext(u8 *buf, u8 rd);
+/***** Moves *****/
+u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi);
+/***** Loads and stores *****/
+u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext);
+u8 store_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size);
+u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size);
+/***** Addition *****/
+u8 add_r32(u8 *buf, u8 rd, u8 rs);
+u8 add_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 add_r64(u8 *buf, u8 rd, u8 rs);
+u8 add_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Subtraction *****/
+u8 sub_r32(u8 *buf, u8 rd, u8 rs);
+u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 sub_r64(u8 *buf, u8 rd, u8 rs);
+u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Multiplication *****/
+u8 mul_r32(u8 *buf, u8 rd, u8 rs);
+u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 mul_r64(u8 *buf, u8 rd, u8 rs);
+u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Division *****/
+u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Remainder *****/
+u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Bitwise AND *****/
+u8 and_r32(u8 *buf, u8 rd, u8 rs);
+u8 and_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 and_r64(u8 *buf, u8 rd, u8 rs);
+u8 and_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise OR *****/
+u8 or_r32(u8 *buf, u8 rd, u8 rs);
+u8 or_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 or_r64(u8 *buf, u8 rd, u8 rs);
+u8 or_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise XOR *****/
+u8 xor_r32(u8 *buf, u8 rd, u8 rs);
+u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 xor_r64(u8 *buf, u8 rd, u8 rs);
+u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise Negate *****/
+u8 neg_r32(u8 *buf, u8 r);
+u8 neg_r64(u8 *buf, u8 r);
+/***** Bitwise left shift *****/
+u8 lsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 lsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (logical) *****/
+u8 rsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 rsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (arithmetic) *****/
+u8 arsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 arsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Frame related *****/
+u32 mask_for_used_regs(u8 bpf_reg, bool is_call);
+u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size);
+u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size);
+/***** Jumps *****/
+/*
+ * Different sorts of conditions (ARC enum as opposed to BPF_*).
+ *
+ * Do not change the order of enums here. ARC_CC_SLE+1 is used
+ * to determine the number of JCCs.
+ */
+enum ARC_CC {
+	ARC_CC_UGT = 0,		/* unsigned >  */
+	ARC_CC_UGE,		/* unsigned >= */
+	ARC_CC_ULT,		/* unsigned <  */
+	ARC_CC_ULE,		/* unsigned <= */
+	ARC_CC_SGT,		/*   signed >  */
+	ARC_CC_SGE,		/*   signed >= */
+	ARC_CC_SLT,		/*   signed <  */
+	ARC_CC_SLE,		/*   signed <= */
+	ARC_CC_AL,		/* always      */
+	ARC_CC_EQ,		/*          == */
+	ARC_CC_NE,		/*          != */
+	ARC_CC_SET,		/* test        */
+	ARC_CC_LAST
+};
+
+/*
+ * A few notes:
+ *
+ * - check_jmp_*() are prerequisites before calling the gen_jmp_*().
+ *   They return "true" if the jump is possible and "false" otherwise.
+ *
+ * - The notion of "*_off" is to emphasize that these parameters are
+ *   merely offsets in the JIT stream and not absolute addresses. One
+ *   can look at them as addresses if the JIT code would start from
+ *   address 0x0000_0000. Nonetheless, since the buffer address for the
+ *   JIT is on a word-aligned address, this works and actually makes
+ *   things simpler (offsets are in the range of u32 which is more than
+ *   enough).
+ */
+bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond);
+bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond);
+u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+/***** Miscellaneous *****/
+u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
+u8 arc_to_bpf_return(u8 *buf);
+/*
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ *   desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ *   by the "do_zext" boolean.
+ */
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
+
+#endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
new file mode 100644
index 000000000000..31bfb6e9ce00
--- /dev/null
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -0,0 +1,3005 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The ARCv2 backend of Just-In-Time compiler for eBPF bytecode.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+#include <linux/bug.h>
+#include "bpf_jit.h"
+
+/* ARC core registers. */
+enum {
+	ARC_R_0,  ARC_R_1,  ARC_R_2,  ARC_R_3,  ARC_R_4,  ARC_R_5,
+	ARC_R_6,  ARC_R_7,  ARC_R_8,  ARC_R_9,  ARC_R_10, ARC_R_11,
+	ARC_R_12, ARC_R_13, ARC_R_14, ARC_R_15, ARC_R_16, ARC_R_17,
+	ARC_R_18, ARC_R_19, ARC_R_20, ARC_R_21, ARC_R_22, ARC_R_23,
+	ARC_R_24, ARC_R_25, ARC_R_26, ARC_R_FP, ARC_R_SP, ARC_R_ILINK,
+	ARC_R_30, ARC_R_BLINK,
+	/*
+	 * Having ARC_R_IMM encoded as source register means there is an
+	 * immediate that must be interpreted from the next 4 bytes. If
+	 * encoded as the destination register though, it implies that the
+	 * output of the operation is not assigned to any register. The
+	 * latter is helpful if we only care about updating the CPU status
+	 * flags.
+	 */
+	ARC_R_IMM = 62
+};
+
+/*
+ * Remarks about the rationale behind the chosen mapping:
+ *
+ * - BPF_REG_{1,2,3,4} are the argument registers and must be mapped to
+ *   argument registers in ARCv2 ABI: r0-r7. The r7 registers is the last
+ *   argument register in the ABI. Therefore BPF_REG_5, as the fifth
+ *   argument, must be pushed onto the stack. This is a must for calling
+ *   in-kernel functions.
+ *
+ * - In ARCv2 ABI, the return value is in r0 for 32-bit results and (r1,r0)
+ *   for 64-bit results. However, because they're already used for BPF_REG_1,
+ *   the next available scratch registers, r8 and r9, are the best candidates
+ *   for BPF_REG_0. After a "call" to a(n) (in-kernel) function, the result
+ *   is "mov"ed to these registers. At a BPF_EXIT, their value is "mov"ed to
+ *   (r1,r0).
+ *   It is worth mentioning that scratch registers are the best choice for
+ *   BPF_REG_0, because it is very popular in BPF instruction encoding.
+ *
+ * - JIT_REG_TMP is an artifact needed to translate some BPF instructions.
+ *   Its life span is one single BPF instruction. Since during the
+ *   analyze_reg_usage(), it is not known if temporary registers are used,
+ *   it is mapped to ARC's scratch registers: r10 and r11. Therefore, they
+ *   don't matter in analysing phase and don't need saving. This temporary
+ *   register is added as yet another index in the bpf2arc array, so it will
+ *   unfold like the rest of registers during the code generation process.
+ *
+ * - Mapping of callee-saved BPF registers, BPF_REG_{6,7,8,9}, starts from
+ *   (r15,r14) register pair. The (r13,r12) is not a good choice, because
+ *   in ARCv2 ABI, r12 is not a callee-saved register and this can cause
+ *   problem when calling an in-kernel function. Theoretically, the mapping
+ *   could start from (r14,r13), but it is not a conventional ARCv2 register
+ *   pair. To have a future proof design, I opted for this arrangement.
+ *   If/when we decide to add ARCv2 instructions that do use register pairs,
+ *   the mapping, hopefully, doesn't need to be revisited.
+ */
+const u8 bpf2arc[][2] = {
+	/* Return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = {ARC_R_8, ARC_R_9},
+	/* Arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = {ARC_R_0, ARC_R_1},
+	[BPF_REG_2] = {ARC_R_2, ARC_R_3},
+	[BPF_REG_3] = {ARC_R_4, ARC_R_5},
+	[BPF_REG_4] = {ARC_R_6, ARC_R_7},
+	/* Remaining arguments, to be passed on the stack per 32-bit ABI */
+	[BPF_REG_5] = {ARC_R_22, ARC_R_23},
+	/* Callee-saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = {ARC_R_14, ARC_R_15},
+	[BPF_REG_7] = {ARC_R_16, ARC_R_17},
+	[BPF_REG_8] = {ARC_R_18, ARC_R_19},
+	[BPF_REG_9] = {ARC_R_20, ARC_R_21},
+	/* Read-only frame pointer to access the eBPF stack. 32-bit only. */
+	[BPF_REG_FP] = {ARC_R_FP, },
+	/* Register for blinding constants */
+	[BPF_REG_AX] = {ARC_R_24, ARC_R_25},
+	/* Temporary registers for internal use */
+	[JIT_REG_TMP] = {ARC_R_10, ARC_R_11}
+};
+
+#define ARC_CALLEE_SAVED_REG_FIRST ARC_R_13
+#define ARC_CALLEE_SAVED_REG_LAST  ARC_R_25
+
+#define REG_LO(r) (bpf2arc[(r)][0])
+#define REG_HI(r) (bpf2arc[(r)][1])
+
+/*
+ * To comply with ARCv2 ABI, BPF's arg5 must be put on stack. After which,
+ * the stack needs to be restored by ARG5_SIZE.
+ */
+#define ARG5_SIZE 8
+
+/* Instruction lengths in bytes. */
+enum {
+	INSN_len_normal = 4,	/* Normal instructions length. */
+	INSN_len_imm = 4	/* Length of an extra 32-bit immediate. */
+};
+
+/* ZZ defines the size of operation in encodings that it is used. */
+enum {
+	ZZ_1_byte = 1,
+	ZZ_2_byte = 2,
+	ZZ_4_byte = 0,
+	ZZ_8_byte = 3
+};
+
+/*
+ * AA is mostly about address write back mode. It determines if the
+ * address in question should be updated before usage or after:
+ *   addr += offset; data = *addr;
+ *   data = *addr; addr += offset;
+ *
+ * In "scaling" mode, the effective address will become the sum
+ * of "address" + "index"*"size". The "size" is specified by the
+ * "ZZ" field. There is no write back when AA is set for scaling:
+ *   data = *(addr + offset<<zz)
+ */
+enum {
+	AA_none  = 0,
+	AA_pre   = 1,	/* in assembly known as "a/aw". */
+	AA_post  = 2,	/* in assembly known as "ab". */
+	AA_scale = 3	/* in assembly known as "as". */
+};
+
+/* X flag determines the mode of extension. */
+enum {
+	X_zero = 0,
+	X_sign = 1
+};
+
+/* Condition codes. */
+enum {
+	CC_always     = 0,	/* condition is true all the time */
+	CC_equal      = 1,	/* if status32.z flag is set */
+	CC_unequal    = 2,	/* if status32.z flag is clear */
+	CC_positive   = 3,	/* if status32.n flag is clear */
+	CC_negative   = 4,	/* if status32.n flag is set */
+	CC_less_u     = 5,	/* less than (unsigned) */
+	CC_less_eq_u  = 14,	/* less than or equal (unsigned) */
+	CC_great_eq_u = 6,	/* greater than or equal (unsigned) */
+	CC_great_u    = 13,	/* greater than (unsigned) */
+	CC_less_s     = 11,	/* less than (signed) */
+	CC_less_eq_s  = 12,	/* less than or equal (signed) */
+	CC_great_eq_s = 10,	/* greater than or equal (signed) */
+	CC_great_s    = 9	/* greater than (signed) */
+};
+
+#define IN_U6_RANGE(x)	((x) <= (0x40      - 1) && (x) >= 0)
+#define IN_S9_RANGE(x)	((x) <= (0x100     - 1) && (x) >= -0x100)
+#define IN_S12_RANGE(x)	((x) <= (0x800     - 1) && (x) >= -0x800)
+#define IN_S21_RANGE(x)	((x) <= (0x100000  - 1) && (x) >= -0x100000)
+#define IN_S25_RANGE(x)	((x) <= (0x1000000 - 1) && (x) >= -0x1000000)
+
+/* Operands in most of the encodings. */
+#define OP_A(x)	((x) & 0x03f)
+#define OP_B(x)	((((x) & 0x07) << 24) | (((x) & 0x38) <<  9))
+#define OP_C(x)	(((x) & 0x03f) << 6)
+#define OP_IMM	(OP_C(ARC_R_IMM))
+#define COND(x)	(OP_A((x) & 31))
+#define FLAG(x)	(((x) & 1) << 15)
+
+/*
+ * The 4-byte encoding of "mov b,c":
+ *
+ * 0010_0bbb 0000_1010 0BBB_cccc cc00_0000
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_MOV		0x200a0000
+
+/*
+ * The 4-byte encoding of "mov b,s12" (used for moving small immediates):
+ *
+ * 0010_0bbb 1000_1010 0BBB_ssss ssSS_SSSS
+ *
+ * b:  BBBbbb		destination register
+ * s:  SSSSSSssssss	source immediate (signed)
+ */
+#define OPC_MOVI	0x208a0000
+#define MOVI_S12(x)	((((x) & 0xfc0) >> 6) | (((x) & 0x3f) << 6))
+
+/*
+ * The 4-byte encoding of "mov[.qq] b,u6", used for conditional
+ * moving of even smaller immediates:
+ *
+ * 0010_0bbb 1100_1010 0BBB_cccc cciq_qqqq
+ *
+ * qq: qqqqq		condition code
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source
+ */
+#define OPC_MOV_CC	0x20ca0000
+#define MOV_CC_I	BIT(5)
+#define OPC_MOVU_CC	(OPC_MOV_CC | MOV_CC_I)
+
+/*
+ * The 4-byte encoding of "sexb b,c" (8-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0101
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXB	0x202f0005
+
+/*
+ * The 4-byte encoding of "sexh b,c" (16-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0110
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXH	0x202f0006
+
+/*
+ * The 4-byte encoding of "ld[zz][.x][.aa] c,[b,s9]":
+ *
+ * 0001_0bbb ssss_ssss SBBB_0aaz zxcc_cccc
+ *
+ * zz:			size mode
+ * aa:			address write back mode
+ * x:			extension mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		destination register
+ */
+#define OPC_LOAD	0x10000000
+#define LOAD_X(x)	((x) << 6)
+#define LOAD_ZZ(x)	((x) << 7)
+#define LOAD_AA(x)	((x) << 9)
+#define LOAD_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+#define LOAD_C(x)	((x) & 0x03f)
+/* Unsigned and signed loads. */
+#define OPC_LDU		(OPC_LOAD | LOAD_X(X_zero))
+#define OPC_LDS		(OPC_LOAD | LOAD_X(X_sign))
+/* 32-bit load. */
+#define OPC_LD32	(OPC_LDU | LOAD_ZZ(ZZ_4_byte))
+/* "pop reg" is merely a "ld.ab reg,[sp,4]". */
+#define OPC_POP		\
+	(OPC_LD32 | LOAD_AA(AA_post) | LOAD_S9(4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "st[zz][.aa] c,[b,s9]":
+ *
+ * 0001_1bbb ssss_ssss SBBB_cccc cc0a_azz0
+ *
+ * zz: zz		size mode
+ * aa: aa		address write back mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		source reg to be stored
+ */
+#define OPC_STORE	0x18000000
+#define STORE_ZZ(x)	((x) << 1)
+#define STORE_AA(x)	((x) << 3)
+#define STORE_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+/* 32-bit store. */
+#define OPC_ST32	(OPC_STORE | STORE_ZZ(ZZ_4_byte))
+/* "push reg" is merely a "st.aw reg,[sp,-4]". */
+#define OPC_PUSH	\
+	(OPC_ST32 | STORE_AA(AA_pre) | STORE_S9(-4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "add a,b,c":
+ *
+ * 0010_0bbb 0i00_0000 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADD		0x20000000
+/* Addition with updating the pertinent flags in "status32" register. */
+#define OPC_ADDF	(OPC_ADD | FLAG(1))
+#define ADDI		BIT(22)
+#define ADDI_U6(x)	OP_C(x)
+#define OPC_ADDI	(OPC_ADD | ADDI)
+#define OPC_ADDIF	(OPC_ADDI | FLAG(1))
+#define OPC_ADD_I	(OPC_ADD | OP_IMM)
+
+/*
+ * The 4-byte encoding of "adc a,b,c" (addition with carry):
+ *
+ * 0010_0bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADC		0x20010000
+#define ADCI		BIT(22)
+#define ADCI_U6(x)	OP_C(x)
+#define OPC_ADCI	(OPC_ADC | ADCI)
+
+/*
+ * The 4-byte encoding of "sub a,b,c":
+ *
+ * 0010_0bbb 0i00_0010 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SUB		0x20020000
+/* Subtraction with updating the pertinent flags in "status32" register. */
+#define OPC_SUBF	(OPC_SUB | FLAG(1))
+#define SUBI		BIT(22)
+#define SUBI_U6(x)	OP_C(x)
+#define OPC_SUBI	(OPC_SUB | SUBI)
+#define OPC_SUB_I	(OPC_SUB | OP_IMM)
+
+/*
+ * The 4-byte encoding of "sbc a,b,c" (subtraction with carry):
+ *
+ * 0010_0bbb 0000_0011 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SBC		0x20030000
+
+/*
+ * The 4-byte encoding of "cmp[.qq] b,c":
+ *
+ * 0010_0bbb 1100_1100 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st operand
+ * c:  cccccc		the 2nd operand
+ */
+#define OPC_CMP		0x20cc8000
+
+/*
+ * The 4-byte encoding of "neg a,b":
+ *
+ * 0010_0bbb 0100_1110 0BBB_0000 00aa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		input
+ */
+#define OPC_NEG		0x204e0000
+
+/*
+ * The 4-byte encoding of "mpy a,b,c".
+ * mpy is the signed 32-bit multiplication with the lower 32-bit
+ * of the product as the result.
+ *
+ * 0010_0bbb 0001_1010 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPY		0x201a0000
+#define OPC_MPYI	(OPC_MPY | OP_IMM)
+
+/*
+ * The 4-byte encoding of "mpydu a,b,c".
+ * mpydu is the unsigned 32-bit multiplication with the lower 32-bit of
+ * the product in register "a" and the higher 32-bit in register "a+1".
+ *
+ * 0010_1bbb 0001_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		64-bit result in registers (R_a+1,R_a)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPYDU	0x28190000
+#define OPC_MPYDUI	(OPC_MPYDU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "divu a,b,c" (unsigned division):
+ *
+ * 0010_1bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVU	0x28050000
+#define OPC_DIVUI	(OPC_DIVU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "div a,b,c" (signed division):
+ *
+ * 0010_1bbb 0000_0100 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVS	0x28040000
+#define OPC_DIVSI	(OPC_DIVS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "remu a,b,c" (unsigned remainder):
+ *
+ * 0010_1bbb 0000_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMU	0x28090000
+#define OPC_REMUI	(OPC_REMU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "rem a,b,c" (signed remainder):
+ *
+ * 0010_1bbb 0000_1000 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMS	0x28080000
+#define OPC_REMSI	(OPC_REMS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "and a,b,c":
+ *
+ * 0010_0bbb 0000_0100 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if zero and negative flags should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_AND		0x20040000
+#define OPC_ANDI	(OPC_AND | OP_IMM)
+
+/*
+ * The 4-byte encoding of "tst[.qq] b,c".
+ * Checks if the two input operands have any bit set at the same
+ * position.
+ *
+ * 0010_0bbb 1100_1011 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_TST		0x20cb8000
+
+/*
+ * The 4-byte encoding of "or a,b,c":
+ *
+ * 0010_0bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_OR		0x20050000
+#define OPC_ORI		(OPC_OR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "xor a,b,c":
+ *
+ * 0010_0bbb 0000_0111 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_XOR		0x20070000
+#define OPC_XORI	(OPC_XOR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "not b,c":
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_1010
+ *
+ * b:  BBBbbb		result
+ * c:  cccccc		input
+ */
+#define OPC_NOT		0x202f000a
+
+/*
+ * The 4-byte encoding of "btst b,u6":
+ *
+ * 0010_0bbb 0101_0001 1BBB_uuuu uu00_0000
+ *
+ * b:  BBBbbb		input number to check
+ * u6: uuuuuu		6-bit unsigned number specifying bit position to check
+ */
+#define OPC_BTSTU6	0x20518000
+#define BTST_U6(x)	(OP_C((x) & 63))
+
+/*
+ * The 4-byte encoding of "asl[.qq] b,b,c" (arithmetic shift left):
+ *
+ * 0010_1bbb 0i00_0000 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 5-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		result and the first operand (number to be shifted)
+ * c:  cccccc		amount to be shifted
+ */
+#define OPC_ASL		0x28000000
+#define ASL_I		BIT(22)
+#define ASLI_U6(x)	OP_C((x) & 31)
+#define OPC_ASLI	(OPC_ASL | ASL_I)
+
+/*
+ * The 4-byte encoding of "asr a,b,c" (arithmetic shift right):
+ *
+ * 0010_1bbb 0i00_0010 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_ASR		0x28020000
+#define ASR_I		ASL_I
+#define ASRI_U6(x)	ASLI_U6(x)
+#define OPC_ASRI	(OPC_ASR | ASR_I)
+
+/*
+ * The 4-byte encoding of "lsr a,b,c" (logical shift right):
+ *
+ * 0010_1bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_LSR		0x28010000
+#define LSR_I		ASL_I
+#define LSRI_U6(x)	ASLI_U6(x)
+#define OPC_LSRI	(OPC_LSR | LSR_I)
+
+/*
+ * The 4-byte encoding of "swape b,c":
+ *
+ * 0010_1bbb 0010_1111 0bbb_cccc cc00_1001
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SWAPE	0x282f0009
+
+/*
+ * Encoding for jump to an address in register:
+ * j reg_c
+ *
+ * 0010_0000 1110_0000 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JMP		0x20e00000
+/* Jump to "branch-and-link" register, which effectively is a "return". */
+#define OPC_J_BLINK	(OPC_JMP | OP_C(ARC_R_BLINK))
+
+/*
+ * Encoding for jump-and-link to an address in register:
+ * jl reg_c
+ *
+ * 0010_0000 0010_0010 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JL		0x20220000
+
+/*
+ * Encoding for (conditional) branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s21
+ * B[qq] s21
+ *
+ * 0000_0sss ssss_sss0 SSSS_SSSS SS0q_qqqq
+ *
+ * qq:	qqqqq				condition code
+ * s21:	SSSS SSSS_SSss ssss_ssss	The displacement (21-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: S_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_BCC		0x00000000
+#define BCC_S21(d)	((((d) & 0x7fe) << 16) | (((d) & 0x1ff800) >> 5))
+
+/*
+ * Encoding for unconditional branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s25
+ * B s25
+ *
+ * 0000_0sss ssss_sss1 SSSS_SSSS SS00_TTTT
+ *
+ * s25:	TTTT SSSS SSSS_SSss ssss_ssss	The displacement (25-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: T TTTS_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_B		0x00010000
+#define B_S25(d)	((((d) & 0x1e00000) >> 21) | BCC_S21(d))
+
+static inline void emit_2_bytes(u8 *buf, u16 bytes)
+{
+	*((u16 *)buf) = bytes;
+}
+
+static inline void emit_4_bytes(u8 *buf, u32 bytes)
+{
+	emit_2_bytes(buf, bytes >> 16);
+	emit_2_bytes(buf + 2, bytes & 0xffff);
+}
+
+static inline u8 bpf_to_arc_size(u8 size)
+{
+	switch (size) {
+	case BPF_B:
+		return ZZ_1_byte;
+	case BPF_H:
+		return ZZ_2_byte;
+	case BPF_W:
+		return ZZ_4_byte;
+	case BPF_DW:
+		return ZZ_8_byte;
+	default:
+		return ZZ_4_byte;
+	}
+}
+
+/************** Encoders (Deal with ARC regs) ************/
+
+/* Move an immediate to register with a 4-byte instruction. */
+static u8 arc_movi_r(u8 *buf, u8 reg, s16 imm)
+{
+	const u32 insn = OPC_MOVI | OP_B(reg) | MOVI_S12(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* rd <- rs */
+static u8 arc_mov_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* The emitted code may have different sizes based on "imm". */
+static u8 arc_mov_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (IN_S12_RANGE(imm))
+		return arc_movi_r(buf, rd, imm);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* The emitted code will always have the same size (8). */
+static u8 arc_mov_i_fixed(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* Conditional move. */
+static u8 arc_mov_cc_r(u8 *buf, u8 cc, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV_CC | OP_B(rd) | OP_C(rs) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Conditional move of a small immediate to rd. */
+static u8 arc_movu_cc_r(u8 *buf, u8 cc, u8 rd, u8 imm)
+{
+	const u32 insn = OPC_MOVU_CC | OP_B(rd) | OP_C(imm) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from a byte. */
+static u8 arc_sexb_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXB | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from two bytes. */
+static u8 arc_sexh_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXH | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* st reg, [reg_mem, off] */
+static u8 arc_st_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_STORE | STORE_ZZ(zz) | OP_C(reg) |
+		OP_B(reg_mem) | STORE_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* st.aw reg, [sp, -4] */
+static u8 arc_push_r(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_PUSH | OP_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld reg, [reg_mem, off] (unsigned) */
+static u8 arc_ld_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_LDU | LOAD_ZZ(zz) | LOAD_C(reg) |
+		OP_B(reg_mem) | LOAD_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld.x reg, [reg_mem, off] (sign extend) */
+static u8 arc_ldx_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz)
+{
+	const u32 insn = OPC_LDS | LOAD_ZZ(zz) | LOAD_C(reg) |
+		OP_B(reg_mem) | LOAD_S9(off);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* ld.ab reg,[sp,4] */
+static u8 arc_pop_r(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_POP | LOAD_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Ra,Rc */
+static u8 arc_add_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADD | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add.f Ra,Ra,Rc */
+static u8 arc_addf_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADDF | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add.f Ra,Ra,u6 */
+static u8 arc_addif_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADDIF | OP_A(ra) | OP_B(ra) | ADDI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Ra,u6 */
+static u8 arc_addi_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADDI | OP_A(ra) | OP_B(ra) | ADDI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* add Ra,Rb,imm */
+static u8 arc_add_i(u8 *buf, u8 ra, u8 rb, s32 imm)
+{
+	const u32 insn = OPC_ADD_I | OP_A(ra) | OP_B(rb);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* adc Ra,Ra,Rc */
+static u8 arc_adc_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_ADC | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* adc Ra,Ra,u6 */
+static u8 arc_adci_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_ADCI | OP_A(ra) | OP_B(ra) | ADCI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,Rc */
+static u8 arc_sub_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SUB | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub.f Ra,Ra,Rc */
+static u8 arc_subf_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SUBF | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,u6 */
+static u8 arc_subi_r(u8 *buf, u8 ra, u8 u6)
+{
+	const u32 insn = OPC_SUBI | OP_A(ra) | OP_B(ra) | SUBI_U6(u6);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* sub Ra,Ra,imm */
+static u8 arc_sub_i(u8 *buf, u8 ra, s32 imm)
+{
+	const u32 insn = OPC_SUB_I | OP_A(ra) | OP_B(ra);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* sbc Ra,Ra,Rc */
+static u8 arc_sbc_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_SBC | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* cmp Rb,Rc */
+static u8 arc_cmp_r(u8 *buf, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * cmp.z Rb,Rc
+ *
+ * This "cmp.z" variant of compare instruction is used on lower
+ * 32-bits of register pairs after "cmp"ing their upper parts. If the
+ * upper parts are equal (z), then this one will proceed to check the
+ * rest.
+ */
+static u8 arc_cmpz_r(u8 *buf, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc) | CC_equal;
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* neg Ra,Rb */
+static u8 arc_neg_r(u8 *buf, u8 ra, u8 rb)
+{
+	const u32 insn = OPC_NEG | OP_A(ra) | OP_B(rb);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpy Ra,Rb,Rc */
+static u8 arc_mpy_r(u8 *buf, u8 ra, u8 rb, u8 rc)
+{
+	const u32 insn = OPC_MPY | OP_A(ra) | OP_B(rb) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpy Ra,Rb,imm */
+static u8 arc_mpy_i(u8 *buf, u8 ra, u8 rb, s32 imm)
+{
+	const u32 insn = OPC_MPYI | OP_A(ra) | OP_B(rb);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* mpydu Ra,Ra,Rc */
+static u8 arc_mpydu_r(u8 *buf, u8 ra, u8 rc)
+{
+	const u32 insn = OPC_MPYDU | OP_A(ra) | OP_B(ra) | OP_C(rc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* mpydu Ra,Ra,imm */
+static u8 arc_mpydu_i(u8 *buf, u8 ra, s32 imm)
+{
+	const u32 insn = OPC_MPYDUI | OP_A(ra) | OP_B(ra);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* divu Rd,Rd,Rs */
+static u8 arc_divu_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_DIVU | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* divu Rd,Rd,imm */
+static u8 arc_divu_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_DIVUI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* div Rd,Rd,Rs */
+static u8 arc_divs_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_DIVS | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* div Rd,Rd,imm */
+static u8 arc_divs_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_DIVSI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* remu Rd,Rd,Rs */
+static u8 arc_remu_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_REMU | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* remu Rd,Rd,imm */
+static u8 arc_remu_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_REMUI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* rem Rd,Rd,Rs */
+static u8 arc_rems_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_REMS | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* rem Rd,Rd,imm */
+static u8 arc_rems_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_REMSI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* and Rd,Rd,Rs */
+static u8 arc_and_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_AND | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* and Rd,Rd,limm */
+static u8 arc_and_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_ANDI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* tst Rd,Rs */
+static u8 arc_tst_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * This particular version, "tst.z ...", is meant to be used after a
+ * "tst" on the low 32-bit of register pairs. If that "tst" is not
+ * zero, then we don't need to test the upper 32-bits lest it sets
+ * the zero flag.
+ */
+static u8 arc_tstz_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs) | CC_equal;
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_or_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_OR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_or_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_ORI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+static u8 arc_xor_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_XOR | OP_A(rd) | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_xor_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_XORI | OP_A(rd) | OP_B(rd);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+static u8 arc_not_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_NOT | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_btst_i(u8 *buf, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_BTSTU6 | OP_B(rs) | BTST_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asl_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_ASL | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asli_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_ASLI | OP_A(rd) | OP_B(rs) | ASLI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_ASR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_asri_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_ASRI | OP_A(rd) | OP_B(rs) | ASRI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_lsr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2)
+{
+	const u32 insn = OPC_LSR | OP_A(rd) | OP_B(rs1) | OP_C(rs2);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_lsri_r(u8 *buf, u8 rd, u8 rs, u8 imm)
+{
+	const u32 insn = OPC_LSRI | OP_A(rd) | OP_B(rs) | LSRI_U6(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_swape_r(u8 *buf, u8 r)
+{
+	const u32 insn = OPC_SWAPE | OP_B(r) | OP_C(r);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+static u8 arc_jmp_return(u8 *buf)
+{
+	if (buf)
+		emit_4_bytes(buf, OPC_J_BLINK);
+	return INSN_len_normal;
+}
+
+static u8 arc_jl(u8 *buf, u8 reg)
+{
+	const u32 insn = OPC_JL | OP_C(reg);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * Conditional jump to an address that is max 21 bits away (signed).
+ *
+ * b<cc> s21
+ */
+static u8 arc_bcc(u8 *buf, u8 cc, int offset)
+{
+	const u32 insn = OPC_BCC | BCC_S21(offset) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/*
+ * Unconditional jump to an address that is max 25 bits away (signed).
+ *
+ * b     s25
+ */
+static u8 arc_b(u8 *buf, s32 offset)
+{
+	const u32 insn = OPC_B | B_S25(offset);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/************* Packers (Deal with BPF_REGs) **************/
+
+inline u8 zext(u8 *buf, u8 rd)
+{
+	if (rd != BPF_REG_FP)
+		return arc_movi_r(buf, REG_HI(rd), 0);
+	else
+		return 0;
+}
+
+u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext)
+{
+	u8 len = 0;
+
+	if (sign_ext) {
+		if (sign_ext == 8)
+			len = arc_sexb_r(buf, REG_LO(rd), REG_LO(rs));
+		else if (sign_ext == 16)
+			len = arc_sexh_r(buf, REG_LO(rd), REG_LO(rs));
+		else if (sign_ext == 32 && rd != rs)
+			len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+		return len;
+	}
+
+	/* Unsigned move. */
+
+	if (rd != rs)
+		len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+	return len;
+}
+
+u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm)
+{
+	return arc_mov_i(buf, REG_LO(reg), imm);
+}
+
+u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext)
+{
+	u8 len = 0;
+
+	if (sign_ext) {
+		/* First handle the low 32-bit part. */
+		len = mov_r32(buf, rd, rs, sign_ext);
+
+		/* Now propagate the sign bit of LO to HI. */
+		if (sign_ext == 8 || sign_ext == 16 || sign_ext == 32) {
+			len += arc_asri_r(BUF(buf, len),
+					  REG_HI(rd), REG_LO(rd), 31);
+		}
+
+		return len;
+	}
+
+	/* Unsigned move. */
+
+	if (rd == rs)
+		return 0;
+
+	len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs));
+
+	if (rs != BPF_REG_FP)
+		len += arc_mov_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	else
+		len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0);
+
+	return len;
+}
+
+/* Sign extend the 32-bit immediate into 64-bit register pair. */
+u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm)
+{
+	u8 len = 0;
+
+	len = arc_mov_i(buf, REG_LO(reg), imm);
+
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	if (reg != BPF_REG_FP) {
+		if (imm >= 0)
+			len += arc_movi_r(BUF(buf, len), REG_HI(reg), 0);
+		else
+			len += arc_movi_r(BUF(buf, len), REG_HI(reg), -1);
+	}
+
+	return len;
+}
+
+/*
+ * This is merely used for translation of "LD R, IMM64" instructions
+ * of the BPF. These sort of instructions are sometimes used for
+ * relocations. If during the normal pass, the relocation value is
+ * not known, the BPF instruction may look something like:
+ *
+ * LD R <- 0x0000_0001_0000_0001
+ *
+ * Which will nicely translate to two 4-byte ARC instructions:
+ *
+ * mov R_lo, 1               # imm is small enough to be s12
+ * mov R_hi, 1               # same
+ *
+ * However, during the extra pass, the IMM64 will have changed
+ * to the resolved address and looks something like:
+ *
+ * LD R <- 0x0000_0000_1234_5678
+ *
+ * Now, the translated code will require 12 bytes:
+ *
+ * mov R_lo, 0x12345678      # this is an 8-byte instruction
+ * mov R_hi, 0               # still 4 bytes
+ *
+ * Which in practice will result in overwriting the following
+ * instruction. To avoid such cases, we will always emit codes
+ * with fixed sizes.
+ */
+u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi)
+{
+	u8 len;
+
+	len  = arc_mov_i_fixed(buf, REG_LO(reg), lo);
+	len += arc_mov_i_fixed(BUF(buf, len), REG_HI(reg), hi);
+
+	return len;
+}
+
+/*
+ * If the "off"set is too big (doesn't encode as S9) for:
+ *
+ *   {ld,st}  r, [rm, off]
+ *
+ * Then emit:
+ *
+ *   add r10, REG_LO(rm), off
+ *
+ * and make sure that r10 becomes the effective address:
+ *
+ *   {ld,st}  r, [r10, 0]
+ */
+static u8 adjust_mem_access(u8 *buf, s16 *off, u8 size,
+			    u8 rm, u8 *arc_reg_mem)
+{
+	u8 len = 0;
+	*arc_reg_mem = REG_LO(rm);
+
+	if (!IN_S9_RANGE(*off) ||
+	    (size == BPF_DW && !IN_S9_RANGE(*off + 4))) {
+		len += arc_add_i(BUF(buf, len),
+				 REG_LO(JIT_REG_TMP), REG_LO(rm), (u32)(*off));
+		*arc_reg_mem = REG_LO(JIT_REG_TMP);
+		*off = 0;
+	}
+
+	return len;
+}
+
+/* store rs, [rd, off] */
+u8 store_r(u8 *buf, u8 rs, u8 rd, s16 off, u8 size)
+{
+	u8 len, arc_reg_mem;
+
+	len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem);
+
+	if (size == BPF_DW) {
+		len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem,
+				off, ZZ_4_byte);
+		len += arc_st_r(BUF(buf, len), REG_HI(rs), arc_reg_mem,
+				off + 4, ZZ_4_byte);
+	} else {
+		u8 zz = bpf_to_arc_size(size);
+
+		len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem,
+				off, zz);
+	}
+
+	return len;
+}
+
+/*
+ * For {8,16,32}-bit stores:
+ *   mov r21, imm
+ *   st  r21, [...]
+ * For 64-bit stores:
+ *   mov r21, imm
+ *   st  r21, [...]
+ *   mov r21, {0,-1}
+ *   st  r21, [...+4]
+ */
+u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size)
+{
+	u8 len, arc_reg_mem;
+	/* REG_LO(JIT_REG_TMP) might be used by "adjust_mem_access()". */
+	const u8 arc_rs = REG_HI(JIT_REG_TMP);
+
+	len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem);
+
+	if (size == BPF_DW) {
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem,
+				off, ZZ_4_byte);
+		imm = (imm >= 0 ? 0 : -1);
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem,
+				off + 4, ZZ_4_byte);
+	} else {
+		u8 zz = bpf_to_arc_size(size);
+
+		len += arc_mov_i(BUF(buf, len), arc_rs, imm);
+		len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem, off, zz);
+	}
+
+	return len;
+}
+
+/*
+ * For the calling convention of a little endian machine, the LO part
+ * must be on top of the stack.
+ */
+static u8 push_r64(u8 *buf, u8 reg)
+{
+	u8 len = 0;
+
+#ifdef __LITTLE_ENDIAN
+	/* BPF_REG_FP is mapped to 32-bit "fp" register. */
+	if (reg != BPF_REG_FP)
+		len += arc_push_r(BUF(buf, len), REG_HI(reg));
+	len += arc_push_r(BUF(buf, len), REG_LO(reg));
+#else
+	len += arc_push_r(BUF(buf, len), REG_LO(reg));
+	if (reg != BPF_REG_FP)
+		len += arc_push_r(BUF(buf, len), REG_HI(reg));
+#endif
+
+	return len;
+}
+
+/* load rd, [rs, off] */
+u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext)
+{
+	u8 len, arc_reg_mem;
+
+	len = adjust_mem_access(buf, &off, size, rs, &arc_reg_mem);
+
+	if (size == BPF_B || size == BPF_H || size == BPF_W) {
+		const u8 zz = bpf_to_arc_size(size);
+
+		/* Use LD.X only if the data size is less than 32-bit. */
+		if (sign_ext && (zz == ZZ_1_byte || zz == ZZ_2_byte)) {
+			len += arc_ldx_r(BUF(buf, len), REG_LO(rd),
+					 arc_reg_mem, off, zz);
+		} else {
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd),
+					arc_reg_mem, off, zz);
+		}
+
+		if (sign_ext) {
+			/* Propagate the sign bit to the higher reg. */
+			len += arc_asri_r(BUF(buf, len),
+					  REG_HI(rd), REG_LO(rd), 31);
+		} else {
+			len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0);
+		}
+	} else if (size == BPF_DW) {
+		/*
+		 * We are about to issue 2 consecutive loads:
+		 *
+		 *   ld rx, [rb, off+0]
+		 *   ld ry, [rb, off+4]
+		 *
+		 * If "rx" and "rb" are the same registers, then the order
+		 * should change to guarantee that "rb" remains intact
+		 * during these 2 operations:
+		 *
+		 *   ld ry, [rb, off+4]
+		 *   ld rx, [rb, off+0]
+		 */
+		if (REG_LO(rd) != arc_reg_mem) {
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem,
+					off, ZZ_4_byte);
+			len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem,
+					off + 4, ZZ_4_byte);
+		} else {
+			len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem,
+					off + 4, ZZ_4_byte);
+			len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem,
+					off, ZZ_4_byte);
+		}
+	}
+
+	return len;
+}
+
+u8 add_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_add_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 add_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	if (IN_U6_RANGE(imm))
+		return arc_addi_r(buf, REG_LO(rd), imm);
+	else
+		return arc_add_i(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+u8 add_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_addf_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_adc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 add_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	if (IN_U6_RANGE(imm)) {
+		len  = arc_addif_r(buf, REG_LO(rd), imm);
+		len += arc_adci_r(BUF(buf, len), REG_HI(rd), 0);
+	} else {
+		len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+		len += add_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	}
+	return len;
+}
+
+u8 sub_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_sub_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	if (IN_U6_RANGE(imm))
+		return arc_subi_r(buf, REG_LO(rd), imm);
+	else
+		return arc_sub_i(buf, REG_LO(rd), imm);
+}
+
+u8 sub_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_subf_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_sbc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += sub_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+static u8 cmp_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_cmp_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 neg_r32(u8 *buf, u8 r)
+{
+	return arc_neg_r(buf, REG_LO(r), REG_LO(r));
+}
+
+/* In a two's complement system, -r is (~r + 1). */
+u8 neg_r64(u8 *buf, u8 r)
+{
+	u8 len;
+
+	len  = arc_not_r(buf, REG_LO(r), REG_LO(r));
+	len += arc_not_r(BUF(buf, len), REG_HI(r), REG_HI(r));
+	len += add_r64_i32(BUF(buf, len), r, 1);
+	return len;
+}
+
+u8 mul_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_mpy_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_mpy_i(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * MUL B, C
+ * --------
+ * mpy       t0, B_hi, C_lo
+ * mpy       t1, B_lo, C_hi
+ * mpydu   B_lo, B_lo, C_lo
+ * add     B_hi, B_hi,   t0
+ * add     B_hi, B_hi,   t1
+ */
+u8 mul_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 C_hi = REG_HI(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_mpy_r(buf, t0, B_hi, C_lo);
+	len += arc_mpy_r(BUF(buf, len), t1, B_lo, C_hi);
+	len += arc_mpydu_r(BUF(buf, len), B_lo, C_lo);
+	len += arc_add_r(BUF(buf, len), B_hi, t0);
+	len += arc_add_r(BUF(buf, len), B_hi, t1);
+
+	return len;
+}
+
+/*
+ * MUL B, imm
+ * ----------
+ *
+ *  To get a 64-bit result from a signed 64x32 multiplication:
+ *
+ *         B_hi             B_lo   *
+ *         sign             imm
+ *  -----------------------------
+ *  HI(B_lo*imm)     LO(B_lo*imm)  +
+ *     B_hi*imm                    +
+ *     B_lo*sign
+ *  -----------------------------
+ *        res_hi           res_lo
+ *
+ * mpy     t1, B_lo, sign(imm)
+ * mpy     t0, B_hi, imm
+ * mpydu B_lo, B_lo, imm
+ * add   B_hi, B_hi,  t0
+ * add   B_hi, B_hi,  t1
+ *
+ * Note: We can't use signed double multiplication, "mpyd", instead of an
+ * unsigned version, "mpydu", and then get rid of the sign adjustments
+ * calculated in "t1". The signed multiplication, "mpyd", will consider
+ * both operands, "B_lo" and "imm", as signed inputs. However, for this
+ * 64x32 multiplication, "B_lo" must be treated as an unsigned number.
+ */
+u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len = 0;
+
+	if (imm == 1)
+		return 0;
+
+	/* Is the sign-extension of the immediate "-1"? */
+	if (imm < 0)
+		len += arc_neg_r(BUF(buf, len), t1, B_lo);
+
+	len += arc_mpy_i(BUF(buf, len), t0, B_hi, imm);
+	len += arc_mpydu_i(BUF(buf, len), B_lo, imm);
+	len += arc_add_r(BUF(buf, len), B_hi, t0);
+
+	/* Add the "sign*B_lo" part, if necessary. */
+	if (imm < 0)
+		len += arc_add_r(BUF(buf, len), B_hi, t1);
+
+	return len;
+}
+
+u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext)
+{
+	if (sign_ext)
+		return arc_divs_r(buf, REG_LO(rd), REG_LO(rs));
+	else
+		return arc_divu_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext)
+{
+	if (imm == 0)
+		return 0;
+
+	if (sign_ext)
+		return arc_divs_i(buf, REG_LO(rd), imm);
+	else
+		return arc_divu_i(buf, REG_LO(rd), imm);
+}
+
+u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext)
+{
+	if (sign_ext)
+		return arc_rems_r(buf, REG_LO(rd), REG_LO(rs));
+	else
+		return arc_remu_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext)
+{
+	if (imm == 0)
+		return 0;
+
+	if (sign_ext)
+		return arc_rems_i(buf, REG_LO(rd), imm);
+	else
+		return arc_remu_i(buf, REG_LO(rd), imm);
+}
+
+u8 and_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_and_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 and_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_and_i(buf, REG_LO(rd), imm);
+}
+
+u8 and_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_and_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_and_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 and_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += and_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+static u8 tst_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_tst_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 or_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 or_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_or_i(buf, REG_LO(rd), imm);
+}
+
+u8 or_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+	len += arc_or_r(BUF(buf, len), REG_HI(rd), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 or_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += or_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+u8 xor_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_xor_r(buf, REG_LO(rd), REG_LO(rs));
+}
+
+u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm)
+{
+	return arc_xor_i(buf, REG_LO(rd), imm);
+}
+
+u8 xor_r64(u8 *buf, u8 rd, u8 rs)
+{
+	u8 len;
+
+	len  = arc_xor_r(buf, REG_LO(rd), REG_LO(rs));
+	len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	return len;
+}
+
+u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	u8 len;
+
+	len  = mov_r64_i32(buf, JIT_REG_TMP, imm);
+	len += xor_r64(BUF(buf, len), rd, JIT_REG_TMP);
+	return len;
+}
+
+/* "asl a,b,c" --> "a = (b << (c & 31))". */
+u8 lsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_asl_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_asli_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_hi = lo >> (32-n)   # (32-n) is the negate of "n" in a 5-bit width.
+ *   lo <<= n
+ *   hi <<= n
+ *   hi |= to_hi
+ * else
+ *   hi = lo << (n-32)
+ *   lo = 0
+ *
+ * assembly translation for "LSH B, C"
+ * (heavily influenced by ARC gcc)
+ * -----------------------------------
+ * not    t0, C_lo            # The first 3 lines are almost the same as:
+ * lsr    t1, B_lo, 1         #   neg   t0, C_lo
+ * lsr    t1, t1, t0          #   lsr   t1, B_lo, t0   --> t1 is "to_hi"
+ * mov    t0, C_lo*           # with one important difference. In "neg"
+ * asl    B_lo, B_lo, t0      # version, when C_lo=0, t1 becomes B_lo while
+ * asl    B_hi, B_hi, t0      # it should be 0. The "not" approach instead,
+ * or     B_hi, B_hi, t1      # "shift"s t1 once and 31 times, practically
+ * btst   t0, 5               # setting it to 0 when C_lo=0.
+ * mov.ne B_hi, B_lo**
+ * mov.ne B_lo, 0
+ *
+ * *The "mov t0, C_lo" is necessary to cover the cases that C is the same
+ * register as B.
+ *
+ * **ARC performs a shift in this manner: B <<= (C & 31)
+ * For 32<=n<64, "n-32" and "n&31" are the same. Therefore, "B << n" and
+ * "B << (n-32)" yield the same results. e.g. the results of "B << 35" and
+ * "B << 3" are the same.
+ *
+ * The behaviour is undefined for n >= 64.
+ */
+u8 lsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_lsri_r(BUF(buf, len), t1, B_lo, 1);
+	len += arc_lsr_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_asl_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_asl_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_or_r(BUF(buf, len), B_hi, B_hi, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, B_lo);
+	len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_lo, 0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_hi = B_lo >> 32-n          # extract upper n bits
+ *   lo <<= n
+ *   hi <<=n
+ *   hi |= to_hi
+ * else if (n < 64)
+ *   hi = lo << n-32
+ *   lo = 0
+ */
+u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_lsri_r(buf, t0, B_lo, 32 - n);
+		len += arc_asli_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_asli_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_hi, B_hi, t0);
+	} else if (n <= 63) {
+		len  = arc_asli_r(buf, B_hi, B_lo, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_lo, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+/* "lsr a,b,c" --> "a = (b >> (c & 31))". */
+u8 rsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_lsr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_lsri_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * For better commentary, see lsh_r64().
+ *
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_lo = hi << (32-n)
+ *   hi >>= n
+ *   lo >>= n
+ *   lo |= to_lo
+ * else
+ *   lo = hi >> (n-32)
+ *   hi = 0
+ *
+ * RSH    B,C
+ * ----------
+ * not    t0, C_lo
+ * asl    t1, B_hi, 1
+ * asl    t1, t1, t0
+ * mov    t0, C_lo
+ * lsr    B_hi, B_hi, t0
+ * lsr    B_lo, B_lo, t0
+ * or     B_lo, B_lo, t1
+ * btst   t0, 5
+ * mov.ne B_lo, B_hi
+ * mov.ne B_hi, 0
+ */
+u8 rsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_asli_r(BUF(buf, len), t1, B_hi, 1);
+	len += arc_asl_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_lsr_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi);
+	len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_hi, 0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_lo = B_lo << 32-n     # extract lower n bits, right-padded with 32-n 0s
+ *   lo >>=n
+ *   hi >>=n
+ *   hi |= to_lo
+ * else if (n < 64)
+ *   lo = hi >> n-32
+ *   hi = 0
+ */
+u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_asli_r(buf, t0, B_hi, 32 - n);
+		len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_lsri_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0);
+	} else if (n <= 63) {
+		len  = arc_lsri_r(buf, B_lo, B_hi, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_hi, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+/* "asr a,b,c" --> "a = (b s>> (c & 31))". */
+u8 arsh_r32(u8 *buf, u8 rd, u8 rs)
+{
+	return arc_asr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs));
+}
+
+u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm)
+{
+	return arc_asri_r(buf, REG_LO(rd), REG_LO(rd), imm);
+}
+
+/*
+ * For comparison, see rsh_r64().
+ *
+ * algorithm
+ * ---------
+ * if (n <= 32)
+ *   to_lo = hi << (32-n)
+ *   hi s>>= n
+ *   lo  >>= n
+ *   lo |= to_lo
+ * else
+ *   hi_sign = hi s>>31
+ *   lo = hi s>> (n-32)
+ *   hi = hi_sign
+ *
+ * ARSH   B,C
+ * ----------
+ * not    t0, C_lo
+ * asl    t1, B_hi, 1
+ * asl    t1, t1, t0
+ * mov    t0, C_lo
+ * asr    B_hi, B_hi, t0
+ * lsr    B_lo, B_lo, t0
+ * or     B_lo, B_lo, t1
+ * btst   t0, 5
+ * asr    t0, B_hi, 31        # now, t0 = 0 or -1 based on B_hi's sign
+ * mov.ne B_lo, B_hi
+ * mov.ne B_hi, t0
+ */
+u8 arsh_r64(u8 *buf, u8 rd, u8 rs)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 t1   = REG_HI(JIT_REG_TMP);
+	const u8 C_lo = REG_LO(rs);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	u8 len;
+
+	len  = arc_not_r(buf, t0, C_lo);
+	len += arc_asli_r(BUF(buf, len), t1, B_hi, 1);
+	len += arc_asl_r(BUF(buf, len), t1, t1, t0);
+	len += arc_mov_r(BUF(buf, len), t0, C_lo);
+	len += arc_asr_r(BUF(buf, len), B_hi, B_hi, t0);
+	len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0);
+	len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1);
+	len += arc_btst_i(BUF(buf, len), t0, 5);
+	len += arc_asri_r(BUF(buf, len), t0, B_hi, 31);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi);
+	len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, t0);
+
+	return len;
+}
+
+/*
+ * if (n < 32)
+ *   to_lo = lo << 32-n     # extract lower n bits, right-padded with 32-n 0s
+ *   lo >>=n
+ *   hi s>>=n
+ *   hi |= to_lo
+ * else if (n < 64)
+ *   lo = hi s>> n-32
+ *   hi = (lo[msb] ? -1 : 0)
+ */
+u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm)
+{
+	const u8 t0   = REG_LO(JIT_REG_TMP);
+	const u8 B_lo = REG_LO(rd);
+	const u8 B_hi = REG_HI(rd);
+	const u8 n    = (u8)imm;
+	u8 len = 0;
+
+	if (n == 0) {
+		return 0;
+	} else if (n <= 31) {
+		len  = arc_asli_r(buf, t0, B_hi, 32 - n);
+		len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n);
+		len += arc_asri_r(BUF(buf, len), B_hi, B_hi, n);
+		len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0);
+	} else if (n <= 63) {
+		len  = arc_asri_r(buf, B_lo, B_hi, n - 32);
+		len += arc_movi_r(BUF(buf, len), B_hi, -1);
+		len += arc_btst_i(BUF(buf, len), B_lo, 31);
+		len += arc_movu_cc_r(BUF(buf, len), CC_equal, B_hi, 0);
+	}
+	/* n >= 64 is undefined behaviour. */
+
+	return len;
+}
+
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
+{
+	u8 len = 0;
+#ifdef __BIG_ENDIAN
+	const u8 host_endian = BPF_FROM_BE;
+#else
+	const u8 host_endian = BPF_FROM_LE;
+#endif
+	if (host_endian != endian || force) {
+		switch (size) {
+		case 16:
+			/*
+			 * r = B4B3_B2B1 << 16 --> r = B2B1_0000
+			 * then, swape(r) would become the desired 0000_B1B2
+			 */
+			len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16);
+			fallthrough;
+		case 32:
+			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
+			break;
+		case 64:
+			/*
+			 * swap "hi" and "lo":
+			 *   hi ^= lo;
+			 *   lo ^= hi;
+			 *   hi ^= lo;
+			 * and then swap the bytes in "hi" and "lo".
+			 */
+			len  = arc_xor_r(buf, REG_HI(rd), REG_LO(rd));
+			len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd));
+			len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd));
+			len += arc_swape_r(BUF(buf, len), REG_LO(rd));
+			len += arc_swape_r(BUF(buf, len), REG_HI(rd));
+			break;
+		default:
+			/* The caller must have handled this. */
+		}
+	} else {
+		/*
+		 * If the same endianness, there's not much to do other
+		 * than zeroing out the upper bytes based on the "size".
+		 */
+		switch (size) {
+		case 16:
+			len = arc_and_i(buf, REG_LO(rd), 0xffff);
+			fallthrough;
+		case 32:
+			if (do_zext)
+				len += zext(BUF(buf, len), rd);
+			break;
+		case 64:
+			break;
+		default:
+			/* The caller must have handled this. */
+		}
+	}
+
+	return len;
+}
+
+/*
+ * To create a frame, all that is needed is:
+ *
+ *  push fp
+ *  mov  fp, sp
+ *  sub  sp, <frame_size>
+ *
+ * "push fp" is taken care of separately while saving the clobbered registers.
+ * All that remains is copying SP value to FP and shrinking SP's address space
+ * for any possible function call to come.
+ */
+static inline u8 frame_create(u8 *buf, u16 size)
+{
+	u8 len;
+
+	len = arc_mov_r(buf, ARC_R_FP, ARC_R_SP);
+	if (IN_U6_RANGE(size))
+		len += arc_subi_r(BUF(buf, len), ARC_R_SP, size);
+	else
+		len += arc_sub_i(BUF(buf, len), ARC_R_SP, size);
+	return len;
+}
+
+/*
+ * mov sp, fp
+ *
+ * The value of SP upon entering was copied to FP.
+ */
+static inline u8 frame_restore(u8 *buf)
+{
+	return arc_mov_r(buf, ARC_R_SP, ARC_R_FP);
+}
+
+/*
+ * Going from a JITed code to the native caller:
+ *
+ * mov ARC_ABI_RET_lo, BPF_REG_0_lo     # r0 <- r8
+ * mov ARC_ABI_RET_hi, BPF_REG_0_hi     # r1 <- r9
+ */
+static u8 bpf_to_arc_return(u8 *buf)
+{
+	u8 len;
+
+	len  = arc_mov_r(buf, ARC_R_0, REG_LO(BPF_REG_0));
+	len += arc_mov_r(BUF(buf, len), ARC_R_1, REG_HI(BPF_REG_0));
+	return len;
+}
+
+/*
+ * Coming back from an external (in-kernel) function to the JITed code:
+ *
+ * mov ARC_ABI_RET_lo, BPF_REG_0_lo     # r8 <- r0
+ * mov ARC_ABI_RET_hi, BPF_REG_0_hi     # r9 <- r1
+ */
+u8 arc_to_bpf_return(u8 *buf)
+{
+	u8 len;
+
+	len  = arc_mov_r(buf, REG_LO(BPF_REG_0), ARC_R_0);
+	len += arc_mov_r(BUF(buf, len), REG_HI(BPF_REG_0), ARC_R_1);
+	return len;
+}
+
+/*
+ * This translation leads to:
+ *
+ *   mov r10, addr                # always an 8-byte instruction
+ *   jl  [r10]
+ *
+ * The length of the "mov" must be fixed (8), otherwise it may diverge
+ * during the normal and extra passes:
+ *
+ *          normal pass                  extra pass
+ *
+ *   180:  mov     r10,0        |   180:  mov     r10,0x700578d8
+ *   184:  jl      [r10]        |   188:  jl      [r10]
+ *   188:  add.f   r16,r16,0x1  |   18c:  adc     r17,r17,0
+ *   18c:  adc     r17,r17,0    |
+ *
+ * In the above example, the change from "r10 <- 0" to "r10 <- 0x700578d8"
+ * has led to an increase in the length of the "mov" instruction.
+ * Inadvertently, that caused the loss of the "add.f" instruction.
+ */
+static u8 jump_and_link(u8 *buf, u32 addr)
+{
+	u8 len;
+
+	len  = arc_mov_i_fixed(buf, REG_LO(JIT_REG_TMP), addr);
+	len += arc_jl(BUF(buf, len), REG_LO(JIT_REG_TMP));
+	return len;
+}
+
+/*
+ * This function determines which ARC registers must be saved and restored.
+ * It does so by looking into:
+ *
+ * "bpf_reg": The clobbered (destination) BPF register
+ * "is_call": Indicator if the current instruction is a call
+ *
+ * When a register of interest is clobbered, its corresponding bit position
+ * in return value, "usage", is set to true.
+ */
+u32 mask_for_used_regs(u8 bpf_reg, bool is_call)
+{
+	u32 usage = 0;
+
+	/* BPF registers that must be saved. */
+	if (bpf_reg >= BPF_REG_6 && bpf_reg <= BPF_REG_9) {
+		usage |= BIT(REG_LO(bpf_reg));
+		usage |= BIT(REG_HI(bpf_reg));
+	/*
+	 * Using the frame pointer register implies that it should
+	 * be saved and reinitialised with the current frame data.
+	 */
+	} else if (bpf_reg == BPF_REG_FP) {
+		usage |= BIT(REG_LO(BPF_REG_FP));
+	/* Could there be some ARC registers that must to be saved? */
+	} else {
+		if (REG_LO(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST &&
+		    REG_LO(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST)
+			usage |= BIT(REG_LO(bpf_reg));
+
+		if (REG_HI(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST &&
+		    REG_HI(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST)
+			usage |= BIT(REG_HI(bpf_reg));
+	}
+
+	/* A "call" indicates that ARC's "blink" reg must be saved. */
+	usage |= is_call ? BIT(ARC_R_BLINK) : 0;
+
+	return usage;
+}
+
+/*
+ * push blink             # if blink is marked as clobbered
+ * push r[0-n]            # if r[i] is marked as clobbered
+ * push fp                # if fp is marked as clobbered
+ * mov  fp, sp            # if frame_size > 0 (clobbers fp)
+ * sub  sp, <frame_size>  # same as above
+ */
+u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size)
+{
+	u8 len = 0;
+	u32 gp_regs = 0;
+
+	/* Deal with blink first. */
+	if (usage & BIT(ARC_R_BLINK))
+		len += arc_push_r(BUF(buf, len), ARC_R_BLINK);
+
+	gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP));
+	while (gp_regs) {
+		u8 reg = __builtin_ffs(gp_regs) - 1;
+
+		len += arc_push_r(BUF(buf, len), reg);
+		gp_regs &= ~BIT(reg);
+	}
+
+	/* Deal with fp last. */
+	if ((usage & BIT(ARC_R_FP)) || frame_size > 0)
+		len += arc_push_r(BUF(buf, len), ARC_R_FP);
+
+	if (frame_size > 0)
+		len += frame_create(BUF(buf, len), frame_size);
+
+#ifdef ARC_BPF_JIT_DEBUG
+	if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
+		pr_err("FP is being saved while there is no frame.");
+		BUG();
+	}
+#endif
+
+	return len;
+}
+
+/*
+ * mov  sp, fp            # if frame_size > 0
+ * pop  fp                # if fp is marked as clobbered
+ * pop  r[n-0]            # if r[i] is marked as clobbered
+ * pop  blink             # if blink is marked as clobbered
+ * mov  r0, r8            # always: ABI_return <- BPF_return
+ * mov  r1, r9            # continuation of above
+ * j    [blink]           # always
+ *
+ * "fp being marked as clobbered" and "frame_size > 0" are the two sides of
+ * the same coin.
+ */
+u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
+{
+	u32 len = 0;
+	u32 gp_regs = 0;
+
+#ifdef ARC_BPF_JIT_DEBUG
+	if ((usage & BIT(ARC_R_FP)) && frame_size == 0) {
+		pr_err("FP is being saved while there is no frame.");
+		BUG();
+	}
+#endif
+
+	if (frame_size > 0)
+		len += frame_restore(BUF(buf, len));
+
+	/* Deal with fp first. */
+	if ((usage & BIT(ARC_R_FP)) || frame_size > 0)
+		len += arc_pop_r(BUF(buf, len), ARC_R_FP);
+
+	gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP));
+	while (gp_regs) {
+		/* "usage" is 32-bit, each bit indicating an ARC register. */
+		u8 reg = 31 - __builtin_clz(gp_regs);
+
+		len += arc_pop_r(BUF(buf, len), reg);
+		gp_regs &= ~BIT(reg);
+	}
+
+	/* Deal with blink last. */
+	if (usage & BIT(ARC_R_BLINK))
+		len += arc_pop_r(BUF(buf, len), ARC_R_BLINK);
+
+	/* Wrap up the return value and jump back to the caller. */
+	len += bpf_to_arc_return(BUF(buf, len));
+	len += arc_jmp_return(BUF(buf, len));
+
+	return len;
+}
+
+/*
+ * For details on the algorithm, see the comments of "gen_jcc_64()".
+ *
+ * This data structure is holding information for jump translations.
+ *
+ * jit_off: How many bytes into the current JIT address, "b"ranch insn. occurs
+ * cond: The condition that the ARC branch instruction must use
+ *
+ * e.g.:
+ *
+ * BPF_JGE  R1, R0, @target
+ * ------------------------
+ *            |
+ *            v
+ * 0x1000: cmp  r3, r1     # 0x1000 is the JIT address for "BPF_JGE ..." insn
+ * 0x1004: bhi  @target    # first jump (branch higher)
+ * 0x1008: blo  @end       # second jump acting as a skip (end is 0x1014)
+ * 0x100C: cmp  r2, r0     # the lower 32 bits are evaluated
+ * 0x1010: bhs  @target    # third jump (branch higher or same)
+ * 0x1014: ...
+ *
+ * The jit_off(set) of the "bhi" is 4 bytes.
+ * The cond(ition) for the "bhi" is "CC_great_u".
+ *
+ * The jit_off(set) is necessary for calculating the exact displacement
+ * to the "target" address:
+ *
+ * jit_address + jit_off(set) - @target
+ * 0x1000      + 4            - @target
+ */
+#define JCC64_NR_OF_JMPS 3	/* Number of jumps in jcc64 template. */
+#define JCC64_INSNS_TO_END 3	/* Number of insn. inclusive the 2nd jmp to end. */
+#define JCC64_SKIP_JMP 1	/* Index of the "skip" jump to "end". */
+const struct {
+	/*
+	 * "jit_off" is common between all "jmp[]" and is coupled with
+	 * "cond" of each "jmp[]" instance. e.g.:
+	 *
+	 * arcv2_64_jccs.jit_off[1]
+	 * arcv2_64_jccs.jmp[ARC_CC_UGT].cond[1]
+	 *
+	 * Are indicating that the second jump in JITed code of "UGT"
+	 * is at offset "jit_off[1]" while its condition is "cond[1]".
+	 */
+	u8 jit_off[JCC64_NR_OF_JMPS];
+
+	struct {
+		u8 cond[JCC64_NR_OF_JMPS];
+	} jmp[ARC_CC_SLE + 1];
+} arcv2_64_jccs = {
+	.jit_off = {
+		INSN_len_normal * 1,
+		INSN_len_normal * 2,
+		INSN_len_normal * 4
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bhi  @target         # 1: u>
+	 *   blo  @end            # 2: u<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhi  @target         # 3: u>
+	 * end:
+	 */
+	.jmp[ARC_CC_UGT] = {
+		.cond = {CC_great_u, CC_less_u, CC_great_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bhi  @target         # 1: u>
+	 *   blo  @end            # 2: u<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhs  @target         # 3: u>=
+	 * end:
+	 */
+	.jmp[ARC_CC_UGE] = {
+		.cond = {CC_great_u, CC_less_u, CC_great_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blo  @target         # 1: u<
+	 *   bhi  @end            # 2: u>
+	 *   cmp  rd_lo, rs_lo
+	 *   blo  @target         # 3: u<
+	 * end:
+	 */
+	.jmp[ARC_CC_ULT] = {
+		.cond = {CC_less_u, CC_great_u, CC_less_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blo  @target         # 1: u<
+	 *   bhi  @end            # 2: u>
+	 *   cmp  rd_lo, rs_lo
+	 *   bls  @target         # 3: u<=
+	 * end:
+	 */
+	.jmp[ARC_CC_ULE] = {
+		.cond = {CC_less_u, CC_great_u, CC_less_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bgt  @target         # 1: s>
+	 *   blt  @end            # 2: s<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhi  @target         # 3: u>
+	 * end:
+	 */
+	.jmp[ARC_CC_SGT] = {
+		.cond = {CC_great_s, CC_less_s, CC_great_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   bgt  @target         # 1: s>
+	 *   blt  @end            # 2: s<
+	 *   cmp  rd_lo, rs_lo
+	 *   bhs  @target         # 3: u>=
+	 * end:
+	 */
+	.jmp[ARC_CC_SGE] = {
+		.cond = {CC_great_s, CC_less_s, CC_great_eq_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blt  @target         # 1: s<
+	 *   bgt  @end            # 2: s>
+	 *   cmp  rd_lo, rs_lo
+	 *   blo  @target         # 3: u<
+	 * end:
+	 */
+	.jmp[ARC_CC_SLT] = {
+		.cond = {CC_less_s, CC_great_s, CC_less_u}
+	},
+	/*
+	 *   cmp  rd_hi, rs_hi
+	 *   blt  @target         # 1: s<
+	 *   bgt  @end            # 2: s>
+	 *   cmp  rd_lo, rs_lo
+	 *   bls  @target         # 3: u<=
+	 * end:
+	 */
+	.jmp[ARC_CC_SLE] = {
+		.cond = {CC_less_s, CC_great_s, CC_less_eq_u}
+	}
+};
+
+/*
+ * The displacement (offset) for ARC's "b"ranch instruction is the distance
+ * from the aligned version of _current_ instruction (PCL) to the target
+ * instruction:
+ *
+ * DISP = TARGET - PCL          # PCL is the word aligned PC
+ */
+static inline s32 get_displacement(u32 curr_off, u32 targ_off)
+{
+	return (s32)(targ_off - (curr_off & ~3L));
+}
+
+/*
+ * "disp"lacement should be:
+ *
+ * 1. 16-bit aligned.
+ * 2. fit in S25, because no "condition code" is supposed to be encoded.
+ */
+static inline bool is_valid_far_disp(s32 disp)
+{
+	return (!(disp & 1) && IN_S25_RANGE(disp));
+}
+
+/*
+ * "disp"lacement should be:
+ *
+ * 1. 16-bit aligned.
+ * 2. fit in S21, because "condition code" is supposed to be encoded too.
+ */
+static inline bool is_valid_near_disp(s32 disp)
+{
+	return (!(disp & 1) && IN_S21_RANGE(disp));
+}
+
+/*
+ * cmp        rd_hi, rs_hi
+ * cmp.z      rd_lo, rs_lo
+ * b{eq,ne}   @target
+ *   |  |
+ *   |  `-->  "eq" param is false (JNE)
+ *   `----->  "eq" param is true  (JEQ)
+ */
+static int gen_j_eq_64(u8 *buf, u8 rd, u8 rs, bool eq,
+		       u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u8 len = 0;
+
+	len += arc_cmp_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	len += arc_cmpz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), eq ? CC_equal : CC_unequal, disp);
+
+	return len;
+}
+
+/*
+ * tst   rd_hi, rs_hi
+ * tst.z rd_lo, rs_lo
+ * bne   @target
+ */
+static u8 gen_jset_64(u8 *buf, u8 rd, u8 rs, u32 curr_off, u32 targ_off)
+{
+	u8 len = 0;
+	s32 disp;
+
+	len += arc_tst_r(BUF(buf, len), REG_HI(rd), REG_HI(rs));
+	len += arc_tstz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), CC_unequal, disp);
+
+	return len;
+}
+
+/*
+ * Verify if all the jumps for a JITed jcc64 operation are valid,
+ * by consulting the data stored at "arcv2_64_jccs".
+ */
+static bool check_jcc_64(u32 curr_off, u32 targ_off, u8 cond)
+{
+	size_t i;
+
+	if (cond >= ARC_CC_LAST)
+		return false;
+
+	for (i = 0; i < JCC64_NR_OF_JMPS; i++) {
+		u32 from, to;
+
+		from = curr_off + arcv2_64_jccs.jit_off[i];
+		/* for the 2nd jump, we jump to the end of block. */
+		if (i != JCC64_SKIP_JMP)
+			to = targ_off;
+		else
+			to = from + (JCC64_INSNS_TO_END * INSN_len_normal);
+		/* There is a "cc" in the instruction, so a "near" jump. */
+		if (!is_valid_near_disp(get_displacement(from, to)))
+			return false;
+	}
+
+	return true;
+}
+
+/* Can the jump from "curr_off" to "targ_off" actually happen? */
+bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond)
+{
+	s32 disp;
+
+	switch (cond) {
+	case ARC_CC_UGT:
+	case ARC_CC_UGE:
+	case ARC_CC_ULT:
+	case ARC_CC_ULE:
+	case ARC_CC_SGT:
+	case ARC_CC_SGE:
+	case ARC_CC_SLT:
+	case ARC_CC_SLE:
+		return check_jcc_64(curr_off, targ_off, cond);
+	case ARC_CC_EQ:
+	case ARC_CC_NE:
+	case ARC_CC_SET:
+		/*
+		 * The "jump" for the JITed BPF_J{SET,EQ,NE} is actually the
+		 * 3rd instruction. See comments of "gen_j{set,_eq}_64()".
+		 */
+		curr_off += 2 * INSN_len_normal;
+		disp = get_displacement(curr_off, targ_off);
+		/* There is a "cc" field in the issued instruction. */
+		return is_valid_near_disp(disp);
+	case ARC_CC_AL:
+		disp = get_displacement(curr_off, targ_off);
+		return is_valid_far_disp(disp);
+	default:
+		return false;
+	}
+}
+
+/*
+ * The template for the 64-bit jumps with the following BPF conditions
+ *
+ * u< u<= u> u>= s< s<= s> s>=
+ *
+ * Looks like below:
+ *
+ *   cmp   rd_hi, rs_hi
+ *   b<c1> @target
+ *   b<c2> @end
+ *   cmp   rd_lo, rs_lo   # if execution reaches here, r{d,s}_hi are equal
+ *   b<c3> @target
+ * end:
+ *
+ * "c1" is the condition that JIT is handling minus the equality part.
+ * For instance if we have to translate an "unsigned greater or equal",
+ * then "c1" will be "unsigned greater". We won't know about equality
+ * until all 64-bits of data (higeher and lower registers) are processed.
+ *
+ * "c2" is the counter logic of "c1". For instance, if "c1" is originated
+ * from "s>", then "c2" would be "s<". Notice that equality doesn't play
+ * a role here either, because the lower 32 bits are not processed yet.
+ *
+ * "c3" is the unsigned version of "c1", no matter if the BPF condition
+ * was signed or unsigned. An unsigned version is necessary, because the
+ * MSB of the lower 32 bits does not reflect a sign in the whole 64-bit
+ * scheme. Otherwise, 64-bit comparisons like
+ * (0x0000_0000,0x8000_0000) s>= (0x0000_0000,0x0000_0000)
+ * would yield an incorrect result. Finally, if there is an equality
+ * check in the BPF condition, it will be reflected in "c3".
+ *
+ * You can find all the instances of this template where the
+ * "arcv2_64_jccs" is getting initialised.
+ */
+static u8 gen_jcc_64(u8 *buf, u8 rd, u8 rs, u8 cond,
+		     u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u32 end_off;
+	const u8 *cc = arcv2_64_jccs.jmp[cond].cond;
+	u8 len = 0;
+
+	/* cmp rd_hi, rs_hi */
+	len += arc_cmp_r(buf, REG_HI(rd), REG_HI(rs));
+
+	/* b<c1> @target */
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), cc[0], disp);
+
+	/* b<c2> @end */
+	end_off = curr_off + len + (JCC64_INSNS_TO_END * INSN_len_normal);
+	disp = get_displacement(curr_off + len, end_off);
+	len += arc_bcc(BUF(buf, len), cc[1], disp);
+
+	/* cmp rd_lo, rs_lo */
+	len += arc_cmp_r(BUF(buf, len), REG_LO(rd), REG_LO(rs));
+
+	/* b<c3> @target */
+	disp = get_displacement(curr_off + len, targ_off);
+	len += arc_bcc(BUF(buf, len), cc[2], disp);
+
+	return len;
+}
+
+/*
+ * This function only applies the necessary logic to make the proper
+ * translations. All the sanity checks must have already been done
+ * by calling the check_jmp_64().
+ */
+u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
+{
+	u8 len = 0;
+	bool eq = false;
+	s32 disp;
+
+	switch (cond) {
+	case ARC_CC_AL:
+		disp = get_displacement(curr_off, targ_off);
+		len = arc_b(buf, disp);
+		break;
+	case ARC_CC_UGT:
+	case ARC_CC_UGE:
+	case ARC_CC_ULT:
+	case ARC_CC_ULE:
+	case ARC_CC_SGT:
+	case ARC_CC_SGE:
+	case ARC_CC_SLT:
+	case ARC_CC_SLE:
+		len = gen_jcc_64(buf, rd, rs, cond, curr_off, targ_off);
+		break;
+	case ARC_CC_EQ:
+		eq = true;
+		fallthrough;
+	case ARC_CC_NE:
+		len = gen_j_eq_64(buf, rd, rs, eq, curr_off, targ_off);
+		break;
+	case ARC_CC_SET:
+		len = gen_jset_64(buf, rd, rs, curr_off, targ_off);
+		break;
+	default:
+#ifdef ARC_BPF_JIT_DEBUG
+		pr_err("64-bit jump condition is not known.");
+		BUG();
+#endif
+	}
+	return len;
+}
+
+/*
+ * The condition codes to use when generating JIT instructions
+ * for 32-bit jumps.
+ *
+ * The "ARC_CC_AL" index is not really used by the code, but it
+ * is here for the sake of completeness.
+ *
+ * The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
+ * instruction that precedes the conditional branch.
+ */
+const u8 arcv2_32_jmps[ARC_CC_LAST] = {
+	[ARC_CC_UGT] = CC_great_u,
+	[ARC_CC_UGE] = CC_great_eq_u,
+	[ARC_CC_ULT] = CC_less_u,
+	[ARC_CC_ULE] = CC_less_eq_u,
+	[ARC_CC_SGT] = CC_great_s,
+	[ARC_CC_SGE] = CC_great_eq_s,
+	[ARC_CC_SLT] = CC_less_s,
+	[ARC_CC_SLE] = CC_less_eq_s,
+	[ARC_CC_AL]  = CC_always,
+	[ARC_CC_EQ]  = CC_equal,
+	[ARC_CC_NE]  = CC_unequal,
+	[ARC_CC_SET] = CC_unequal
+};
+
+/* Can the jump from "curr_off" to "targ_off" actually happen? */
+bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond)
+{
+	u8 addendum;
+	s32 disp;
+
+	if (cond >= ARC_CC_LAST)
+		return false;
+
+	/*
+	 * The unconditional jump happens immediately, while the rest
+	 * are either preceded by a "cmp" or "tst" instruction.
+	 */
+	addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal;
+	disp = get_displacement(curr_off + addendum, targ_off);
+
+	if (ARC_CC_AL)
+		return is_valid_far_disp(disp);
+	else
+		return is_valid_near_disp(disp);
+}
+
+/*
+ * The JITed code for 32-bit (conditional) branches:
+ *
+ * ARC_CC_AL  @target
+ *   b @jit_targ_addr
+ *
+ * ARC_CC_SET rd, rs, @target
+ *   tst rd, rs
+ *   bnz @jit_targ_addr
+ *
+ * ARC_CC_xx  rd, rs, @target
+ *   cmp rd, rs
+ *   b<cc> @jit_targ_addr            # cc = arcv2_32_jmps[xx]
+ */
+u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
+{
+	s32 disp;
+	u8 len = 0;
+
+	/*
+	 * Although this must have already been checked by "check_jmp_32()",
+	 * we're not going to risk accessing "arcv2_32_jmps" array without
+	 * the boundary check.
+	 */
+	if (cond >= ARC_CC_LAST) {
+#ifdef ARC_BPF_JIT_DEBUG
+		pr_err("32-bit jump condition is not known.");
+		BUG();
+#endif
+		return 0;
+	}
+
+	/* If there is a "condition", issue the "cmp" or "tst" first. */
+	if (cond != ARC_CC_AL) {
+		if (cond == ARC_CC_SET)
+			len = tst_r32(buf, rd, rs);
+		else
+			len = cmp_r32(buf, rd, rs);
+		/*
+		 * The issued instruction affects the "disp"lacement as
+		 * it alters the "curr_off" by its "len"gth. The "curr_off"
+		 * should always point to the jump instruction.
+		 */
+		disp = get_displacement(curr_off + len, targ_off);
+		len += arc_bcc(BUF(buf, len), arcv2_32_jmps[cond], disp);
+	} else {
+		/* The straight forward unconditional jump. */
+		disp = get_displacement(curr_off, targ_off);
+		len = arc_b(buf, disp);
+	}
+
+	return len;
+}
+
+/*
+ * Generate code for functions calls. There can be two types of calls:
+ *
+ * - Calling another BPF function
+ * - Calling an in-kernel function which is compiled by ARC gcc
+ *
+ * In the later case, we must comply to ARCv2 ABI and handle arguments
+ * and return values accordingly.
+ */
+u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func)
+{
+	u8 len = 0;
+
+	/*
+	 * In case of an in-kernel function call, always push the 5th
+	 * argument onto the stack, because that's where the ABI dictates
+	 * it should be found. If the callee doesn't really use it, no harm
+	 * is done. The stack is readjusted either way after the call.
+	 */
+	if (external_func)
+		len += push_r64(BUF(buf, len), BPF_REG_5);
+
+	len += jump_and_link(BUF(buf, len), func_addr);
+
+	if (external_func)
+		len += arc_add_i(BUF(buf, len), ARC_R_SP, ARC_R_SP, ARG5_SIZE);
+
+	return len;
+}
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
new file mode 100644
index 000000000000..6f6b4ffccf2c
--- /dev/null
+++ b/arch/arc/net/bpf_jit_core.c
@@ -0,0 +1,1425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The back-end-agnostic part of Just-In-Time compiler for eBPF bytecode.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+#include <linux/bug.h>
+#include "bpf_jit.h"
+
+/*
+ * Check for the return value. A pattern used often in this file.
+ * There must be a "ret" variable of type "int" in the scope.
+ */
+#define CHECK_RET(cmd)			\
+	do {				\
+		ret = (cmd);		\
+		if (ret < 0)		\
+			return ret;	\
+	} while (0)
+
+#ifdef ARC_BPF_JIT_DEBUG
+/* Dumps bytes in /var/log/messages at KERN_INFO level (4). */
+static void dump_bytes(const u8 *buf, u32 len, const char *header)
+{
+	u8 line[64];
+	size_t i, j;
+
+	pr_info("-----------------[ %s ]-----------------\n", header);
+
+	for (i = 0, j = 0; i < len; i++) {
+		/* Last input byte? */
+		if (i == len - 1) {
+			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
+			pr_info("%s\n", line);
+			break;
+		}
+		/* End of line? */
+		else if (i % 8 == 7) {
+			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
+			pr_info("%s\n", line);
+			j = 0;
+		} else {
+			j += scnprintf(line + j, 64 - j, "0x%02x, ", buf[i]);
+		}
+	}
+}
+#endif /* ARC_BPF_JIT_DEBUG */
+
+/********************* JIT context ***********************/
+
+/*
+ * buf:		Translated instructions end up here.
+ * len:		The length of whole block in bytes.
+ * index:	The offset at which the _next_ instruction may be put.
+ */
+struct jit_buffer {
+	u8	*buf;
+	u32	len;
+	u32	index;
+};
+
+/*
+ * This is a subset of "struct jit_context" that its information is deemed
+ * necessary for the next extra pass to come.
+ *
+ * bpf_header:	Needed to finally lock the region.
+ * bpf2insn:	Used to find the translation for instructions of interest.
+ *
+ * Things like "jit.buf" and "jit.len" can be retrieved respectively from
+ * "prog->bpf_func" and "prog->jited_len".
+ */
+struct arc_jit_data {
+	struct bpf_binary_header *bpf_header;
+	u32                      *bpf2insn;
+};
+
+/*
+ * The JIT pertinent context that is used by different functions.
+ *
+ * prog:		The current eBPF program being handled.
+ * orig_prog:		The original eBPF program before any possible change.
+ * jit:			The JIT buffer and its length.
+ * bpf_header:		The JITed program header. "jit.buf" points inside it.
+ * emit:		If set, opcodes are written to memory; else, a dry-run.
+ * do_zext:		If true, 32-bit sub-regs must be zero extended.
+ * bpf2insn:		Maps BPF insn indices to their counterparts in jit.buf.
+ * bpf2insn_valid:	Indicates if "bpf2ins" is populated with the mappings.
+ * jit_data:		A piece of memory to transfer data to the next pass.
+ * arc_regs_clobbered:	Each bit status determines if that arc reg is clobbered.
+ * save_blink:		Whether ARC's "blink" register needs to be saved.
+ * frame_size:		Derived from "prog->aux->stack_depth".
+ * epilogue_offset:	Used by early "return"s in the code to jump here.
+ * need_extra_pass:	A forecast if an "extra_pass" will occur.
+ * is_extra_pass:	Indicates if the current pass is an extra pass.
+ * user_bpf_prog:	True, if VM opcodes come from a real program.
+ * blinded:		True if "constant blinding" step returned a new "prog".
+ * success:		Indicates if the whole JIT went OK.
+ */
+struct jit_context {
+	struct bpf_prog			*prog;
+	struct bpf_prog			*orig_prog;
+	struct jit_buffer		jit;
+	struct bpf_binary_header	*bpf_header;
+	bool				emit;
+	bool				do_zext;
+	u32				*bpf2insn;
+	bool				bpf2insn_valid;
+	struct arc_jit_data		*jit_data;
+	u32				arc_regs_clobbered;
+	bool				save_blink;
+	u16				frame_size;
+	u32				epilogue_offset;
+	bool				need_extra_pass;
+	bool				is_extra_pass;
+	bool				user_bpf_prog;
+	bool				blinded;
+	bool				success;
+};
+
+/*
+ * If we're in ARC_BPF_JIT_DEBUG mode and the debug level is right, dump the
+ * input BPF stream. "bpf_jit_dump()" is not fully suited for this purpose.
+ */
+static void vm_dump(const struct bpf_prog *prog)
+{
+#ifdef ARC_BPF_JIT_DEBUG
+	if (bpf_jit_enable > 1)
+		dump_bytes((u8 *)prog->insns, 8 * prog->len, " VM  ");
+#endif
+}
+
+/*
+ * If the right level of debug is set, dump the bytes. There are 2 variants
+ * of this function:
+ *
+ * 1. Use the standard bpf_jit_dump() which is meant only for JITed code.
+ * 2. Use the dump_bytes() to match its "vm_dump()" instance.
+ */
+static void jit_dump(const struct jit_context *ctx)
+{
+#ifdef ARC_BPF_JIT_DEBUG
+	u8 header[8];
+#endif
+	const int pass = ctx->is_extra_pass ? 2 : 1;
+
+	if (bpf_jit_enable <= 1 || !ctx->prog->jited)
+		return;
+
+#ifdef ARC_BPF_JIT_DEBUG
+	scnprintf(header, sizeof(header), "JIT:%d", pass);
+	dump_bytes(ctx->jit.buf, ctx->jit.len, header);
+	pr_info("\n");
+#else
+	bpf_jit_dump(ctx->prog->len, ctx->jit.len, pass, ctx->jit.buf);
+#endif
+}
+
+/* Initialise the context so there's no garbage. */
+static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
+{
+	memset(ctx, 0, sizeof(ctx));
+
+	ctx->orig_prog = prog;
+
+	/* If constant blinding was requested but failed, scram. */
+	ctx->prog = bpf_jit_blind_constants(prog);
+	if (IS_ERR(ctx->prog))
+		return PTR_ERR(ctx->prog);
+	ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true);
+
+	/* If the verifier doesn't zero-extend, then we have to do it. */
+	ctx->do_zext = !ctx->prog->aux->verifier_zext;
+
+	ctx->is_extra_pass = ctx->prog->jited;
+	ctx->user_bpf_prog = ctx->prog->is_func;
+
+	return 0;
+}
+
+/*
+ * Only after the first iteration of normal pass (the dry-run),
+ * there are valid offsets in ctx->bpf2insn array.
+ */
+static inline bool offsets_available(const struct jit_context *ctx)
+{
+	return ctx->bpf2insn_valid;
+}
+
+/*
+ * "*mem" should be freed when there is no "extra pass" to come,
+ * or the compilation terminated abruptly. A few of such memory
+ * allocations are: ctx->jit_data and ctx->bpf2insn.
+ */
+static inline void maybe_free(struct jit_context *ctx, void **mem)
+{
+	if (*mem) {
+		if (!ctx->success || !ctx->need_extra_pass) {
+			kfree(*mem);
+			*mem = NULL;
+		}
+	}
+}
+
+/*
+ * Free memories based on the status of the context.
+ *
+ * A note about "bpf_header": On successful runs, "bpf_header" is
+ * not freed, because "jit.buf", a sub-array of it, is returned as
+ * the "bpf_func". However, "bpf_header" is lost and nothing points
+ * to it. This should not cause a leakage, because apparently
+ * "bpf_header" can be revived by "bpf_jit_binary_hdr()". This is
+ * how "bpf_jit_free()" in "kernel/bpf/core.c" releases the memory.
+ */
+static void jit_ctx_cleanup(struct jit_context *ctx)
+{
+	if (ctx->blinded) {
+		/* if all went well, release the orig_prog. */
+		if (ctx->success)
+			bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog);
+		else
+			bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog);
+	}
+
+	maybe_free(ctx, (void **)&ctx->bpf2insn);
+	maybe_free(ctx, (void **)&ctx->jit_data);
+
+	if (!ctx->bpf2insn)
+		ctx->bpf2insn_valid = false;
+
+	/* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */
+	if (!ctx->success && ctx->bpf_header) {
+		bpf_jit_binary_free(ctx->bpf_header);
+		ctx->bpf_header = NULL;
+		ctx->jit.buf    = NULL;
+		ctx->jit.index  = 0;
+		ctx->jit.len    = 0;
+	}
+
+	ctx->emit = false;
+	ctx->do_zext = false;
+}
+
+/*
+ * Analyse the register usage and record the frame size.
+ * The register usage is determined by consulting the back-end.
+ */
+static void analyze_reg_usage(struct jit_context *ctx)
+{
+	size_t i;
+	u32 usage = 0;
+	const struct bpf_insn *insn = ctx->prog->insnsi;
+
+	for (i = 0; i < ctx->prog->len; i++) {
+		u8 bpf_reg;
+		bool call;
+
+		bpf_reg = insn[i].dst_reg;
+		call = (insn[i].code == (BPF_JMP | BPF_CALL)) ? true : false;
+		usage |= mask_for_used_regs(bpf_reg, call);
+	}
+
+	ctx->arc_regs_clobbered = usage;
+	ctx->frame_size = ctx->prog->aux->stack_depth;
+}
+
+/* Verify that no instruction will be emitted when there is no buffer. */
+static inline int jit_buffer_check(const struct jit_context *ctx)
+{
+	if (ctx->emit) {
+		if (!ctx->jit.buf) {
+			pr_err("bpf-jit: inconsistence state; no "
+			       "buffer to emit instructions.\n");
+			return -EINVAL;
+		} else if (ctx->jit.index > ctx->jit.len) {
+			pr_err("bpf-jit: estimated JIT length is less "
+			       "than the emitted instructions.\n");
+			return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/* On a dry-run (emit=false), "jit.len" is growing gradually. */
+static inline void jit_buffer_update(struct jit_context *ctx, u32 n)
+{
+	if (!ctx->emit)
+		ctx->jit.len += n;
+	else
+		ctx->jit.index += n;
+}
+
+/* Based on "emit", determine the address where instructions are emitted. */
+static inline u8 *effective_jit_buf(const struct jit_context *ctx)
+{
+	return ctx->emit ? (ctx->jit.buf + ctx->jit.index) : NULL;
+}
+
+/* Prologue based on context variables set by "analyze_reg_usage()". */
+static int handle_prologue(struct jit_context *ctx)
+{
+	int ret;
+	u8 *buf = effective_jit_buf(ctx);
+	u32 len = 0;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	len = arc_prologue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
+	jit_buffer_update(ctx, len);
+
+	return 0;
+}
+
+/* The counter part for "handle_prologue()". */
+static int handle_epilogue(struct jit_context *ctx)
+{
+	int ret;
+	u8 *buf = effective_jit_buf(ctx);
+	u32 len = 0;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	len = arc_epilogue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
+	jit_buffer_update(ctx, len);
+
+	return 0;
+}
+
+/* Tell which number of the BPF instruction we are dealing with. */
+static inline s32 get_index_for_insn(const struct jit_context *ctx,
+				     const struct bpf_insn *insn)
+{
+	return (insn - ctx->prog->insnsi);
+}
+
+/*
+ * In most of the cases, the "offset" is read from "insn->off". However,
+ * if it is an unconditional BPF_JMP32, then it comes from "insn->imm".
+ *
+ * (Courtesy of "cpu=v4" support)
+ */
+static inline s32 get_offset(const struct bpf_insn *insn)
+{
+	if ((BPF_CLASS(insn->code) == BPF_JMP32) &&
+	    (BPF_OP(insn->code) == BPF_JA))
+		return insn->imm;
+	else
+		return insn->off;
+}
+
+/*
+ * Determine to which number of the BPF instruction we're jumping to.
+ *
+ * The "offset" is interpreted as the "number" of BPF instructions
+ * from the _next_ BPF instruction. e.g.:
+ *
+ *  4 means 4 instructions after  the next insn
+ *  0 means 0 instructions after  the next insn -> fallthrough.
+ * -1 means 1 instruction  before the next insn -> jmp to current insn.
+ *
+ *  Another way to look at this, "offset" is the number of instructions
+ *  that exist between the current instruction and the target instruction.
+ *
+ *  It is worth noting that a "mov r,i64", which is 16-byte long, is
+ *  treated as two instructions long, therefore "offset" needn't be
+ *  treated specially for those. Everything is uniform.
+ */
+static inline s32 get_target_index_for_insn(const struct jit_context *ctx,
+					    const struct bpf_insn *insn)
+{
+	return (get_index_for_insn(ctx, insn) + 1) + get_offset(insn);
+}
+
+/* Is there an immediate operand encoded in the "insn"? */
+static inline bool has_imm(const struct bpf_insn *insn)
+{
+	return BPF_SRC(insn->code) == BPF_K;
+}
+
+/* Is the last BPF instruction? */
+static inline bool is_last_insn(const struct bpf_prog *prog, u32 idx)
+{
+	return idx == (prog->len - 1);
+}
+
+/*
+ * Invocation of this function, conditionally signals the need for
+ * an extra pass. The conditions that must be met are:
+ *
+ * 1. The current pass itself shouldn't be an extra pass.
+ * 2. The stream of bytes being JITed must come from a user program.
+ */
+static inline void set_need_for_extra_pass(struct jit_context *ctx)
+{
+	if (!ctx->is_extra_pass)
+		ctx->need_extra_pass = ctx->user_bpf_prog;
+}
+
+/*
+ * Check if the "size" is valid and then transfer the control to
+ * the back-end for the swap.
+ */
+static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
+		       bool force, bool do_zext, u8 *len)
+{
+	/* Sanity check on the size. */
+	switch (size) {
+	case 16:
+	case 32:
+	case 64:
+		break;
+	default:
+		pr_err("bpf-jit: invalid size for swap.\n");
+		return -EINVAL;
+	}
+
+	*len = gen_swap(buf, rd, size, endian, force, do_zext);
+
+	return 0;
+}
+
+/* Checks if the (instruction) index is in valid range. */
+static inline bool check_insn_idx_valid(const struct jit_context *ctx,
+					const s32 idx)
+{
+	return (idx >= 0 && idx < ctx->prog->len);
+}
+
+/*
+ * Decouple the back-end from BPF by converting BPF conditions
+ * to internal enum. ARC_CC_* start from 0 and are used as index
+ * to an array. BPF_J* usage must end after this conversion.
+ */
+static int bpf_cond_to_arc(const u8 op, u8 *arc_cc)
+{
+	switch (op) {
+	case BPF_JA:
+		*arc_cc = ARC_CC_AL;
+		break;
+	case BPF_JEQ:
+		*arc_cc = ARC_CC_EQ;
+		break;
+	case BPF_JGT:
+		*arc_cc = ARC_CC_UGT;
+		break;
+	case BPF_JGE:
+		*arc_cc = ARC_CC_UGE;
+		break;
+	case BPF_JSET:
+		*arc_cc = ARC_CC_SET;
+		break;
+	case BPF_JNE:
+		*arc_cc = ARC_CC_NE;
+		break;
+	case BPF_JSGT:
+		*arc_cc = ARC_CC_SGT;
+		break;
+	case BPF_JSGE:
+		*arc_cc = ARC_CC_SGE;
+		break;
+	case BPF_JLT:
+		*arc_cc = ARC_CC_ULT;
+		break;
+	case BPF_JLE:
+		*arc_cc = ARC_CC_ULE;
+		break;
+	case BPF_JSLT:
+		*arc_cc = ARC_CC_SLT;
+		break;
+	case BPF_JSLE:
+		*arc_cc = ARC_CC_SLE;
+		break;
+	default:
+		pr_err("bpf-jit: can't handle condition 0x%02X\n", op);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Check a few things for a supposedly "jump" instruction:
+ *
+ * 0. "insn" is a "jump" instruction, but not the "call/exit" variant.
+ * 1. The current "insn" index is in valid range.
+ * 2. The index of target instruction is in valid range.
+ */
+static int check_bpf_jump(const struct jit_context *ctx,
+			  const struct bpf_insn *insn)
+{
+	const u8 class = BPF_CLASS(insn->code);
+	const u8 op = BPF_OP(insn->code);
+
+	/* Must be a jmp(32) instruction that is not a "call/exit". */
+	if ((class != BPF_JMP && class != BPF_JMP32) ||
+	    (op == BPF_CALL || op == BPF_EXIT)) {
+		pr_err("bpf-jit: not a jump instruction.\n");
+		return -EINVAL;
+	}
+
+	if (!check_insn_idx_valid(ctx, get_index_for_insn(ctx, insn))) {
+		pr_err("bpf-jit: the bpf jump insn is not in prog.\n");
+		return -EINVAL;
+	}
+
+	if (!check_insn_idx_valid(ctx, get_target_index_for_insn(ctx, insn))) {
+		pr_err("bpf-jit: bpf jump label is out of range.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Based on input "insn", consult "ctx->bpf2insn" to get the
+ * related index (offset) of the translation in JIT stream.
+ */
+static u32 get_curr_jit_off(const struct jit_context *ctx,
+			    const struct bpf_insn *insn)
+{
+	const s32 idx = get_index_for_insn(ctx, insn);
+#ifdef ARC_BPF_JIT_DEBUG
+	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, idx));
+#endif
+	return ctx->bpf2insn[idx];
+}
+
+/*
+ * The input "insn" must be a jump instruction.
+ *
+ * Based on input "insn", consult "ctx->bpf2insn" to get the
+ * related JIT index (offset) of "target instruction" that
+ * "insn" would jump to.
+ */
+static u32 get_targ_jit_off(const struct jit_context *ctx,
+			    const struct bpf_insn *insn)
+{
+	const s32 tidx = get_target_index_for_insn(ctx, insn);
+#ifdef ARC_BPF_JIT_DEBUG
+	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, tidx));
+#endif
+	return ctx->bpf2insn[tidx];
+}
+
+/*
+ * This function will return 0 for a feasible jump.
+ *
+ * Consult the back-end to check if it finds it feasible to emit
+ * the necessary instructions based on "cond" and the displacement
+ * between the "from_off" and the "to_off".
+ */
+static int feasible_jit_jump(u32 from_off, u32 to_off, u8 cond, bool j32)
+{
+	int ret = 0;
+
+	if (j32) {
+		if (!check_jmp_32(from_off, to_off, cond))
+			ret = -EFAULT;
+	} else {
+		if (!check_jmp_64(from_off, to_off, cond))
+			ret = -EFAULT;
+	}
+
+	if (ret != 0)
+		pr_err("bpf-jit: the JIT displacement is not OK.\n");
+
+	return ret;
+}
+
+/*
+ * This jump handler performs the following steps:
+ *
+ * 1. Compute ARC's internal condition code from BPF's
+ * 2. Determine the bitness of the operation (32 vs. 64)
+ * 3. Sanity check on BPF stream
+ * 4. Sanity check on what is supposed to be JIT's displacement
+ * 5. And finally, emit the necessary instructions
+ *
+ * The last two steps are performed through the back-end.
+ * The value of steps 1 and 2 are necessary inputs for the back-end.
+ */
+static int handle_jumps(const struct jit_context *ctx,
+			const struct bpf_insn *insn,
+			u8 *len)
+{
+	u8 cond;
+	int ret = 0;
+	u8 *buf = effective_jit_buf(ctx);
+	const bool j32 = (BPF_CLASS(insn->code) == BPF_JMP32) ? true : false;
+	const u8 rd = insn->dst_reg;
+	u8 rs = insn->src_reg;
+	u32 curr_off = 0, targ_off = 0;
+
+	*len = 0;
+
+	/* Map the BPF condition to internal enum. */
+	CHECK_RET(bpf_cond_to_arc(BPF_OP(insn->code), &cond));
+
+	/* Sanity check on the BPF byte stream. */
+	CHECK_RET(check_bpf_jump(ctx, insn));
+
+	/*
+	 * Move the immediate into a temporary register _now_ for 2 reasons:
+	 *
+	 * 1. "gen_jmp_{32,64}()" deal with operands in registers.
+	 *
+	 * 2. The "len" parameter will grow so that the current jit offset
+	 *    (curr_off) will have increased to a point where the necessary
+	 *    instructions can be inserted by "gen_jmp_{32,64}()".
+	 */
+	if (has_imm(insn) && cond != ARC_CC_AL) {
+		if (j32) {
+			*len += mov_r32_i32(BUF(buf, *len), JIT_REG_TMP,
+					    insn->imm);
+		} else {
+			*len += mov_r64_i32(BUF(buf, *len), JIT_REG_TMP,
+					    insn->imm);
+		}
+		rs = JIT_REG_TMP;
+	}
+
+	/* If the offsets are known, check if the branch can occur. */
+	if (offsets_available(ctx)) {
+		curr_off = get_curr_jit_off(ctx, insn) + *len;
+		targ_off = get_targ_jit_off(ctx, insn);
+
+		/* Sanity check on the back-end side. */
+		CHECK_RET(feasible_jit_jump(curr_off, targ_off, cond, j32));
+	}
+
+	if (j32) {
+		*len += gen_jmp_32(BUF(buf, *len), rd, rs, cond,
+				   curr_off, targ_off);
+	} else {
+		*len += gen_jmp_64(BUF(buf, *len), rd, rs, cond,
+				   curr_off, targ_off);
+	}
+
+	return ret;
+}
+
+/* Jump to translated epilogue address. */
+static int handle_jmp_epilogue(struct jit_context *ctx,
+			       const struct bpf_insn *insn, u8 *len)
+{
+	u8 *buf = effective_jit_buf(ctx);
+	u32 curr_off = 0, epi_off = 0;
+
+	/* Check the offset only if the data is available. */
+	if (offsets_available(ctx)) {
+		curr_off = get_curr_jit_off(ctx, insn);
+		epi_off = ctx->epilogue_offset;
+
+		if (!check_jmp_64(curr_off, epi_off, ARC_CC_AL)) {
+			pr_err("bpf-jit: epilogue offset is not valid.\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Jump to "epilogue offset" (rd and rs don't matter). */
+	*len = gen_jmp_64(buf, 0, 0, ARC_CC_AL, curr_off, epi_off);
+
+	return 0;
+}
+
+/* Try to get the resolved address and generate the instructions. */
+static int handle_call(struct jit_context *ctx,
+		       const struct bpf_insn *insn,
+		       u8 *len)
+{
+	int  ret;
+	bool in_kernel_func, fixed = false;
+	u64  addr = 0;
+	u8  *buf = effective_jit_buf(ctx);
+
+	ret = bpf_jit_get_func_addr(ctx->prog, insn, ctx->is_extra_pass,
+				    &addr, &fixed);
+	if (ret < 0) {
+		pr_err("bpf-jit: can't get the address for call.\n");
+		return ret;
+	}
+	in_kernel_func = (fixed ? true : false);
+
+	/* No valuable address retrieved (yet). */
+	if (!fixed && !addr)
+		set_need_for_extra_pass(ctx);
+
+	*len = gen_func_call(buf, (ARC_ADDR)addr, in_kernel_func);
+
+	if (insn->src_reg != BPF_PSEUDO_CALL) {
+		/* Assigning ABI's return reg to JIT's return reg. */
+		*len += arc_to_bpf_return(BUF(buf, *len));
+	}
+
+	return 0;
+}
+
+/*
+ * Try to generate instructions for loading a 64-bit immediate.
+ * These sort of instructions are usually associated with the 64-bit
+ * relocations: R_BPF_64_64. Therefore, signal the need for an extra
+ * pass if the circumstances are right.
+ */
+static int handle_ld_imm64(struct jit_context *ctx,
+			   const struct bpf_insn *insn,
+			   u8 *len)
+{
+	const s32 idx = get_index_for_insn(ctx, insn);
+	u8 *buf = effective_jit_buf(ctx);
+
+	/* We're about to consume 2 VM instructions. */
+	if (is_last_insn(ctx->prog, idx)) {
+		pr_err("bpf-jit: need more data for 64-bit immediate.\n");
+		return -EINVAL;
+	}
+
+	*len = mov_r64_i64(buf, insn->dst_reg, insn->imm, (insn + 1)->imm);
+
+	if (bpf_pseudo_func(insn))
+		set_need_for_extra_pass(ctx);
+
+	return 0;
+}
+
+/*
+ * Handles one eBPF instruction at a time. To make this function faster,
+ * it does not call "jit_buffer_check()". Else, it would call it for every
+ * instruction. As a result, it should not be invoked directly. Only
+ * "handle_body()", that has already executed the "check", may call this
+ * function.
+ *
+ * If the "ret" value is negative, something has went wrong. Else,
+ * it mostly holds the value 0 and rarely 1. Number 1 signals
+ * the loop in "handle_body()" to skip the next instruction, because
+ * it has been consumed as part of a 64-bit immediate value.
+ */
+static int handle_insn(struct jit_context *ctx, u32 idx)
+{
+	const struct bpf_insn *insn = &ctx->prog->insnsi[idx];
+	const u8  code = insn->code;
+	const u8  dst  = insn->dst_reg;
+	const u8  src  = insn->src_reg;
+	const s16 off  = insn->off;
+	const s32 imm  = insn->imm;
+	u8 *buf = effective_jit_buf(ctx);
+	u8  len = 0;
+	int ret = 0;
+
+	switch (code) {
+	/* dst += src (32-bit) */
+	case BPF_ALU | BPF_ADD | BPF_X:
+		len = add_r32(buf, dst, src);
+		break;
+	/* dst += imm (32-bit) */
+	case BPF_ALU | BPF_ADD | BPF_K:
+		len = add_r32_i32(buf, dst, imm);
+		break;
+	/* dst -= src (32-bit) */
+	case BPF_ALU | BPF_SUB | BPF_X:
+		len = sub_r32(buf, dst, src);
+		break;
+	/* dst -= imm (32-bit) */
+	case BPF_ALU | BPF_SUB | BPF_K:
+		len = sub_r32_i32(buf, dst, imm);
+		break;
+	/* dst = -dst (32-bit) */
+	case BPF_ALU | BPF_NEG:
+		len = neg_r32(buf, dst);
+		break;
+	/* dst *= src (32-bit) */
+	case BPF_ALU | BPF_MUL | BPF_X:
+		len = mul_r32(buf, dst, src);
+		break;
+	/* dst *= imm (32-bit) */
+	case BPF_ALU | BPF_MUL | BPF_K:
+		len = mul_r32_i32(buf, dst, imm);
+		break;
+	/* dst /= src (32-bit) */
+	case BPF_ALU | BPF_DIV | BPF_X:
+		len = div_r32(buf, dst, src, off == 1);
+		break;
+	/* dst /= imm (32-bit) */
+	case BPF_ALU | BPF_DIV | BPF_K:
+		len = div_r32_i32(buf, dst, imm, off == 1);
+		break;
+	/* dst %= src (32-bit) */
+	case BPF_ALU | BPF_MOD | BPF_X:
+		len = mod_r32(buf, dst, src, off == 1);
+		break;
+	/* dst %= imm (32-bit) */
+	case BPF_ALU | BPF_MOD | BPF_K:
+		len = mod_r32_i32(buf, dst, imm, off == 1);
+		break;
+	/* dst &= src (32-bit) */
+	case BPF_ALU | BPF_AND | BPF_X:
+		len = and_r32(buf, dst, src);
+		break;
+	/* dst &= imm (32-bit) */
+	case BPF_ALU | BPF_AND | BPF_K:
+		len = and_r32_i32(buf, dst, imm);
+		break;
+	/* dst |= src (32-bit) */
+	case BPF_ALU | BPF_OR | BPF_X:
+		len = or_r32(buf, dst, src);
+		break;
+	/* dst |= imm (32-bit) */
+	case BPF_ALU | BPF_OR | BPF_K:
+		len = or_r32_i32(buf, dst, imm);
+		break;
+	/* dst ^= src (32-bit) */
+	case BPF_ALU | BPF_XOR | BPF_X:
+		len = xor_r32(buf, dst, src);
+		break;
+	/* dst ^= imm (32-bit) */
+	case BPF_ALU | BPF_XOR | BPF_K:
+		len = xor_r32_i32(buf, dst, imm);
+		break;
+	/* dst <<= src (32-bit) */
+	case BPF_ALU | BPF_LSH | BPF_X:
+		len = lsh_r32(buf, dst, src);
+		break;
+	/* dst <<= imm (32-bit) */
+	case BPF_ALU | BPF_LSH | BPF_K:
+		len = lsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (32-bit) [unsigned] */
+	case BPF_ALU | BPF_RSH | BPF_X:
+		len = rsh_r32(buf, dst, src);
+		break;
+	/* dst >>= imm (32-bit) [unsigned] */
+	case BPF_ALU | BPF_RSH | BPF_K:
+		len = rsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (32-bit) [signed] */
+	case BPF_ALU | BPF_ARSH | BPF_X:
+		len = arsh_r32(buf, dst, src);
+		break;
+	/* dst >>= imm (32-bit) [signed] */
+	case BPF_ALU | BPF_ARSH | BPF_K:
+		len = arsh_r32_i32(buf, dst, imm);
+		break;
+	/* dst = src (32-bit) */
+	case BPF_ALU | BPF_MOV | BPF_X:
+		len = mov_r32(buf, dst, src, (u8)off);
+		break;
+	/* dst = imm32 (32-bit) */
+	case BPF_ALU | BPF_MOV | BPF_K:
+		len = mov_r32_i32(buf, dst, imm);
+		break;
+	/* dst = swap(dst) */
+	case BPF_ALU   | BPF_END | BPF_FROM_LE:
+	case BPF_ALU   | BPF_END | BPF_FROM_BE:
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
+		CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
+				      BPF_CLASS(code) == BPF_ALU64,
+				      ctx->do_zext, &len));
+		break;
+	}
+	/* dst += src (64-bit) */
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		len = add_r64(buf, dst, src);
+		break;
+	/* dst += imm32 (64-bit) */
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		len = add_r64_i32(buf, dst, imm);
+		break;
+	/* dst -= src (64-bit) */
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		len = sub_r64(buf, dst, src);
+		break;
+	/* dst -= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		len = sub_r64_i32(buf, dst, imm);
+		break;
+	/* dst = -dst (64-bit) */
+	case BPF_ALU64 | BPF_NEG:
+		len = neg_r64(buf, dst);
+		break;
+	/* dst *= src (64-bit) */
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		len = mul_r64(buf, dst, src);
+		break;
+	/* dst *= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		len = mul_r64_i32(buf, dst, imm);
+		break;
+	/* dst &= src (64-bit) */
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		len = and_r64(buf, dst, src);
+		break;
+	/* dst &= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		len = and_r64_i32(buf, dst, imm);
+		break;
+	/* dst |= src (64-bit) */
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		len = or_r64(buf, dst, src);
+		break;
+	/* dst |= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		len = or_r64_i32(buf, dst, imm);
+		break;
+	/* dst ^= src (64-bit) */
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		len = xor_r64(buf, dst, src);
+		break;
+	/* dst ^= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		len = xor_r64_i32(buf, dst, imm);
+		break;
+	/* dst <<= src (64-bit) */
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		len = lsh_r64(buf, dst, src);
+		break;
+	/* dst <<= imm32 (64-bit) */
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		len = lsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (64-bit) [unsigned] */
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		len = rsh_r64(buf, dst, src);
+		break;
+	/* dst >>= imm32 (64-bit) [unsigned] */
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		len = rsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst >>= src (64-bit) [signed] */
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		len = arsh_r64(buf, dst, src);
+		break;
+	/* dst >>= imm32 (64-bit) [signed] */
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		len = arsh_r64_i32(buf, dst, imm);
+		break;
+	/* dst = src (64-bit) */
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		len = mov_r64(buf, dst, src, (u8)off);
+		break;
+	/* dst = imm32 (sign extend to 64-bit) */
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		len = mov_r64_i32(buf, dst, imm);
+		break;
+	/* dst = imm64 */
+	case BPF_LD | BPF_DW | BPF_IMM:
+		CHECK_RET(handle_ld_imm64(ctx, insn, &len));
+		/* Tell the loop to skip the next instruction. */
+		ret = 1;
+		break;
+	/* dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		len = load_r(buf, dst, src, off, BPF_SIZE(code), false);
+		break;
+	case BPF_LDX | BPF_MEMSX | BPF_W:
+	case BPF_LDX | BPF_MEMSX | BPF_H:
+	case BPF_LDX | BPF_MEMSX | BPF_B:
+		len = load_r(buf, dst, src, off, BPF_SIZE(code), true);
+		break;
+	/* *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW:
+		len = store_r(buf, src, dst, off, BPF_SIZE(code));
+		break;
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		len = store_i(buf, imm, dst, off, BPF_SIZE(code));
+		break;
+	case BPF_JMP   | BPF_JA:
+	case BPF_JMP   | BPF_JEQ  | BPF_X:
+	case BPF_JMP   | BPF_JEQ  | BPF_K:
+	case BPF_JMP   | BPF_JNE  | BPF_X:
+	case BPF_JMP   | BPF_JNE  | BPF_K:
+	case BPF_JMP   | BPF_JSET | BPF_X:
+	case BPF_JMP   | BPF_JSET | BPF_K:
+	case BPF_JMP   | BPF_JGT  | BPF_X:
+	case BPF_JMP   | BPF_JGT  | BPF_K:
+	case BPF_JMP   | BPF_JGE  | BPF_X:
+	case BPF_JMP   | BPF_JGE  | BPF_K:
+	case BPF_JMP   | BPF_JSGT | BPF_X:
+	case BPF_JMP   | BPF_JSGT | BPF_K:
+	case BPF_JMP   | BPF_JSGE | BPF_X:
+	case BPF_JMP   | BPF_JSGE | BPF_K:
+	case BPF_JMP   | BPF_JLT  | BPF_X:
+	case BPF_JMP   | BPF_JLT  | BPF_K:
+	case BPF_JMP   | BPF_JLE  | BPF_X:
+	case BPF_JMP   | BPF_JLE  | BPF_K:
+	case BPF_JMP   | BPF_JSLT | BPF_X:
+	case BPF_JMP   | BPF_JSLT | BPF_K:
+	case BPF_JMP   | BPF_JSLE | BPF_X:
+	case BPF_JMP   | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JA:
+	case BPF_JMP32 | BPF_JEQ  | BPF_X:
+	case BPF_JMP32 | BPF_JEQ  | BPF_K:
+	case BPF_JMP32 | BPF_JNE  | BPF_X:
+	case BPF_JMP32 | BPF_JNE  | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JGT  | BPF_X:
+	case BPF_JMP32 | BPF_JGT  | BPF_K:
+	case BPF_JMP32 | BPF_JGE  | BPF_X:
+	case BPF_JMP32 | BPF_JGE  | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JLT  | BPF_X:
+	case BPF_JMP32 | BPF_JLT  | BPF_K:
+	case BPF_JMP32 | BPF_JLE  | BPF_X:
+	case BPF_JMP32 | BPF_JLE  | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+		CHECK_RET(handle_jumps(ctx, insn, &len));
+		break;
+	case BPF_JMP | BPF_CALL:
+		CHECK_RET(handle_call(ctx, insn, &len));
+		break;
+
+	case BPF_JMP | BPF_EXIT:
+		/* If this is the last instruction, epilogue will follow. */
+		if (is_last_insn(ctx->prog, idx))
+			break;
+		CHECK_RET(handle_jmp_epilogue(ctx, insn, &len));
+		break;
+	default:
+		pr_err("bpf-jit: can't handle instruction code 0x%02X\n", code);
+		return -EOPNOTSUPP;
+	}
+
+	if (BPF_CLASS(code) == BPF_ALU) {
+		/*
+		 * Skip the "swap" instructions. Even 64-bit swaps are of type
+		 * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
+		 * has to look at the "size" of the operations rather than the
+		 * ALU type. "gen_swap()" specifically takes care of that.
+		 */
+		if (BPF_OP(code) != BPF_END && ctx->do_zext)
+			len += zext(BUF(buf, len), dst);
+	}
+
+	jit_buffer_update(ctx, len);
+
+	return ret;
+}
+
+static int handle_body(struct jit_context *ctx)
+{
+	int ret;
+	bool populate_bpf2insn = false;
+	const struct bpf_prog *prog = ctx->prog;
+
+	CHECK_RET(jit_buffer_check(ctx));
+
+	/*
+	 * Record the mapping for the instructions during the dry-run.
+	 * Doing it this way allows us to have the mapping ready for
+	 * the jump instructions during the real compilation phase.
+	 */
+	if (!ctx->emit)
+		populate_bpf2insn = true;
+
+	for (u32 i = 0; i < prog->len; i++) {
+		/* During the dry-run, jit.len grows gradually per BPF insn. */
+		if (populate_bpf2insn)
+			ctx->bpf2insn[i] = ctx->jit.len;
+
+		CHECK_RET(handle_insn(ctx, i));
+		if (ret > 0) {
+			/* "ret" is 1 if two (64-bit) chunks were consumed. */
+			ctx->bpf2insn[i + 1] = ctx->bpf2insn[i];
+			i++;
+		}
+	}
+
+	/* If bpf2insn had to be populated, then it is done at this point. */
+	if (populate_bpf2insn)
+		ctx->bpf2insn_valid = true;
+
+	return 0;
+}
+
+/*
+ * Initialize the memory with "unimp_s" which is the mnemonic for
+ * "unimplemented" instruction and always raises an exception.
+ *
+ * The instruction is 2 bytes. If "size" is odd, there is not much
+ * that can be done about the last byte in "area". Because, the
+ * CPU always fetches instructions in two bytes. Therefore, the
+ * byte beyond the last one is going to accompany it during a
+ * possible fetch. In the most likely case of a little endian
+ * system, that beyond-byte will become the major opcode and
+ * we have no control over its initialisation.
+ */
+static void fill_ill_insn(void *area, unsigned int size)
+{
+	const u16 unimp_s = 0x79e0;
+
+	if (size & 1) {
+		*((u8 *)area + (size - 1)) = 0xff;
+		size -= 1;
+	}
+
+	memset16(area, unimp_s, size >> 1);
+}
+
+/* Piece of memory that can be allocated at the beginning of jit_prepare(). */
+static int jit_prepare_early_mem_alloc(struct jit_context *ctx)
+{
+	ctx->bpf2insn = kcalloc(ctx->prog->len, sizeof(ctx->jit.len),
+				GFP_KERNEL);
+
+	if (!ctx->bpf2insn) {
+		pr_err("bpf-jit: could not allocate memory for "
+		       "mapping of the instructions.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Memory allocations that rely on parameters known at the end of
+ * jit_prepare().
+ */
+static int jit_prepare_final_mem_alloc(struct jit_context *ctx)
+{
+	const size_t alignment = sizeof(u32);
+
+	ctx->bpf_header = bpf_jit_binary_alloc(ctx->jit.len, &ctx->jit.buf,
+					       alignment, fill_ill_insn);
+	if (!ctx->bpf_header) {
+		pr_err("bpf-jit: could not allocate memory for translation.\n");
+		return -ENOMEM;
+	}
+
+	if (ctx->need_extra_pass) {
+		ctx->jit_data = kzalloc(sizeof(*ctx->jit_data), GFP_KERNEL);
+		if (!ctx->jit_data)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * The first phase of the translation without actually emitting any
+ * instruction. It helps in getting a forecast on some aspects, such
+ * as the length of the whole program or where the epilogue starts.
+ *
+ * Whenever the necessary parameters are known, memories are allocated.
+ */
+static int jit_prepare(struct jit_context *ctx)
+{
+	int ret;
+
+	/* Dry run. */
+	ctx->emit = false;
+
+	CHECK_RET(jit_prepare_early_mem_alloc(ctx));
+
+	/* Get the length of prologue section after some register analysis. */
+	analyze_reg_usage(ctx);
+	CHECK_RET(handle_prologue(ctx));
+
+	CHECK_RET(handle_body(ctx));
+
+	/* Record at which offset epilogue begins. */
+	ctx->epilogue_offset = ctx->jit.len;
+
+	/* Process the epilogue section now. */
+	CHECK_RET(handle_epilogue(ctx));
+
+	CHECK_RET(jit_prepare_final_mem_alloc(ctx));
+
+	return 0;
+}
+
+/*
+ * All the "handle_*()" functions have been called before by the
+ * "jit_prepare()". If there was an error, we would know by now.
+ * Therefore, no extra error checking at this point, other than
+ * a sanity check at the end that expects the calculated length
+ * (jit.len) to be equal to the length of generated instructions
+ * (jit.index).
+ */
+static int jit_compile(struct jit_context *ctx)
+{
+	int ret;
+
+	/* Let there be code. */
+	ctx->emit = true;
+
+	CHECK_RET(handle_prologue(ctx));
+
+	CHECK_RET(handle_body(ctx));
+
+	CHECK_RET(handle_epilogue(ctx));
+
+	if (ctx->jit.index != ctx->jit.len) {
+		pr_err("bpf-jit: divergence between the phases; "
+		       "%u vs. %u (bytes).\n",
+		       ctx->jit.len, ctx->jit.index);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Calling this function implies a successful JIT. A successful
+ * translation is signaled by setting the right parameters:
+ *
+ * prog->jited=1, prog->jited_len=..., prog->bpf_func=...
+ */
+static int jit_finalize(struct jit_context *ctx)
+{
+	struct bpf_prog *prog = ctx->prog;
+
+	/* We're going to need this information for the "do_extra_pass()". */
+	if (ctx->need_extra_pass) {
+		ctx->jit_data->bpf_header = ctx->bpf_header;
+		ctx->jit_data->bpf2insn = ctx->bpf2insn;
+		prog->aux->jit_data = (void *)ctx->jit_data;
+	} else {
+		/*
+		 * If things seem finalised, then mark the JITed memory
+		 * as R-X and flush it.
+		 */
+		if (bpf_jit_binary_lock_ro(ctx->bpf_header)) {
+			pr_err("bpf-jit: Could not lock the JIT memory.\n");
+			return -EFAULT;
+		}
+		flush_icache_range((unsigned long)ctx->bpf_header,
+				   (unsigned long)
+				   BUF(ctx->jit.buf, ctx->jit.len));
+		prog->aux->jit_data = NULL;
+		bpf_prog_fill_jited_linfo(prog, ctx->bpf2insn);
+	}
+
+	ctx->success = true;
+	prog->bpf_func = (void *)ctx->jit.buf;
+	prog->jited_len = ctx->jit.len;
+	prog->jited = 1;
+
+	jit_ctx_cleanup(ctx);
+	jit_dump(ctx);
+
+	return 0;
+}
+
+/*
+ * A lenient verification for the existence of JIT context in "prog".
+ * Apparently the JIT internals, namely jit_subprogs() in bpf/verifier.c,
+ * may request for a second compilation although nothing needs to be done.
+ */
+static inline int check_jit_context(const struct bpf_prog *prog)
+{
+	if (!prog->aux->jit_data) {
+		pr_notice("bpf-jit: no jit data for the extra pass.\n");
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* Reuse the previous pass's data. */
+static int jit_resume_context(struct jit_context *ctx)
+{
+	struct arc_jit_data *jdata =
+		(struct arc_jit_data *)ctx->prog->aux->jit_data;
+
+	if (!jdata) {
+		pr_err("bpf-jit: no jit data for the extra pass.\n");
+		return -EINVAL;
+	}
+
+	ctx->jit.buf = (u8 *)ctx->prog->bpf_func;
+	ctx->jit.len = ctx->prog->jited_len;
+	ctx->bpf_header = jdata->bpf_header;
+	ctx->bpf2insn = (u32 *)jdata->bpf2insn;
+	ctx->bpf2insn_valid = ctx->bpf2insn ? true : false;
+	ctx->jit_data = jdata;
+
+	return 0;
+}
+
+/*
+ * Patch in the new addresses. The instructions of interest are:
+ *
+ * - call
+ * - ld r64, imm64
+ *
+ * For "call"s, it resolves the addresses one more time through the
+ * handle_call().
+ *
+ * For 64-bit immediate loads, it just retranslates them, because the BPF
+ * core in kernel might have changed the value since the normal pass.
+ */
+static int jit_patch_relocations(struct jit_context *ctx)
+{
+	const u8 bpf_opc_call = BPF_JMP | BPF_CALL;
+	const u8 bpf_opc_ldi64 = BPF_LD | BPF_DW | BPF_IMM;
+	const struct bpf_prog *prog = ctx->prog;
+	int ret;
+
+	ctx->emit = true;
+	for (u32 i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		u8 dummy;
+		/*
+		 * Adjust "ctx.jit.index", so "gen_*()" functions below
+		 * can use it for their output addresses.
+		 */
+		ctx->jit.index = ctx->bpf2insn[i];
+
+		if (insn->code == bpf_opc_call) {
+			CHECK_RET(handle_call(ctx, insn, &dummy));
+		} else if (insn->code == bpf_opc_ldi64) {
+			CHECK_RET(handle_ld_imm64(ctx, insn, &dummy));
+			/* Skip the next instruction. */
+			++i;
+		}
+	}
+	return 0;
+}
+
+/*
+ * A normal pass that involves a "dry-run" phase, jit_prepare(),
+ * to get the necessary data for the real compilation phase,
+ * jit_compile().
+ */
+static struct bpf_prog *do_normal_pass(struct bpf_prog *prog)
+{
+	struct jit_context ctx;
+
+	/* Bail out if JIT is disabled. */
+	if (!prog->jit_requested)
+		return prog;
+
+	if (jit_ctx_init(&ctx, prog)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	/* Get the lengths and allocate buffer. */
+	if (jit_prepare(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_compile(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_finalize(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	return ctx.prog;
+}
+
+/*
+ * If there are multi-function BPF programs that call each other,
+ * their translated addresses are not known all at once. Therefore,
+ * an extra pass is needed to consult the bpf_jit_get_func_addr()
+ * again to get the newly translated addresses in order to resolve
+ * the "call"s.
+ */
+static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
+{
+	struct jit_context ctx;
+
+	/* Skip if there's no context to resume from. */
+	if (check_jit_context(prog))
+		return prog;
+
+	if (jit_ctx_init(&ctx, prog)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_resume_context(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_patch_relocations(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	if (jit_finalize(&ctx)) {
+		jit_ctx_cleanup(&ctx);
+		return prog;
+	}
+
+	return ctx.prog;
+}
+
+/*
+ * This function may be invoked twice for the same stream of BPF
+ * instructions. The "extra pass" happens, when there are "call"s
+ * involved that their addresses are not known during the first
+ * invocation.
+ */
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	vm_dump(prog);
+
+	/* Was this program already translated? */
+	if (!prog->jited)
+		return do_normal_pass(prog);
+	else
+		return do_extra_pass(prog);
+
+	return prog;
+}
-- 
cgit v1.2.3-73-gaa49b


From b56035101e1cdd9c4420ea5da17f09f87fb69285 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Fri, 10 May 2024 23:19:26 +0300
Subject: netdev: Add queue stats for TX stop and wake

TX queue stop and wake are counted by some drivers.
Support reporting these via netdev-genl queue stats.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://lore.kernel.org/r/20240510201927.1821109-2-danielj@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml | 14 ++++++++++++++
 include/net/netdev_queues.h             |  3 +++
 include/uapi/linux/netdev.h             |  2 ++
 net/core/netdev-genl.c                  |  4 +++-
 tools/include/uapi/linux/netdev.h       |  2 ++
 5 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 2be4b3714d17..11a32373365a 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -439,6 +439,20 @@ attribute-sets:
           Number of the packets dropped by the device due to the transmit
           packets bitrate exceeding the device rate limit.
         type: uint
+      -
+        name: tx-stop
+        doc: |
+          Number of times driver paused accepting new tx packets
+          from the stack to this queue, because the queue was full.
+          Note that if BQL is supported and enabled on the device
+          the networking stack will avoid queuing a lot of data at once.
+        type: uint
+      -
+        name: tx-wake
+        doc: |
+          Number of times driver re-started accepting send
+          requests to this queue from the stack.
+        type: uint
 
 operations:
   list:
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index e7b84f018cee..a8a7e48dfa6c 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -41,6 +41,9 @@ struct netdev_queue_stats_tx {
 	u64 hw_gso_wire_bytes;
 
 	u64 hw_drop_ratelimits;
+
+	u64 stop;
+	u64 wake;
 };
 
 /**
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index cf24f1d9adf8..a8188202413e 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -165,6 +165,8 @@ enum {
 	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
 	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
 	NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
+	NETDEV_A_QSTATS_TX_STOP,
+	NETDEV_A_QSTATS_TX_WAKE,
 
 	__NETDEV_A_QSTATS_MAX,
 	NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 4b5054087309..1f6ae6379e0f 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -517,7 +517,9 @@ netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx)
 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) ||
 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) ||
 	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) ||
-	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits))
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) ||
+	    netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake))
 		return -EMSGSIZE;
 	return 0;
 }
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index cf24f1d9adf8..a8188202413e 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -165,6 +165,8 @@ enum {
 	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
 	NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
 	NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
+	NETDEV_A_QSTATS_TX_STOP,
+	NETDEV_A_QSTATS_TX_WAKE,
 
 	__NETDEV_A_QSTATS_MAX,
 	NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
-- 
cgit v1.2.3-73-gaa49b


From ab5588703981e5560cce47988187aa15005c766f Mon Sep 17 00:00:00 2001
From: Clément Léger <clement.leger@bootlin.com>
Date: Mon, 13 May 2024 09:25:12 +0200
Subject: dt-bindings: net: renesas,rzn1-gmac: Document RZ/N1 GMAC support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The RZ/N1 series of MPUs feature up to two Gigabit Ethernet controllers.
These controllers are based on Synopsys IPs. They can be connected to
RZ/N1 RGMII/RMII converters.

Add a binding that describes these GMAC devices.

Signed-off-by: Clément Léger <clement.leger@bootlin.com>
[rgantois: commit log]
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Romain Gantois <romain.gantois@bootlin.com>
Link: https://lore.kernel.org/r/20240513-rzn1-gmac1-v7-1-6acf58b5440d@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/renesas,rzn1-gmac.yaml | 66 ++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
new file mode 100644
index 000000000000..d9a8d586e260
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/renesas,rzn1-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas GMAC
+
+maintainers:
+  - Romain Gantois <romain.gantois@bootlin.com>
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - renesas,r9a06g032-gmac
+          - renesas,rzn1-gmac
+  required:
+    - compatible
+
+allOf:
+  - $ref: snps,dwmac.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r9a06g032-gmac
+      - const: renesas,rzn1-gmac
+      - const: snps,dwmac
+
+  pcs-handle:
+    description:
+      phandle pointing to a PCS sub-node compatible with
+      renesas,rzn1-miic.yaml#
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r9a06g032-sysctrl.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    ethernet@44000000 {
+      compatible = "renesas,r9a06g032-gmac", "renesas,rzn1-gmac", "snps,dwmac";
+      reg = <0x44000000 0x2000>;
+      interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+      clock-names = "stmmaceth";
+      clocks = <&sysctrl R9A06G032_HCLK_GMAC0>;
+      power-domains = <&sysctrl>;
+      snps,multicast-filter-bins = <256>;
+      snps,perfect-filter-entries = <128>;
+      tx-fifo-depth = <2048>;
+      rx-fifo-depth = <4096>;
+      pcs-handle = <&mii_conv1>;
+      phy-mode = "mii";
+    };
+
+...
-- 
cgit v1.2.3-73-gaa49b


From 5c1672705a1a2389f5ad78e0fea6f08ed32d6f18 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 13 May 2024 08:41:55 -0700
Subject: net: revert partially applied PHY topology series

The series is causing issues with PHY drivers built as modules.
Since it was only partially applied and the merge window has
opened let's revert and try again for v6.11.

Revert 6916e461e793 ("net: phy: Introduce ethernet link topology representation")
Revert 0ec5ed6c130e ("net: sfp: pass the phy_device when disconnecting an sfp module's PHY")
Revert e75e4e074c44 ("net: phy: add helpers to handle sfp phy connect/disconnect")
Revert fdd353965b52 ("net: sfp: Add helper to return the SFP bus name")
Revert 841942bc6212 ("net: ethtool: Allow passing a phy index for some commands")

Link: https://lore.kernel.org/all/171242462917.4000.9759453824684907063.git-patchwork-notify@kernel.org/
Link: https://lore.kernel.org/all/20240507102822.2023826-1-maxime.chevallier@bootlin.com/
Link: https://lore.kernel.org/r/20240513154156.104281-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst |   7 --
 MAINTAINERS                                  |   2 -
 drivers/net/phy/Makefile                     |   2 +-
 drivers/net/phy/marvell-88x2222.c            |   2 -
 drivers/net/phy/marvell.c                    |   2 -
 drivers/net/phy/marvell10g.c                 |   2 -
 drivers/net/phy/phy_device.c                 |  55 --------------
 drivers/net/phy/phy_link_topology.c          | 105 ---------------------------
 drivers/net/phy/phylink.c                    |   3 +-
 drivers/net/phy/qcom/at803x.c                |   2 -
 drivers/net/phy/qcom/qca807x.c               |   2 -
 drivers/net/phy/sfp-bus.c                    |  15 +---
 include/linux/netdevice.h                    |   4 +-
 include/linux/phy.h                          |   6 --
 include/linux/phy_link_topology.h            |  72 ------------------
 include/linux/phy_link_topology_core.h       |  25 -------
 include/linux/sfp.h                          |   8 +-
 include/uapi/linux/ethtool.h                 |  16 ----
 include/uapi/linux/ethtool_netlink.h         |   1 -
 net/core/dev.c                               |   9 ---
 net/ethtool/netlink.c                        |  48 +-----------
 net/ethtool/netlink.h                        |   5 --
 22 files changed, 8 insertions(+), 385 deletions(-)
 delete mode 100644 drivers/net/phy/phy_link_topology.c
 delete mode 100644 include/linux/phy_link_topology.h
 delete mode 100644 include/linux/phy_link_topology_core.h

(limited to 'Documentation')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 8bc71f249448..160bfb0ae8ba 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -57,7 +57,6 @@ Structure of this header is
   ``ETHTOOL_A_HEADER_DEV_INDEX``  u32     device ifindex
   ``ETHTOOL_A_HEADER_DEV_NAME``   string  device name
   ``ETHTOOL_A_HEADER_FLAGS``      u32     flags common for all requests
-  ``ETHTOOL_A_HEADER_PHY_INDEX``  u32     phy device index
   ==============================  ======  =============================
 
 ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the
@@ -82,12 +81,6 @@ the behaviour is backward compatible, i.e. requests from old clients not aware
 of the flag should be interpreted the way the client expects. A client must
 not set flags it does not understand.
 
-``ETHTOOL_A_HEADER_PHY_INDEX`` identifies the Ethernet PHY the message relates to.
-As there are numerous commands that are related to PHY configuration, and because
-there may be more than one PHY on the link, the PHY index can be passed in the
-request for the commands that needs it. It is, however, not mandatory, and if it
-is not passed for commands that target a PHY, the net_device.phydev pointer
-is used.
 
 Bit sets
 ========
diff --git a/MAINTAINERS b/MAINTAINERS
index 40eb26e6c60c..b4ed6480e919 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8027,8 +8027,6 @@ F:	include/linux/mii.h
 F:	include/linux/of_net.h
 F:	include/linux/phy.h
 F:	include/linux/phy_fixed.h
-F:	include/linux/phy_link_topology.h
-F:	include/linux/phy_link_topology_core.h
 F:	include/linux/phylib_stubs.h
 F:	include/linux/platform_data/mdio-bcm-unimac.h
 F:	include/linux/platform_data/mdio-gpio.h
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 1d8be374915f..202ed7f450da 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Linux PHY drivers
 
 libphy-y			:= phy.o phy-c45.o phy-core.o phy_device.o \
-				   linkmode.o phy_link_topology.o
+				   linkmode.o
 mdio-bus-y			+= mdio_bus.o mdio_device.o
 
 ifdef CONFIG_MDIO_DEVICE
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index 0b777cdd7078..b88398e6872b 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -553,8 +553,6 @@ static const struct sfp_upstream_ops sfp_phy_ops = {
 	.link_down = mv2222_sfp_link_down,
 	.attach = phy_sfp_attach,
 	.detach = phy_sfp_detach,
-	.connect_phy = phy_sfp_connect_phy,
-	.disconnect_phy = phy_sfp_disconnect_phy,
 };
 
 static int mv2222_probe(struct phy_device *phydev)
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 9964bf3dea2f..b89fbffa6a93 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -3613,8 +3613,6 @@ static const struct sfp_upstream_ops m88e1510_sfp_ops = {
 	.module_remove = m88e1510_sfp_remove,
 	.attach = phy_sfp_attach,
 	.detach = phy_sfp_detach,
-	.connect_phy = phy_sfp_connect_phy,
-	.disconnect_phy = phy_sfp_disconnect_phy,
 };
 
 static int m88e1510_probe(struct phy_device *phydev)
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 6642eb642d4b..ad43e280930c 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -503,8 +503,6 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
 static const struct sfp_upstream_ops mv3310_sfp_ops = {
 	.attach = phy_sfp_attach,
 	.detach = phy_sfp_detach,
-	.connect_phy = phy_sfp_connect_phy,
-	.disconnect_phy = phy_sfp_disconnect_phy,
 	.module_insert = mv3310_sfp_insert,
 };
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 616bd7ba46cb..6c6ec9475709 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -29,7 +29,6 @@
 #include <linux/phy.h>
 #include <linux/phylib_stubs.h>
 #include <linux/phy_led_triggers.h>
-#include <linux/phy_link_topology.h>
 #include <linux/pse-pd/pse.h>
 #include <linux/property.h>
 #include <linux/rtnetlink.h>
@@ -277,14 +276,6 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev)
 
 static struct phy_driver genphy_driver;
 
-static struct phy_link_topology *phy_get_link_topology(struct phy_device *phydev)
-{
-	if (phydev->attached_dev)
-		return phydev->attached_dev->link_topo;
-
-	return NULL;
-}
-
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
@@ -1378,46 +1369,6 @@ phy_standalone_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(phy_standalone);
 
-/**
- * phy_sfp_connect_phy - Connect the SFP module's PHY to the upstream PHY
- * @upstream: pointer to the upstream phy device
- * @phy: pointer to the SFP module's phy device
- *
- * This helper allows keeping track of PHY devices on the link. It adds the
- * SFP module's phy to the phy namespace of the upstream phy
- */
-int phy_sfp_connect_phy(void *upstream, struct phy_device *phy)
-{
-	struct phy_device *phydev = upstream;
-	struct phy_link_topology *topo = phy_get_link_topology(phydev);
-
-	if (topo)
-		return phy_link_topo_add_phy(topo, phy, PHY_UPSTREAM_PHY, phydev);
-
-	return 0;
-}
-EXPORT_SYMBOL(phy_sfp_connect_phy);
-
-/**
- * phy_sfp_disconnect_phy - Disconnect the SFP module's PHY from the upstream PHY
- * @upstream: pointer to the upstream phy device
- * @phy: pointer to the SFP module's phy device
- *
- * This helper allows keeping track of PHY devices on the link. It removes the
- * SFP module's phy to the phy namespace of the upstream phy. As the module phy
- * will be destroyed, re-inserting the same module will add a new phy with a
- * new index.
- */
-void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy)
-{
-	struct phy_device *phydev = upstream;
-	struct phy_link_topology *topo = phy_get_link_topology(phydev);
-
-	if (topo)
-		phy_link_topo_del_phy(topo, phy);
-}
-EXPORT_SYMBOL(phy_sfp_disconnect_phy);
-
 /**
  * phy_sfp_attach - attach the SFP bus to the PHY upstream network device
  * @upstream: pointer to the phy device
@@ -1560,11 +1511,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 
 		if (phydev->sfp_bus_attached)
 			dev->sfp_bus = phydev->sfp_bus;
-
-		err = phy_link_topo_add_phy(dev->link_topo, phydev,
-					    PHY_UPSTREAM_MAC, dev);
-		if (err)
-			goto error;
 	}
 
 	/* Some Ethernet drivers try to connect to a PHY device before
@@ -1992,7 +1938,6 @@ void phy_detach(struct phy_device *phydev)
 	if (dev) {
 		phydev->attached_dev->phydev = NULL;
 		phydev->attached_dev = NULL;
-		phy_link_topo_del_phy(dev->link_topo, phydev);
 	}
 	phydev->phylink = NULL;
 
diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c
deleted file mode 100644
index 985941c5c558..000000000000
--- a/drivers/net/phy/phy_link_topology.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Infrastructure to handle all PHY devices connected to a given netdev,
- * either directly or indirectly attached.
- *
- * Copyright (c) 2023 Maxime Chevallier<maxime.chevallier@bootlin.com>
- */
-
-#include <linux/phy_link_topology.h>
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include <linux/rtnetlink.h>
-#include <linux/xarray.h>
-
-struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
-{
-	struct phy_link_topology *topo;
-
-	topo = kzalloc(sizeof(*topo), GFP_KERNEL);
-	if (!topo)
-		return ERR_PTR(-ENOMEM);
-
-	xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1);
-	topo->next_phy_index = 1;
-
-	return topo;
-}
-
-void phy_link_topo_destroy(struct phy_link_topology *topo)
-{
-	if (!topo)
-		return;
-
-	xa_destroy(&topo->phys);
-	kfree(topo);
-}
-
-int phy_link_topo_add_phy(struct phy_link_topology *topo,
-			  struct phy_device *phy,
-			  enum phy_upstream upt, void *upstream)
-{
-	struct phy_device_node *pdn;
-	int ret;
-
-	pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
-	if (!pdn)
-		return -ENOMEM;
-
-	pdn->phy = phy;
-	switch (upt) {
-	case PHY_UPSTREAM_MAC:
-		pdn->upstream.netdev = (struct net_device *)upstream;
-		if (phy_on_sfp(phy))
-			pdn->parent_sfp_bus = pdn->upstream.netdev->sfp_bus;
-		break;
-	case PHY_UPSTREAM_PHY:
-		pdn->upstream.phydev = (struct phy_device *)upstream;
-		if (phy_on_sfp(phy))
-			pdn->parent_sfp_bus = pdn->upstream.phydev->sfp_bus;
-		break;
-	default:
-		ret = -EINVAL;
-		goto err;
-	}
-	pdn->upstream_type = upt;
-
-	/* Attempt to re-use a previously allocated phy_index */
-	if (phy->phyindex) {
-		ret = xa_insert(&topo->phys, phy->phyindex, pdn, GFP_KERNEL);
-
-		/* Errors could be either -ENOMEM or -EBUSY. If the phy has an
-		 * index, and there's another entry at the same index, this is
-		 * unexpected and we still error-out
-		 */
-		if (ret)
-			goto err;
-		return 0;
-	}
-
-	ret = xa_alloc_cyclic(&topo->phys, &phy->phyindex, pdn, xa_limit_32b,
-			      &topo->next_phy_index, GFP_KERNEL);
-	if (ret)
-		goto err;
-
-	return 0;
-
-err:
-	kfree(pdn);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(phy_link_topo_add_phy);
-
-void phy_link_topo_del_phy(struct phy_link_topology *topo,
-			   struct phy_device *phy)
-{
-	struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex);
-
-	/* We delete the PHY from the topology, however we don't re-set the
-	 * phy->phyindex field. If the PHY isn't gone, we can re-assign it the
-	 * same index next time it's added back to the topology
-	 */
-
-	kfree(pdn);
-}
-EXPORT_SYMBOL_GPL(phy_link_topo_del_phy);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index b7e5c669dc8e..994471fad833 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -3411,8 +3411,7 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
 	return ret;
 }
 
-static void phylink_sfp_disconnect_phy(void *upstream,
-				       struct phy_device *phydev)
+static void phylink_sfp_disconnect_phy(void *upstream)
 {
 	phylink_disconnect_phy(upstream);
 }
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 105602581a03..c8f83e5f78ab 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -770,8 +770,6 @@ static const struct sfp_upstream_ops at8031_sfp_ops = {
 	.attach = phy_sfp_attach,
 	.detach = phy_sfp_detach,
 	.module_insert = at8031_sfp_insert,
-	.connect_phy = phy_sfp_connect_phy,
-	.disconnect_phy = phy_sfp_disconnect_phy,
 };
 
 static int at8031_parse_dt(struct phy_device *phydev)
diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c
index 5eb0ab1cb70e..672c6929119a 100644
--- a/drivers/net/phy/qcom/qca807x.c
+++ b/drivers/net/phy/qcom/qca807x.c
@@ -699,8 +699,6 @@ static const struct sfp_upstream_ops qca807x_sfp_ops = {
 	.detach = phy_sfp_detach,
 	.module_insert = qca807x_sfp_insert,
 	.module_remove = qca807x_sfp_remove,
-	.connect_phy = phy_sfp_connect_phy,
-	.disconnect_phy = phy_sfp_disconnect_phy,
 };
 
 static int qca807x_probe(struct phy_device *phydev)
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 37c85f1e6534..2f44fc51848f 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -487,7 +487,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
 			bus->socket_ops->stop(bus->sfp);
 		bus->socket_ops->detach(bus->sfp);
 		if (bus->phydev && ops && ops->disconnect_phy)
-			ops->disconnect_phy(bus->upstream, bus->phydev);
+			ops->disconnect_phy(bus->upstream);
 	}
 	bus->registered = false;
 }
@@ -743,7 +743,7 @@ void sfp_remove_phy(struct sfp_bus *bus)
 	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
 
 	if (ops && ops->disconnect_phy)
-		ops->disconnect_phy(bus->upstream, bus->phydev);
+		ops->disconnect_phy(bus->upstream);
 	bus->phydev = NULL;
 }
 EXPORT_SYMBOL_GPL(sfp_remove_phy);
@@ -860,14 +860,3 @@ void sfp_unregister_socket(struct sfp_bus *bus)
 	sfp_bus_put(bus);
 }
 EXPORT_SYMBOL_GPL(sfp_unregister_socket);
-
-const char *sfp_get_name(struct sfp_bus *bus)
-{
-	ASSERT_RTNL();
-
-	if (bus->sfp_dev)
-		return dev_name(bus->sfp_dev);
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(sfp_get_name);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf261fb89d73..d20c6c99eb88 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -40,6 +40,7 @@
 #include <net/dcbnl.h>
 #endif
 #include <net/netprio_cgroup.h>
+
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
 #include <uapi/linux/netdevice.h>
@@ -51,7 +52,6 @@
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
-#include <linux/phy_link_topology_core.h>
 
 struct netpoll_info;
 struct device;
@@ -1975,7 +1975,6 @@ enum netdev_reg_state {
  *	@fcoe_ddp_xid:	Max exchange id for FCoE LRO by ddp
  *
  *	@priomap:	XXX: need comments on this one
- *	@link_topo:	Physical link topology tracking attached PHYs
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
  *	@sfp_bus:	attached &struct sfp_bus structure.
@@ -2368,7 +2367,6 @@ struct net_device {
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
-	struct phy_link_topology	*link_topo;
 	struct phy_device	*phydev;
 	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3ddfe7fe781a..e6e83304558e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -550,9 +550,6 @@ struct macsec_ops;
  * @drv: Pointer to the driver for this PHY instance
  * @devlink: Create a link between phy dev and mac dev, if the external phy
  *           used by current mac interface is managed by another mac interface.
- * @phyindex: Unique id across the phy's parent tree of phys to address the PHY
- *	      from userspace, similar to ifindex. A zero index means the PHY
- *	      wasn't assigned an id yet.
  * @phy_id: UID for this device found during discovery
  * @c45_ids: 802.3-c45 Device Identifiers if is_c45.
  * @is_c45:  Set to true if this PHY uses clause 45 addressing.
@@ -653,7 +650,6 @@ struct phy_device {
 
 	struct device_link *devlink;
 
-	u32 phyindex;
 	u32 phy_id;
 
 	struct phy_c45_device_ids c45_ids;
@@ -1758,8 +1754,6 @@ int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
 int __phy_resume(struct phy_device *phydev);
 int phy_loopback(struct phy_device *phydev, bool enable);
-int phy_sfp_connect_phy(void *upstream, struct phy_device *phy);
-void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy);
 void phy_sfp_attach(void *upstream, struct sfp_bus *bus);
 void phy_sfp_detach(void *upstream, struct sfp_bus *bus);
 int phy_sfp_probe(struct phy_device *phydev,
diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
deleted file mode 100644
index 6b79feb607e7..000000000000
--- a/include/linux/phy_link_topology.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PHY device list allow maintaining a list of PHY devices that are
- * part of a netdevice's link topology. PHYs can for example be chained,
- * as is the case when using a PHY that exposes an SFP module, on which an
- * SFP transceiver that embeds a PHY is connected.
- *
- * This list can then be used by userspace to leverage individual PHY
- * capabilities.
- */
-#ifndef __PHY_LINK_TOPOLOGY_H
-#define __PHY_LINK_TOPOLOGY_H
-
-#include <linux/ethtool.h>
-#include <linux/phy_link_topology_core.h>
-
-struct xarray;
-struct phy_device;
-struct net_device;
-struct sfp_bus;
-
-struct phy_device_node {
-	enum phy_upstream upstream_type;
-
-	union {
-		struct net_device	*netdev;
-		struct phy_device	*phydev;
-	} upstream;
-
-	struct sfp_bus *parent_sfp_bus;
-
-	struct phy_device *phy;
-};
-
-struct phy_link_topology {
-	struct xarray phys;
-	u32 next_phy_index;
-};
-
-static inline struct phy_device *
-phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex)
-{
-	struct phy_device_node *pdn = xa_load(&topo->phys, phyindex);
-
-	if (pdn)
-		return pdn->phy;
-
-	return NULL;
-}
-
-#if IS_REACHABLE(CONFIG_PHYLIB)
-int phy_link_topo_add_phy(struct phy_link_topology *topo,
-			  struct phy_device *phy,
-			  enum phy_upstream upt, void *upstream);
-
-void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy);
-
-#else
-static inline int phy_link_topo_add_phy(struct phy_link_topology *topo,
-					struct phy_device *phy,
-					enum phy_upstream upt, void *upstream)
-{
-	return 0;
-}
-
-static inline void phy_link_topo_del_phy(struct phy_link_topology *topo,
-					 struct phy_device *phy)
-{
-}
-#endif
-
-#endif /* __PHY_LINK_TOPOLOGY_H */
diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h
deleted file mode 100644
index 0a6479055745..000000000000
--- a/include/linux/phy_link_topology_core.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PHY_LINK_TOPOLOGY_CORE_H
-#define __PHY_LINK_TOPOLOGY_CORE_H
-
-struct phy_link_topology;
-
-#if IS_REACHABLE(CONFIG_PHYLIB)
-
-struct phy_link_topology *phy_link_topo_create(struct net_device *dev);
-void phy_link_topo_destroy(struct phy_link_topology *topo);
-
-#else
-
-static inline struct phy_link_topology *phy_link_topo_create(struct net_device *dev)
-{
-	return NULL;
-}
-
-static inline void phy_link_topo_destroy(struct phy_link_topology *topo)
-{
-}
-
-#endif
-
-#endif /* __PHY_LINK_TOPOLOGY_CORE_H */
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 5ebc57f78c95..a45da7eef9a2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -544,7 +544,7 @@ struct sfp_upstream_ops {
 	void (*link_down)(void *priv);
 	void (*link_up)(void *priv);
 	int (*connect_phy)(void *priv, struct phy_device *);
-	void (*disconnect_phy)(void *priv, struct phy_device *);
+	void (*disconnect_phy)(void *priv);
 };
 
 #if IS_ENABLED(CONFIG_SFP)
@@ -570,7 +570,6 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode);
 int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 			 const struct sfp_upstream_ops *ops);
 void sfp_bus_del_upstream(struct sfp_bus *bus);
-const char *sfp_get_name(struct sfp_bus *bus);
 #else
 static inline int sfp_parse_port(struct sfp_bus *bus,
 				 const struct sfp_eeprom_id *id,
@@ -649,11 +648,6 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 static inline void sfp_bus_del_upstream(struct sfp_bus *bus)
 {
 }
-
-static inline const char *sfp_get_name(struct sfp_bus *bus)
-{
-	return NULL;
-}
 #endif
 
 #endif
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 041e09c3515d..8733a3117902 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2323,20 +2323,4 @@ struct ethtool_link_settings {
 	 * __u32 map_lp_advertising[link_mode_masks_nwords];
 	 */
 };
-
-/**
- * enum phy_upstream - Represents the upstream component a given PHY device
- * is connected to, as in what is on the other end of the MII bus. Most PHYs
- * will be attached to an Ethernet MAC controller, but in some cases, there's
- * an intermediate PHY used as a media-converter, which will driver another
- * MII interface as its output.
- * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port,
- *		      or ethernet controller)
- * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter)
- */
-enum phy_upstream {
-	PHY_UPSTREAM_MAC,
-	PHY_UPSTREAM_PHY,
-};
-
 #endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index f17dbe54bf5e..b49b804b9495 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -132,7 +132,6 @@ enum {
 	ETHTOOL_A_HEADER_DEV_INDEX,		/* u32 */
 	ETHTOOL_A_HEADER_DEV_NAME,		/* string */
 	ETHTOOL_A_HEADER_FLAGS,			/* u32 - ETHTOOL_FLAG_* */
-	ETHTOOL_A_HEADER_PHY_INDEX,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_HEADER_CNT,
diff --git a/net/core/dev.c b/net/core/dev.c
index d2ce91a334c1..e1bb6d7856d9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,7 +158,6 @@
 #include <net/page_pool/types.h>
 #include <net/page_pool/helpers.h>
 #include <net/rps.h>
-#include <linux/phy_link_topology_core.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -10998,12 +10997,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 #ifdef CONFIG_NET_SCHED
 	hash_init(dev->qdisc_hash);
 #endif
-	dev->link_topo = phy_link_topo_create(dev);
-	if (IS_ERR(dev->link_topo)) {
-		dev->link_topo = NULL;
-		goto free_all;
-	}
-
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
@@ -11092,8 +11085,6 @@ void free_netdev(struct net_device *dev)
 	free_percpu(dev->xdp_bulkq);
 	dev->xdp_bulkq = NULL;
 
-	phy_link_topo_destroy(dev->link_topo);
-
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED ||
 	    dev->reg_state == NETREG_DUMMY) {
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 563e94e0cbd8..bd04f28d5cf4 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -4,7 +4,6 @@
 #include <linux/ethtool_netlink.h>
 #include <linux/pm_runtime.h>
 #include "netlink.h"
-#include <linux/phy_link_topology.h>
 
 static struct genl_family ethtool_genl_family;
 
@@ -31,24 +30,6 @@ const struct nla_policy ethnl_header_policy_stats[] = {
 							  ETHTOOL_FLAGS_STATS),
 };
 
-const struct nla_policy ethnl_header_policy_phy[] = {
-	[ETHTOOL_A_HEADER_DEV_INDEX]	= { .type = NLA_U32 },
-	[ETHTOOL_A_HEADER_DEV_NAME]	= { .type = NLA_NUL_STRING,
-					    .len = ALTIFNAMSIZ - 1 },
-	[ETHTOOL_A_HEADER_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
-							  ETHTOOL_FLAGS_BASIC),
-	[ETHTOOL_A_HEADER_PHY_INDEX]		= NLA_POLICY_MIN(NLA_U32, 1),
-};
-
-const struct nla_policy ethnl_header_policy_phy_stats[] = {
-	[ETHTOOL_A_HEADER_DEV_INDEX]	= { .type = NLA_U32 },
-	[ETHTOOL_A_HEADER_DEV_NAME]	= { .type = NLA_NUL_STRING,
-					    .len = ALTIFNAMSIZ - 1 },
-	[ETHTOOL_A_HEADER_FLAGS]	= NLA_POLICY_MASK(NLA_U32,
-							  ETHTOOL_FLAGS_STATS),
-	[ETHTOOL_A_HEADER_PHY_INDEX]		= NLA_POLICY_MIN(NLA_U32, 1),
-};
-
 int ethnl_ops_begin(struct net_device *dev)
 {
 	int ret;
@@ -108,9 +89,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 			       const struct nlattr *header, struct net *net,
 			       struct netlink_ext_ack *extack, bool require_dev)
 {
-	struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
+	struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
 	const struct nlattr *devname_attr;
-	struct phy_device *phydev = NULL;
 	struct net_device *dev = NULL;
 	u32 flags = 0;
 	int ret;
@@ -124,7 +104,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 	/* No validation here, command policy should have a nested policy set
 	 * for the header, therefore validation should have already been done.
 	 */
-	ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
+	ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
 			       NULL, extack);
 	if (ret < 0)
 		return ret;
@@ -165,30 +145,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 		return -EINVAL;
 	}
 
-	if (dev) {
-		if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
-			struct nlattr *phy_id;
-
-			phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX];
-			phydev = phy_link_topo_get_phy(dev->link_topo,
-						       nla_get_u32(phy_id));
-			if (!phydev) {
-				NL_SET_BAD_ATTR(extack, phy_id);
-				return -ENODEV;
-			}
-		} else {
-			/* If we need a PHY but no phy index is specified, fallback
-			 * to dev->phydev
-			 */
-			phydev = dev->phydev;
-		}
-	} else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
-		NL_SET_ERR_MSG_ATTR(extack, header,
-				    "can't target a PHY without a netdev");
-		return -EINVAL;
-	}
-
-	req_info->phydev = phydev;
 	req_info->dev = dev;
 	req_info->flags = flags;
 	return 0;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index d57a890b5d9e..9a333a8d04c1 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -250,7 +250,6 @@ static inline unsigned int ethnl_reply_header_size(void)
  * @dev:   network device the request is for (may be null)
  * @dev_tracker: refcount tracker for @dev reference
  * @flags: request flags common for all request types
- * @phydev: phy_device connected to @dev this request is for (may be null)
  *
  * This is a common base for request specific structures holding data from
  * parsed userspace request. These always embed struct ethnl_req_info at
@@ -260,7 +259,6 @@ struct ethnl_req_info {
 	struct net_device	*dev;
 	netdevice_tracker	dev_tracker;
 	u32			flags;
-	struct phy_device	*phydev;
 };
 
 static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -397,12 +395,9 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
 extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
 extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
 extern const struct ethnl_request_ops ethnl_mm_request_ops;
-extern const struct ethnl_request_ops ethnl_phy_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
-extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
-extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
 extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
 extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
 extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
-- 
cgit v1.2.3-73-gaa49b


From b33a0d297d3d2068e310616d074d88ab81560236 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Tue, 30 Apr 2024 03:08:42 +0200
Subject: dt-bindings: net: broadcom-bluetooth: Add CYW43439 DT binding

CYW43439 is a Wi-Fi + Bluetooth combo device from Infineon.
The Bluetooth part is capable of Bluetooth 5.2 BR/EDR/LE .
This chip is present e.g. on muRata 1YN module.

Extend the binding with its DT compatible using fallback
compatible string to "brcm,bcm4329-bt" which seems to be
the oldest compatible device. This should also prevent the
growth of compatible string tables in drivers. The existing
block of compatible strings is retained.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../bindings/net/broadcom-bluetooth.yaml           | 33 +++++++++++++---------
 1 file changed, 19 insertions(+), 14 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
index cc70b00c6ce5..4a1bfc2b3584 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
@@ -14,20 +14,25 @@ description:
 
 properties:
   compatible:
-    enum:
-      - brcm,bcm20702a1
-      - brcm,bcm4329-bt
-      - brcm,bcm4330-bt
-      - brcm,bcm4334-bt
-      - brcm,bcm43430a0-bt
-      - brcm,bcm43430a1-bt
-      - brcm,bcm43438-bt
-      - brcm,bcm4345c5
-      - brcm,bcm43540-bt
-      - brcm,bcm4335a0
-      - brcm,bcm4349-bt
-      - cypress,cyw4373a0-bt
-      - infineon,cyw55572-bt
+    oneOf:
+      - items:
+          - enum:
+              - infineon,cyw43439-bt
+          - const: brcm,bcm4329-bt
+      - enum:
+          - brcm,bcm20702a1
+          - brcm,bcm4329-bt
+          - brcm,bcm4330-bt
+          - brcm,bcm4334-bt
+          - brcm,bcm43430a0-bt
+          - brcm,bcm43430a1-bt
+          - brcm,bcm43438-bt
+          - brcm,bcm4345c5
+          - brcm,bcm43540-bt
+          - brcm,bcm4335a0
+          - brcm,bcm4349-bt
+          - cypress,cyw4373a0-bt
+          - infineon,cyw55572-bt
 
   shutdown-gpios:
     maxItems: 1
-- 
cgit v1.2.3-73-gaa49b


From defa9cca02fd5904ff26f6b8ce65c72002570f69 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Fri, 12 Apr 2024 15:30:42 +0800
Subject: dt-bindings: net: bluetooth: Add MediaTek MT7921S SDIO Bluetooth

The MediaTek MT7921S is a WiFi/Bluetooth combo chip that works over
SDIO. WiFi and Bluetooth are separate SDIO functions within the chip.
While the Bluetooth SDIO function is fully discoverable, the chip has
a pin that can reset just the Bluetooth core, as opposed to the full
chip. This should be described in the device tree.

Add a device tree binding for the Bluetooth SDIO function of the MT7921S
specifically to document the reset line. This binding is based on the MMC
controller binding, which specifies one device node per SDIO function.

Cc: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../net/bluetooth/mediatek,mt7921s-bluetooth.yaml  | 55 ++++++++++++++++++++++
 MAINTAINERS                                        |  1 +
 2 files changed, 56 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml
new file mode 100644
index 000000000000..67ff7caad599
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/bluetooth/mediatek,mt7921s-bluetooth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek MT7921S Bluetooth
+
+maintainers:
+  - Sean Wang <sean.wang@mediatek.com>
+
+description:
+  MT7921S is an SDIO-attached dual-radio WiFi+Bluetooth Combo chip; each
+  function is its own SDIO function on a shared SDIO interface. The chip
+  has two dedicated reset lines, one for each function core.
+  This binding only covers the Bluetooth SDIO function, with one device
+  node describing only this SDIO function.
+
+allOf:
+  - $ref: bluetooth-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7921s-bluetooth
+
+  reg:
+    const: 2
+
+  reset-gpios:
+    maxItems: 1
+    description:
+      An active-low reset line for the Bluetooth core; on typical M.2
+      key E modules this is the W_DISABLE2# pin.
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    mmc {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        bluetooth@2 {
+            compatible = "mediatek,mt7921s-bluetooth";
+            reg = <2>;
+            reset-gpios = <&pio 8 GPIO_ACTIVE_LOW>;
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index b4ed6480e919..c4c6ffbc6d10 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13763,6 +13763,7 @@ M:	Sean Wang <sean.wang@mediatek.com>
 L:	linux-bluetooth@vger.kernel.org
 L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
+F:	Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml
 F:	Documentation/devicetree/bindings/net/mediatek-bluetooth.txt
 F:	drivers/bluetooth/btmtkuart.c
 
-- 
cgit v1.2.3-73-gaa49b