aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/rxrpc.rst13
-rw-r--r--drivers/block/drbd/drbd_int.h28
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c13
-rw-r--r--drivers/block/drbd/drbd_worker.c6
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c42
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.c76
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c100
-rw-r--r--drivers/net/dsa/sja1105/sja1105_spi.c6
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c350
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c279
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c322
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c559
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c401
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h25
-rw-r--r--drivers/nvme/host/tcp.c53
-rw-r--r--drivers/nvme/target/tcp.c54
-rw-r--r--drivers/target/iscsi/Kconfig2
-rw-r--r--drivers/target/iscsi/iscsi_target_login.c35
-rw-r--r--fs/afs/rxrpc.c6
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/transport.c8
-rw-r--r--fs/dlm/lowcomms.c158
-rw-r--r--fs/ocfs2/cluster/tcp.c42
-rw-r--r--include/linux/net.h2
-rw-r--r--include/linux/tcp.h9
-rw-r--r--include/net/af_rxrpc.h2
-rw-r--r--include/net/ip.h6
-rw-r--r--include/net/ipv6.h92
-rw-r--r--include/net/sock.h10
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/ceph/messenger.c11
-rw-r--r--net/core/sock.c181
-rw-r--r--net/ipv4/ip_sockglue.c65
-rw-r--r--net/ipv4/tcp.c222
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/udp_tunnel.c4
-rw-r--r--net/ipv6/ip6_udp_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c59
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/rds/tcp.c11
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c47
-rw-r--r--net/rds/tcp_send.c9
-rw-r--r--net/rxrpc/af_rxrpc.c13
-rw-r--r--net/rxrpc/local_object.c34
-rw-r--r--net/rxrpc/output.c14
-rw-r--r--net/socket.c34
-rw-r--r--net/sunrpc/svcsock.c29
-rw-r--r--net/sunrpc/xprtsock.c40
-rw-r--r--net/tipc/socket.c18
-rw-r--r--net/tipc/socket.h2
-rw-r--r--net/tipc/topsrv.c6
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh115
78 files changed, 2901 insertions, 1809 deletions
diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index 5ad35113d0f4..68552b92dc44 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -477,7 +477,7 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
Encrypted checksum plus packet padded and first eight bytes of packet
encrypted - which includes the actual packet length.
- (c) RXRPC_SECURITY_ENCRYPTED
+ (c) RXRPC_SECURITY_ENCRYPT
Encrypted checksum plus entire packet padded and encrypted, including
actual packet length.
@@ -578,7 +578,7 @@ A client would issue an operation by:
This issues a request_key() to get the key representing the security
context. The minimum security level can be set::
- unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+ unsigned int sec = RXRPC_SECURITY_ENCRYPT;
setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
&sec, sizeof(sec));
@@ -1090,6 +1090,15 @@ The kernel interface functions are as follows:
jiffies). In the event of the timeout occurring, the call will be
aborted and -ETIME or -ETIMEDOUT will be returned.
+ (#) Apply the RXRPC_MIN_SECURITY_LEVEL sockopt to a socket from within in the
+ kernel::
+
+ int rxrpc_sock_set_min_security_level(struct sock *sk,
+ unsigned int val);
+
+ This specifies the minimum security level required for calls on this
+ socket.
+
Configurable Parameters
=======================
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index aae99a2d7bd4..14345a87c7cc 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,34 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
extern int drbd_connected(struct drbd_peer_device *);
-static inline void drbd_tcp_cork(struct socket *sock)
-{
- int val = 1;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
- (char*)&val, sizeof(val));
-}
-
-static inline void drbd_tcp_uncork(struct socket *sock)
-{
- int val = 0;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
- (char*)&val, sizeof(val));
-}
-
-static inline void drbd_tcp_nodelay(struct socket *sock)
-{
- int val = 1;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char*)&val, sizeof(val));
-}
-
-static inline void drbd_tcp_quickack(struct socket *sock)
-{
- int val = 2;
- (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
- (char*)&val, sizeof(val));
-}
-
/* sets the number of 512 byte sectors of our virtual device */
void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c094c3c2c5d4..45fbd526c453 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr,
/* DRBD protocol "pings" are latency critical.
* This is supposed to trigger tcp_push_pending_frames() */
if (!err && (cmd == P_PING || cmd == P_PING_ACK))
- drbd_tcp_nodelay(sock->socket);
+ tcp_sock_set_nodelay(sock->socket->sk);
return err;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c15e7083b13a..3a3f2b6a821f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1051,8 +1051,8 @@ randomize:
/* we don't want delays.
* we use TCP_CORK where appropriate, though */
- drbd_tcp_nodelay(sock.socket);
- drbd_tcp_nodelay(msock.socket);
+ tcp_sock_set_nodelay(sock.socket->sk);
+ tcp_sock_set_nodelay(msock.socket->sk);
connection->data.socket = sock.socket;
connection->meta.socket = msock.socket;
@@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, str
* quickly as possible, and let remote TCP know what we have
* received so far. */
if (err == -EAGAIN) {
- drbd_tcp_quickack(connection->data.socket);
+ tcp_sock_set_quickack(connection->data.socket->sk, 2);
drbd_unplug_all_devices(connection);
}
if (err > 0) {
@@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection *connection, struct packe
{
/* Make sure we've acked all the TCP data associated
* with the data requests being unplugged */
- drbd_tcp_quickack(connection->data.socket);
-
+ tcp_sock_set_quickack(connection->data.socket->sk, 2);
return 0;
}
@@ -6162,7 +6161,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
rcu_read_unlock();
if (tcp_cork)
- drbd_tcp_cork(connection->meta.socket);
+ tcp_sock_set_cork(connection->meta.socket->sk, true);
err = drbd_finish_peer_reqs(device);
kref_put(&device->kref, drbd_destroy_device);
@@ -6175,7 +6174,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
}
if (tcp_cork)
- drbd_tcp_uncork(connection->meta.socket);
+ tcp_sock_set_cork(connection->meta.socket->sk, false);
return;
}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0dc019da1f8d..2b89c9f2ca70 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -2098,7 +2098,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
if (uncork) {
mutex_lock(&connection->data.mutex);
if (connection->data.socket)
- drbd_tcp_uncork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, false);
mutex_unlock(&connection->data.mutex);
}
@@ -2153,9 +2153,9 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
mutex_lock(&connection->data.mutex);
if (connection->data.socket) {
if (cork)
- drbd_tcp_cork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, true);
else if (!uncork)
- drbd_tcp_uncork(connection->data.socket);
+ tcp_sock_set_cork(connection->data.socket->sk, false);
}
mutex_unlock(&connection->data.mutex);
}
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 559e5fd3bad8..1662216be66d 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_get(new_cep);
new_s->sk->sk_user_data = new_cep;
- if (siw_tcp_nagle == false) {
- int val = 1;
-
- rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- if (rv) {
- siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
- goto error;
- }
- }
+ if (siw_tcp_nagle == false)
+ tcp_sock_set_nodelay(new_s->sk);
new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
@@ -1312,17 +1304,14 @@ static void siw_cm_llp_state_change(struct sock *sk)
static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
struct sockaddr *raddr)
{
- int rv, flags = 0, s_val = 1;
+ int rv, flags = 0;
size_t size = laddr->sa_family == AF_INET ?
sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
/*
* Make address available again asap.
*/
- rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
- sizeof(s_val));
- if (rv < 0)
- return rv;
+ sock_set_reuseaddr(s->sk);
rv = s->ops->bind(s, laddr, size);
if (rv < 0)
@@ -1389,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
goto error;
}
- if (siw_tcp_nagle == false) {
- int val = 1;
-
- rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val,
- sizeof(val));
- if (rv) {
- siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
- goto error;
- }
- }
+ if (siw_tcp_nagle == false)
+ tcp_sock_set_nodelay(s->sk);
cep = siw_cep_alloc(sdev);
if (!cep) {
rv = -ENOMEM;
@@ -1781,7 +1762,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
struct siw_cep *cep = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
int addr_family = id->local_addr.ss_family;
- int rv = 0, s_val;
+ int rv = 0;
if (addr_family != AF_INET && addr_family != AF_INET6)
return -EAFNOSUPPORT;
@@ -1793,13 +1774,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
/*
* Allow binding local port when still in TIME_WAIT from last close.
*/
- s_val = 1;
- rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
- sizeof(s_val));
- if (rv) {
- siw_dbg(id->device, "setsockopt error: %d\n", rv);
- goto error;
- }
+ sock_set_reuseaddr(s->sk);
+
if (addr_family == AF_INET) {
struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 198d2a7d7f95..cb3c81a49fbc 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -84,6 +84,7 @@ struct sja1105_info {
* the egress timestamps.
*/
int ptpegr_ts_bytes;
+ int num_cbs_shapers;
const struct sja1105_dynamic_table_ops *dyn_ops;
const struct sja1105_table_ops *static_ops;
const struct sja1105_regs *regs;
@@ -218,6 +219,7 @@ struct sja1105_private {
struct mutex mgmt_lock;
bool expect_dsa_8021q;
enum sja1105_vlan_state vlan_state;
+ struct sja1105_cbs_entry *cbs;
struct sja1105_tagger_data tagger_data;
struct sja1105_ptp_data ptp_data;
struct sja1105_tas_data tas_data;
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 2a8fbd7fdedc..7516f2ffdd4e 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -136,6 +136,12 @@
#define SJA1105_SIZE_RETAGGING_DYN_CMD \
(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_RETAGGING_ENTRY)
+#define SJA1105ET_SIZE_CBS_DYN_CMD \
+ (SJA1105_SIZE_DYN_CMD + SJA1105ET_SIZE_CBS_ENTRY)
+
+#define SJA1105PQRS_SIZE_CBS_DYN_CMD \
+ (SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_CBS_ENTRY)
+
#define SJA1105_MAX_DYN_CMD_SIZE \
SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD
@@ -542,6 +548,60 @@ sja1105_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
sja1105_packing(p, &cmd->index, 5, 0, size, op);
}
+static void sja1105et_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+ enum packing_op op)
+{
+ u8 *p = buf + SJA1105ET_SIZE_CBS_ENTRY;
+ const int size = SJA1105_SIZE_DYN_CMD;
+
+ sja1105_packing(p, &cmd->valid, 31, 31, size, op);
+ sja1105_packing(p, &cmd->index, 19, 16, size, op);
+}
+
+static size_t sja1105et_cbs_entry_packing(void *buf, void *entry_ptr,
+ enum packing_op op)
+{
+ const size_t size = SJA1105ET_SIZE_CBS_ENTRY;
+ struct sja1105_cbs_entry *entry = entry_ptr;
+ u8 *cmd = buf + size;
+ u32 *p = buf;
+
+ sja1105_packing(cmd, &entry->port, 5, 3, SJA1105_SIZE_DYN_CMD, op);
+ sja1105_packing(cmd, &entry->prio, 2, 0, SJA1105_SIZE_DYN_CMD, op);
+ sja1105_packing(p + 3, &entry->credit_lo, 31, 0, size, op);
+ sja1105_packing(p + 2, &entry->credit_hi, 31, 0, size, op);
+ sja1105_packing(p + 1, &entry->send_slope, 31, 0, size, op);
+ sja1105_packing(p + 0, &entry->idle_slope, 31, 0, size, op);
+ return size;
+}
+
+static void sja1105pqrs_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+ enum packing_op op)
+{
+ u8 *p = buf + SJA1105PQRS_SIZE_CBS_ENTRY;
+ const int size = SJA1105_SIZE_DYN_CMD;
+
+ sja1105_packing(p, &cmd->valid, 31, 31, size, op);
+ sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+ sja1105_packing(p, &cmd->errors, 29, 29, size, op);
+ sja1105_packing(p, &cmd->index, 3, 0, size, op);
+}
+
+static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
+ enum packing_op op)
+{
+ const size_t size = SJA1105PQRS_SIZE_CBS_ENTRY;
+ struct sja1105_cbs_entry *entry = entry_ptr;
+
+ sja1105_packing(buf, &entry->port, 159, 157, size, op);
+ sja1105_packing(buf, &entry->prio, 156, 154, size, op);
+ sja1105_packing(buf, &entry->credit_lo, 153, 122, size, op);
+ sja1105_packing(buf, &entry->credit_hi, 121, 90, size, op);
+ sja1105_packing(buf, &entry->send_slope, 89, 58, size, op);
+ sja1105_packing(buf, &entry->idle_slope, 57, 26, size, op);
+ return size;
+}
+
#define OP_READ BIT(0)
#define OP_WRITE BIT(1)
#define OP_DEL BIT(2)
@@ -631,6 +691,14 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
.addr = 0x31,
},
+ [BLK_IDX_CBS] = {
+ .entry_packing = sja1105et_cbs_entry_packing,
+ .cmd_packing = sja1105et_cbs_cmd_packing,
+ .max_entry_count = SJA1105ET_MAX_CBS_COUNT,
+ .access = OP_WRITE,
+ .packed_size = SJA1105ET_SIZE_CBS_DYN_CMD,
+ .addr = 0x2c,
+ },
[BLK_IDX_XMII_PARAMS] = {0},
};
@@ -725,6 +793,14 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
.addr = 0x38,
},
+ [BLK_IDX_CBS] = {
+ .entry_packing = sja1105pqrs_cbs_entry_packing,
+ .cmd_packing = sja1105pqrs_cbs_cmd_packing,
+ .max_entry_count = SJA1105PQRS_MAX_CBS_COUNT,
+ .access = OP_WRITE,
+ .packed_size = SJA1105PQRS_SIZE_CBS_DYN_CMD,
+ .addr = 0x32,
+ },
[BLK_IDX_XMII_PARAMS] = {0},
};
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 44ce7882dfb1..36ab527449e6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1640,6 +1640,92 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
sja1105_bridge_member(ds, port, br, false);
}
+#define BYTES_PER_KBIT (1000LL / 8)
+
+static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->info->num_cbs_shapers; i++)
+ if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
+ return i;
+
+ return -1;
+}
+
+static int sja1105_delete_cbs_shaper(struct sja1105_private *priv, int port,
+ int prio)
+{
+ int i;
+
+ for (i = 0; i < priv->info->num_cbs_shapers; i++) {
+ struct sja1105_cbs_entry *cbs = &priv->cbs[i];
+
+ if (cbs->port == port && cbs->prio == prio) {
+ memset(cbs, 0, sizeof(*cbs));
+ return sja1105_dynamic_config_write(priv, BLK_IDX_CBS,
+ i, cbs, true);
+ }
+ }
+
+ return 0;
+}
+
+static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+ struct tc_cbs_qopt_offload *offload)
+{
+ struct sja1105_private *priv = ds->priv;
+ struct sja1105_cbs_entry *cbs;
+ int index;
+
+ if (!offload->enable)
+ return sja1105_delete_cbs_shaper(priv, port, offload->queue);
+
+ index = sja1105_find_unused_cbs_shaper(priv);
+ if (index < 0)
+ return -ENOSPC;
+
+ cbs = &priv->cbs[index];
+ cbs->port = port;
+ cbs->prio = offload->queue;
+ /* locredit and sendslope are negative by definition. In hardware,
+ * positive values must be provided, and the negative sign is implicit.
+ */
+ cbs->credit_hi = offload->hicredit;
+ cbs->credit_lo = abs(offload->locredit);
+ /* User space is in kbits/sec, hardware in bytes/sec */
+ cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
+ cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
+ /* Convert the negative values from 64-bit 2's complement
+ * to 32-bit 2's complement (for the case of 0x80000000 whose
+ * negative is still negative).
+ */
+ cbs->credit_lo &= GENMASK_ULL(31, 0);
+ cbs->send_slope &= GENMASK_ULL(31, 0);
+
+ return sja1105_dynamic_config_write(priv, BLK_IDX_CBS, index, cbs,
+ true);
+}
+
+static int sja1105_reload_cbs(struct sja1105_private *priv)
+{
+ int rc = 0, i;
+
+ for (i = 0; i < priv->info->num_cbs_shapers; i++) {
+ struct sja1105_cbs_entry *cbs = &priv->cbs[i];
+
+ if (!cbs->idle_slope && !cbs->send_slope)
+ continue;
+
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_CBS, i, cbs,
+ true);
+ if (rc)
+ break;
+ }
+
+ return rc;
+}
+
static const char * const sja1105_reset_reasons[] = {
[SJA1105_VLAN_FILTERING] = "VLAN filtering",
[SJA1105_RX_HWTSTAMPING] = "RX timestamping",
@@ -1754,6 +1840,10 @@ out_unlock_ptp:
sja1105_sgmii_pcs_force_speed(priv, speed);
}
}
+
+ rc = sja1105_reload_cbs(priv);
+ if (rc < 0)
+ goto out;
out:
mutex_unlock(&priv->mgmt_lock);
@@ -3131,6 +3221,8 @@ static int sja1105_port_setup_tc(struct dsa_switch *ds, int port,
switch (type) {
case TC_SETUP_QDISC_TAPRIO:
return sja1105_setup_tc_taprio(ds, port, type_data);
+ case TC_SETUP_QDISC_CBS:
+ return sja1105_setup_tc_cbs(ds, port, type_data);
default:
return -EOPNOTSUPP;
}
@@ -3408,6 +3500,14 @@ static int sja1105_probe(struct spi_device *spi)
if (rc)
return rc;
+ if (IS_ENABLED(CONFIG_NET_SCH_CBS)) {
+ priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
+ sizeof(struct sja1105_cbs_entry),
+ GFP_KERNEL);
+ if (!priv->cbs)
+ return -ENOMEM;
+ }
+
/* Connections between dsa_port and sja1105_port */
for (port = 0; port < SJA1105_NUM_PORTS; port++) {
struct sja1105_port *sp = &priv->ports[port];
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index a0dacae803cc..bb52b9c841b2 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -515,6 +515,7 @@ struct sja1105_info sja1105e_info = {
.qinq_tpid = ETH_P_8021Q,
.ptp_ts_bits = 24,
.ptpegr_ts_bytes = 4,
+ .num_cbs_shapers = SJA1105ET_MAX_CBS_COUNT,
.reset_cmd = sja1105et_reset_cmd,
.fdb_add_cmd = sja1105et_fdb_add,
.fdb_del_cmd = sja1105et_fdb_del,
@@ -530,6 +531,7 @@ struct sja1105_info sja1105t_info = {
.qinq_tpid = ETH_P_8021Q,
.ptp_ts_bits = 24,
.ptpegr_ts_bytes = 4,
+ .num_cbs_shapers = SJA1105ET_MAX_CBS_COUNT,
.reset_cmd = sja1105et_reset_cmd,
.fdb_add_cmd = sja1105et_fdb_add,
.fdb_del_cmd = sja1105et_fdb_del,
@@ -545,6 +547,7 @@ struct sja1105_info sja1105p_info = {
.qinq_tpid = ETH_P_8021AD,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
+ .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT,
.setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay,
.reset_cmd = sja1105pqrs_reset_cmd,
.fdb_add_cmd = sja1105pqrs_fdb_add,
@@ -561,6 +564,7 @@ struct sja1105_info sja1105q_info = {
.qinq_tpid = ETH_P_8021AD,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
+ .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT,
.setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay,
.reset_cmd = sja1105pqrs_reset_cmd,
.fdb_add_cmd = sja1105pqrs_fdb_add,
@@ -577,6 +581,7 @@ struct sja1105_info sja1105r_info = {
.qinq_tpid = ETH_P_8021AD,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
+ .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT,
.setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay,
.reset_cmd = sja1105pqrs_reset_cmd,
.fdb_add_cmd = sja1105pqrs_fdb_add,
@@ -594,6 +599,7 @@ struct sja1105_info sja1105s_info = {
.qinq_tpid = ETH_P_8021AD,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
+ .num_cbs_shapers = SJA1105PQRS_MAX_CBS_COUNT,
.setup_rgmii_delay = sja1105pqrs_setup_rgmii_delay,
.reset_cmd = sja1105pqrs_reset_cmd,
.fdb_add_cmd = sja1105pqrs_fdb_add,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 5946847bb5b9..9b62b9b5549d 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -30,11 +30,13 @@
#define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_ENTRY 4
#define SJA1105ET_SIZE_GENERAL_PARAMS_ENTRY 40
#define SJA1105ET_SIZE_AVB_PARAMS_ENTRY 12
+#define SJA1105ET_SIZE_CBS_ENTRY 16
#define SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY 20
#define SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY 32
#define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY 16
#define SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY 44
#define SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY 16
+#define SJA1105PQRS_SIZE_CBS_ENTRY 20
/* UM10944.pdf Page 11, Table 2. Configuration Blocks */
enum {
@@ -56,6 +58,7 @@ enum {
BLKID_AVB_PARAMS = 0x10,
BLKID_GENERAL_PARAMS = 0x11,
BLKID_RETAGGING = 0x12,
+ BLKID_CBS = 0x13,
BLKID_XMII_PARAMS = 0x4E,
};
@@ -78,6 +81,7 @@ enum sja1105_blk_idx {
BLK_IDX_AVB_PARAMS,
BLK_IDX_GENERAL_PARAMS,
BLK_IDX_RETAGGING,
+ BLK_IDX_CBS,
BLK_IDX_XMII_PARAMS,
BLK_IDX_MAX,
/* Fake block indices that are only valid for dynamic access */
@@ -105,6 +109,8 @@ enum sja1105_blk_idx {
#define SJA1105_MAX_RETAGGING_COUNT 32
#define SJA1105_MAX_XMII_PARAMS_COUNT 1
#define SJA1105_MAX_AVB_PARAMS_COUNT 1
+#define SJA1105ET_MAX_CBS_COUNT 10
+#define SJA1105PQRS_MAX_CBS_COUNT 16
#define SJA1105_MAX_FRAME_MEMORY 929
#define SJA1105_MAX_FRAME_MEMORY_RETAGGING 910
@@ -289,6 +295,15 @@ struct sja1105_retagging_entry {
u64 destports;
};
+struct sja1105_cbs_entry {
+ u64 port;
+ u64 prio;
+ u64 credit_hi;
+ u64 credit_lo;
+ u64 send_slope;
+ u64 idle_slope;
+};
+
struct sja1105_xmii_params_entry {
u64 phy_mac[5];
u64 xmii_mode[5];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index e5ee9103fefb..b61e47bc16e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -34,7 +34,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
+ en_rep.o en/rep/bond.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
en/mapping.o esw/chains.o en/tc_tun.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
@@ -46,6 +47,10 @@ mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
+ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 8ecac81a385d..a700f3c86899 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -76,58 +76,59 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p,
.v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
MASK_VAL_L2(u16, ethertype, ethertype);
+ MASK_VAL_L2(u8, ip_version, ip_version);
PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
PRINT_MASKED_VAL(ethertype, p, "%04x");
- if (ethertype.m == 0xffff) {
- if (ethertype.v == ETH_P_IP) {
+ if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) ||
+ (ip_version.m == 0xf && ip_version.v == 4)) {
#define MASK_VAL_L2_BE(type, name, fld) \
MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
- MASK_VAL_L2_BE(u32, src_ipv4,
- src_ipv4_src_ipv6.ipv4_layout.ipv4);
- MASK_VAL_L2_BE(u32, dst_ipv4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, src_ipv4,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, dst_ipv4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
- PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
- "%pI4");
- PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
- "%pI4");
- } else if (ethertype.v == ETH_P_IPV6) {
- static const struct in6_addr full_ones = {
- .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff),
- __constant_htonl(0xffffffff)},
- };
- DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
- DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+ PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+ "%pI4");
+ PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+ "%pI4");
+ } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) ||
+ (ip_version.m == 0xf && ip_version.v == 6)) {
+ static const struct in6_addr full_ones = {
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
+ };
+ DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+ DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
- memcpy(src_ipv6.m.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(src_ipv6.m));
- memcpy(dst_ipv6.m.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(dst_ipv6.m));
- memcpy(src_ipv6.v.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(src_ipv6.v));
- memcpy(dst_ipv6.v.in6_u.u6_addr8,
- MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(dst_ipv6.v));
+ memcpy(src_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.m));
+ memcpy(dst_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.m));
+ memcpy(src_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.v));
+ memcpy(dst_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.v));
- if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
- trace_seq_printf(p, "src_ipv6=%pI6 ",
- &src_ipv6.v);
- if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
- trace_seq_printf(p, "dst_ipv6=%pI6 ",
- &dst_ipv6.v);
- }
+ if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "src_ipv6=%pI6 ",
+ &src_ipv6.v);
+ if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "dst_ipv6=%pI6 ",
+ &dst_ipv6.v);
}
#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..bdb71332cbf2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+ struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct list_head metadata_list;
+};
+
+struct mlx5e_rep_bond_slave_entry {
+ struct list_head list;
+ struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+ struct list_head list; /* link to global list of rep_bond_metadata */
+ struct mlx5_eswitch *esw;
+ /* private of uplink holding rep bond metadata list */
+ struct net_device *lag_dev;
+ u32 metadata_reg_c_0;
+
+ struct list_head slaves_list; /* slaves list */
+ int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_metadata *found = NULL;
+ struct mlx5e_rep_bond_metadata *cur;
+
+ list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+ if (cur->lag_dev == lag_dev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+ const struct net_device *netdev)
+{
+ struct mlx5e_rep_bond_slave_entry *found = NULL;
+ struct mlx5e_rep_bond_slave_entry *cur;
+
+ list_for_each_entry(cur, &mdata->slaves_list, list) {
+ if (cur->netdev == netdev) {
+ found = cur;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+ netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ list_del(&mdata->list);
+ mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
+ WARN_ON(!list_empty(&mdata->slaves_list));
+ kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+ int err;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata) {
+ /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+ mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+ if (!mdata)
+ return -ENOMEM;
+
+ mdata->lag_dev = lag_dev;
+ mdata->esw = esw;
+ INIT_LIST_HEAD(&mdata->slaves_list);
+ mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+ if (!mdata->metadata_reg_c_0) {
+ kfree(mdata);
+ return -ENOSPC;
+ }
+ list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+ netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+ mdata->metadata_reg_c_0);
+ }
+
+ s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+ if (!s_entry) {
+ err = -ENOMEM;
+ goto entry_alloc_err;
+ }
+
+ s_entry->netdev = netdev;
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+ mdata->metadata_reg_c_0);
+ if (err)
+ goto ingress_err;
+
+ mdata->slaves++;
+ list_add_tail(&s_entry->list, &mdata->slaves_list);
+ netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ return 0;
+
+ingress_err:
+ kfree(s_entry);
+entry_alloc_err:
+ if (!mdata->slaves)
+ mlx5e_rep_bond_metadata_release(mdata);
+ return err;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev)
+{
+ struct mlx5e_rep_bond_slave_entry *s_entry;
+ struct mlx5e_rep_bond_metadata *mdata;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *priv;
+
+ ASSERT_RTNL();
+
+ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+ if (!mdata)
+ return;
+
+ s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+ if (!s_entry)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+
+ /* Reset bond_metadata to zero first then reset all ingress/egress
+ * acls and rx rules of unslave representor's vport
+ */
+ mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+ mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
+ mlx5e_rep_bond_update(priv, false);
+
+ list_del(&s_entry->list);
+
+ netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+ rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
+ if (--mdata->slaves == 0)
+ mlx5e_rep_bond_metadata_release(mdata);
+ kfree(s_entry);
+}
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ /* A given netdev is not a representor or not a slave of LAG configuration */
+ if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+ return false;
+
+ /* Egress acl forward to vport is supported only non-uplink representor */
+ return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct netdev_lag_lower_state_info *lag_info;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+ struct list_head *iter;
+ struct net_device *dev;
+ u16 acl_vport_num;
+ u16 fwd_vport_num;
+ int err;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ info = ptr;
+ lag_info = info->lower_state_info;
+ /* This is not an event of a representor becoming active slave */
+ if (!lag_info->tx_enabled)
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ fwd_vport_num = rpriv->rep->vport;
+ lag_dev = netdev_master_upper_dev_get(netdev);
+
+ netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+ lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+ /* Point everyone's egress acl to the vport of the active representor */
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ priv = netdev_priv(dev);
+ rpriv = priv->ppriv;
+ acl_vport_num = rpriv->rep->vport;
+ if (acl_vport_num != fwd_vport_num) {
+ /* Only single rx_rule for unique bond_metadata should be
+ * present, delete it if it's saved as passive vport's
+ * rx_rule with destination as passive vport's root_ft
+ */
+ mlx5e_rep_bond_update(priv, true);
+ err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+ fwd_vport_num,
+ acl_vport_num);
+ if (err)
+ netdev_warn(dev,
+ "configure slave vport(%d) egress fwd, err(%d)",
+ acl_vport_num, err);
+ }
+ }
+
+ /* Insert new rx_rule for unique bond_metadata, save it as active vport's
+ * rx_rule with new destination as active vport's root_ft
+ */
+ err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+ if (err)
+ netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+ fwd_vport_num, err);
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *lag_dev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return;
+
+ priv = netdev_priv(netdev);
+ rpriv = priv->ppriv;
+ lag_dev = info->upper_dev;
+
+ netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+ info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+ if (info->linking)
+ mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+ else
+ mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ mlx5e_rep_changelowerstate_event(netdev, ptr);
+ break;
+ case NETDEV_CHANGEUPPER:
+ mlx5e_rep_changeupper_event(netdev, ptr);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv;
+ int ret = 0;
+
+ priv = netdev_priv(netdev);
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+ goto out;
+
+ uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+ if (!uplink_priv->bond) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
+ uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+ ret = register_netdevice_notifier_dev_net(netdev,
+ &uplink_priv->bond->nb,
+ &uplink_priv->bond->nn);
+ if (ret) {
+ netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+ kvfree(uplink_priv->bond);
+ uplink_priv->bond = NULL;
+ }
+
+out:
+ return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+ !rpriv->uplink_priv.bond)
+ return;
+
+ unregister_netdevice_notifier_dev_net(rpriv->netdev,
+ &rpriv->uplink_priv.bond->nb,
+ &rpriv->uplink_priv.bond->nn);
+ kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 995b2ef1fb3b..afc19dca1f5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -119,7 +119,7 @@ mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
}
static int
-mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
+mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
struct flow_rule *rule)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -134,10 +134,8 @@ mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
flow_rule_match_basic(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
- ntohs(match.mask->n_proto));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ntohs(match.key->n_proto));
+ mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
+ headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -533,7 +531,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->counter = entry->counter;
attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
- mlx5_tc_ct_set_tuple_match(spec, flow_rule);
+ mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
entry->zone & MLX5_CT_ZONE_MASK,
MLX5_CT_ZONE_MASK);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index e99382f58807..7cce85faa16f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -512,6 +512,13 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_basic key_basic = {};
+ struct flow_dissector_key_basic mask_basic = {
+ .n_proto = htons(0xFFFF),
+ };
+ struct flow_match_basic match_basic = {
+ .key = &key_basic, .mask = &mask_basic,
+ };
struct flow_match_control match;
u16 addr_type;
@@ -537,10 +544,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.key->dst));
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IP);
+ key_basic.n_proto = htons(ETH_P_IP);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
struct flow_match_ipv6_addrs match;
@@ -563,10 +569,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
&match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
- ethertype);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ETH_P_IPV6);
+ key_basic.n_proto = htons(ETH_P_IPV6);
+ mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+ headers_c, headers_v);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4e13e37a9ecd..af89a4803c7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -854,6 +854,24 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
return 0;
}
+static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (!rpriv->vport_rx_rule)
+ return;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rpriv->vport_rx_rule = NULL;
+}
+
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+{
+ rep_vport_rx_rule_destroy(priv);
+
+ return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv);
+}
+
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -918,9 +936,7 @@ err_close_drop_rq:
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
@@ -959,16 +975,18 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+ mlx5e_rep_bond_init(rpriv);
err = mlx5e_rep_tc_netdevice_event_register(rpriv);
if (err) {
mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
err);
- goto tc_rep_cleanup;
+ goto err_event_reg;
}
return 0;
-tc_rep_cleanup:
+err_event_reg:
+ mlx5e_rep_bond_cleanup(rpriv);
mlx5e_rep_tc_cleanup(rpriv);
return err;
}
@@ -1001,7 +1019,7 @@ static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
{
mlx5e_rep_tc_netdevice_event_unregister(rpriv);
mlx5e_rep_indr_clean_block_privs(rpriv);
-
+ mlx5e_rep_bond_cleanup(rpriv);
mlx5e_rep_tc_cleanup(rpriv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 1c4af8522467..da9f1686d525 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -56,6 +56,7 @@ struct mlx5e_neigh_update_table {
};
struct mlx5_tc_ct_priv;
+struct mlx5e_rep_bond;
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
* the uplink's VFs
@@ -89,6 +90,9 @@ struct mlx5_rep_uplink_priv {
struct mapping_ctx *tunnel_enc_opts_mapping;
struct mlx5_tc_ct_priv *ct_priv;
+
+ /* support eswitch vports bonding */
+ struct mlx5e_rep_bond *bond;
};
struct mlx5e_rep_priv {
@@ -211,6 +215,15 @@ struct mlx5e_rep_sq {
void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+ struct net_device *lag_dev);
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+ const struct net_device *netdev,
+ const struct net_device *lag_dev);
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
+
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 571da14809fe..0f119c08b835 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -50,6 +50,7 @@
#include <net/arp.h>
#include <net/ipv6_stubs.h>
#include <net/bareudp.h>
+#include <net/bonding.h>
#include "en.h"
#include "en_rep.h"
#include "en/rep/tc.h"
@@ -145,6 +146,7 @@ struct mlx5e_tc_flow {
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
+ struct net_device *orig_dev; /* netdev adding flow first */
int tmp_efi_index;
struct list_head tmp_list; /* temporary flow list used by neigh update */
refcount_t refcnt;
@@ -2018,6 +2020,32 @@ u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
return flow->tunnel_id;
}
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v)
+{
+ bool ip_version_cap;
+
+ ip_version_cap = outer ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+
+ if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
+ (match->key->n_proto == htons(ETH_P_IP) ||
+ match->key->n_proto == htons(ETH_P_IPV6))) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
+ match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(match->mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(match->key->n_proto));
+ }
+}
+
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
@@ -2239,10 +2267,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_match_basic match;
flow_rule_match_basic(rule, &match);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
- ntohs(match.mask->n_proto));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
- ntohs(match.key->n_proto));
+ mlx5e_tc_set_ethertype(priv->mdev, &match,
+ match_level == outer_match_level,
+ headers_c, headers_v);
if (match.mask->n_proto)
*match_level = MLX5_MATCH_L2;
@@ -3118,16 +3145,19 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
{
const struct flow_action_entry *act;
bool modify_ip_header;
+ void *headers_c;
void *headers_v;
u16 ethertype;
u8 ip_proto;
int i, err;
+ headers_c = get_match_headers_criteria(actions, spec);
headers_v = get_match_headers_value(actions, spec);
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */
- if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+ if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
+ ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
goto out_ok;
modify_ip_header = false;
@@ -3758,6 +3788,28 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
return 0;
}
+static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
+ struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
static int add_vlan_push_action(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr *attr,
struct net_device **out_dev,
@@ -4073,7 +4125,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
} else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
- struct net_device *uplink_upper;
if (is_duplicated_output_device(priv->netdev,
out_dev,
@@ -4085,14 +4136,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
ifindexes[if_count] = out_dev->ifindex;
if_count++;
- rcu_read_lock();
- uplink_upper =
- netdev_master_upper_dev_get_rcu(uplink_dev);
- if (uplink_upper &&
- netif_is_lag_master(uplink_upper) &&
- uplink_upper == out_dev)
- out_dev = uplink_dev;
- rcu_read_unlock();
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
if (is_vlan_dev(out_dev)) {
err = add_vlan_push_action(priv, attr,
@@ -4624,11 +4670,21 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return err;
}
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
+ struct mlx5e_rep_priv *rpriv)
+{
+ /* Offloaded flow rule is allowed to duplicate on non-uplink representor
+ * sharing tc block with other slaves of a lag device.
+ */
+ return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags)
{
struct netlink_ext_ack *extack = f->common.extack;
struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_tc_flow *flow;
int err = 0;
@@ -4636,6 +4692,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
rcu_read_unlock();
if (flow) {
+ /* Same flow rule offloaded to non-uplink representor sharing tc block,
+ * just return 0.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
+ goto out;
+
NL_SET_ERR_MSG_MOD(extack,
"flow cookie already exists, ignoring");
netdev_warn_once(priv->netdev,
@@ -4650,6 +4712,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
if (err)
goto out;
+ /* Flow rule offloaded to non-uplink representor sharing tc block,
+ * set the flow's owner dev.
+ */
+ if (is_flow_rule_duplicate_allowed(dev, rpriv))
+ flow->orig_dev = dev;
+
err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
if (err)
goto err_free;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 037aa73bf9ab..5c330b0cae21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -170,6 +170,10 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
struct mlx5e_tc_flow;
u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+ struct flow_match_basic *match, bool outer,
+ void *headers_c, void *headers_v);
+
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
new file mode 100644
index 000000000000..d46f8b225ebe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
+ mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
+ vport->egress.legacy.drop_rule = NULL;
+ }
+}
+
+static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *drop_grp;
+ u32 *flow_group_in;
+ int err = 0;
+
+ err = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (err)
+ return err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(drop_grp)) {
+ err = PTR_ERR(drop_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_grp_err;
+ }
+
+ vport->egress.legacy.drop_grp = drop_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_grp_err:
+ kvfree(flow_group_in);
+alloc_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+ return err;
+}
+
+static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) {
+ mlx5_destroy_flow_group(vport->egress.legacy.drop_grp);
+ vport->egress.legacy.drop_grp = NULL;
+ }
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_fc *drop_counter = NULL;
+ struct mlx5_flow_act flow_act = {};
+ /* The egress acl table contains 2 rules:
+ * 1)Allow traffic with vlan_tag=vst_vlan_id
+ * 2)Drop all other traffic.
+ */
+ int table_size = 2;
+ int dest_num = 0;
+ int err = 0;
+
+ if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
+ drop_counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(drop_counter))
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule counter err(%ld)\n",
+ vport->vport, PTR_ERR(drop_counter));
+ vport->egress.legacy.drop_counter = drop_counter;
+ }
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+
+ if (!vport->info.vlan && !vport->info.qos) {
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!IS_ERR_OR_NULL(vport->egress.acl))
+ return 0;
+
+ vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ table_size);
+ if (IS_ERR_OR_NULL(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ goto out;
+ }
+
+ err = esw_acl_egress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+
+ esw_debug(esw->dev,
+ "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ /* Allowed vlan rule */
+ err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+ if (err)
+ goto out;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach egress drop flow counter */
+ if (!IS_ERR_OR_NULL(drop_counter)) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->egress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->egress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->egress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.legacy.drop_rule = NULL;
+ goto out;
+ }
+
+ return err;
+
+out:
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ return err;
+}
+
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+ esw_acl_egress_lgcy_rules_destroy(vport);
+ esw_acl_egress_lgcy_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+
+clean_drop_counter:
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) {
+ mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
+ vport->egress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
new file mode 100644
index 000000000000..07b2acd7e6b3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.fwd_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.fwd_rule);
+ vport->egress.offloads.fwd_rule = NULL;
+}
+
+static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+
+ esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n",
+ vport->vport, fwd_dest->vport.num);
+
+ /* Delete the old egress forward-to-vport rule if any */
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ vport->egress.offloads.fwd_rule =
+ mlx5_add_flow_rules(vport->egress.acl, NULL,
+ &flow_act, fwd_dest, 1);
+ if (IS_ERR(vport->egress.offloads.fwd_rule)) {
+ err = PTR_ERR(vport->egress.offloads.fwd_rule);
+ esw_warn(esw->dev,
+ "vport(%d) failed to add fwd2vport acl rule err(%d)\n",
+ vport->vport, err);
+ vport->egress.offloads.fwd_rule = NULL;
+ }
+
+ return err;
+}
+
+static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest)
+{
+ int err = 0;
+ int action;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) prio tag packets - pop the prio tag VLAN, allow
+ * Unmatched traffic is allowed by default
+ */
+ esw_debug(esw->dev,
+ "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+ action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ /* prio tag vlan rule - pop it so vport receives untagged packets */
+ err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action);
+ if (err)
+ goto prio_err;
+ }
+
+ if (fwd_dest) {
+ err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest);
+ if (err)
+ goto fwd_err;
+ }
+
+ return 0;
+
+fwd_err:
+ esw_acl_egress_vlan_destroy(vport);
+prio_err:
+ return err;
+}
+
+static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
+{
+ esw_acl_egress_vlan_destroy(vport);
+ esw_acl_egress_ofld_fwd2vport_destroy(vport);
+}
+
+static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fwd_grp;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+ ret = esw_acl_egress_vlan_grp_create(esw, vport);
+ if (ret)
+ return ret;
+
+ flow_index++;
+ }
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ goto out;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in) {
+ ret = -ENOMEM;
+ goto fwd_grp_err;
+ }
+
+ /* This group holds 1 FTE to forward all packets to other vport
+ * when bond vports is supported.
+ */
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+ fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(fwd_grp)) {
+ ret = PTR_ERR(fwd_grp);
+ esw_warn(esw->dev,
+ "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n",
+ vport->vport, ret);
+ kvfree(flow_group_in);
+ goto fwd_grp_err;
+ }
+ vport->egress.offloads.fwd_grp = fwd_grp;
+ kvfree(flow_group_in);
+ return 0;
+
+fwd_grp_err:
+ esw_acl_egress_vlan_grp_destroy(vport);
+out:
+ return ret;
+}
+
+static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
+ vport->egress.offloads.fwd_grp = NULL;
+ }
+ esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int table_size = 0;
+ int err;
+
+ if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) &&
+ !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ return 0;
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+
+ if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
+ table_size++;
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
+ table_size++;
+ vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
+ if (IS_ERR_OR_NULL(vport->egress.acl)) {
+ err = PTR_ERR(vport->egress.acl);
+ vport->egress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_egress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport);
+
+ err = esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_egress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_egress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport)
+{
+ esw_acl_egress_ofld_rules_destroy(vport);
+ esw_acl_egress_ofld_groups_destroy(vport);
+ esw_acl_egress_table_destroy(vport);
+}
+
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num)
+{
+ struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num);
+ struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num);
+ struct mlx5_flow_destination fwd_dest = {};
+
+ if (IS_ERR(active_vport))
+ return PTR_ERR(active_vport);
+ if (IS_ERR(passive_vport))
+ return PTR_ERR(passive_vport);
+
+ /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */
+ esw_acl_egress_ofld_rules_destroy(active_vport);
+ esw_acl_egress_ofld_rules_create(esw, active_vport, NULL);
+
+ /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */
+ esw_acl_egress_ofld_rules_destroy(passive_vport);
+ fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ fwd_dest.vport.num = active_vport_num;
+ fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest);
+}
+
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ esw_acl_egress_ofld_rules_destroy(vport);
+ return esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
new file mode 100644
index 000000000000..22f4c1c28006
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ int acl_supported;
+ int vport_index;
+ int err;
+
+ acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
+ MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) :
+ MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support);
+
+ if (!acl_supported)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
+
+ vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
+ vport_num);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ acl = mlx5_create_vport_flow_table(root_ns, 0, size, 0, vport_num);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num,
+ ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err);
+ }
+ return acl;
+}
+
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ if (vport->egress.allowed_vlan)
+ return -EEXIST;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = flow_action;
+ vport->egress.allowed_vlan =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, fwd_dest, 0);
+ if (IS_ERR(vport->egress.allowed_vlan)) {
+ err = PTR_ERR(vport->egress.allowed_vlan);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress vlan rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.allowed_vlan = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+ mlx5_del_flow_rules(vport->egress.allowed_vlan);
+ vport->egress.allowed_vlan = NULL;
+ }
+}
+
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *vlan_grp;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+ if (IS_ERR(vlan_grp)) {
+ ret = PTR_ERR(vlan_grp);
+ esw_warn(esw->dev,
+ "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n",
+ vport->vport, ret);
+ goto out;
+ }
+ vport->egress.vlan_grp = vlan_grp;
+
+out:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) {
+ mlx5_destroy_flow_group(vport->egress.vlan_grp);
+ vport->egress.vlan_grp = NULL;
+ }
+}
+
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ return;
+
+ mlx5_destroy_flow_table(vport->egress.acl);
+ vport->egress.acl = NULL;
+}
+
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.acl)
+ return;
+
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+}
+
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->ingress.allow_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.allow_rule);
+ vport->ingress.allow_rule = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
new file mode 100644
index 000000000000..8dc4cab66a71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_HELPER_H__
+#define __MLX5_ESWITCH_ACL_HELPER_H__
+
+#include "eswitch.h"
+
+/* General acl helper functions */
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size);
+
+/* Egress acl helper functions */
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ struct mlx5_flow_destination *fwd_dest,
+ u16 vlan_id, u32 flow_action);
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport);
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport);
+
+/* Ingress acl helper functions */
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport);
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
new file mode 100644
index 000000000000..9bda4fe2eafa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.drop_rule) {
+ mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
+ vport->ingress.legacy.drop_rule = NULL;
+ }
+ esw_acl_ingress_allow_rule_destroy(vport);
+}
+
+static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto spoof_err;
+ }
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, err);
+ goto untagged_err;
+ }
+ vport->ingress.legacy.allow_untagged_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto allow_spoof_err;
+ }
+ vport->ingress.legacy.allow_spoofchk_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto drop_err;
+ }
+ vport->ingress.legacy.drop_grp = g;
+ kvfree(flow_group_in);
+ return 0;
+
+drop_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+allow_spoof_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+untagged_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+spoof_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.legacy.allow_spoofchk_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+ vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_only_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+ vport->ingress.legacy.allow_untagged_only_grp = NULL;
+ }
+ if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+ vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+ }
+ if (vport->ingress.legacy.drop_grp) {
+ mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
+ vport->ingress.legacy.drop_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_destination drop_ctr_dst = {};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec = NULL;
+ struct mlx5_fc *counter = NULL;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+ * 1 drop rule from the last group):
+ * 1)Allow untagged traffic with smac=original mac.
+ * 2)Allow untagged traffic.
+ * 3)Allow traffic with smac=original mac.
+ * 4)Drop all other traffic.
+ */
+ int table_size = 4;
+ int dest_num = 0;
+ int err = 0;
+ u8 *smac_v;
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+
+ if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
+ counter = mlx5_fc_create(esw->dev, false);
+ if (IS_ERR(counter))
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule counter failed\n",
+ vport->vport);
+ vport->ingress.legacy.drop_counter = counter;
+ }
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ return 0;
+ }
+
+ if (!vport->ingress.acl) {
+ vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ table_size);
+ if (IS_ERR_OR_NULL(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_lgcy_groups_create(esw, vport);
+ if (err)
+ goto out;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (vport->info.vlan || vport->info.qos)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+
+ if (vport->info.spoofchk) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.smac_15_0);
+ smac_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+ /* Create ingress allow rule */
+ memset(spec, 0, sizeof(*spec));
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ goto out;
+ }
+
+ memset(&flow_act, 0, sizeof(flow_act));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ /* Attach drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->ingress.legacy.drop_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, NULL,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->ingress.legacy.drop_rule)) {
+ err = PTR_ERR(vport->ingress.legacy.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.legacy.drop_rule = NULL;
+ goto out;
+ }
+ kvfree(spec);
+ return 0;
+
+out:
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
+ kvfree(spec);
+ return err;
+}
+
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->ingress.acl))
+ goto clean_drop_counter;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+ esw_acl_ingress_lgcy_rules_destroy(vport);
+ esw_acl_ingress_lgcy_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+
+clean_drop_counter:
+ if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_counter)) {
+ mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
+ vport->ingress.legacy.drop_counter = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
new file mode 100644
index 000000000000..4e55d7225a26
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static bool
+esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
+ const struct mlx5_vport *vport)
+{
+ return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ mlx5_eswitch_is_vf_vport(esw, vport->vport));
+}
+
+static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ /* For prio tag mode, there is only 1 FTEs:
+ * 1) Untagged packets - push prio tag VLAN and modify metadata if
+ * required, allow
+ * Unmatched traffic is allowed by default
+ */
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Untagged packets - push prio tag VLAN, allow */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.vlan[0].ethtype = ETH_P_8021Q;
+ flow_act.vlan[0].vid = 0;
+ flow_act.vlan[0].prio = 0;
+
+ if (vport->ingress.offloads.modify_metadata_rule) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ }
+
+ vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress untagged allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+ struct mlx5_flow_act flow_act = {};
+ int err = 0;
+ u32 key;
+
+ key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
+ key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
+
+ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, action, field,
+ MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+ MLX5_SET(set_action_in, action, data, key);
+ MLX5_SET(set_action_in, action, offset,
+ ESW_SOURCE_PORT_METADATA_OFFSET);
+ MLX5_SET(set_action_in, action, length,
+ ESW_SOURCE_PORT_METADATA_BITS);
+
+ vport->ingress.offloads.modify_metadata =
+ mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ 1, action);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata);
+ esw_warn(esw->dev,
+ "failed to alloc modify header for vport %d ingress acl (%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+ vport->ingress.offloads.modify_metadata_rule =
+ mlx5_add_flow_rules(vport->ingress.acl,
+ NULL, &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
+ err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
+ esw_warn(esw->dev,
+ "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+ vport->vport, err);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+ }
+ return err;
+}
+
+static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!vport->ingress.offloads.modify_metadata_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
+ mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+ vport->ingress.offloads.modify_metadata_rule = NULL;
+}
+
+static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int err;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ err = esw_acl_ingress_mod_metadata_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress modify metadata, err(%d)\n",
+ vport->vport, err);
+ return err;
+ }
+ }
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ err = esw_acl_ingress_prio_tag_create(esw, vport);
+ if (err) {
+ esw_warn(esw->dev,
+ "vport(%d) create ingress prio tag rule, err(%d)\n",
+ vport->vport, err);
+ goto prio_tag_err;
+ }
+ }
+
+ return 0;
+
+prio_tag_err:
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+ return err;
+}
+
+static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_allow_rule_destroy(vport);
+ esw_acl_ingress_mod_metadata_destroy(esw, vport);
+}
+
+static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ u32 flow_index = 0;
+ int ret = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+ /* This group is to hold FTE to match untagged packets when prio_tag
+ * is enabled.
+ */
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ flow_group_in, match_criteria);
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+ vport->vport, ret);
+ goto prio_tag_err;
+ }
+ vport->ingress.offloads.metadata_prio_tag_grp = g;
+ flow_index++;
+ }
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ /* This group holds an FTE with no match to add metadata for
+ * tagged packets if prio-tag is enabled, or for all untagged
+ * traffic in case prio-tag is disabled.
+ */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+ g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+ if (IS_ERR(g)) {
+ ret = PTR_ERR(g);
+ esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+ vport->vport, ret);
+ goto metadata_err;
+ }
+ vport->ingress.offloads.metadata_allmatch_grp = g;
+ }
+
+ kvfree(flow_group_in);
+ return 0;
+
+metadata_err:
+ if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+prio_tag_err:
+ kvfree(flow_group_in);
+ return ret;
+}
+
+static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+ if (vport->ingress.offloads.metadata_allmatch_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
+ vport->ingress.offloads.metadata_allmatch_grp = NULL;
+ }
+
+ if (vport->ingress.offloads.metadata_prio_tag_grp) {
+ mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+ vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+ }
+}
+
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int num_ftes = 0;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ !esw_acl_ingress_prio_tag_enabled(esw, vport))
+ return 0;
+
+ esw_acl_ingress_allow_rule_destroy(vport);
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+ num_ftes++;
+ if (esw_acl_ingress_prio_tag_enabled(esw, vport))
+ num_ftes++;
+
+ vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ num_ftes);
+ if (IS_ERR_OR_NULL(vport->ingress.acl)) {
+ err = PTR_ERR(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ return err;
+ }
+
+ err = esw_acl_ingress_ofld_groups_create(esw, vport);
+ if (err)
+ goto group_err;
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules\n", vport->vport);
+
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto rules_err;
+
+ return 0;
+
+rules_err:
+ esw_acl_ingress_ofld_groups_destroy(vport);
+group_err:
+ esw_acl_ingress_table_destroy(vport);
+ return err;
+}
+
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+ esw_acl_ingress_ofld_groups_destroy(vport);
+ esw_acl_ingress_table_destroy(vport);
+}
+
+/* Caller must hold rtnl_lock */
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata)
+{
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+ int err;
+
+ if (WARN_ON_ONCE(IS_ERR(vport))) {
+ esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+ err = PTR_ERR(vport);
+ goto out;
+ }
+
+ esw_acl_ingress_ofld_rules_destroy(esw, vport);
+
+ vport->metadata = metadata ? metadata : vport->default_metadata;
+
+ /* Recreate ingress acl rules with vport->metadata */
+ err = esw_acl_ingress_ofld_rules_create(esw, vport);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ vport->metadata = vport->default_metadata;
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
new file mode 100644
index 000000000000..44c152da3d83
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_LGCY_H__
+#define __MLX5_ESWITCH_ACL_LGCY_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
new file mode 100644
index 000000000000..c57869b93d60
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_OFLD_H__
+#define __MLX5_ESWITCH_ACL_OFLD_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+ u16 passive_vport_num);
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
+
+static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw)
+{
+ return esw && esw->mode == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport);
+}
+
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+ u32 metadata);
+
+#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ac79b7c9aeb3..1116ab9bea6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include "esw/acl/lgcy.h"
#include "mlx5_core.h"
#include "lib/eq.h"
#include "eswitch.h"
@@ -936,512 +937,6 @@ static void esw_vport_change_handler(struct work_struct *work)
mutex_unlock(&esw->state_lock);
}
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_group *vlan_grp = NULL;
- struct mlx5_flow_group *drop_grp = NULL;
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *acl;
- void *match_criteria;
- u32 *flow_group_in;
- /* The egress acl table contains 2 rules:
- * 1)Allow traffic with vlan_tag=vst_vlan_id
- * 2)Drop all other traffic.
- */
- int table_size = 2;
- int err = 0;
-
- if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- if (!IS_ERR_OR_NULL(vport->egress.acl))
- return 0;
-
- esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
- vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
-
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- mlx5_eswitch_vport_num_to_index(esw, vport->vport));
- if (!root_ns) {
- esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
- return -EOPNOTSUPP;
- }
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
- if (IS_ERR(acl)) {
- err = PTR_ERR(acl);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
- vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
- if (IS_ERR(vlan_grp)) {
- err = PTR_ERR(vlan_grp);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
- drop_grp = mlx5_create_flow_group(acl, flow_group_in);
- if (IS_ERR(drop_grp)) {
- err = PTR_ERR(drop_grp);
- esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
- vport->vport, err);
- goto out;
- }
-
- vport->egress.acl = acl;
- vport->egress.drop_grp = drop_grp;
- vport->egress.allowed_vlans_grp = vlan_grp;
-out:
- kvfree(flow_group_in);
- if (err && !IS_ERR_OR_NULL(vlan_grp))
- mlx5_destroy_flow_group(vlan_grp);
- if (err && !IS_ERR_OR_NULL(acl))
- mlx5_destroy_flow_table(acl);
- return err;
-}
-
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
- mlx5_del_flow_rules(vport->egress.allowed_vlan);
- vport->egress.allowed_vlan = NULL;
- }
-
- if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
- mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
- vport->egress.legacy.drop_rule = NULL;
- }
-}
-
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (IS_ERR_OR_NULL(vport->egress.acl))
- return;
-
- esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
-
- esw_vport_cleanup_egress_rules(esw, vport);
- mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
- mlx5_destroy_flow_group(vport->egress.drop_grp);
- mlx5_destroy_flow_table(vport->egress.acl);
- vport->egress.allowed_vlans_grp = NULL;
- vport->egress.drop_grp = NULL;
- vport->egress.acl = NULL;
-}
-
-static int
-esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_group *g;
- void *match_criteria;
- u32 *flow_group_in;
- int err;
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
- vport->vport, err);
- goto spoof_err;
- }
- vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
- vport->vport, err);
- goto untagged_err;
- }
- vport->ingress.legacy.allow_untagged_only_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
- vport->vport, err);
- goto allow_spoof_err;
- }
- vport->ingress.legacy.allow_spoofchk_only_grp = g;
-
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- err = PTR_ERR(g);
- esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
- vport->vport, err);
- goto drop_err;
- }
- vport->ingress.legacy.drop_grp = g;
- kvfree(flow_group_in);
- return 0;
-
-drop_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
- vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
- }
-allow_spoof_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
- vport->ingress.legacy.allow_untagged_only_grp = NULL;
- }
-untagged_err:
- if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
- vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
- }
-spoof_err:
- kvfree(flow_group_in);
- return err;
-}
-
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport, int table_size)
-{
- struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
- struct mlx5_flow_table *acl;
- int vport_index;
- int err;
-
- if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
- return -EOPNOTSUPP;
-
- esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
- vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
-
- vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport);
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- vport_index);
- if (!root_ns) {
- esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n",
- vport->vport);
- return -EOPNOTSUPP;
- }
-
- acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
- if (IS_ERR(acl)) {
- err = PTR_ERR(acl);
- esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n",
- vport->vport, err);
- return err;
- }
- vport->ingress.acl = acl;
- return 0;
-}
-
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport)
-{
- if (!vport->ingress.acl)
- return;
-
- mlx5_destroy_flow_table(vport->ingress.acl);
- vport->ingress.acl = NULL;
-}
-
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (vport->ingress.legacy.drop_rule) {
- mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
- vport->ingress.legacy.drop_rule = NULL;
- }
-
- if (vport->ingress.allow_rule) {
- mlx5_del_flow_rules(vport->ingress.allow_rule);
- vport->ingress.allow_rule = NULL;
- }
-}
-
-static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (!vport->ingress.acl)
- return;
-
- esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
-
- esw_vport_cleanup_ingress_rules(esw, vport);
- if (vport->ingress.legacy.allow_spoofchk_only_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
- vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
- }
- if (vport->ingress.legacy.allow_untagged_only_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
- vport->ingress.legacy.allow_untagged_only_grp = NULL;
- }
- if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
- vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
- }
- if (vport->ingress.legacy.drop_grp) {
- mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
- vport->ingress.legacy.drop_grp = NULL;
- }
- esw_vport_destroy_ingress_acl_table(vport);
-}
-
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- struct mlx5_fc *counter = vport->ingress.legacy.drop_counter;
- struct mlx5_flow_destination drop_ctr_dst = {0};
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec = NULL;
- int dest_num = 0;
- int err = 0;
- u8 *smac_v;
-
- /* The ingress acl table contains 4 groups
- * (2 active rules at the same time -
- * 1 allow rule from one of the first 3 groups.
- * 1 drop rule from the last group):
- * 1)Allow untagged traffic with smac=original mac.
- * 2)Allow untagged traffic.
- * 3)Allow traffic with smac=original mac.
- * 4)Drop all other traffic.
- */
- int table_size = 4;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- return 0;
- }
-
- if (!vport->ingress.acl) {
- err = esw_vport_create_ingress_acl_table(esw, vport, table_size);
- if (err) {
- esw_warn(esw->dev,
- "vport[%d] enable ingress acl err (%d)\n",
- err, vport->vport);
- return err;
- }
-
- err = esw_vport_create_legacy_ingress_acl_groups(esw, vport);
- if (err)
- goto out;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->info.vlan, vport->info.qos);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec) {
- err = -ENOMEM;
- goto out;
- }
-
- if (vport->info.vlan || vport->info.qos)
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
-
- if (vport->info.spoofchk) {
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
- smac_v = MLX5_ADDR_OF(fte_match_param,
- spec->match_value,
- outer_headers.smac_47_16);
- ether_addr_copy(smac_v, vport->info.mac);
- }
-
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- vport->ingress.allow_rule =
- mlx5_add_flow_rules(vport->ingress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.allow_rule)) {
- err = PTR_ERR(vport->ingress.allow_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress allow rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.allow_rule = NULL;
- goto out;
- }
-
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /* Attach drop flow counter */
- if (counter) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter_id = mlx5_fc_id(counter);
- dst = &drop_ctr_dst;
- dest_num++;
- }
- vport->ingress.legacy.drop_rule =
- mlx5_add_flow_rules(vport->ingress.acl, NULL,
- &flow_act, dst, dest_num);
- if (IS_ERR(vport->ingress.legacy.drop_rule)) {
- err = PTR_ERR(vport->ingress.legacy.drop_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress drop rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.legacy.drop_rule = NULL;
- goto out;
- }
- kvfree(spec);
- return 0;
-
-out:
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- kvfree(spec);
- return err;
-}
-
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u16 vlan_id, u32 flow_action)
-{
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- int err = 0;
-
- if (vport->egress.allowed_vlan)
- return -EEXIST;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return -ENOMEM;
-
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
-
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = flow_action;
- vport->egress.allowed_vlan =
- mlx5_add_flow_rules(vport->egress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->egress.allowed_vlan)) {
- err = PTR_ERR(vport->egress.allowed_vlan);
- esw_warn(esw->dev,
- "vport[%d] configure egress vlan rule failed, err(%d)\n",
- vport->vport, err);
- vport->egress.allowed_vlan = NULL;
- }
-
- kvfree(spec);
- return err;
-}
-
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- struct mlx5_fc *counter = vport->egress.legacy.drop_counter;
- struct mlx5_flow_destination drop_ctr_dst = {0};
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_flow_act flow_act = {0};
- int dest_num = 0;
- int err = 0;
-
- esw_vport_cleanup_egress_rules(esw, vport);
-
- if (!vport->info.vlan && !vport->info.qos) {
- esw_vport_disable_egress_acl(esw, vport);
- return 0;
- }
-
- err = esw_vport_enable_egress_acl(esw, vport);
- if (err) {
- mlx5_core_warn(esw->dev,
- "failed to enable egress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- esw_debug(esw->dev,
- "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->info.vlan, vport->info.qos);
-
- /* Allowed vlan rule */
- err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan,
- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
- if (err)
- return err;
-
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /* Attach egress drop flow counter */
- if (counter) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter_id = mlx5_fc_id(counter);
- dst = &drop_ctr_dst;
- dest_num++;
- }
- vport->egress.legacy.drop_rule =
- mlx5_add_flow_rules(vport->egress.acl, NULL,
- &flow_act, dst, dest_num);
- if (IS_ERR(vport->egress.legacy.drop_rule)) {
- err = PTR_ERR(vport->egress.legacy.drop_rule);
- esw_warn(esw->dev,
- "vport[%d] configure egress drop rule failed, err(%d)\n",
- vport->vport, err);
- vport->egress.legacy.drop_rule = NULL;
- }
-
- return err;
-}
-
static bool element_type_supported(struct mlx5_eswitch *esw, int type)
{
const struct mlx5_core_dev *dev = esw->dev;
@@ -1653,44 +1148,19 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
if (mlx5_esw_is_manager_vport(esw, vport->vport))
return 0;
- if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
- vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
- if (IS_ERR(vport->ingress.legacy.drop_counter)) {
- esw_warn(esw->dev,
- "vport[%d] configure ingress drop rule counter failed\n",
- vport->vport);
- vport->ingress.legacy.drop_counter = NULL;
- }
- }
-
- ret = esw_vport_ingress_config(esw, vport);
+ ret = esw_acl_ingress_lgcy_setup(esw, vport);
if (ret)
goto ingress_err;
- if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
- vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
- if (IS_ERR(vport->egress.legacy.drop_counter)) {
- esw_warn(esw->dev,
- "vport[%d] configure egress drop rule counter failed\n",
- vport->vport);
- vport->egress.legacy.drop_counter = NULL;
- }
- }
-
- ret = esw_vport_egress_config(esw, vport);
+ ret = esw_acl_egress_lgcy_setup(esw, vport);
if (ret)
goto egress_err;
return 0;
egress_err:
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
- vport->egress.legacy.drop_counter = NULL;
-
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
ingress_err:
- mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
- vport->ingress.legacy.drop_counter = NULL;
return ret;
}
@@ -1710,13 +1180,8 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw,
if (mlx5_esw_is_manager_vport(esw, vport->vport))
return;
- esw_vport_disable_egress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
- vport->egress.legacy.drop_counter = NULL;
-
- esw_vport_disable_legacy_ingress_acl(esw, vport);
- mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
- vport->ingress.legacy.drop_counter = NULL;
+ esw_acl_egress_lgcy_cleanup(esw, vport);
+ esw_acl_ingress_lgcy_cleanup(esw, vport);
}
static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
@@ -2265,6 +1730,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->offloads.decap_tbl_lock);
hash_init(esw->offloads.decap_tbl);
atomic64_set(&esw->offloads.num_flows, 0);
+ ida_init(&esw->offloads.vport_metadata_ida);
mutex_init(&esw->state_lock);
mutex_init(&esw->mode_lock);
@@ -2303,6 +1769,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_offloads_cleanup_reps(esw);
mutex_destroy(&esw->mode_lock);
mutex_destroy(&esw->state_lock);
+ ida_destroy(&esw->offloads.vport_metadata_ida);
mutex_destroy(&esw->offloads.mod_hdr.lock);
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
@@ -2348,7 +1815,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
ether_addr_copy(evport->info.mac, mac);
evport->info.node_guid = node_guid;
if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
- err = esw_vport_ingress_config(esw, evport);
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
unlock:
mutex_unlock(&esw->state_lock);
@@ -2430,10 +1897,10 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
evport->info.vlan = vlan;
evport->info.qos = qos;
if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
- err = esw_vport_ingress_config(esw, evport);
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
if (err)
return err;
- err = esw_vport_egress_config(esw, evport);
+ err = esw_acl_egress_lgcy_setup(esw, evport);
}
return err;
@@ -2475,7 +1942,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
"Spoofchk in set while MAC is invalid, vport(%d)\n",
evport->vport);
if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
- err = esw_vport_ingress_config(esw, evport);
+ err = esw_acl_ingress_lgcy_setup(esw, evport);
if (err)
evport->info.spoofchk = pschk;
mutex_unlock(&esw->state_lock);
@@ -2734,7 +2201,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
if (!vport->enabled)
goto unlock;
- if (vport->egress.legacy.drop_counter)
+ if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
&stats->rx_dropped, &bytes);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ccbbea3e0505..a5175e98c0b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -99,13 +99,19 @@ struct vport_ingress {
struct vport_egress {
struct mlx5_flow_table *acl;
- struct mlx5_flow_group *allowed_vlans_grp;
- struct mlx5_flow_group *drop_grp;
struct mlx5_flow_handle *allowed_vlan;
- struct {
- struct mlx5_flow_handle *drop_rule;
- struct mlx5_fc *drop_counter;
- } legacy;
+ struct mlx5_flow_group *vlan_grp;
+ union {
+ struct {
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+ } legacy;
+ struct {
+ struct mlx5_flow_group *fwd_grp;
+ struct mlx5_flow_handle *fwd_rule;
+ } offloads;
+ };
};
struct mlx5_vport_drop_stats {
@@ -143,6 +149,8 @@ struct mlx5_vport {
struct vport_ingress ingress;
struct vport_egress egress;
+ u32 default_metadata;
+ u32 metadata;
struct mlx5_vport_info info;
@@ -218,6 +226,7 @@ struct mlx5_esw_offload {
u8 inline_mode;
atomic64_t num_flows;
enum devlink_eswitch_encap_mode encap;
+ struct ida vport_metadata_ida;
};
/* E-Switch MC FDB table hash node */
@@ -285,18 +294,10 @@ void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- int table_size);
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport);
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport);
+
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
+
int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
u32 rate_mbps);
@@ -458,10 +459,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u16 vlan_id, u32 flow_action);
-
static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw)
{
return esw->qos.enabled;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 554fc64d8ef6..060354bb211a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -31,12 +31,14 @@
*/
#include <linux/etherdevice.h>
+#include <linux/idr.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
+#include "esw/acl/ofld.h"
#include "esw/chains.h"
#include "rdma.h"
#include "en.h"
@@ -234,13 +236,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
return &esw->offloads.vport_reps[idx];
}
-static bool
-esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
- const struct mlx5_vport *vport)
-{
- return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
- mlx5_eswitch_is_vf_vport(esw, vport->vport));
-}
static void
mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
@@ -1851,279 +1846,6 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
}
-static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec;
- int err = 0;
-
- /* For prio tag mode, there is only 1 FTEs:
- * 1) Untagged packets - push prio tag VLAN and modify metadata if
- * required, allow
- * Unmatched traffic is allowed by default
- */
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return -ENOMEM;
-
- /* Untagged packets - push prio tag VLAN, allow */
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
- MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- flow_act.vlan[0].ethtype = ETH_P_8021Q;
- flow_act.vlan[0].vid = 0;
- flow_act.vlan[0].prio = 0;
-
- if (vport->ingress.offloads.modify_metadata_rule) {
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
- }
-
- vport->ingress.allow_rule =
- mlx5_add_flow_rules(vport->ingress.acl, spec,
- &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.allow_rule)) {
- err = PTR_ERR(vport->ingress.allow_rule);
- esw_warn(esw->dev,
- "vport[%d] configure ingress untagged allow rule, err(%d)\n",
- vport->vport, err);
- vport->ingress.allow_rule = NULL;
- }
-
- kvfree(spec);
- return err;
-}
-
-static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
- struct mlx5_flow_act flow_act = {};
- int err = 0;
- u32 key;
-
- key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
- key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
-
- MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
- MLX5_SET(set_action_in, action, field,
- MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
- MLX5_SET(set_action_in, action, data, key);
- MLX5_SET(set_action_in, action, offset,
- ESW_SOURCE_PORT_METADATA_OFFSET);
- MLX5_SET(set_action_in, action, length,
- ESW_SOURCE_PORT_METADATA_BITS);
-
- vport->ingress.offloads.modify_metadata =
- mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
- 1, action);
- if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
- err = PTR_ERR(vport->ingress.offloads.modify_metadata);
- esw_warn(esw->dev,
- "failed to alloc modify header for vport %d ingress acl (%d)\n",
- vport->vport, err);
- return err;
- }
-
- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
- vport->ingress.offloads.modify_metadata_rule =
- mlx5_add_flow_rules(vport->ingress.acl,
- NULL, &flow_act, NULL, 0);
- if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
- err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
- esw_warn(esw->dev,
- "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
- vport->vport, err);
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
- vport->ingress.offloads.modify_metadata_rule = NULL;
- }
- return err;
-}
-
-static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- if (vport->ingress.offloads.modify_metadata_rule) {
- mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
- mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
-
- vport->ingress.offloads.modify_metadata_rule = NULL;
- }
-}
-
-static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5_flow_group *g;
- void *match_criteria;
- u32 *flow_group_in;
- u32 flow_index = 0;
- int ret = 0;
-
- flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
- return -ENOMEM;
-
- if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
- /* This group is to hold FTE to match untagged packets when prio_tag
- * is enabled.
- */
- memset(flow_group_in, 0, inlen);
-
- match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- flow_group_in, match_criteria);
- MLX5_SET(create_flow_group_in, flow_group_in,
- match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- ret = PTR_ERR(g);
- esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
- vport->vport, ret);
- goto prio_tag_err;
- }
- vport->ingress.offloads.metadata_prio_tag_grp = g;
- flow_index++;
- }
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- /* This group holds an FTE with no matches for add metadata for
- * tagged packets, if prio-tag is enabled (as a fallthrough),
- * or all traffic in case prio-tag is disabled.
- */
- memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
- g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
- if (IS_ERR(g)) {
- ret = PTR_ERR(g);
- esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
- vport->vport, ret);
- goto metadata_err;
- }
- vport->ingress.offloads.metadata_allmatch_grp = g;
- }
-
- kvfree(flow_group_in);
- return 0;
-
-metadata_err:
- if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
- vport->ingress.offloads.metadata_prio_tag_grp = NULL;
- }
-prio_tag_err:
- kvfree(flow_group_in);
- return ret;
-}
-
-static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport)
-{
- if (vport->ingress.offloads.metadata_allmatch_grp) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
- vport->ingress.offloads.metadata_allmatch_grp = NULL;
- }
-
- if (vport->ingress.offloads.metadata_prio_tag_grp) {
- mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
- vport->ingress.offloads.metadata_prio_tag_grp = NULL;
- }
-}
-
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int num_ftes = 0;
- int err;
-
- if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
- !esw_check_ingress_prio_tag_enabled(esw, vport))
- return 0;
-
- esw_vport_cleanup_ingress_rules(esw, vport);
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw))
- num_ftes++;
- if (esw_check_ingress_prio_tag_enabled(esw, vport))
- num_ftes++;
-
- err = esw_vport_create_ingress_acl_table(esw, vport, num_ftes);
- if (err) {
- esw_warn(esw->dev,
- "failed to enable ingress acl (%d) on vport[%d]\n",
- err, vport->vport);
- return err;
- }
-
- err = esw_vport_create_ingress_acl_group(esw, vport);
- if (err)
- goto group_err;
-
- esw_debug(esw->dev,
- "vport[%d] configure ingress rules\n", vport->vport);
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
- if (err)
- goto metadata_err;
- }
-
- if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
- err = esw_vport_ingress_prio_tag_config(esw, vport);
- if (err)
- goto prio_tag_err;
- }
- return 0;
-
-prio_tag_err:
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
-metadata_err:
- esw_vport_destroy_ingress_acl_group(vport);
-group_err:
- esw_vport_destroy_ingress_acl_table(vport);
- return err;
-}
-
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int err;
-
- if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
- return 0;
-
- esw_vport_cleanup_egress_rules(esw, vport);
-
- err = esw_vport_enable_egress_acl(esw, vport);
- if (err)
- return err;
-
- /* For prio tag mode, there is only 1 FTEs:
- * 1) prio tag packets - pop the prio tag VLAN, allow
- * Unmatched traffic is allowed by default
- */
- esw_debug(esw->dev,
- "vport[%d] configure prio tag egress rules\n", vport->vport);
-
- /* prio tag vlan rule - pop it so VF receives untagged packets */
- err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0,
- MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
- MLX5_FLOW_CONTEXT_ACTION_ALLOW);
- if (err)
- esw_vport_disable_egress_acl(esw, vport);
-
- return err;
-}
-
static bool
esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
{
@@ -2156,25 +1878,83 @@ static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw)
esw_check_vport_match_metadata_supported(esw);
}
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
+{
+ u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1;
+ u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
+ u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+ u32 start;
+ u32 end;
+ int id;
+
+ /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
+ WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
+
+ /* Trim vhca_id to ESW_VHCA_ID_BITS */
+ vhca_id &= vhca_id_mask;
+
+ start = (vhca_id << ESW_VPORT_BITS);
+ end = start + num_vports;
+ if (!vhca_id)
+ start += 1; /* zero is reserved/invalid metadata */
+ id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL);
+
+ return (id < 0) ? 0 : id;
+}
+
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
+{
+ ida_free(&esw->offloads.vport_metadata_ida, metadata);
+}
+
+static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->vport == MLX5_VPORT_UPLINK)
+ return 0;
+
+ vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
+ vport->metadata = vport->default_metadata;
+ return vport->metadata ? 0 : -ENOSPC;
+}
+
+static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata)
+ return;
+
+ WARN_ON(vport->metadata != vport->default_metadata);
+ mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+}
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
int err;
- err = esw_vport_ingress_config(esw, vport);
+ err = esw_offloads_vport_metadata_setup(esw, vport);
if (err)
- return err;
+ goto metadata_err;
+
+ err = esw_acl_ingress_ofld_setup(esw, vport);
+ if (err)
+ goto ingress_err;
if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
- err = esw_vport_egress_config(esw, vport);
- if (err) {
- esw_vport_cleanup_ingress_rules(esw, vport);
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
- esw_vport_destroy_ingress_acl_group(vport);
- esw_vport_destroy_ingress_acl_table(vport);
- }
+ err = esw_acl_egress_ofld_setup(esw, vport);
+ if (err)
+ goto egress_err;
}
+
+ return 0;
+
+egress_err:
+ esw_acl_ingress_ofld_cleanup(esw, vport);
+ingress_err:
+ esw_offloads_vport_metadata_cleanup(esw, vport);
+metadata_err:
return err;
}
@@ -2182,11 +1962,9 @@ void
esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
{
- esw_vport_disable_egress_acl(esw, vport);
- esw_vport_cleanup_ingress_rules(esw, vport);
- esw_vport_del_ingress_acl_modify_metadata(esw, vport);
- esw_vport_destroy_ingress_acl_group(vport);
- esw_vport_destroy_ingress_acl_table(vport);
+ esw_acl_egress_ofld_cleanup(vport);
+ esw_acl_ingress_ofld_cleanup(esw, vport);
+ esw_offloads_vport_metadata_cleanup(esw, vport);
}
static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
@@ -2852,38 +2630,11 @@ EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num)
{
- u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0);
- u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
- u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
- u32 val;
+ struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
- /* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
- WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
-
- /* Trim vhca_id to ESW_VHCA_ID_BITS */
- vhca_id &= vhca_id_mask;
-
- /* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they
- * don't overlap with VF numbers, and themselves, after trimming.
- */
- WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) <
- vport_num_mask - 1);
- WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) <
- vport_num_mask - 1);
- WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) ==
- (MLX5_VPORT_ECPF & vport_num_mask));
-
- /* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't
- * overlap with pf and ecpf.
- */
- if (vport_num != MLX5_VPORT_UPLINK &&
- vport_num != MLX5_VPORT_ECPF)
- WARN_ON_ONCE(vport_num >= vport_num_mask - 1);
-
- /* We can now trim vport_num to ESW_VPORT_BITS */
- vport_num &= vport_num_mask;
+ if (WARN_ON_ONCE(IS_ERR(vport)))
+ return 0;
- val = (vhca_id << ESW_VPORT_BITS) | vport_num;
- return val << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+ return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
}
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 92f2395dd31a..30de3bf35c6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1272,7 +1272,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mlx5_debugfs_root);
if (!priv->dbg_root) {
dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n");
- return -ENOMEM;
+ goto err_dbg_root;
}
err = mlx5_health_init(dev);
@@ -1289,15 +1289,27 @@ err_pagealloc_init:
mlx5_health_cleanup(dev);
err_health_init:
debugfs_remove(dev->priv.dbg_root);
-
+err_dbg_root:
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
return err;
}
static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
debugfs_remove_recursive(dev->priv.dbg_root);
+ mutex_destroy(&priv->pgdir_mutex);
+ mutex_destroy(&priv->alloc_mutex);
+ mutex_destroy(&priv->bfregs.wc_head.lock);
+ mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->intf_state_mutex);
}
#define MLX5_IB_MOD "mlx5_ib"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 48b6358b6845..890767a2a7cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -297,7 +297,8 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
dmn->mdev = mdev;
dmn->type = type;
refcount_set(&dmn->refcount, 1);
- mutex_init(&dmn->mutex);
+ mutex_init(&dmn->info.rx.mutex);
+ mutex_init(&dmn->info.tx.mutex);
if (dr_domain_caps_init(mdev, dmn)) {
mlx5dr_err(dmn, "Failed init domain, no caps\n");
@@ -345,9 +346,9 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
int ret = 0;
if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
- mutex_lock(&dmn->mutex);
+ mlx5dr_domain_lock(dmn);
ret = mlx5dr_send_ring_force_drain(dmn);
- mutex_unlock(&dmn->mutex);
+ mlx5dr_domain_unlock(dmn);
if (ret) {
mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
flags, ret);
@@ -371,7 +372,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
dr_domain_uninit_cache(dmn);
dr_domain_uninit_resources(dmn);
dr_domain_caps_uninit(dmn);
- mutex_destroy(&dmn->mutex);
+ mutex_destroy(&dmn->info.tx.mutex);
+ mutex_destroy(&dmn->info.rx.mutex);
kfree(dmn);
return 0;
}
@@ -379,7 +381,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn)
{
- mutex_lock(&dmn->mutex);
+ mlx5dr_domain_lock(dmn);
if (dmn->peer_dmn)
refcount_dec(&dmn->peer_dmn->refcount);
@@ -389,5 +391,5 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
if (dmn->peer_dmn)
refcount_inc(&dmn->peer_dmn->refcount);
- mutex_unlock(&dmn->mutex);
+ mlx5dr_domain_unlock(dmn);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index a95938874798..31abcbb95ca2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -690,7 +690,7 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
refcount_set(&matcher->refcount, 1);
INIT_LIST_HEAD(&matcher->matcher_list);
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
ret = dr_matcher_init(matcher, mask);
if (ret)
@@ -700,14 +700,14 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
if (ret)
goto matcher_uninit;
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return matcher;
matcher_uninit:
dr_matcher_uninit(matcher);
free_matcher:
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
kfree(matcher);
dec_ref:
refcount_dec(&tbl->refcount);
@@ -791,13 +791,13 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
if (refcount_read(&matcher->refcount) > 1)
return -EBUSY;
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
dr_matcher_remove_from_tbl(matcher);
dr_matcher_uninit(matcher);
refcount_dec(&matcher->tbl->refcount);
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
kfree(matcher);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index cce3ee7a6614..cd708dcc2e3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -938,7 +938,10 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
struct mlx5dr_rule_rx_tx *nic_rule)
{
+ mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
dr_rule_clean_rule_members(rule, nic_rule);
+ mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
+
return 0;
}
@@ -1039,18 +1042,18 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
return 0;
+ hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+ if (!hw_ste_arr)
+ return -ENOMEM;
+
+ mlx5dr_domain_nic_lock(nic_dmn);
+
ret = mlx5dr_matcher_select_builders(matcher,
nic_matcher,
dr_rule_get_ipv(&param->outer),
dr_rule_get_ipv(&param->inner));
if (ret)
- goto out_err;
-
- hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
- if (!hw_ste_arr) {
- ret = -ENOMEM;
- goto out_err;
- }
+ goto free_hw_ste;
/* Set the tag values inside the ste array */
ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
@@ -1115,6 +1118,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
if (htbl)
mlx5dr_htbl_put(htbl);
+ mlx5dr_domain_nic_unlock(nic_dmn);
+
kfree(hw_ste_arr);
return 0;
@@ -1129,8 +1134,8 @@ free_rule:
kfree(ste_info);
}
free_hw_ste:
+ mlx5dr_domain_nic_unlock(nic_dmn);
kfree(hw_ste_arr);
-out_err:
return ret;
}
@@ -1232,31 +1237,23 @@ struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
{
struct mlx5dr_rule *rule;
- mutex_lock(&matcher->tbl->dmn->mutex);
refcount_inc(&matcher->refcount);
rule = dr_rule_create_rule(matcher, value, num_actions, actions);
if (!rule)
refcount_dec(&matcher->refcount);
- mutex_unlock(&matcher->tbl->dmn->mutex);
-
return rule;
}
int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
{
struct mlx5dr_matcher *matcher = rule->matcher;
- struct mlx5dr_table *tbl = rule->matcher->tbl;
int ret;
- mutex_lock(&tbl->dmn->mutex);
-
ret = dr_rule_destroy_rule(rule);
-
- mutex_unlock(&tbl->dmn->mutex);
-
if (!ret)
refcount_dec(&matcher->refcount);
+
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index b8d97d44be7b..f421013b0b54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -357,9 +357,11 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
u32 buff_offset;
int ret;
+ spin_lock(&send_ring->lock);
+
ret = dr_handle_pending_wc(dmn, send_ring);
if (ret)
- return ret;
+ goto out_unlock;
if (send_info->write.length > dmn->info.max_inline_size) {
buff_offset = (send_ring->tx_head &
@@ -377,7 +379,9 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
dr_fill_data_segs(send_ring, send_info);
dr_post_send(send_ring->qp, send_info);
- return 0;
+out_unlock:
+ spin_unlock(&send_ring->lock);
+ return ret;
}
static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
@@ -563,9 +567,7 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
send_info.remote_addr = action->rewrite.chunk->mr_addr;
send_info.rkey = action->rewrite.chunk->rkey;
- mutex_lock(&dmn->mutex);
ret = dr_postsend_icm_data(dmn, &send_info);
- mutex_unlock(&dmn->mutex);
return ret;
}
@@ -886,6 +888,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
init_attr.pdn = dmn->pdn;
init_attr.uar = dmn->uar;
init_attr.max_send_wr = QUEUE_SIZE;
+ spin_lock_init(&dmn->send_ring->lock);
dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
if (!dmn->send_ring->qp) {
@@ -990,7 +993,9 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
return ret;
}
+ spin_lock(&send_ring->lock);
ret = dr_handle_pending_wc(dmn, send_ring);
+ spin_unlock(&send_ring->lock);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index c2fe48d7b75a..b599b6beb5b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -14,7 +14,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
if (action && action->action_type != DR_ACTION_TYP_FT)
return -EOPNOTSUPP;
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
if (!list_empty(&tbl->matcher_list))
last_matcher = list_last_entry(&tbl->matcher_list,
@@ -78,7 +78,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
refcount_inc(&action->refcount);
out:
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return ret;
}
@@ -95,7 +95,7 @@ static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
static void dr_table_uninit(struct mlx5dr_table *tbl)
{
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
switch (tbl->dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -112,7 +112,7 @@ static void dr_table_uninit(struct mlx5dr_table *tbl)
break;
}
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
}
static int dr_table_init_nic(struct mlx5dr_domain *dmn,
@@ -177,7 +177,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
INIT_LIST_HEAD(&tbl->matcher_list);
- mutex_lock(&tbl->dmn->mutex);
+ mlx5dr_domain_lock(tbl->dmn);
switch (tbl->dmn->type) {
case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -201,7 +201,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
break;
}
- mutex_unlock(&tbl->dmn->mutex);
+ mlx5dr_domain_unlock(tbl->dmn);
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 984783238baa..c6d5a81d138b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -636,6 +636,7 @@ struct mlx5dr_domain_rx_tx {
u64 drop_icm_addr;
u64 default_icm_addr;
enum mlx5dr_ste_entry_type ste_type;
+ struct mutex mutex; /* protect rx/tx domain */
};
struct mlx5dr_domain_info {
@@ -660,7 +661,6 @@ struct mlx5dr_domain {
struct mlx5_uars_page *uar;
enum mlx5dr_domain_type type;
refcount_t refcount;
- struct mutex mutex; /* protect domain */
struct mlx5dr_icm_pool *ste_icm_pool;
struct mlx5dr_icm_pool *action_icm_pool;
struct mlx5dr_send_ring *send_ring;
@@ -814,6 +814,28 @@ struct mlx5dr_icm_chunk {
struct list_head *miss_list;
};
+static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ mutex_lock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+ mutex_unlock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_domain_nic_lock(&dmn->info.rx);
+ mlx5dr_domain_nic_lock(&dmn->info.tx);
+}
+
+static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn)
+{
+ mlx5dr_domain_nic_unlock(&dmn->info.tx);
+ mlx5dr_domain_nic_unlock(&dmn->info.rx);
+}
+
static inline int
mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps)
{
@@ -1043,6 +1065,7 @@ struct mlx5dr_send_ring {
struct ib_wc wc[MAX_SEND_CQE];
u8 sync_buff[MIN_READ_SYNC];
struct mlx5dr_mr *sync_mr;
+ spinlock_t lock; /* Protect the data path of the send ring */
};
int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f..4c972d8abf31 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,8 +1313,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
- struct linger sol = { .l_onoff = 1, .l_linger = 0 };
- int ret, opt, rcv_pdu_size;
+ int ret, rcv_pdu_size;
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
@@ -1337,60 +1336,24 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
/* Single syn retry */
- opt = 1;
- ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
- (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set TCP_SYNCNT sock opt %d\n", ret);
- goto err_sock;
- }
+ tcp_sock_set_syncnt(queue->sock->sk, 1);
/* Set TCP no delay */
- opt = 1;
- ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
- TCP_NODELAY, (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set TCP_NODELAY sock opt %d\n", ret);
- goto err_sock;
- }
+ tcp_sock_set_nodelay(queue->sock->sk);
/*
* Cleanup whatever is sitting in the TCP transmit queue on socket
* close. This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out.
*/
- ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
- (char *)&sol, sizeof(sol));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set SO_LINGER sock opt %d\n", ret);
- goto err_sock;
- }
+ sock_no_linger(queue->sock->sk);
- if (so_priority > 0) {
- ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret) {
- dev_err(ctrl->ctrl.device,
- "failed to set SO_PRIORITY sock opt, ret %d\n",
- ret);
- goto err_sock;
- }
- }
+ if (so_priority > 0)
+ sock_set_priority(queue->sock->sk, so_priority);
/* Set socket type of service */
- if (nctrl->opts->tos >= 0) {
- opt = nctrl->opts->tos;
- ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
- (char *)&opt, sizeof(opt));
- if (ret) {
- dev_err(nctrl->device,
- "failed to set IP_TOS sock opt %d\n", ret);
- goto err_sock;
- }
- }
+ if (nctrl->opts->tos >= 0)
+ ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
queue->sock->sk->sk_allocation = GFP_ATOMIC;
nvme_tcp_set_queue_io_cpu(queue);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f4..4546049a96b3 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1429,7 +1429,6 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
{
struct socket *sock = queue->sock;
struct inet_sock *inet = inet_sk(sock->sk);
- struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret;
ret = kernel_getsockname(sock,
@@ -1447,27 +1446,14 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
* close. This is done to prevent stale data from being sent should
* the network connection be restored before TCP times out.
*/
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&sol, sizeof(sol));
- if (ret)
- return ret;
+ sock_no_linger(sock->sk);
- if (so_priority > 0) {
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret)
- return ret;
- }
+ if (so_priority > 0)
+ sock_set_priority(sock->sk, so_priority);
/* Set socket type of service */
- if (inet->rcv_tos > 0) {
- int tos = inet->rcv_tos;
-
- ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
- (char *)&tos, sizeof(tos));
- if (ret)
- return ret;
- }
+ if (inet->rcv_tos > 0)
+ ip_sock_set_tos(sock->sk, inet->rcv_tos);
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = queue;
@@ -1588,7 +1574,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
{
struct nvmet_tcp_port *port;
__kernel_sa_family_t af;
- int opt, ret;
+ int ret;
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port)
@@ -1632,30 +1618,10 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
port->sock->sk->sk_user_data = port;
port->data_ready = port->sock->sk->sk_data_ready;
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
-
- opt = 1;
- ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
- TCP_NODELAY, (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
- goto err_sock;
- }
-
- ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- if (ret) {
- pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
- goto err_sock;
- }
-
- if (so_priority > 0) {
- ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
- (char *)&so_priority, sizeof(so_priority));
- if (ret) {
- pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
- goto err_sock;
- }
- }
+ sock_set_reuseaddr(port->sock->sk);
+ tcp_sock_set_nodelay(port->sock->sk);
+ if (so_priority > 0)
+ sock_set_priority(port->sock->sk, so_priority);
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr));
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 1f93ea381353..922484ea4e30 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config ISCSI_TARGET
tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
- depends on NET
+ depends on INET
select CRYPTO
select CRYPTO_CRC32C
select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 731ee67fe914..85748e338858 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <linux/idr.h>
#include <linux/tcp.h> /* TCP_NODELAY */
+#include <net/ip.h>
#include <net/ipv6.h> /* ipv6_addr_v4mapped() */
#include <scsi/iscsi_proto.h>
#include <target/target_core_base.h>
@@ -855,7 +856,7 @@ int iscsit_setup_np(
struct sockaddr_storage *sockaddr)
{
struct socket *sock = NULL;
- int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
+ int backlog = ISCSIT_TCP_BACKLOG, ret, len;
switch (np->np_network_transport) {
case ISCSI_TCP:
@@ -897,34 +898,10 @@ int iscsit_setup_np(
/*
* Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
*/
- /* FIXME: Someone please explain why this is endian-safe */
- opt = 1;
- if (np->np_network_transport == ISCSI_TCP) {
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for TCP_NODELAY"
- " failed: %d\n", ret);
- goto fail;
- }
- }
-
- /* FIXME: Someone please explain why this is endian-safe */
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for SO_REUSEADDR"
- " failed\n");
- goto fail;
- }
-
- ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- pr_err("kernel_setsockopt() for IP_FREEBIND"
- " failed\n");
- goto fail;
- }
+ if (np->np_network_transport == ISCSI_TCP)
+ tcp_sock_set_nodelay(sock->sk);
+ sock_set_reuseaddr(sock->sk);
+ ip_sock_set_freebind(sock->sk);
ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
if (ret < 0) {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1ecc67da6c1a..e313dae01674 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -37,7 +37,6 @@ int afs_open_socket(struct afs_net *net)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
- unsigned int min_level;
int ret;
_enter("");
@@ -57,9 +56,8 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family = AF_INET6;
srx.transport.sin6.sin6_port = htons(AFS_CM_PORT);
- min_level = RXRPC_SECURITY_ENCRYPT;
- ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
- (void *)&min_level, sizeof(min_level));
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
if (ret < 0)
goto error_2;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 28268ed461b8..ad8fb53b3682 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket->sk->sk_rcvbuf = 140 * 1024;
}
- if (server->tcp_nodelay) {
- int val = 1;
- rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
- (char *)&val, sizeof(val));
- if (rc)
- cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n",
- rc);
- }
+ if (server->tcp_nodelay)
+ tcp_sock_set_nodelay(socket->sk);
cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
socket->sk->sk_sndbuf,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c97570eb2c18..99760063e000 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -325,7 +325,6 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
struct msghdr smb_msg;
- int val = 1;
__be32 rfc1002_marker;
if (cifs_rdma_enabled(server)) {
@@ -345,8 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
}
/* cork the socket */
- kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
- (char *)&val, sizeof(val));
+ tcp_sock_set_cork(ssocket->sk, true);
for (j = 0; j < num_rqst; j++)
send_length += smb_rqst_len(server, &rqst[j]);
@@ -435,9 +433,7 @@ unmask:
}
/* uncork it */
- val = 0;
- kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
- (char *)&val, sizeof(val));
+ tcp_sock_set_cork(ssocket->sk, false);
if ((total_len > 0) && (total_len != send_length)) {
cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdfaf4f0e11a..69333728d871 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -724,7 +724,7 @@ out_close:
}
/* Listening socket is busy, accept a connection */
-static int tcp_accept_from_sock(struct connection *con)
+static int accept_from_sock(struct connection *con)
{
int result;
struct sockaddr_storage peeraddr;
@@ -852,123 +852,6 @@ accept_err:
return result;
}
-static int sctp_accept_from_sock(struct connection *con)
-{
- /* Check that the new node is in the lockspace */
- struct sctp_prim prim;
- int nodeid;
- int prim_len, ret;
- int addr_len;
- struct connection *newcon;
- struct connection *addcon;
- struct socket *newsock;
-
- mutex_lock(&connections_lock);
- if (!dlm_allow_conn) {
- mutex_unlock(&connections_lock);
- return -1;
- }
- mutex_unlock(&connections_lock);
-
- mutex_lock_nested(&con->sock_mutex, 0);
-
- ret = kernel_accept(con->sock, &newsock, O_NONBLOCK);
- if (ret < 0)
- goto accept_err;
-
- memset(&prim, 0, sizeof(struct sctp_prim));
- prim_len = sizeof(struct sctp_prim);
-
- ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
- (char *)&prim, &prim_len);
- if (ret < 0) {
- log_print("getsockopt/sctp_primary_addr failed: %d", ret);
- goto accept_err;
- }
-
- make_sockaddr(&prim.ssp_addr, 0, &addr_len);
- ret = addr_to_nodeid(&prim.ssp_addr, &nodeid);
- if (ret) {
- unsigned char *b = (unsigned char *)&prim.ssp_addr;
-
- log_print("reject connect from unknown addr");
- print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
- b, sizeof(struct sockaddr_storage));
- goto accept_err;
- }
-
- newcon = nodeid2con(nodeid, GFP_NOFS);
- if (!newcon) {
- ret = -ENOMEM;
- goto accept_err;
- }
-
- mutex_lock_nested(&newcon->sock_mutex, 1);
-
- if (newcon->sock) {
- struct connection *othercon = newcon->othercon;
-
- if (!othercon) {
- othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
- if (!othercon) {
- log_print("failed to allocate incoming socket");
- mutex_unlock(&newcon->sock_mutex);
- ret = -ENOMEM;
- goto accept_err;
- }
- othercon->nodeid = nodeid;
- othercon->rx_action = receive_from_sock;
- mutex_init(&othercon->sock_mutex);
- INIT_LIST_HEAD(&othercon->writequeue);
- spin_lock_init(&othercon->writequeue_lock);
- INIT_WORK(&othercon->swork, process_send_sockets);
- INIT_WORK(&othercon->rwork, process_recv_sockets);
- set_bit(CF_IS_OTHERCON, &othercon->flags);
- }
- mutex_lock_nested(&othercon->sock_mutex, 2);
- if (!othercon->sock) {
- newcon->othercon = othercon;
- add_sock(newsock, othercon);
- addcon = othercon;
- mutex_unlock(&othercon->sock_mutex);
- } else {
- printk("Extra connection from node %d attempted\n", nodeid);
- ret = -EAGAIN;
- mutex_unlock(&othercon->sock_mutex);
- mutex_unlock(&newcon->sock_mutex);
- goto accept_err;
- }
- } else {
- newcon->rx_action = receive_from_sock;
- add_sock(newsock, newcon);
- addcon = newcon;
- }
-
- log_print("connected to %d", nodeid);
-
- mutex_unlock(&newcon->sock_mutex);
-
- /*
- * Add it to the active queue in case we got data
- * between processing the accept adding the socket
- * to the read_sockets list
- */
- if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
- queue_work(recv_workqueue, &addcon->rwork);
- mutex_unlock(&con->sock_mutex);
-
- return 0;
-
-accept_err:
- mutex_unlock(&con->sock_mutex);
- if (newsock)
- sock_release(newsock);
- if (ret != -EAGAIN)
- log_print("error accepting connection from node: %d", ret);
-
- return ret;
-}
-
static void free_entry(struct writequeue_entry *e)
{
__free_page(e->page);
@@ -1035,7 +918,6 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
- struct __kernel_sock_timeval tv = { .tv_sec = 5, .tv_usec = 0 };
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1087,13 +969,10 @@ static void sctp_connect_to_sock(struct connection *con)
* since O_NONBLOCK argument in connect() function does not work here,
* then, we should restore the default value of this attribute.
*/
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 5);
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
0);
- memset(&tv, 0, sizeof(tv));
- kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
- sizeof(tv));
+ sock_set_sndtimeo(sock->sk, 0);
if (result == -EINPROGRESS)
result = 0;
@@ -1132,7 +1011,6 @@ static void tcp_connect_to_sock(struct connection *con)
struct sockaddr_storage saddr, src_addr;
int addr_len;
struct socket *sock = NULL;
- int one = 1;
int result;
if (con->nodeid == 0) {
@@ -1181,8 +1059,7 @@ static void tcp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
O_NONBLOCK);
@@ -1224,7 +1101,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
{
struct socket *sock = NULL;
int result = 0;
- int one = 1;
int addr_len;
if (dlm_local_addr[0]->ss_family == AF_INET)
@@ -1241,19 +1117,14 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
}
/* Turn off Nagle's algorithm */
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
- sizeof(one));
+ tcp_sock_set_nodelay(sock->sk);
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&one, sizeof(one));
+ sock_set_reuseaddr(sock->sk);
- if (result < 0) {
- log_print("Failed to set SO_REUSEADDR on socket: %d", result);
- }
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = con;
save_listen_callbacks(sock);
- con->rx_action = tcp_accept_from_sock;
+ con->rx_action = accept_from_sock;
con->connect_action = tcp_connect_to_sock;
write_unlock_bh(&sock->sk->sk_callback_lock);
@@ -1267,11 +1138,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
con->sock = NULL;
goto create_out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&one, sizeof(one));
- if (result < 0) {
- log_print("Set keepalive failed: %d", result);
- }
+ sock_set_keepalive(sock->sk);
result = sock->ops->listen(sock, 5);
if (result < 0) {
@@ -1309,7 +1176,6 @@ static int sctp_listen_for_all(void)
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
- int bufsize = NEEDED_RMEM;
int one = 1;
if (!con)
@@ -1324,11 +1190,7 @@ static int sctp_listen_for_all(void)
goto out;
}
- result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
- (char *)&bufsize, sizeof(bufsize));
- if (result)
- log_print("Error increasing buffer space on socket %d", result);
-
+ sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
if (result < 0)
@@ -1340,7 +1202,7 @@ static int sctp_listen_for_all(void)
save_listen_callbacks(sock);
con->sock = sock;
con->sock->sk->sk_data_ready = lowcomms_data_ready;
- con->rx_action = sctp_accept_from_sock;
+ con->rx_action = accept_from_sock;
con->connect_action = sctp_connect_to_sock;
write_unlock_bh(&sock->sk->sk_callback_lock);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2c512b40a940..79a231719460 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,22 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
sc_put(sc);
}
-static int o2net_set_nodelay(struct socket *sock)
-{
- int val = 1;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (void *)&val, sizeof(val));
-}
-
-static int o2net_set_usertimeout(struct socket *sock)
-{
- int user_timeout = O2NET_TCP_USER_TIMEOUT;
-
- return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (void *)&user_timeout, sizeof(user_timeout));
-}
-
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1636,17 +1620,8 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
- ret = o2net_set_nodelay(sc->sc_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
-
- ret = o2net_set_usertimeout(sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(sc->sc_sock->sk);
+ tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT);
o2net_register_callbacks(sc->sc_sock->sk, sc);
@@ -1832,17 +1807,8 @@ static int o2net_accept_one(struct socket *sock, int *more)
*more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
- ret = o2net_set_nodelay(new_sock);
- if (ret) {
- mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
- goto out;
- }
-
- ret = o2net_set_usertimeout(new_sock);
- if (ret) {
- mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
- goto out;
- }
+ tcp_sock_set_nodelay(new_sock->sk);
+ tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
if (ret < 0)
diff --git a/include/linux/net.h b/include/linux/net.h
index 6451425e828f..74ef5d7315f7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
-int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
- int *optlen);
int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
unsigned int optlen);
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bf44e85d709d..9aac824c523c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -497,4 +497,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
int shiftlen);
+void tcp_sock_set_cork(struct sock *sk, bool on);
+int tcp_sock_set_keepcnt(struct sock *sk, int val);
+int tcp_sock_set_keepidle(struct sock *sk, int val);
+int tcp_sock_set_keepintvl(struct sock *sk, int val);
+void tcp_sock_set_nodelay(struct sock *sk);
+void tcp_sock_set_quickack(struct sock *sk, int val);
+int tcp_sock_set_syncnt(struct sock *sk, int val);
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
+
#endif /* _LINUX_TCP_H */
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ab988940bf04..91eacbdcf33d 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -72,4 +72,6 @@ bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+
#endif /* _NET_RXRPC_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b317c9f4470..04ebe7bf54c6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,4 +765,10 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
return likely(mtu >= IPV4_MIN_MTU);
}
+void ip_sock_set_freebind(struct sock *sk);
+int ip_sock_set_mtu_discover(struct sock *sk, int val);
+void ip_sock_set_pktinfo(struct sock *sk);
+void ip_sock_set_recverr(struct sock *sk);
+void ip_sock_set_tos(struct sock *sk, int val);
+
#endif /* _IP_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 39a00d3ef5e2..5e65bf2fd32d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1177,4 +1177,96 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
const struct in6_addr *addr, unsigned int mode);
int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+
+static inline int ip6_sock_set_v6only(struct sock *sk)
+{
+ if (inet_sk(sk)->inet_num)
+ return -EINVAL;
+ lock_sock(sk);
+ sk->sk_ipv6only = true;
+ release_sock(sk);
+ return 0;
+}
+
+static inline void ip6_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+
+static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+{
+ unsigned int pref = 0;
+ unsigned int prefmask = ~0;
+
+ /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+ switch (val & (IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP |
+ IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+ case IPV6_PREFER_SRC_PUBLIC:
+ pref |= IPV6_PREFER_SRC_PUBLIC;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_TMP:
+ pref |= IPV6_PREFER_SRC_TMP;
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+ prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check HOME/COA conflicts */
+ switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
+ case IPV6_PREFER_SRC_HOME:
+ prefmask &= ~IPV6_PREFER_SRC_COA;
+ break;
+ case IPV6_PREFER_SRC_COA:
+ pref |= IPV6_PREFER_SRC_COA;
+ break;
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* check CGA/NONCGA conflicts */
+ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+ case IPV6_PREFER_SRC_CGA:
+ case IPV6_PREFER_SRC_NONCGA:
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ return 0;
+}
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = __ip6_sock_set_addr_preferences(sk, val);
+ release_sock(sk);
+ return ret;
+}
+
+static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet6_sk(sk)->rxopt.bits.rxinfo = true;
+ release_sock(sk);
+}
+
#endif /* _NET_IPV6_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 3e8c6d4b4b59..d994daa418ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,4 +2688,14 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
+int sock_bindtoindex(struct sock *sk, int ifindex);
+void sock_enable_timestamps(struct sock *sk);
+void sock_no_linger(struct sock *sk);
+void sock_set_keepalive(struct sock *sk);
+void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_rcvbuf(struct sock *sk, int val);
+void sock_set_reuseaddr(struct sock *sk);
+void sock_set_reuseport(struct sock *sk);
+void sock_set_sndtimeo(struct sock *sk, s64 secs);
+
#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b681338a8320..66e4b8331850 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -437,6 +437,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
void tcp_v4_mtu_reduced(struct sock *sk);
void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(const struct sock *sk,
struct request_sock *req,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f8ca5edc5f2c..27d6ab11f9ee 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
- if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
- int optval = 1;
-
- ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)&optval, sizeof(optval));
- if (ret)
- pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
- ret);
- }
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
+ tcp_sock_set_nodelay(sock->sk);
con->sock = sock;
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index fd85e651ce28..2ca3425b519c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -594,6 +594,18 @@ out:
return ret;
}
+int sock_bindtoindex(struct sock *sk, int ifindex)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = sock_bindtoindex_locked(sk, ifindex);
+ release_sock(sk);
+
+ return ret;
+}
+EXPORT_SYMBOL(sock_bindtoindex);
+
static int sock_setbindtodevice(struct sock *sk, char __user *optval,
int optlen)
{
@@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- lock_sock(sk);
- ret = sock_setbindtodevice_locked(sk, index);
- release_sock(sk);
-
+ return sock_bindtoindex(sk, index);
out:
#endif
@@ -712,6 +721,111 @@ bool sk_mc_loop(struct sock *sk)
}
EXPORT_SYMBOL(sk_mc_loop);
+void sock_set_reuseaddr(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuse = SK_CAN_REUSE;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseaddr);
+
+void sock_set_reuseport(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuseport = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseport);
+
+void sock_no_linger(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_lingertime = 0;
+ sock_set_flag(sk, SOCK_LINGER);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_no_linger);
+
+void sock_set_priority(struct sock *sk, u32 priority)
+{
+ lock_sock(sk);
+ sk->sk_priority = priority;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_priority);
+
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
+{
+ lock_sock(sk);
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+ sk->sk_sndtimeo = secs * HZ;
+ else
+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_sndtimeo);
+
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+ if (val) {
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+ } else {
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+ sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+ }
+}
+
+void sock_enable_timestamps(struct sock *sk)
+{
+ lock_sock(sk);
+ __sock_set_timestamps(sk, true, false, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_enable_timestamps);
+
+void sock_set_keepalive(struct sock *sk)
+{
+ lock_sock(sk);
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, true);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_keepalive);
+
+static void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+ * as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+ /* We double it on the way in to account for "struct sk_buff" etc.
+ * overhead. Applications assume that the SO_RCVBUF setting they make
+ * will allow that much actual data to be received on that socket.
+ *
+ * Applications are unaware that "struct sk_buff" and other overheads
+ * allocate from the receive buffer during socket buffer allocation.
+ *
+ * And after considering the possible alternatives, returning the value
+ * we actually used in getsockopt is the most desirable behavior.
+ */
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __sock_set_rcvbuf(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -808,30 +922,7 @@ set_sndbuf:
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
- /* Ensure val * 2 fits into an int, to prevent max_t()
- * from treating it as a negative value.
- */
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- WRITE_ONCE(sk->sk_rcvbuf,
- max_t(int, val * 2, SOCK_MIN_RCVBUF));
+ __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
break;
case SO_RCVBUFFORCE:
@@ -843,9 +934,8 @@ set_rcvbuf:
/* No negative values (to prevent underflow, as val will be
* multiplied by 2).
*/
- if (val < 0)
- val = 0;
- goto set_rcvbuf;
+ __sock_set_rcvbuf(sk, max(val, 0));
+ break;
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
@@ -903,28 +993,17 @@ set_rcvbuf:
break;
case SO_TIMESTAMP_OLD:
+ __sock_set_timestamps(sk, valbool, false, false);
+ break;
case SO_TIMESTAMP_NEW:
+ __sock_set_timestamps(sk, valbool, true, false);
+ break;
case SO_TIMESTAMPNS_OLD:
+ __sock_set_timestamps(sk, valbool, false, true);
+ break;
case SO_TIMESTAMPNS_NEW:
- if (valbool) {
- if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- else
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
- if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- else
- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
- sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- } else {
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
- }
+ __sock_set_timestamps(sk, valbool, true, true);
break;
-
case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
/* fall through */
@@ -1180,7 +1259,7 @@ set_rcvbuf:
break;
case SO_BINDTOIFINDEX:
- ret = sock_setbindtodevice_locked(sk, val);
+ ret = sock_bindtoindex_locked(sk, val);
break;
default:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f43d5f12aa86..84ec3703c909 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -560,6 +560,61 @@ out:
return err;
}
+static void __ip_sock_set_tos(struct sock *sk, int val)
+{
+ if (sk->sk_type == SOCK_STREAM) {
+ val &= ~INET_ECN_MASK;
+ val |= inet_sk(sk)->tos & INET_ECN_MASK;
+ }
+ if (inet_sk(sk)->tos != val) {
+ inet_sk(sk)->tos = val;
+ sk->sk_priority = rt_tos2priority(val);
+ sk_dst_reset(sk);
+ }
+}
+
+void ip_sock_set_tos(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __ip_sock_set_tos(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_tos);
+
+void ip_sock_set_freebind(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->freebind = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_freebind);
+
+void ip_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_recverr);
+
+int ip_sock_set_mtu_discover(struct sock *sk, int val)
+{
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
+ return -EINVAL;
+ lock_sock(sk);
+ inet_sk(sk)->pmtudisc = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(ip_sock_set_mtu_discover);
+
+void ip_sock_set_pktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_pktinfo);
/*
* Socket option code for IP. This is the end of the line after any
@@ -823,15 +878,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS: /* This sets both TOS and Precedence */
- if (sk->sk_type == SOCK_STREAM) {
- val &= ~INET_ECN_MASK;
- val |= inet->tos & INET_ECN_MASK;
- }
- if (inet->tos != val) {
- inet->tos = val;
- sk->sk_priority = rt_tos2priority(val);
- sk_dst_reset(sk);
- }
+ __ip_sock_set_tos(sk, val);
break;
case IP_TTL:
if (optlen < 1)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 970064996377..15d47d5e7951 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2801,6 +2801,163 @@ static void tcp_enable_tx_delay(void)
}
}
+/* When set indicates to always queue non-full frames. Later the user clears
+ * this option and we transmit any pending partial frames in the queue. This is
+ * meant to be used alongside sendfile() to get properly filled frames when the
+ * user (for example) must write out headers with a write() call first and then
+ * use sendfile to send out the data parts.
+ *
+ * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
+ * TCP_NODELAY.
+ */
+static void __tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (on) {
+ tp->nonagle |= TCP_NAGLE_CORK;
+ } else {
+ tp->nonagle &= ~TCP_NAGLE_CORK;
+ if (tp->nonagle & TCP_NAGLE_OFF)
+ tp->nonagle |= TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ }
+}
+
+void tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ lock_sock(sk);
+ __tcp_sock_set_cork(sk, on);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_cork);
+
+/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
+ * remembered, but it is not activated until cork is cleared.
+ *
+ * However, when TCP_NODELAY is set we make an explicit push, which overrides
+ * even TCP_CORK for currently queued segments.
+ */
+static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
+{
+ if (on) {
+ tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ } else {
+ tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
+ }
+}
+
+void tcp_sock_set_nodelay(struct sock *sk)
+{
+ lock_sock(sk);
+ __tcp_sock_set_nodelay(sk, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_nodelay);
+
+static void __tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ if (!val) {
+ inet_csk_enter_pingpong_mode(sk);
+ return;
+ }
+
+ inet_csk_exit_pingpong_mode(sk);
+ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+ inet_csk_ack_scheduled(sk)) {
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
+ tcp_cleanup_rbuf(sk, 1);
+ if (!(val & 1))
+ inet_csk_enter_pingpong_mode(sk);
+ }
+}
+
+void tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __tcp_sock_set_quickack(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_quickack);
+
+int tcp_sock_set_syncnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ inet_csk(sk)->icsk_syn_retries = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_syncnt);
+
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+{
+ lock_sock(sk);
+ inet_csk(sk)->icsk_user_timeout = val;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+
+static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ return -EINVAL;
+
+ tp->keepalive_time = val * HZ;
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ u32 elapsed = keepalive_time_elapsed(tp);
+
+ if (tp->keepalive_time > elapsed)
+ elapsed = tp->keepalive_time - elapsed;
+ else
+ elapsed = 0;
+ inet_csk_reset_keepalive_timer(sk, elapsed);
+ }
+
+ return 0;
+}
+
+int tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ int err;
+
+ lock_sock(sk);
+ err = __tcp_sock_set_keepidle(sk, val);
+ release_sock(sk);
+ return err;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepidle);
+
+int tcp_sock_set_keepintvl(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_intvl = val * HZ;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+
+int tcp_sock_set_keepcnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_probes = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepcnt);
+
/*
* Socket option code for TCP.
*/
@@ -2898,20 +3055,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_NODELAY:
- if (val) {
- /* TCP_NODELAY is weaker than TCP_CORK, so that
- * this option on corked socket is remembered, but
- * it is not activated until cork is cleared.
- *
- * However, when TCP_NODELAY is set we make
- * an explicit push, which overrides even TCP_CORK
- * for currently queued segments.
- */
- tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- } else {
- tp->nonagle &= ~TCP_NAGLE_OFF;
- }
+ __tcp_sock_set_nodelay(sk, val);
break;
case TCP_THIN_LINEAR_TIMEOUTS:
@@ -2979,43 +3123,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_CORK:
- /* When set indicates to always queue non-full frames.
- * Later the user clears this option and we transmit
- * any pending partial frames in the queue. This is
- * meant to be used alongside sendfile() to get properly
- * filled frames when the user (for example) must write
- * out headers with a write() call first and then use
- * sendfile to send out the data parts.
- *
- * TCP_CORK can be set together with TCP_NODELAY and it is
- * stronger than TCP_NODELAY.
- */
- if (val) {
- tp->nonagle |= TCP_NAGLE_CORK;
- } else {
- tp->nonagle &= ~TCP_NAGLE_CORK;
- if (tp->nonagle&TCP_NAGLE_OFF)
- tp->nonagle |= TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- }
+ __tcp_sock_set_cork(sk, val);
break;
case TCP_KEEPIDLE:
- if (val < 1 || val > MAX_TCP_KEEPIDLE)
- err = -EINVAL;
- else {
- tp->keepalive_time = val * HZ;
- if (sock_flag(sk, SOCK_KEEPOPEN) &&
- !((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN))) {
- u32 elapsed = keepalive_time_elapsed(tp);
- if (tp->keepalive_time > elapsed)
- elapsed = tp->keepalive_time - elapsed;
- else
- elapsed = 0;
- inet_csk_reset_keepalive_timer(sk, elapsed);
- }
- }
+ err = __tcp_sock_set_keepidle(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
@@ -3072,19 +3184,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_QUICKACK:
- if (!val) {
- inet_csk_enter_pingpong_mode(sk);
- } else {
- inet_csk_exit_pingpong_mode(sk);
- if ((1 << sk->sk_state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
- inet_csk_ack_scheduled(sk)) {
- icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
- tcp_cleanup_rbuf(sk, 1);
- if (!(val & 1))
- inet_csk_enter_pingpong_mode(sk);
- }
- }
+ __tcp_sock_set_quickack(sk, val);
break;
#ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4eef5b84fff1..ad6435ba6d72 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -404,7 +404,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
EXPORT_SYMBOL(tcp_req_err);
/* TCP-LD (RFC 6069) logic */
-static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -441,6 +441,7 @@ static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
tcp_retransmit_timer(sk);
}
}
+EXPORT_SYMBOL(tcp_ld_RTO_revert);
/*
* This routine is called by the ICMP module when it gets some
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 150e6f0fdbf5..2158e8bddf41 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 58956a6b66a2..2e0ad1bc84a8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -25,17 +25,12 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->ipv6_v6only) {
- int val = 1;
-
- err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
- (char *) &val, sizeof(val));
+ err = ip6_sock_set_v6only(sock->sk);
if (err < 0)
goto error;
}
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e10258c2210e..adbfed6adf11 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -845,67 +845,10 @@ done:
break;
case IPV6_ADDR_PREFERENCES:
- {
- unsigned int pref = 0;
- unsigned int prefmask = ~0;
-
if (optlen < sizeof(int))
goto e_inval;
-
- retv = -EINVAL;
-
- /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
- switch (val & (IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP|
- IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
- case IPV6_PREFER_SRC_PUBLIC:
- pref |= IPV6_PREFER_SRC_PUBLIC;
- break;
- case IPV6_PREFER_SRC_TMP:
- pref |= IPV6_PREFER_SRC_TMP;
- break;
- case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
- break;
- case 0:
- goto pref_skip_pubtmp;
- default:
- goto e_inval;
- }
-
- prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP);
-pref_skip_pubtmp:
-
- /* check HOME/COA conflicts */
- switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
- case IPV6_PREFER_SRC_HOME:
- break;
- case IPV6_PREFER_SRC_COA:
- pref |= IPV6_PREFER_SRC_COA;
- case 0:
- goto pref_skip_coa;
- default:
- goto e_inval;
- }
-
- prefmask &= ~IPV6_PREFER_SRC_COA;
-pref_skip_coa:
-
- /* check CGA/NONCGA conflicts */
- switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
- case IPV6_PREFER_SRC_CGA:
- case IPV6_PREFER_SRC_NONCGA:
- case 0:
- break;
- default:
- goto e_inval;
- }
-
- np->srcprefs = (np->srcprefs & prefmask) | pref;
- retv = 0;
-
+ retv = __ip6_sock_set_addr_preferences(sk, val);
break;
- }
case IPV6_MINHOPCOUNT:
if (optlen < sizeof(int))
goto e_inval;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 01a6f5111a77..b7415ca75c2d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -473,6 +473,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else
sk->sk_err_soft = err;
goto out;
+ case TCP_LISTEN:
+ break;
+ default:
+ /* check if this ICMP message allows revert of backoff.
+ * (see RFC 6069)
+ */
+ if (!fastopen && type == ICMPV6_DEST_UNREACH &&
+ code == ICMPV6_NOROUTE)
+ tcp_ld_RTO_revert(sk, seq);
}
if (!sock_owned_by_user(sk) && np->recverr) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 46782fac4c16..43db0eca911f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
{ }
};
-/* doing it this way avoids calling tcp_sk() */
-void rds_tcp_nonagle(struct socket *sock)
-{
- int val = 1;
-
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val,
- sizeof(val));
-}
-
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
/* seq# of the last byte of data in tcp send buffer */
@@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock)
struct net *net = sock_net(sk);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361d21c7..bad9cf49d565 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -50,7 +50,6 @@ struct rds_tcp_statistics {
/* tcp.c */
void rds_tcp_tune(struct socket *sock);
-void rds_tcp_nonagle(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
@@ -71,9 +70,8 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
-int rds_tcp_keepalive(struct socket *sock);
+void rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
-void rds_tcp_set_linger(struct socket *sock);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 008f50fb25dd..4e64598176b0 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
if (sock) {
if (rds_destroy_pending(cp->cp_conn))
- rds_tcp_set_linger(sock);
+ sock_no_linger(sock->sk);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 810a3a49e947..101cf14215a0 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -38,36 +38,19 @@
#include "rds.h"
#include "tcp.h"
-int rds_tcp_keepalive(struct socket *sock)
+void rds_tcp_keepalive(struct socket *sock)
{
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
- int keepalive = 1;
- int ret = 0;
-
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&keepalive, sizeof(keepalive));
- if (ret < 0)
- goto bail;
-
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
- if (ret < 0)
- goto bail;
-
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- if (ret < 0)
- goto bail;
+ sock_set_keepalive(sock->sk);
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
+ tcp_sock_set_keepidle(sock->sk, keepidle);
/* KEEPINTVL is the interval between successive probes. We follow
* the model in xs_tcp_finish_connecting() and re-use keepidle.
*/
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
-bail:
- return ret;
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
}
/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
@@ -111,17 +94,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
return NULL;
}
-void rds_tcp_set_linger(struct socket *sock)
-{
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
-}
-
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -160,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock)
new_sock->ops = sock->ops;
__module_get(new_sock->ops->owner);
- ret = rds_tcp_keepalive(new_sock);
- if (ret < 0)
- goto out;
-
+ rds_tcp_keepalive(new_sock);
rds_tcp_tune(new_sock);
inet = inet_sk(new_sock->sk);
@@ -241,7 +210,7 @@ rst_nsk:
* be pending on it. By setting linger, we achieve the side-effect
* of avoiding TIME_WAIT state on new_sock.
*/
- rds_tcp_set_linger(new_sock);
+ sock_no_linger(new_sock->sk);
kernel_sock_shutdown(new_sock, SHUT_RDWR);
ret = 0;
out:
@@ -303,7 +272,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
}
sock->sk->sk_reuse = SK_CAN_REUSE;
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = sock->sk->sk_data_ready;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 78a2554a4497..8c4d1d6e9249 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -38,23 +38,18 @@
#include "rds.h"
#include "tcp.h"
-static void rds_tcp_cork(struct socket *sock, int val)
-{
- kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val));
-}
-
void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 1);
+ tcp_sock_set_cork(tc->t_sock->sk, true);
}
void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 0);
+ tcp_sock_set_cork(tc->t_sock->sk, false);
}
/* the core send_sem serializes this with other xmit and shutdown */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 15ee92d79581..394189b81849 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -571,6 +571,19 @@ out:
return ret;
}
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+ if (sk->sk_state != RXRPC_UNBOUND)
+ return -EISCONN;
+ if (val > RXRPC_SECURITY_MAX)
+ return -EINVAL;
+ lock_sock(sk);
+ rxrpc_sk(sk)->min_sec_level = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
/*
* set RxRPC socket options
*/
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 01135e54d95d..c8b2097f499c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
{
struct sock *usk;
- int ret, opt;
+ int ret;
_enter("%p{%d,%d}",
local, local->srx.transport_type, local->srx.transport.family);
@@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
switch (local->srx.transport.family) {
case AF_INET6:
/* we want to receive ICMPv6 errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip6_sock_set_recverr(local->socket->sk);
/* Fall through and set IPv4 options too otherwise we don't get
* errors from IPv4 packets sent through the IPv6 socket.
@@ -171,31 +165,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
/* Fall through */
case AF_INET:
/* we want to receive ICMP errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_recverr(local->socket->sk);
/* we want to set the don't fragment bit */
- opt = IP_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
/* We want receive timestamps. */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ sock_enable_timestamps(local->socket->sk);
break;
default:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f8b632a5c619..1ba43c3df4ad 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
- int ret, opt;
+ int ret;
_enter(",{%d}", skb->len);
@@ -473,18 +473,14 @@ send_fragmentable:
switch (conn->params.local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- opt = IP_PMTUDISC_DONT;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DONT);
ret = kernel_sendmsg(conn->params.local->socket, &msg,
iov, 2, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
- opt = IP_PMTUDISC_DO;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DO);
break;
default:
diff --git a/net/socket.c b/net/socket.c
index 80422fc3c836..81a98b6cbd08 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3625,40 +3625,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
EXPORT_SYMBOL(kernel_getpeername);
/**
- * kernel_getsockopt - get a socket option (kernel space)
- * @sock: socket
- * @level: API level (SOL_SOCKET, ...)
- * @optname: option tag
- * @optval: option value
- * @optlen: option length
- *
- * Assigns the option length to @optlen.
- * Returns 0 or an error.
- */
-
-int kernel_getsockopt(struct socket *sock, int level, int optname,
- char *optval, int *optlen)
-{
- mm_segment_t oldfs = get_fs();
- char __user *uoptval;
- int __user *uoptlen;
- int err;
-
- uoptval = (char __user __force *) optval;
- uoptlen = (int __user __force *) optlen;
-
- set_fs(KERNEL_DS);
- if (level == SOL_SOCKET)
- err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
- else
- err = sock->ops->getsockopt(sock, level, optname, uoptval,
- uoptlen);
- set_fs(oldfs);
- return err;
-}
-EXPORT_SYMBOL(kernel_getsockopt);
-
-/**
* kernel_setsockopt - set a socket option (kernel space)
* @sock: socket
* @level: API level (SOL_SOCKET, ...)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..e7a0037d9b56 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
{
- struct svc_sock *svsk;
- struct socket *sock;
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- svsk = container_of(xprt, struct svc_sock, sk_xprt);
- sock = svsk->sk_sock;
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
+ sock_no_linger(svsk->sk_sock->sk);
}
/*
@@ -603,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = {
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
- int err, level, optname, one = 1;
-
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -624,19 +614,14 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
/* make sure we get destination address info */
switch (svsk->sk_sk->sk_family) {
case AF_INET:
- level = SOL_IP;
- optname = IP_PKTINFO;
+ ip_sock_set_pktinfo(svsk->sk_sock->sk);
break;
case AF_INET6:
- level = SOL_IPV6;
- optname = IPV6_RECVPKTINFO;
+ ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
break;
default:
BUG();
}
- err = kernel_setsockopt(svsk->sk_sock, level, optname,
- (char *)&one, sizeof(one));
- dprintk("svc: kernel_setsockopt returned %d\n", err);
}
/*
@@ -1337,7 +1322,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
- int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1373,11 +1357,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
* getting requests from IPv4 remotes. Those should
* be shunted to a PF_INET listener via rpcbind.
*/
- val = 1;
if (family == PF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
+ ip6_sock_set_v6only(sock->sk);
if (type == SOCK_STREAM)
sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..3a143e250b9a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1594,21 +1594,6 @@ static int xs_get_random_port(void)
return rand + min;
}
-/**
- * xs_set_reuseaddr_port - set the socket's port and address reuse options
- * @sock: socket
- *
- * Note that this function has to be called on all sockets that share the
- * same port, and it must be called before binding.
- */
-static void xs_sock_set_reuseport(struct socket *sock)
-{
- int opt = 1;
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT,
- (char *)&opt, sizeof(opt));
-}
-
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
@@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
xs_reclassify_socket(family, sock);
if (reuseport)
- xs_sock_set_reuseport(sock);
+ sock_set_reuseport(sock->sk);
err = xs_bind(transport, sock);
if (err) {
@@ -2110,7 +2095,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
unsigned int keepidle;
unsigned int keepcnt;
- unsigned int opt_on = 1;
unsigned int timeo;
spin_lock(&xprt->transport_lock);
@@ -2122,18 +2106,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
spin_unlock(&xprt->transport_lock);
/* TCP Keepalive options */
- kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&opt_on, sizeof(opt_on));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
+ sock_set_keepalive(sock->sk);
+ tcp_sock_set_keepidle(sock->sk, keepidle);
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
/* TCP user timeout (see RFC5482) */
- kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (char *)&timeo, sizeof(timeo));
+ tcp_sock_set_user_timeout(sock->sk, timeo);
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2171,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
- unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
/* Avoid temporary address, they are bad for long-lived
* connections such as NFS mounts.
@@ -2180,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
* knowledge about the normal duration of connections,
* MAY override this as appropriate.
*/
- kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
- (char *)&addr_pref, sizeof(addr_pref));
+ if (xs_addr(xprt)->sa_family == PF_INET6) {
+ ip6_sock_set_addr_preferences(sk,
+ IPV6_PREFER_SRC_PUBLIC);
+ }
xs_tcp_set_socket_timeouts(xprt, sock);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d6b67d07d22e..3734cdbedc9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -196,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(&tsk->phdr);
}
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
{
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&tsk->phdr, (u32)imp);
- return 0;
+ return container_of(sk, struct tipc_sock, sk);
}
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
{
- return container_of(sk, struct tipc_sock, sk);
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
}
static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -2721,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tsk_set_importance(new_tsock, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
new_tsock->conn_type = msg_nametype(msg);
new_tsock->conn_instance = msg_nameinst(msg);
@@ -3139,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tsk_set_importance(tsk, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee..b11575afc66f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk);
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tsk_set_importance(struct sock *sk, int imp);
+
#endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 446af7bbd13e..1489cfb941d8 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
{
- int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
sk->sk_user_data = srv;
write_unlock_bh(&sk->sk_callback_lock);
- rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&imp, sizeof(imp));
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
if (rc < 0)
goto err;
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 1e2f61262e4e..dee567f7576a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
TESTS="${ALL_TESTS}"
@@ -767,6 +767,62 @@ ipv6_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv6_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 2001:db8:91::2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv6_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv6_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 torture test"
+}
+
+
ipv4_fcnal()
{
local rc
@@ -1313,6 +1369,61 @@ ipv4_compat_mode()
sysctl_nexthop_compat_mode_set 1 "IPv4"
}
+ipv4_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 172.16.1.2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv4_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv4_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 torture test"
+}
+
basic()
{
echo