From b18cba09e374637a0a3759d856a6bca94c133952 Mon Sep 17 00:00:00 2001
From: minoura makoto <minoura@valinux.co.jp>
Date: Tue, 13 Dec 2022 13:14:31 +0900
Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall

Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid
but different gss service") introduced `auth` argument to
__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
since it (and auth->service) was not (yet) determined.

When multiple upcalls with the same uid and different service are
ongoing, it could happen that __gss_find_upcall(), which returns the
first match found in the pipe->in_downcall list, could not find the
correct gss_msg corresponding to the downcall we are looking for.
Moreover, it might return a msg which is not sent to rpc.gssd yet.

We could see mount.nfs process hung in D state with multiple mount.nfs
are executed in parallel.  The call trace below is of CentOS 7.9
kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
elrepo kernel-ml-6.0.7-1.el7.

PID: 71258  TASK: ffff91ebd4be0000  CPU: 36  COMMAND: "mount.nfs"
 #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
 #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
 #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
 #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
[sunrpc]
 #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
 #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
 #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
 #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
 #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
 #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]

The scenario is like this. Let's say there are two upcalls for
services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.

When rpc.gssd reads pipe to get the upcall msg corresponding to
service B from pipe->pipe and then writes the response, in
gss_pipe_downcall the msg corresponding to service A will be picked
because only uid is used to find the msg and it is before the one for
B in pipe->in_downcall.  And the process waiting for the msg
corresponding to service A will be woken up.

Actual scheduing of that process might be after rpc.gssd processes the
next msg.  In rpc_pipe_generic_upcall it clears msg->errno (for A).
The process is scheduled to see gss_msg->ctx == NULL and
gss_msg->msg.errno == 0, therefore it cannot break the loop in
gss_create_upcall and is never woken up after that.

This patch adds a simple check to ensure that a msg which is not
sent to rpc.gssd yet is not chosen as the matching upcall upon
receiving a downcall.

Signed-off-by: minoura makoto <minoura@valinux.co.jp>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Cc: Trond Myklebust <trondmy@hammerspace.com>
Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index cd188a527d16..3b35b6f6533a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
 				       char __user *, size_t);
 extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
 
+/* returns true if the msg is in-flight, i.e., already eaten by the peer */
+static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
+	return (msg->copied != 0 && list_empty(&msg->list));
+}
+
 struct rpc_clnt;
 extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
 extern int rpc_remove_client_dir(struct rpc_clnt *);
-- 
cgit 


From 1f0ae22ab470946143485a02cc1cd7e05c0f9120 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 12 Dec 2022 10:42:15 +0200
Subject: net/mlx5: E-Switch, properly handle ingress tagged packets on VST

Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already
present in the frame. Previous VST mode behavior was to drop packets or
override existing tag, depending on the device version.

In this patch we fix this behavior by correctly building the HW steering
rule with a push vlan action, or for older devices we ask the FW to stack
the vlan when a vlan is already present.

Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes")
Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/esw/acl/egress_lgcy.c       |  7 ++++-
 .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c      | 33 ++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 30 ++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  6 ++++
 include/linux/mlx5/device.h                        |  5 ++++
 include/linux/mlx5/mlx5_ifc.h                      |  3 +-
 6 files changed, 68 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 60a73990017c..6b4c9ffad95b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
 int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 			      struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	struct mlx5_flow_destination drop_ctr_dst = {};
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_fc *drop_counter = NULL;
@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 	 */
 	int table_size = 2;
 	int dest_num = 0;
+	int actions_flag;
 	int err = 0;
 
 	if (vport->egress.legacy.drop_counter) {
@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 		  vport->vport, vport->info.vlan, vport->info.qos);
 
 	/* Allowed vlan rule */
+	actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	if (vst_mode_steering)
+		actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 	err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
-					 MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+					 actions_flag);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index b1a5199260f6..093ed86a0acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
 int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 			       struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	struct mlx5_flow_destination drop_ctr_dst = {};
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_spec *spec = NULL;
 	struct mlx5_fc *counter = NULL;
+	bool vst_check_cvlan = false;
+	bool vst_push_cvlan = false;
 	/* The ingress acl table contains 4 groups
 	 * (2 active rules at the same time -
 	 *      1 allow rule from one of the first 3 groups.
@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
-	if (vport->info.vlan || vport->info.qos)
+	if ((vport->info.vlan || vport->info.qos)) {
+		if (vst_mode_steering)
+			vst_push_cvlan = true;
+		else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
+			vst_check_cvlan = true;
+	}
+
+	if (vst_check_cvlan || vport->info.spoofchk)
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+	/* Create ingress allow rule */
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	if (vst_push_cvlan) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+		flow_act.vlan[0].prio = vport->info.qos;
+		flow_act.vlan[0].vid = vport->info.vlan;
+		flow_act.vlan[0].ethtype = ETH_P_8021Q;
+	}
+
+	if (vst_check_cvlan)
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 
@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		ether_addr_copy(smac_v, vport->info.mac);
 	}
 
-	/* Create ingress allow rule */
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
 							&flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.allow_rule)) {
@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
+	if (!vst_check_cvlan && !vport->info.spoofchk)
+		goto out;
+
 	memset(&flow_act, 0, sizeof(flow_act));
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	/* Attach drop flow counter */
@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 	return 0;
 
 out:
-	esw_acl_ingress_lgcy_cleanup(esw, vport);
+	if (err)
+		esw_acl_ingress_lgcy_cleanup(esw, vport);
 	kvfree(spec);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 527e4bffda8d..0dfd5742c6fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 			 esw_vport_context.vport_cvlan_strip, 1);
 
 	if (set_flags & SET_VLAN_INSERT) {
-		/* insert only if no vlan in packet */
-		MLX5_SET(modify_esw_vport_context_in, in,
-			 esw_vport_context.vport_cvlan_insert, 1);
-
+		if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
+			/* insert either if vlan exist in packet or not */
+			MLX5_SET(modify_esw_vport_context_in, in,
+				 esw_vport_context.vport_cvlan_insert,
+				 MLX5_VPORT_CVLAN_INSERT_ALWAYS);
+		} else {
+			/* insert only if no vlan in packet */
+			MLX5_SET(modify_esw_vport_context_in, in,
+				 esw_vport_context.vport_cvlan_insert,
+				 MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
+		}
 		MLX5_SET(modify_esw_vport_context_in, in,
 			 esw_vport_context.cvlan_pcp, qos);
 		MLX5_SET(modify_esw_vport_context_in, in,
@@ -809,6 +816,7 @@ out_free:
 
 static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	u16 vport_num = vport->vport;
 	int flags;
 	int err;
@@ -839,8 +847,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 
 	flags = (vport->info.vlan || vport->info.qos) ?
 		SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
-	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
-			       vport->info.qos, flags);
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
+		modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+				       vport->info.qos, flags);
 
 	return 0;
 
@@ -1848,6 +1857,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags)
 {
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	int err = 0;
 
 	if (IS_ERR(evport))
@@ -1855,9 +1865,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 	if (vlan > 4095 || qos > 7)
 		return -EINVAL;
 
-	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
-	if (err)
-		return err;
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
+		err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+		if (err)
+			return err;
+	}
 
 	evport->info.vlan = vlan;
 	evport->info.qos = qos;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5a85a5d32be7..92644fbb5081 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -527,6 +527,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags);
 
+static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
+{
+	return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
+		MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
+}
+
 static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
 						       u8 vlan_depth)
 {
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5fe5d198b57a..29d4b201c7b2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1090,6 +1090,11 @@ enum {
 	MLX5_VPORT_ADMIN_STATE_AUTO  = 0x2,
 };
 
+enum {
+	MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN  = 0x1,
+	MLX5_VPORT_CVLAN_INSERT_ALWAYS         = 0x3,
+};
+
 enum {
 	MLX5_L3_PROT_TYPE_IPV4		= 0,
 	MLX5_L3_PROT_TYPE_IPV6		= 1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3d1c62c98dd..a9ee7bc59c90 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -913,7 +913,8 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_at_5[0x2];
+	u8         reserved_at_5[0x1];
+	u8         vport_cvlan_insert_always[0x1];
 	u8         esw_shared_ingress_acl[0x1];
 	u8         esw_uplink_ingress_acl[0x1];
 	u8         root_ft_on_other_esw[0x1];
-- 
cgit 


From d9dba91be71f03cc75bcf39fc0d5d99ff33f1ae0 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Thu, 29 Dec 2022 17:33:33 +0100
Subject: net: dsa: tag_qca: fix wrong MGMT_DATA2 size

It was discovered that MGMT_DATA2 can contain up to 28 bytes of data
instead of the 12 bytes written in the Documentation by accounting the
limit of 16 bytes declared in Documentation subtracting the first 4 byte
in the packet header.

Update the define with the real world value.

Tested-by: Ronald Wahl <ronald.wahl@raritan.com>
Fixes: c2ee8181fddb ("net: dsa: tag_qca: add define for handling mgmt Ethernet packet")
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Cc: stable@vger.kernel.org # v5.18+
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/tag_qca.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index b1b5720d89a5..ee657452f122 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -45,8 +45,8 @@ struct sk_buff;
 					QCA_HDR_MGMT_COMMAND_LEN + \
 					QCA_HDR_MGMT_DATA1_LEN)
 
-#define QCA_HDR_MGMT_DATA2_LEN		12 /* Other 12 byte for the mdio data */
-#define QCA_HDR_MGMT_PADDING_LEN	34 /* Padding to reach the min Ethernet packet */
+#define QCA_HDR_MGMT_DATA2_LEN		28 /* Other 28 byte for the mdio data */
+#define QCA_HDR_MGMT_PADDING_LEN	18 /* Padding to reach the min Ethernet packet */
 
 #define QCA_HDR_MGMT_PKT_LEN		(QCA_HDR_MGMT_HEADER_LEN + \
 					QCA_HDR_LEN + \
-- 
cgit 


From 6d4cfcf97986cc67635630a2bc1f8d5c92ecdbba Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Thu, 29 Dec 2022 15:21:20 -0500
Subject: net: phy: Update documentation for get_rate_matching

Now that phylink no longer calls phy_get_rate_matching with
PHY_INTERFACE_MODE_NA, phys no longer need to support it. Remove the
documentation mandating support.

Fixes: 7642cc28fd37 ("net: phylink: fix PHY validation with rate adaption")
Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 71eeb4e3b1fd..6378c997ded5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -826,10 +826,7 @@ struct phy_driver {
 	 * whether to advertise lower-speed modes for that interface. It is
 	 * assumed that if a rate matching mode is supported on an interface,
 	 * then that interface's rate can be adapted to all slower link speeds
-	 * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy
-	 * supports any kind of rate matching for any interface, then it must
-	 * return that rate matching mode (preferring %RATE_MATCH_PAUSE to
-	 * %RATE_MATCH_CRS). If the interface is not supported, this should
+	 * supported by the phy. If the interface is not supported, this should
 	 * return %RATE_MATCH_NONE.
 	 */
 	int (*get_rate_matching)(struct phy_device *phydev,
-- 
cgit 


From d19ab1f785d0b6b9f709799f0938658903821ba1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 Dec 2022 15:13:34 +0100
Subject: mtd: cfi: allow building spi-intel standalone

When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver
fails to build, as it includes the shared CFI header:

include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp]
   62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work.

linux/mtd/spi-nor.h does not actually need to include cfi.h, so
remove the inclusion here to fix the warning. This uncovers a
missing #include in spi-nor/core.c so add that there to
prevent a different build issue.

Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Tokunori Ikegami <ikegami.t@gmail.com>
Acked-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org
---
 drivers/mtd/spi-nor/core.c  | 1 +
 include/linux/mtd/spi-nor.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index d8703d7dfd0a..d67c926bca8b 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/math64.h>
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 25765556223a..a3f8cdca90c8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -7,7 +7,6 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 #include <linux/bitops.h>
-#include <linux/mtd/cfi.h>
 #include <linux/mtd/mtd.h>
 #include <linux/spi/spi-mem.h>
 
-- 
cgit 


From 8e1858710d9a71d88acd922f2e95d1eddb90eea0 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Thu, 17 Nov 2022 10:57:53 +0800
Subject: ceph: avoid use-after-free in ceph_fl_release_lock()

When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.

Will switch to use ceph dedicate lock info to track the inode.

And in ceph_fl_release_lock() we should skip all the operations if the
fl->fl_u.ceph.inode is not set, which should come from the request
file_lock. And we will set fl->fl_u.ceph.inode when inserting it to the
inode lock list, which is when copying the lock.

Link: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c    | 20 ++++++++++++++++++--
 include/linux/fs.h |  3 +++
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 6b3b8c299c17..9c8dc8a55e7e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
 	struct inode *inode = file_inode(dst->fl_file);
 	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	dst->fl_u.ceph.inode = igrab(inode);
 }
 
+/*
+ * Do not use the 'fl->fl_file' in release function, which
+ * is possibly already released by another thread.
+ */
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
-	struct inode *inode = file_inode(fl->fl_file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct inode *inode = fl->fl_u.ceph.inode;
+	struct ceph_inode_info *ci;
+
+	/*
+	 * If inode is NULL it should be a request file_lock,
+	 * nothing we can do.
+	 */
+	if (!inode)
+		return;
+
+	ci = ceph_inode(inode);
 	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
 		/* clear error when all locks are released */
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
 		spin_unlock(&ci->i_ceph_lock);
 	}
+	fl->fl_u.ceph.inode = NULL;
+	iput(inode);
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 066555ad1bf8..c1769a2c5d70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1119,6 +1119,9 @@ struct file_lock {
 			int state;		/* state of grant or error if -ve */
 			unsigned int	debug_id;
 		} afs;
+		struct {
+			struct inode *inode;
+		} ceph;
 	} fl_u;
 } __randomize_layout;
 
-- 
cgit 


From 5e29dc36bd5e2166b834ceb19990d9e68a734d7d Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Fri, 30 Dec 2022 13:24:38 +0100
Subject: netfilter: ipset: Rework long task execution when adding/deleting
 entries

When adding/deleting large number of elements in one step in ipset, it can
take a reasonable amount of time and can result in soft lockup errors. The
patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of
consecutive elements to add/delete") tried to fix it by limiting the max
elements to process at all. However it was not enough, it is still possible
that we get hung tasks. Lowering the limit is not reasonable, so the
approach in this patch is as follows: rely on the method used at resizing
sets and save the state when we reach a smaller internal batch limit,
unlock/lock and proceed from the saved state. Thus we can avoid long
continuous tasks and at the same time removed the limit to add/delete large
number of elements in one step.

The nfnl mutex is held during the whole operation which prevents one to
issue other ipset commands in parallel.

Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h      |  2 +-
 net/netfilter/ipset/ip_set_core.c           |  7 ++++---
 net/netfilter/ipset/ip_set_hash_ip.c        | 14 +++++++-------
 net/netfilter/ipset/ip_set_hash_ipmark.c    | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipport.c    | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 ++++++++-----
 net/netfilter/ipset/ip_set_hash_net.c       | 17 +++++++----------
 net/netfilter/ipset/ip_set_hash_netiface.c  | 15 ++++++---------
 net/netfilter/ipset/ip_set_hash_netnet.c    | 23 +++++++----------------
 net/netfilter/ipset/ip_set_hash_netport.c   | 19 +++++++------------
 11 files changed, 68 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ab934ad951a8..e8c350a3ade1 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -197,7 +197,7 @@ struct ip_set_region {
 };
 
 /* Max range where every element is added/deleted in one step */
-#define IPSET_MAX_RANGE		(1<<20)
+#define IPSET_MAX_RANGE		(1<<14)
 
 /* The max revision number supported by any set type + 1 */
 #define IPSET_REVISION_MAX	9
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
 		ip_set_unlock(set);
 		retried = true;
-	} while (ret == -EAGAIN &&
-		 set->variant->resize &&
-		 (ret = set->variant->resize(set, retried)) == 0);
+	} while (ret == -ERANGE ||
+		 (ret == -EAGAIN &&
+		  set->variant->resize &&
+		  (ret = set->variant->resize(set, retried)) == 0));
 
 	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
 		return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
 hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ip4 *h = set->data;
+	struct hash_ip4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = { 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, hosts;
+	u32 ip = 0, ip_to = 0, hosts, i = 0;
 	int ret = 0;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
 
-	/* 64bit division is not allowed on 32bit */
-	if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; ip <= ip_to;) {
+	for (; ip <= ip_to; i++) {
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ip4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
 hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipmark4 *h = set->data;
+	struct hash_ipmark4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipmark4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0;
+	u32 ip, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ip_set_mask_from_to(ip, ip_to, cidr);
 	}
 
-	if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; ip <= ip_to; ip++) {
+	for (; ip <= ip_to; ip++, i++) {
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ipmark4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
 hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipport4 *h = set->data;
+	struct hash_ipport4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { .ip = 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
 	bool with_ports = false;
 	int ret;
 
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.ip = htonl(ip);
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
 hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipportip4 *h = set->data;
+	struct hash_ipportip4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { .ip = 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
 	bool with_ports = false;
 	int ret;
 
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.ip = htonl(ip);
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipportip4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
 hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipportnet4 *h = set->data;
+	struct hash_ipportnet4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
-	u32 ip2_from = 0, ip2_to = 0, ip2;
+	u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	ip2_to = ip2_from;
 	if (tb[IPSET_ATTR_IP2_TO]) {
 		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		for (; p <= port_to; p++) {
 			e.port = htons(p);
 			do {
+				i++;
 				e.ip2 = htonl(ip2);
 				ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
 				e.cidr = cidr - 1;
+				if (i > IPSET_MAX_RANGE) {
+					hash_ipportnet4_data_next(&h->next,
+								  &e);
+					return -ERANGE;
+				}
 				ret = adtfn(set, &e, &ext, &ext, flags);
 
 				if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
 hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_net4 *h = set->data;
+	struct hash_net4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = { .cidr = HOST_MASK };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, ipn, n = 0;
+	u32 ip = 0, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	do {
+		i++;
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_net4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, ipn, n = 0;
+	u32 ip = 0, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	do {
+		i++;
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_netiface4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
 
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
 hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_netnet4 *h = set->data;
+	struct hash_netnet4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netnet4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0;
-	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
-	u64 n = 0, m = 0;
+	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
-		n++;
-	} while (ipn++ < ip_to);
-	ipn = ip2_from;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
-		m++;
-	} while (ipn++ < ip2_to);
-
-	if (n*m > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried) {
 		ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		e.ip[0] = htonl(ip);
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
 		do {
+			i++;
 			e.ip[1] = htonl(ip2);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netnet4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
 hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_netport4 *h = set->data;
+	struct hash_netport4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
-	u64 n = 0;
+	u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried) {
 		ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		e.ip = htonl(ip);
 		ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
 		e.cidr = cidr - 1;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-- 
cgit 


From 59b745bb4e0bd445366c45b8df6b51b69134f4f5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 13:49:54 -0700
Subject: io_uring: move 'poll_multi_queue' bool in io_ring_ctx

The cacheline section holding this variable has two gaps, where one is
caused by this bool not packing well with structs. This causes it to
blow into the next cacheline. Move the variable, shrinking io_ring_ctx
by a full cacheline in size.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dcd8a563ab52..128a67a40065 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -292,6 +292,8 @@ struct io_ring_ctx {
 	struct {
 		spinlock_t		completion_lock;
 
+		bool			poll_multi_queue;
+
 		/*
 		 * ->iopoll_list is protected by the ctx->uring_lock for
 		 * io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -300,7 +302,6 @@ struct io_ring_ctx {
 		 */
 		struct io_wq_work_list	iopoll_list;
 		struct io_hash_table	cancel_table;
-		bool			poll_multi_queue;
 
 		struct llist_head	work_llist;
 
-- 
cgit 


From ee4b4e2248565babfba807d82c0f3e00c392a4c0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:43:27 -0700
Subject: Revert "block: bio_copy_data_iter"

This reverts commit db1c7d77976775483a8ef240b4c705f113e13ea1.

We're reinstating the pktcdvd driver, which needs this API.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 37 ++++++++++++++++++++++---------------
 include/linux/bio.h |  2 ++
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 5f96fcae3f75..ab59a491a883 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1401,6 +1401,27 @@ void __bio_advance(struct bio *bio, unsigned bytes)
 }
 EXPORT_SYMBOL(__bio_advance);
 
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			struct bio *src, struct bvec_iter *src_iter)
+{
+	while (src_iter->bi_size && dst_iter->bi_size) {
+		struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+		struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+		unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+		void *src_buf = bvec_kmap_local(&src_bv);
+		void *dst_buf = bvec_kmap_local(&dst_bv);
+
+		memcpy(dst_buf, src_buf, bytes);
+
+		kunmap_local(dst_buf);
+		kunmap_local(src_buf);
+
+		bio_advance_iter_single(src, src_iter, bytes);
+		bio_advance_iter_single(dst, dst_iter, bytes);
+	}
+}
+EXPORT_SYMBOL(bio_copy_data_iter);
+
 /**
  * bio_copy_data - copy contents of data buffers from one bio to another
  * @src: source bio
@@ -1414,21 +1435,7 @@ void bio_copy_data(struct bio *dst, struct bio *src)
 	struct bvec_iter src_iter = src->bi_iter;
 	struct bvec_iter dst_iter = dst->bi_iter;
 
-	while (src_iter.bi_size && dst_iter.bi_size) {
-		struct bio_vec src_bv = bio_iter_iovec(src, src_iter);
-		struct bio_vec dst_bv = bio_iter_iovec(dst, dst_iter);
-		unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
-		void *src_buf = bvec_kmap_local(&src_bv);
-		void *dst_buf = bvec_kmap_local(&dst_bv);
-
-		memcpy(dst_buf, src_buf, bytes);
-
-		kunmap_local(dst_buf);
-		kunmap_local(src_buf);
-
-		bio_advance_iter_single(src, &src_iter, bytes);
-		bio_advance_iter_single(dst, &dst_iter, bytes);
-	}
+	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
 }
 EXPORT_SYMBOL(bio_copy_data);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 22078a28d7cb..c1da63f6c808 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -475,6 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
 extern void bio_free_pages(struct bio *bio);
 void guard_bio_eod(struct bio *bio);
-- 
cgit 


From 050a4f341f35bf51db321c7f68700f9e0b1a7552 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:44:02 -0700
Subject: Revert "block: remove devnode callback from struct
 block_device_operations"

This reverts commit 85d6ce58e493ac8b7122e2fbe3f41b94d6ebdc11.

We're reinstating the pktcdvd driver, which needs this API.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          | 11 +++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 08f76135a637..14329dc278b2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1201,10 +1201,21 @@ struct class block_class = {
 	.dev_uevent	= block_uevent,
 };
 
+static char *block_devnode(struct device *dev, umode_t *mode,
+			   kuid_t *uid, kgid_t *gid)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (disk->fops->devnode)
+		return disk->fops->devnode(disk, mode);
+	return NULL;
+}
+
 const struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
+	.devnode	= block_devnode,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 301cf1cf4f2f..43d4e073b111 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1395,6 +1395,7 @@ struct block_device_operations {
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
+	char *(*devnode)(struct gendisk *disk, umode_t *mode);
 	/* returns the length of the identifier or a negative errno: */
 	int (*get_unique_id)(struct gendisk *disk, u8 id[16],
 			enum blk_unique_id id_type);
-- 
cgit 


From 4b83e99ee7092df37a5cf292fde976ebc475ea63 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:44:13 -0700
Subject: Revert "pktcdvd: remove driver."
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit f40eb99897af665f11858dd7b56edcb62c3f3c67.

There are apparently still users out there of this driver. While we'd
love to remove it to ease the maintenance burden, let's reinstate it
for now until better (userspace) solutions can be developed.

Link: https://lore.kernel.org/lkml/20230104190115.ceglfefco475ev6c@pali/
Reported-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/testing/debugfs-pktcdvd     |   18 +
 Documentation/ABI/testing/sysfs-class-pktcdvd |   97 +
 MAINTAINERS                                   |    7 +
 drivers/block/Kconfig                         |   43 +
 drivers/block/Makefile                        |    1 +
 drivers/block/pktcdvd.c                       | 2944 +++++++++++++++++++++++++
 include/linux/pktcdvd.h                       |  197 ++
 include/uapi/linux/pktcdvd.h                  |  112 +
 8 files changed, 3419 insertions(+)
 create mode 100644 Documentation/ABI/testing/debugfs-pktcdvd
 create mode 100644 Documentation/ABI/testing/sysfs-class-pktcdvd
 create mode 100644 drivers/block/pktcdvd.c
 create mode 100644 include/linux/pktcdvd.h
 create mode 100644 include/uapi/linux/pktcdvd.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
new file mode 100644
index 000000000000..f6f65a4faea0
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -0,0 +1,18 @@
+What:           /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
+Date:           Oct. 2006
+KernelVersion:  2.6.20
+Contact:        Thomas Maier <balagi@justmail.de>
+Description:
+
+The pktcdvd module (packet writing driver) creates
+these files in debugfs:
+
+/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
+
+    ====            ====== ====================================
+    info            0444   Lots of driver statistics and infos.
+    ====            ====== ====================================
+
+Example::
+
+    cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
new file mode 100644
index 000000000000..ba1ce626591d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-pktcdvd
@@ -0,0 +1,97 @@
+sysfs interface
+---------------
+The pktcdvd module (packet writing driver) creates the following files in the
+sysfs: (<devid> is in the format major:minor)
+
+What:		/sys/class/pktcdvd/add
+What:		/sys/class/pktcdvd/remove
+What:		/sys/class/pktcdvd/device_map
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+
+		==========	==============================================
+		add		(WO) Write a block device id (major:minor) to
+				create a new pktcdvd device and map it to the
+				block device.
+
+		remove		(WO) Write the pktcdvd device id (major:minor)
+				to remove the pktcdvd device.
+
+		device_map	(RO) Shows the device mapping in format:
+				pktcdvd[0-7] <pktdevid> <blkdevid>
+		==========	==============================================
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/dev
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/uevent
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		dev:	(RO) Device id
+
+		uevent:	(WO) To send a uevent
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		packets_started:	(RO) Number of started packets.
+
+		packets_finished:	(RO) Number of finished packets.
+
+		kb_written:		(RO) kBytes written.
+
+		kb_read:		(RO) kBytes read.
+
+		kb_read_gather:		(RO) kBytes read to fill write packets.
+
+		reset:			(WO) Write any value to it to reset
+					pktcdvd device statistic values, like
+					bytes read/written.
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		==============	================================================
+		size		(RO) Contains the size of the bio write queue.
+
+		congestion_off	(RW) If bio write queue size is below this mark,
+				accept new bio requests from the block layer.
+
+		congestion_on	(RW) If bio write queue size is higher as this
+				mark, do no longer accept bio write requests
+				from the block layer and wait till the pktcdvd
+				device has processed enough bio's so that bio
+				write queue size is below congestion off mark.
+				A value of <= 0 disables congestion control.
+		==============	================================================
+
+
+Example:
+--------
+To use the pktcdvd sysfs interface directly, you can do::
+
+    # create a new pktcdvd device mapped to /dev/hdc
+    echo "22:0" >/sys/class/pktcdvd/add
+    cat /sys/class/pktcdvd/device_map
+    # assuming device pktcdvd0 was created, look at stat's
+    cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
+    # print the device id of the mapped block device
+    fgrep pktcdvd0 /sys/class/pktcdvd/device_map
+    # remove device, using pktcdvd0 device id   253:0
+    echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/MAINTAINERS b/MAINTAINERS
index d53b3a6cdc67..3ef137fea4f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16520,6 +16520,13 @@ S:	Supported
 F:	Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
 F:	drivers/input/keyboard/pinephone-keyboard.c
 
+PKTCDVD DRIVER
+M:	linux-block@vger.kernel.org
+S:	Orphan
+F:	drivers/block/pktcdvd.c
+F:	include/linux/pktcdvd.h
+F:	include/uapi/linux/pktcdvd.h
+
 PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
 M:	Tomasz Duszynski <tduszyns@gmail.com>
 S:	Maintained
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a2184b428493..a41145d52de9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE
 	  The default value is 4096 kilobytes. Only change this if you know
 	  what you are doing.
 
+config CDROM_PKTCDVD
+	tristate "Packet writing on CD/DVD media (DEPRECATED)"
+	depends on !UML
+	depends on SCSI
+	select CDROM
+	help
+	  Note: This driver is deprecated and will be removed from the
+	  kernel in the near future!
+
+	  If you have a CDROM/DVD drive that supports packet writing, say
+	  Y to include support. It should work with any MMC/Mt Fuji
+	  compliant ATAPI or SCSI drive, which is just about any newer
+	  DVD/CD writer.
+
+	  Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+	  is possible.
+	  DVD-RW disks must be in restricted overwrite mode.
+
+	  See the file <file:Documentation/cdrom/packet-writing.rst>
+	  for further information on the use of this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+	int "Free buffers for data gathering"
+	depends on CDROM_PKTCDVD
+	default "8"
+	help
+	  This controls the maximum number of active concurrent packets. More
+	  concurrent packets can increase write performance, but also require
+	  more memory. Each concurrent packet will require approximately 64Kb
+	  of non-swappable kernel memory, memory which will be allocated when
+	  a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+	bool "Enable write caching"
+	depends on CDROM_PKTCDVD
+	help
+	  If enabled, write caching will be set for the CD-R/W device. For now
+	  this option is dangerous unless the CD-RW media is known good, as we
+	  don't do deferred write error handling yet.
+
 config ATA_OVER_ETH
 	tristate "ATA over Ethernet support"
 	depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 962ee65d8ca3..101612cba303 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
 obj-$(CONFIG_N64CART)		+= n64cart.o
 obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
 obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
+obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
 
 obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
new file mode 100644
index 000000000000..4cea3b08087e
--- /dev/null
+++ b/drivers/block/pktcdvd.c
@@ -0,0 +1,2944 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ * Copyright (C) 2006 Thomas Maier <balagi@justmail.de>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
+ * DVD-RAM devices.
+ *
+ * Theory of operation:
+ *
+ * At the lowest level, there is the standard driver for the CD/DVD device,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
+ * but it doesn't know anything about the special restrictions that apply to
+ * packet writing. One restriction is that write requests must be aligned to
+ * packet boundaries on the physical media, and the size of a write request
+ * must be equal to the packet size. Another restriction is that a
+ * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
+ * command, if the previous command was a write.
+ *
+ * The purpose of the packet writing driver is to hide these restrictions from
+ * higher layers, such as file systems, and present a block device that can be
+ * randomly read and written using 2kB-sized blocks.
+ *
+ * The lowest layer in the packet writing driver is the packet I/O scheduler.
+ * Its data is defined by the struct packet_iosched and includes two bio
+ * queues with pending read and write requests. These queues are processed
+ * by the pkt_iosched_process_queue() function. The write requests in this
+ * queue are already properly aligned and sized. This layer is responsible for
+ * issuing the flush cache commands and scheduling the I/O in a good order.
+ *
+ * The next layer transforms unaligned write requests to aligned writes. This
+ * transformation requires reading missing pieces of data from the underlying
+ * block device, assembling the pieces to full packets and queuing them to the
+ * packet I/O scheduler.
+ *
+ * At the top layer there is a custom ->submit_bio function that forwards
+ * read requests directly to the iosched queue and puts write requests in the
+ * unaligned write queue. A kernel thread performs the necessary read
+ * gathering to convert the unaligned writes to aligned writes and then feeds
+ * them to the packet I/O scheduler.
+ *
+ *************************************************************************/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/pktcdvd.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/freezer.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/backing-dev.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/nospec.h>
+#include <linux/uaccess.h>
+
+#define DRIVER_NAME	"pktcdvd"
+
+#define pkt_err(pd, fmt, ...)						\
+	pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_notice(pd, fmt, ...)					\
+	pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_info(pd, fmt, ...)						\
+	pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
+
+#define pkt_dbg(level, pd, fmt, ...)					\
+do {									\
+	if (level == 2 && PACKET_DEBUG >= 2)				\
+		pr_notice("%s: %s():" fmt,				\
+			  pd->name, __func__, ##__VA_ARGS__);		\
+	else if (level == 1 && PACKET_DEBUG >= 1)			\
+		pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__);		\
+} while (0)
+
+#define MAX_SPEED 0xffff
+
+static DEFINE_MUTEX(pktcdvd_mutex);
+static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
+static struct proc_dir_entry *pkt_proc;
+static int pktdev_major;
+static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
+static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
+static mempool_t psd_pool;
+static struct bio_set pkt_bio_set;
+
+static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
+static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
+
+/* forward declaration */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
+static int pkt_remove_dev(dev_t pkt_dev);
+static int pkt_seq_show(struct seq_file *m, void *p);
+
+static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
+{
+	return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
+}
+
+/**********************************************************
+ * sysfs interface for pktcdvd
+ * by (C) 2006  Thomas Maier <balagi@justmail.de>
+ 
+  /sys/class/pktcdvd/pktcdvd[0-7]/
+                     stat/reset
+                     stat/packets_started
+                     stat/packets_finished
+                     stat/kb_written
+                     stat/kb_read
+                     stat/kb_read_gather
+                     write_queue/size
+                     write_queue/congestion_off
+                     write_queue/congestion_on
+ **********************************************************/
+
+static ssize_t packets_started_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started);
+}
+static DEVICE_ATTR_RO(packets_started);
+
+static ssize_t packets_finished_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended);
+}
+static DEVICE_ATTR_RO(packets_finished);
+
+static ssize_t kb_written_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1);
+}
+static DEVICE_ATTR_RO(kb_written);
+
+static ssize_t kb_read_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1);
+}
+static DEVICE_ATTR_RO(kb_read);
+
+static ssize_t kb_read_gather_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1);
+}
+static DEVICE_ATTR_RO(kb_read_gather);
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	if (len > 0) {
+		pd->stats.pkt_started = 0;
+		pd->stats.pkt_ended = 0;
+		pd->stats.secs_w = 0;
+		pd->stats.secs_rg = 0;
+		pd->stats.secs_r = 0;
+	}
+	return len;
+}
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *pkt_stat_attrs[] = {
+	&dev_attr_packets_finished.attr,
+	&dev_attr_packets_started.attr,
+	&dev_attr_kb_read.attr,
+	&dev_attr_kb_written.attr,
+	&dev_attr_kb_read_gather.attr,
+	&dev_attr_reset.attr,
+	NULL,
+};
+
+static const struct attribute_group pkt_stat_group = {
+	.name = "stat",
+	.attrs = pkt_stat_attrs,
+};
+
+static ssize_t size_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->bio_queue_size);
+	spin_unlock(&pd->lock);
+	return n;
+}
+static DEVICE_ATTR_RO(size);
+
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+	if (*hi > 0) {
+		*hi = max(*hi, 500);
+		*hi = min(*hi, 1000000);
+		if (*lo <= 0)
+			*lo = *hi - 100;
+		else {
+			*lo = min(*lo, *hi - 100);
+			*lo = max(*lo, 100);
+		}
+	} else {
+		*hi = -1;
+		*lo = -1;
+	}
+}
+
+static ssize_t congestion_off_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->write_congestion_off);
+	spin_unlock(&pd->lock);
+	return n;
+}
+
+static ssize_t congestion_off_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_off = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	}
+	return len;
+}
+static DEVICE_ATTR_RW(congestion_off);
+
+static ssize_t congestion_on_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->write_congestion_on);
+	spin_unlock(&pd->lock);
+	return n;
+}
+
+static ssize_t congestion_on_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_on = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	}
+	return len;
+}
+static DEVICE_ATTR_RW(congestion_on);
+
+static struct attribute *pkt_wq_attrs[] = {
+	&dev_attr_congestion_on.attr,
+	&dev_attr_congestion_off.attr,
+	&dev_attr_size.attr,
+	NULL,
+};
+
+static const struct attribute_group pkt_wq_group = {
+	.name = "write_queue",
+	.attrs = pkt_wq_attrs,
+};
+
+static const struct attribute_group *pkt_groups[] = {
+	&pkt_stat_group,
+	&pkt_wq_group,
+	NULL,
+};
+
+static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (class_pktcdvd) {
+		pd->dev = device_create_with_groups(class_pktcdvd, NULL,
+						    MKDEV(0, 0), pd, pkt_groups,
+						    "%s", pd->name);
+		if (IS_ERR(pd->dev))
+			pd->dev = NULL;
+	}
+}
+
+static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
+{
+	if (class_pktcdvd)
+		device_unregister(pd->dev);
+}
+
+
+/********************************************************************
+  /sys/class/pktcdvd/
+                     add            map block device
+                     remove         unmap packet dev
+                     device_map     show mappings
+ *******************************************************************/
+
+static void class_pktcdvd_release(struct class *cls)
+{
+	kfree(cls);
+}
+
+static ssize_t device_map_show(struct class *c, struct class_attribute *attr,
+			       char *data)
+{
+	int n = 0;
+	int idx;
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		struct pktcdvd_device *pd = pkt_devs[idx];
+		if (!pd)
+			continue;
+		n += sprintf(data+n, "%s %u:%u %u:%u\n",
+			pd->name,
+			MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
+			MAJOR(pd->bdev->bd_dev),
+			MINOR(pd->bdev->bd_dev));
+	}
+	mutex_unlock(&ctl_mutex);
+	return n;
+}
+static CLASS_ATTR_RO(device_map);
+
+static ssize_t add_store(struct class *c, struct class_attribute *attr,
+			 const char *buf, size_t count)
+{
+	unsigned int major, minor;
+
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		/* pkt_setup_dev() expects caller to hold reference to self */
+		if (!try_module_get(THIS_MODULE))
+			return -ENODEV;
+
+		pkt_setup_dev(MKDEV(major, minor), NULL);
+
+		module_put(THIS_MODULE);
+
+		return count;
+	}
+
+	return -EINVAL;
+}
+static CLASS_ATTR_WO(add);
+
+static ssize_t remove_store(struct class *c, struct class_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned int major, minor;
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		pkt_remove_dev(MKDEV(major, minor));
+		return count;
+	}
+	return -EINVAL;
+}
+static CLASS_ATTR_WO(remove);
+
+static struct attribute *class_pktcdvd_attrs[] = {
+	&class_attr_add.attr,
+	&class_attr_remove.attr,
+	&class_attr_device_map.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(class_pktcdvd);
+
+static int pkt_sysfs_init(void)
+{
+	int ret = 0;
+
+	/*
+	 * create control files in sysfs
+	 * /sys/class/pktcdvd/...
+	 */
+	class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL);
+	if (!class_pktcdvd)
+		return -ENOMEM;
+	class_pktcdvd->name = DRIVER_NAME;
+	class_pktcdvd->owner = THIS_MODULE;
+	class_pktcdvd->class_release = class_pktcdvd_release;
+	class_pktcdvd->class_groups = class_pktcdvd_groups;
+	ret = class_register(class_pktcdvd);
+	if (ret) {
+		kfree(class_pktcdvd);
+		class_pktcdvd = NULL;
+		pr_err("failed to create class pktcdvd\n");
+		return ret;
+	}
+	return 0;
+}
+
+static void pkt_sysfs_cleanup(void)
+{
+	if (class_pktcdvd)
+		class_destroy(class_pktcdvd);
+	class_pktcdvd = NULL;
+}
+
+/********************************************************************
+  entries in debugfs
+
+  /sys/kernel/debug/pktcdvd[0-7]/
+			info
+
+ *******************************************************************/
+
+static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
+{
+	return pkt_seq_show(m, p);
+}
+
+static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pkt_debugfs_seq_show, inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= pkt_debugfs_fops_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.owner		= THIS_MODULE,
+};
+
+static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+	if (!pd->dfs_d_root)
+		return;
+
+	pd->dfs_f_info = debugfs_create_file("info", 0444,
+					     pd->dfs_d_root, pd, &debug_fops);
+}
+
+static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	debugfs_remove(pd->dfs_f_info);
+	debugfs_remove(pd->dfs_d_root);
+	pd->dfs_f_info = NULL;
+	pd->dfs_d_root = NULL;
+}
+
+static void pkt_debugfs_init(void)
+{
+	pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
+}
+
+static void pkt_debugfs_cleanup(void)
+{
+	debugfs_remove(pkt_debugfs_root);
+	pkt_debugfs_root = NULL;
+}
+
+/* ----------------------------------------------------------*/
+
+
+static void pkt_bio_finished(struct pktcdvd_device *pd)
+{
+	BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
+	if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
+		pkt_dbg(2, pd, "queue empty\n");
+		atomic_set(&pd->iosched.attention, 1);
+		wake_up(&pd->wqueue);
+	}
+}
+
+/*
+ * Allocate a packet_data struct
+ */
+static struct packet_data *pkt_alloc_packet_data(int frames)
+{
+	int i;
+	struct packet_data *pkt;
+
+	pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
+	if (!pkt)
+		goto no_pkt;
+
+	pkt->frames = frames;
+	pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
+	if (!pkt->w_bio)
+		goto no_bio;
+
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
+		pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!pkt->pages[i])
+			goto no_page;
+	}
+
+	spin_lock_init(&pkt->lock);
+	bio_list_init(&pkt->orig_bios);
+
+	for (i = 0; i < frames; i++) {
+		pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+		if (!pkt->r_bios[i])
+			goto no_rd_bio;
+	}
+
+	return pkt;
+
+no_rd_bio:
+	for (i = 0; i < frames; i++)
+		kfree(pkt->r_bios[i]);
+no_page:
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
+		if (pkt->pages[i])
+			__free_page(pkt->pages[i]);
+	kfree(pkt->w_bio);
+no_bio:
+	kfree(pkt);
+no_pkt:
+	return NULL;
+}
+
+/*
+ * Free a packet_data struct
+ */
+static void pkt_free_packet_data(struct packet_data *pkt)
+{
+	int i;
+
+	for (i = 0; i < pkt->frames; i++)
+		kfree(pkt->r_bios[i]);
+	for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
+		__free_page(pkt->pages[i]);
+	kfree(pkt->w_bio);
+	kfree(pkt);
+}
+
+static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
+
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
+		pkt_free_packet_data(pkt);
+	}
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+}
+
+static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
+{
+	struct packet_data *pkt;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
+
+	while (nr_packets > 0) {
+		pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
+		if (!pkt) {
+			pkt_shrink_pktlist(pd);
+			return 0;
+		}
+		pkt->id = nr_packets;
+		pkt->pd = pd;
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+		nr_packets--;
+	}
+	return 1;
+}
+
+static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
+{
+	struct rb_node *n = rb_next(&node->rb_node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct pkt_rb_node, rb_node);
+}
+
+static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	rb_erase(&node->rb_node, &pd->bio_queue);
+	mempool_free(node, &pd->rb_pool);
+	pd->bio_queue_size--;
+	BUG_ON(pd->bio_queue_size < 0);
+}
+
+/*
+ * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
+ */
+static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
+{
+	struct rb_node *n = pd->bio_queue.rb_node;
+	struct rb_node *next;
+	struct pkt_rb_node *tmp;
+
+	if (!n) {
+		BUG_ON(pd->bio_queue_size > 0);
+		return NULL;
+	}
+
+	for (;;) {
+		tmp = rb_entry(n, struct pkt_rb_node, rb_node);
+		if (s <= tmp->bio->bi_iter.bi_sector)
+			next = n->rb_left;
+		else
+			next = n->rb_right;
+		if (!next)
+			break;
+		n = next;
+	}
+
+	if (s > tmp->bio->bi_iter.bi_sector) {
+		tmp = pkt_rbtree_next(tmp);
+		if (!tmp)
+			return NULL;
+	}
+	BUG_ON(s > tmp->bio->bi_iter.bi_sector);
+	return tmp;
+}
+
+/*
+ * Insert a node into the pd->bio_queue rb tree.
+ */
+static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	struct rb_node **p = &pd->bio_queue.rb_node;
+	struct rb_node *parent = NULL;
+	sector_t s = node->bio->bi_iter.bi_sector;
+	struct pkt_rb_node *tmp;
+
+	while (*p) {
+		parent = *p;
+		tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
+		if (s < tmp->bio->bi_iter.bi_sector)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->rb_node, parent, p);
+	rb_insert_color(&node->rb_node, &pd->bio_queue);
+	pd->bio_queue_size++;
+}
+
+/*
+ * Send a packet_command to the underlying block device and
+ * wait for completion.
+ */
+static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	struct request_queue *q = bdev_get_queue(pd->bdev);
+	struct scsi_cmnd *scmd;
+	struct request *rq;
+	int ret = 0;
+
+	rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	scmd = blk_mq_rq_to_pdu(rq);
+
+	if (cgc->buflen) {
+		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+				      GFP_NOIO);
+		if (ret)
+			goto out;
+	}
+
+	scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+	memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE);
+
+	rq->timeout = 60*HZ;
+	if (cgc->quiet)
+		rq->rq_flags |= RQF_QUIET;
+
+	blk_execute_rq(rq, false);
+	if (scmd->result)
+		ret = -EIO;
+out:
+	blk_mq_free_request(rq);
+	return ret;
+}
+
+static const char *sense_key_string(__u8 index)
+{
+	static const char * const info[] = {
+		"No sense", "Recovered error", "Not ready",
+		"Medium error", "Hardware error", "Illegal request",
+		"Unit attention", "Data protect", "Blank check",
+	};
+
+	return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
+}
+
+/*
+ * A generic sense dump / resolve mechanism should be implemented across
+ * all ATAPI + SCSI devices.
+ */
+static void pkt_dump_sense(struct pktcdvd_device *pd,
+			   struct packet_command *cgc)
+{
+	struct scsi_sense_hdr *sshdr = cgc->sshdr;
+
+	if (sshdr)
+		pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+			CDROM_PACKET_SIZE, cgc->cmd,
+			sshdr->sense_key, sshdr->asc, sshdr->ascq,
+			sense_key_string(sshdr->sense_key));
+	else
+		pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
+}
+
+/*
+ * flush the drive cache to media
+ */
+static int pkt_flush_cache(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
+	cgc.quiet = 1;
+
+	/*
+	 * the IMMED bit -- we default to not setting it, although that
+	 * would allow a much faster close, this is safer
+	 */
+#if 0
+	cgc.cmd[1] = 1 << 1;
+#endif
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * speed is given as the normal factor, e.g. 4 for 4x
+ */
+static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
+				unsigned write_speed, unsigned read_speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	int ret;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_SET_SPEED;
+	cgc.cmd[2] = (read_speed >> 8) & 0xff;
+	cgc.cmd[3] = read_speed & 0xff;
+	cgc.cmd[4] = (write_speed >> 8) & 0xff;
+	cgc.cmd[5] = write_speed & 0xff;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		pkt_dump_sense(pd, &cgc);
+
+	return ret;
+}
+
+/*
+ * Queue a bio for processing by the low-level CD device. Must be called
+ * from process context.
+ */
+static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
+{
+	spin_lock(&pd->iosched.lock);
+	if (bio_data_dir(bio) == READ)
+		bio_list_add(&pd->iosched.read_queue, bio);
+	else
+		bio_list_add(&pd->iosched.write_queue, bio);
+	spin_unlock(&pd->iosched.lock);
+
+	atomic_set(&pd->iosched.attention, 1);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Process the queued read/write requests. This function handles special
+ * requirements for CDRW drives:
+ * - A cache flush command must be inserted before a read request if the
+ *   previous request was a write.
+ * - Switching between reading and writing is slow, so don't do it more often
+ *   than necessary.
+ * - Optimize for throughput at the expense of latency. This means that streaming
+ *   writes will never be interrupted by a read, but if the drive has to seek
+ *   before the next write, switch to reading instead if there are any pending
+ *   read requests.
+ * - Set the read speed according to current usage pattern. When only reading
+ *   from the device, it's best to use the highest possible read speed, but
+ *   when switching often between reading and writing, it's better to have the
+ *   same read and write speeds.
+ */
+static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
+{
+
+	if (atomic_read(&pd->iosched.attention) == 0)
+		return;
+	atomic_set(&pd->iosched.attention, 0);
+
+	for (;;) {
+		struct bio *bio;
+		int reads_queued, writes_queued;
+
+		spin_lock(&pd->iosched.lock);
+		reads_queued = !bio_list_empty(&pd->iosched.read_queue);
+		writes_queued = !bio_list_empty(&pd->iosched.write_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!reads_queued && !writes_queued)
+			break;
+
+		if (pd->iosched.writing) {
+			int need_write_seek = 1;
+			spin_lock(&pd->iosched.lock);
+			bio = bio_list_peek(&pd->iosched.write_queue);
+			spin_unlock(&pd->iosched.lock);
+			if (bio && (bio->bi_iter.bi_sector ==
+				    pd->iosched.last_write))
+				need_write_seek = 0;
+			if (need_write_seek && reads_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					pkt_dbg(2, pd, "write, waiting\n");
+					break;
+				}
+				pkt_flush_cache(pd);
+				pd->iosched.writing = 0;
+			}
+		} else {
+			if (!reads_queued && writes_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					pkt_dbg(2, pd, "read, waiting\n");
+					break;
+				}
+				pd->iosched.writing = 1;
+			}
+		}
+
+		spin_lock(&pd->iosched.lock);
+		if (pd->iosched.writing)
+			bio = bio_list_pop(&pd->iosched.write_queue);
+		else
+			bio = bio_list_pop(&pd->iosched.read_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!bio)
+			continue;
+
+		if (bio_data_dir(bio) == READ)
+			pd->iosched.successive_reads +=
+				bio->bi_iter.bi_size >> 10;
+		else {
+			pd->iosched.successive_reads = 0;
+			pd->iosched.last_write = bio_end_sector(bio);
+		}
+		if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
+			if (pd->read_speed == pd->write_speed) {
+				pd->read_speed = MAX_SPEED;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		} else {
+			if (pd->read_speed != pd->write_speed) {
+				pd->read_speed = pd->write_speed;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		}
+
+		atomic_inc(&pd->cdrw.pending_bios);
+		submit_bio_noacct(bio);
+	}
+}
+
+/*
+ * Special care is needed if the underlying block device has a small
+ * max_phys_segments value.
+ */
+static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
+{
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_segments(q)) {
+		/*
+		 * The cdrom device can handle one segment/frame
+		 */
+		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_segments(q)) {
+		/*
+		 * We can handle this case at the expense of some extra memory
+		 * copies during write operations
+		 */
+		set_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else {
+		pkt_err(pd, "cdrom max_phys_segments too small\n");
+		return -EIO;
+	}
+}
+
+static void pkt_end_io_read(struct bio *bio)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
+		bio, (unsigned long long)pkt->sector,
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
+
+	if (bio->bi_status)
+		atomic_inc(&pkt->io_errors);
+	bio_uninit(bio);
+	if (atomic_dec_and_test(&pkt->io_wait)) {
+		atomic_inc(&pkt->run_sm);
+		wake_up(&pd->wqueue);
+	}
+	pkt_bio_finished(pd);
+}
+
+static void pkt_end_io_packet_write(struct bio *bio)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
+
+	pd->stats.pkt_ended++;
+
+	bio_uninit(bio);
+	pkt_bio_finished(pd);
+	atomic_dec(&pkt->io_wait);
+	atomic_inc(&pkt->run_sm);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Schedule reads for the holes in a packet
+ */
+static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int frames_read = 0;
+	struct bio *bio;
+	int f;
+	char written[PACKET_MAX_SIZE];
+
+	BUG_ON(bio_list_empty(&pkt->orig_bios));
+
+	atomic_set(&pkt->io_wait, 0);
+	atomic_set(&pkt->io_errors, 0);
+
+	/*
+	 * Figure out which frames we need to read before we can write.
+	 */
+	memset(written, 0, sizeof(written));
+	spin_lock(&pkt->lock);
+	bio_list_for_each(bio, &pkt->orig_bios) {
+		int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
+			(CD_FRAMESIZE >> 9);
+		int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
+		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
+		BUG_ON(first_frame < 0);
+		BUG_ON(first_frame + num_frames > pkt->frames);
+		for (f = first_frame; f < first_frame + num_frames; f++)
+			written[f] = 1;
+	}
+	spin_unlock(&pkt->lock);
+
+	if (pkt->cache_valid) {
+		pkt_dbg(2, pd, "zone %llx cached\n",
+			(unsigned long long)pkt->sector);
+		goto out_account;
+	}
+
+	/*
+	 * Schedule reads for missing parts of the packet.
+	 */
+	for (f = 0; f < pkt->frames; f++) {
+		int p, offset;
+
+		if (written[f])
+			continue;
+
+		bio = pkt->r_bios[f];
+		bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
+		bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+		bio->bi_end_io = pkt_end_io_read;
+		bio->bi_private = pkt;
+
+		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
+		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+		pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
+			f, pkt->pages[p], offset);
+		if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
+			BUG();
+
+		atomic_inc(&pkt->io_wait);
+		pkt_queue_bio(pd, bio);
+		frames_read++;
+	}
+
+out_account:
+	pkt_dbg(2, pd, "need %d frames for zone %llx\n",
+		frames_read, (unsigned long long)pkt->sector);
+	pd->stats.pkt_started++;
+	pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
+}
+
+/*
+ * Find a packet matching zone, or the least recently used packet if
+ * there is no match.
+ */
+static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
+{
+	struct packet_data *pkt;
+
+	list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
+		if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
+			list_del_init(&pkt->list);
+			if (pkt->sector != zone)
+				pkt->cache_valid = 0;
+			return pkt;
+		}
+	}
+	BUG();
+	return NULL;
+}
+
+static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	if (pkt->cache_valid) {
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+	} else {
+		list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
+	}
+}
+
+static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+{
+#if PACKET_DEBUG > 1
+	static const char *state_name[] = {
+		"IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
+	};
+	enum packet_data_state old_state = pkt->state;
+	pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
+		pkt->id, (unsigned long long)pkt->sector,
+		state_name[old_state], state_name[state]);
+#endif
+	pkt->state = state;
+}
+
+/*
+ * Scan the work queue to see if we can start a new packet.
+ * returns non-zero if any work was done.
+ */
+static int pkt_handle_queue(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *p;
+	struct bio *bio = NULL;
+	sector_t zone = 0; /* Suppress gcc warning */
+	struct pkt_rb_node *node, *first_node;
+	struct rb_node *n;
+
+	atomic_set(&pd->scan_queue, 0);
+
+	if (list_empty(&pd->cdrw.pkt_free_list)) {
+		pkt_dbg(2, pd, "no pkt\n");
+		return 0;
+	}
+
+	/*
+	 * Try to find a zone we are not already working on.
+	 */
+	spin_lock(&pd->lock);
+	first_node = pkt_rbtree_find(pd, pd->current_sector);
+	if (!first_node) {
+		n = rb_first(&pd->bio_queue);
+		if (n)
+			first_node = rb_entry(n, struct pkt_rb_node, rb_node);
+	}
+	node = first_node;
+	while (node) {
+		bio = node->bio;
+		zone = get_zone(bio->bi_iter.bi_sector, pd);
+		list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
+			if (p->sector == zone) {
+				bio = NULL;
+				goto try_next_bio;
+			}
+		}
+		break;
+try_next_bio:
+		node = pkt_rbtree_next(node);
+		if (!node) {
+			n = rb_first(&pd->bio_queue);
+			if (n)
+				node = rb_entry(n, struct pkt_rb_node, rb_node);
+		}
+		if (node == first_node)
+			node = NULL;
+	}
+	spin_unlock(&pd->lock);
+	if (!bio) {
+		pkt_dbg(2, pd, "no bio\n");
+		return 0;
+	}
+
+	pkt = pkt_get_packet_data(pd, zone);
+
+	pd->current_sector = zone + pd->settings.size;
+	pkt->sector = zone;
+	BUG_ON(pkt->frames != pd->settings.size >> 2);
+	pkt->write_size = 0;
+
+	/*
+	 * Scan work queue for bios in the same zone and link them
+	 * to this packet.
+	 */
+	spin_lock(&pd->lock);
+	pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
+	while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+		bio = node->bio;
+		pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
+			get_zone(bio->bi_iter.bi_sector, pd));
+		if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
+			break;
+		pkt_rbtree_erase(pd, node);
+		spin_lock(&pkt->lock);
+		bio_list_add(&pkt->orig_bios, bio);
+		pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
+		spin_unlock(&pkt->lock);
+	}
+	/* check write congestion marks, and if bio_queue_size is
+	 * below, wake up any waiters
+	 */
+	if (pd->congested &&
+	    pd->bio_queue_size <= pd->write_congestion_off) {
+		pd->congested = false;
+		wake_up_var(&pd->congested);
+	}
+	spin_unlock(&pd->lock);
+
+	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
+	pkt_set_state(pkt, PACKET_WAITING_STATE);
+	atomic_set(&pkt->run_sm, 1);
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_add(&pkt->list, &pd->cdrw.pkt_active_list);
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+	return 1;
+}
+
+/**
+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * Stops when it reaches the end of either the @src list or @dst list - that is,
+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
+ * bios).
+ */
+static void bio_list_copy_data(struct bio *dst, struct bio *src)
+{
+	struct bvec_iter src_iter = src->bi_iter;
+	struct bvec_iter dst_iter = dst->bi_iter;
+
+	while (1) {
+		if (!src_iter.bi_size) {
+			src = src->bi_next;
+			if (!src)
+				break;
+
+			src_iter = src->bi_iter;
+		}
+
+		if (!dst_iter.bi_size) {
+			dst = dst->bi_next;
+			if (!dst)
+				break;
+
+			dst_iter = dst->bi_iter;
+		}
+
+		bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+	}
+}
+
+/*
+ * Assemble a bio to write one packet and queue the bio for processing
+ * by the underlying block device.
+ */
+static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int f;
+
+	bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+		 REQ_OP_WRITE);
+	pkt->w_bio->bi_iter.bi_sector = pkt->sector;
+	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+	pkt->w_bio->bi_private = pkt;
+
+	/* XXX: locking? */
+	for (f = 0; f < pkt->frames; f++) {
+		struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
+		unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+
+		if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
+			BUG();
+	}
+	pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
+
+	/*
+	 * Fill-in bvec with data from orig_bios.
+	 */
+	spin_lock(&pkt->lock);
+	bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
+
+	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+	spin_unlock(&pkt->lock);
+
+	pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
+		pkt->write_size, (unsigned long long)pkt->sector);
+
+	if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
+		pkt->cache_valid = 1;
+	else
+		pkt->cache_valid = 0;
+
+	/* Start the write request */
+	atomic_set(&pkt->io_wait, 1);
+	pkt_queue_bio(pd, pkt->w_bio);
+}
+
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
+{
+	struct bio *bio;
+
+	if (status)
+		pkt->cache_valid = 0;
+
+	/* Finish all bios corresponding to this packet */
+	while ((bio = bio_list_pop(&pkt->orig_bios))) {
+		bio->bi_status = status;
+		bio_endio(bio);
+	}
+}
+
+static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	pkt_dbg(2, pd, "pkt %d\n", pkt->id);
+
+	for (;;) {
+		switch (pkt->state) {
+		case PACKET_WAITING_STATE:
+			if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
+				return;
+
+			pkt->sleep_time = 0;
+			pkt_gather_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+			break;
+
+		case PACKET_READ_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (atomic_read(&pkt->io_errors) > 0) {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			} else {
+				pkt_start_write(pd, pkt);
+			}
+			break;
+
+		case PACKET_WRITE_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (!pkt->w_bio->bi_status) {
+				pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			} else {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			}
+			break;
+
+		case PACKET_RECOVERY_STATE:
+			pkt_dbg(2, pd, "No recovery possible\n");
+			pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			break;
+
+		case PACKET_FINISHED_STATE:
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
+			return;
+
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static void pkt_handle_packets(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	/*
+	 * Run state machine for active packets
+	 */
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (atomic_read(&pkt->run_sm) > 0) {
+			atomic_set(&pkt->run_sm, 0);
+			pkt_run_state_machine(pd, pkt);
+		}
+	}
+
+	/*
+	 * Move no longer active packets to the free list
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->state == PACKET_FINISHED_STATE) {
+			list_del(&pkt->list);
+			pkt_put_packet_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_IDLE_STATE);
+			atomic_set(&pd->scan_queue, 1);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
+{
+	struct packet_data *pkt;
+	int i;
+
+	for (i = 0; i < PACKET_NUM_STATES; i++)
+		states[i] = 0;
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		states[pkt->state]++;
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+/*
+ * kcdrwd is woken up when writes have been queued for one of our
+ * registered devices
+ */
+static int kcdrwd(void *foobar)
+{
+	struct pktcdvd_device *pd = foobar;
+	struct packet_data *pkt;
+	long min_sleep_time, residue;
+
+	set_user_nice(current, MIN_NICE);
+	set_freezable();
+
+	for (;;) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		/*
+		 * Wait until there is something to do
+		 */
+		add_wait_queue(&pd->wqueue, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			/* Check if we need to run pkt_handle_queue */
+			if (atomic_read(&pd->scan_queue) > 0)
+				goto work_to_do;
+
+			/* Check if we need to run the state machine for some packet */
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (atomic_read(&pkt->run_sm) > 0)
+					goto work_to_do;
+			}
+
+			/* Check if we need to process the iosched queues */
+			if (atomic_read(&pd->iosched.attention) != 0)
+				goto work_to_do;
+
+			/* Otherwise, go to sleep */
+			if (PACKET_DEBUG > 1) {
+				int states[PACKET_NUM_STATES];
+				pkt_count_states(pd, states);
+				pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+					states[0], states[1], states[2],
+					states[3], states[4], states[5]);
+			}
+
+			min_sleep_time = MAX_SCHEDULE_TIMEOUT;
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
+					min_sleep_time = pkt->sleep_time;
+			}
+
+			pkt_dbg(2, pd, "sleeping\n");
+			residue = schedule_timeout(min_sleep_time);
+			pkt_dbg(2, pd, "wake up\n");
+
+			/* make swsusp happy with our thread */
+			try_to_freeze();
+
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (!pkt->sleep_time)
+					continue;
+				pkt->sleep_time -= min_sleep_time - residue;
+				if (pkt->sleep_time <= 0) {
+					pkt->sleep_time = 0;
+					atomic_inc(&pkt->run_sm);
+				}
+			}
+
+			if (kthread_should_stop())
+				break;
+		}
+work_to_do:
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pd->wqueue, &wait);
+
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * if pkt_handle_queue returns true, we can queue
+		 * another request.
+		 */
+		while (pkt_handle_queue(pd))
+			;
+
+		/*
+		 * Handle packet state machine
+		 */
+		pkt_handle_packets(pd);
+
+		/*
+		 * Handle iosched queues
+		 */
+		pkt_iosched_process_queue(pd);
+	}
+
+	return 0;
+}
+
+static void pkt_print_settings(struct pktcdvd_device *pd)
+{
+	pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+		 pd->settings.fp ? "Fixed" : "Variable",
+		 pd->settings.size >> 2,
+		 pd->settings.block_mode == 8 ? '1' : '2');
+}
+
+static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+
+	cgc->cmd[0] = GPCMD_MODE_SENSE_10;
+	cgc->cmd[2] = page_code | (page_control << 6);
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_READ;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+	memset(cgc->buffer, 0, 2);
+	cgc->cmd[0] = GPCMD_MODE_SELECT_10;
+	cgc->cmd[1] = 0x10;		/* PF */
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_WRITE;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
+{
+	struct packet_command cgc;
+	int ret;
+
+	/* set up command and get the disc info */
+	init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+	cgc.cmd[8] = cgc.buflen = 2;
+	cgc.quiet = 1;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		return ret;
+
+	/* not all drives have the same disc_info length, so requeue
+	 * packet with the length the drive tells us it can supply
+	 */
+	cgc.buflen = be16_to_cpu(di->disc_information_length) +
+		     sizeof(di->disc_information_length);
+
+	if (cgc.buflen > sizeof(disc_information))
+		cgc.buflen = sizeof(disc_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
+{
+	struct packet_command cgc;
+	int ret;
+
+	init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+	cgc.cmd[1] = type & 3;
+	cgc.cmd[4] = (track & 0xff00) >> 8;
+	cgc.cmd[5] = track & 0xff;
+	cgc.cmd[8] = 8;
+	cgc.quiet = 1;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		return ret;
+
+	cgc.buflen = be16_to_cpu(ti->track_information_length) +
+		     sizeof(ti->track_information_length);
+
+	if (cgc.buflen > sizeof(track_information))
+		cgc.buflen = sizeof(track_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
+						long *last_written)
+{
+	disc_information di;
+	track_information ti;
+	__u32 last_track;
+	int ret;
+
+	ret = pkt_get_disc_info(pd, &di);
+	if (ret)
+		return ret;
+
+	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+	ret = pkt_get_track_info(pd, last_track, 1, &ti);
+	if (ret)
+		return ret;
+
+	/* if this track is blank, try the previous. */
+	if (ti.blank) {
+		last_track--;
+		ret = pkt_get_track_info(pd, last_track, 1, &ti);
+		if (ret)
+			return ret;
+	}
+
+	/* if last recorded field is valid, return it. */
+	if (ti.lra_v) {
+		*last_written = be32_to_cpu(ti.last_rec_address);
+	} else {
+		/* make it up instead */
+		*last_written = be32_to_cpu(ti.track_start) +
+				be32_to_cpu(ti.track_size);
+		if (ti.free_blocks)
+			*last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+	}
+	return 0;
+}
+
+/*
+ * write mode select package based on pd->settings
+ */
+static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	write_param_page *wp;
+	char buffer[128];
+	int ret, size;
+
+	/* doesn't apply to DVD+RW or DVD-RAM */
+	if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12))
+		return 0;
+
+	memset(buffer, 0, sizeof(buffer));
+	init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
+	pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+	if (size > sizeof(buffer))
+		size = sizeof(buffer);
+
+	/*
+	 * now get it all
+	 */
+	init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	/*
+	 * write page is offset header + block descriptor length
+	 */
+	wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
+
+	wp->fp = pd->settings.fp;
+	wp->track_mode = pd->settings.track_mode;
+	wp->write_type = pd->settings.write_type;
+	wp->data_block_type = pd->settings.block_mode;
+
+	wp->multi_session = 0;
+
+#ifdef PACKET_USE_LS
+	wp->link_size = 7;
+	wp->ls_v = 1;
+#endif
+
+	if (wp->data_block_type == PACKET_BLOCK_MODE1) {
+		wp->session_format = 0;
+		wp->subhdr2 = 0x20;
+	} else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
+		wp->session_format = 0x20;
+		wp->subhdr2 = 8;
+#if 0
+		wp->mcn[0] = 0x80;
+		memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
+#endif
+	} else {
+		/*
+		 * paranoia
+		 */
+		pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
+		return 1;
+	}
+	wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
+
+	cgc.buflen = cgc.cmd[8] = size;
+	ret = pkt_mode_select(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	pkt_print_settings(pd);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this track, 0 -- we can't
+ */
+static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
+{
+	switch (pd->mmc3_profile) {
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			/* The track is always writable on DVD+RW/DVD-RAM */
+			return 1;
+		default:
+			break;
+	}
+
+	if (!ti->packet || !ti->fp)
+		return 0;
+
+	/*
+	 * "good" settings as per Mt Fuji.
+	 */
+	if (ti->rt == 0 && ti->blank == 0)
+		return 1;
+
+	if (ti->rt == 0 && ti->blank == 1)
+		return 1;
+
+	if (ti->rt == 1 && ti->blank == 0)
+		return 1;
+
+	pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this disc, 0 -- we can't
+ */
+static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
+{
+	switch (pd->mmc3_profile) {
+		case 0x0a: /* CD-RW */
+		case 0xffff: /* MMC3 not supported */
+			break;
+		case 0x1a: /* DVD+RW */
+		case 0x13: /* DVD-RW */
+		case 0x12: /* DVD-RAM */
+			return 1;
+		default:
+			pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
+				pd->mmc3_profile);
+			return 0;
+	}
+
+	/*
+	 * for disc type 0xff we should probably reserve a new track.
+	 * but i'm not sure, should we leave this to user apps? probably.
+	 */
+	if (di->disc_type == 0xff) {
+		pkt_notice(pd, "unknown disc - no track?\n");
+		return 0;
+	}
+
+	if (di->disc_type != 0x20 && di->disc_type != 0) {
+		pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
+		return 0;
+	}
+
+	if (di->erasable == 0) {
+		pkt_notice(pd, "disc not erasable\n");
+		return 0;
+	}
+
+	if (di->border_status == PACKET_SESSION_RESERVED) {
+		pkt_err(pd, "can't write to last track (reserved)\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	unsigned char buf[12];
+	disc_information di;
+	track_information ti;
+	int ret, track;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
+	cgc.cmd[8] = 8;
+	ret = pkt_generic_packet(pd, &cgc);
+	pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+
+	memset(&di, 0, sizeof(disc_information));
+	memset(&ti, 0, sizeof(track_information));
+
+	ret = pkt_get_disc_info(pd, &di);
+	if (ret) {
+		pkt_err(pd, "failed get_disc\n");
+		return ret;
+	}
+
+	if (!pkt_writable_disc(pd, &di))
+		return -EROFS;
+
+	pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
+
+	track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
+	ret = pkt_get_track_info(pd, track, 1, &ti);
+	if (ret) {
+		pkt_err(pd, "failed get_track\n");
+		return ret;
+	}
+
+	if (!pkt_writable_track(pd, &ti)) {
+		pkt_err(pd, "can't write to this track\n");
+		return -EROFS;
+	}
+
+	/*
+	 * we keep packet size in 512 byte units, makes it easier to
+	 * deal with request calculations.
+	 */
+	pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
+	if (pd->settings.size == 0) {
+		pkt_notice(pd, "detected zero packet size!\n");
+		return -ENXIO;
+	}
+	if (pd->settings.size > PACKET_MAX_SECTORS) {
+		pkt_err(pd, "packet size is too big\n");
+		return -EROFS;
+	}
+	pd->settings.fp = ti.fp;
+	pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
+
+	if (ti.nwa_v) {
+		pd->nwa = be32_to_cpu(ti.next_writable);
+		set_bit(PACKET_NWA_VALID, &pd->flags);
+	}
+
+	/*
+	 * in theory we could use lra on -RW media as well and just zero
+	 * blocks that haven't been written yet, but in practice that
+	 * is just a no-go. we'll use that for -R, naturally.
+	 */
+	if (ti.lra_v) {
+		pd->lra = be32_to_cpu(ti.last_rec_address);
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	} else {
+		pd->lra = 0xffffffff;
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	}
+
+	/*
+	 * fine for now
+	 */
+	pd->settings.link_loss = 7;
+	pd->settings.write_type = 0;	/* packet */
+	pd->settings.track_mode = ti.track_mode;
+
+	/*
+	 * mode1 or mode2 disc
+	 */
+	switch (ti.data_mode) {
+		case PACKET_MODE1:
+			pd->settings.block_mode = PACKET_BLOCK_MODE1;
+			break;
+		case PACKET_MODE2:
+			pd->settings.block_mode = PACKET_BLOCK_MODE2;
+			break;
+		default:
+			pkt_err(pd, "unknown data mode\n");
+			return -EROFS;
+	}
+	return 0;
+}
+
+/*
+ * enable/disable write caching on drive
+ */
+static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
+						int set)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[64];
+	int ret;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.buflen = pd->mode_offset + 12;
+
+	/*
+	 * caching mode page might not be there, so quiet this command
+	 */
+	cgc.quiet = 1;
+
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0);
+	if (ret)
+		return ret;
+
+	buf[pd->mode_offset + 10] |= (!!set << 2);
+
+	cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+	ret = pkt_mode_select(pd, &cgc);
+	if (ret) {
+		pkt_err(pd, "write caching control failed\n");
+		pkt_dump_sense(pd, &cgc);
+	} else if (!ret && set)
+		pkt_notice(pd, "enabled write caching\n");
+	return ret;
+}
+
+static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+	cgc.cmd[4] = lockflag ? 1 : 0;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * Returns drive maximum write speed
+ */
+static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
+						unsigned *write_speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[256+18];
+	unsigned char *cap_buf;
+	int ret, offset;
+
+	cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
+	cgc.sshdr = &sshdr;
+
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+	if (ret) {
+		cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
+			     sizeof(struct mode_page_header);
+		ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+		if (ret) {
+			pkt_dump_sense(pd, &cgc);
+			return ret;
+		}
+	}
+
+	offset = 20;			    /* Obsoleted field, used by older drives */
+	if (cap_buf[1] >= 28)
+		offset = 28;		    /* Current write speed selected */
+	if (cap_buf[1] >= 30) {
+		/* If the drive reports at least one "Logical Unit Write
+		 * Speed Performance Descriptor Block", use the information
+		 * in the first block. (contains the highest speed)
+		 */
+		int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+		if (num_spdb > 0)
+			offset = 34;
+	}
+
+	*write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+	return 0;
+}
+
+/* These tables from cdrecord - I don't have orange book */
+/* standard speed CD-RW (1-4x) */
+static char clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* high speed CD-RW (-10x) */
+static char hs_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* ultra high speed CD-RW */
+static char us_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
+};
+
+/*
+ * reads the maximum media speed from ATIP
+ */
+static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
+						unsigned *speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[64];
+	unsigned int size, st, sp;
+	int ret;
+
+	init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4; /* READ ATIP */
+	cgc.cmd[8] = 2;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+	size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+
+	init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4;
+	cgc.cmd[8] = size;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	if (!(buf[6] & 0x40)) {
+		pkt_notice(pd, "disc type is not CD-RW\n");
+		return 1;
+	}
+	if (!(buf[6] & 0x4)) {
+		pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
+		return 1;
+	}
+
+	st = (buf[6] >> 3) & 0x7; /* disc sub-type */
+
+	sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
+
+	/* Info from cdrecord */
+	switch (st) {
+		case 0: /* standard speed */
+			*speed = clv_to_speed[sp];
+			break;
+		case 1: /* high speed */
+			*speed = hs_clv_to_speed[sp];
+			break;
+		case 2: /* ultra high speed */
+			*speed = us_clv_to_speed[sp];
+			break;
+		default:
+			pkt_notice(pd, "unknown disc sub-type %d\n", st);
+			return 1;
+	}
+	if (*speed) {
+		pkt_info(pd, "maximum media speed: %d\n", *speed);
+		return 0;
+	} else {
+		pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
+		return 1;
+	}
+}
+
+static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	int ret;
+
+	pkt_dbg(2, pd, "Performing OPC\n");
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sshdr = &sshdr;
+	cgc.timeout = 60*HZ;
+	cgc.cmd[0] = GPCMD_SEND_OPC;
+	cgc.cmd[1] = 1;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		pkt_dump_sense(pd, &cgc);
+	return ret;
+}
+
+static int pkt_open_write(struct pktcdvd_device *pd)
+{
+	int ret;
+	unsigned int write_speed, media_write_speed, read_speed;
+
+	ret = pkt_probe_settings(pd);
+	if (ret) {
+		pkt_dbg(2, pd, "failed probe\n");
+		return ret;
+	}
+
+	ret = pkt_set_write_settings(pd);
+	if (ret) {
+		pkt_dbg(1, pd, "failed saving write settings\n");
+		return -EIO;
+	}
+
+	pkt_write_caching(pd, USE_WCACHING);
+
+	ret = pkt_get_max_speed(pd, &write_speed);
+	if (ret)
+		write_speed = 16 * 177;
+	switch (pd->mmc3_profile) {
+		case 0x13: /* DVD-RW */
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
+			break;
+		default:
+			ret = pkt_media_speed(pd, &media_write_speed);
+			if (ret)
+				media_write_speed = 16;
+			write_speed = min(write_speed, media_write_speed * 177);
+			pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
+			break;
+	}
+	read_speed = write_speed;
+
+	ret = pkt_set_speed(pd, write_speed, read_speed);
+	if (ret) {
+		pkt_dbg(1, pd, "couldn't set write speed\n");
+		return -EIO;
+	}
+	pd->write_speed = write_speed;
+	pd->read_speed = read_speed;
+
+	ret = pkt_perform_opc(pd);
+	if (ret) {
+		pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
+	}
+
+	return 0;
+}
+
+/*
+ * called at open time.
+ */
+static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+{
+	int ret;
+	long lba;
+	struct request_queue *q;
+	struct block_device *bdev;
+
+	/*
+	 * We need to re-open the cdrom device without O_NONBLOCK to be able
+	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
+	 * so open should not fail.
+	 */
+	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
+	if (IS_ERR(bdev)) {
+		ret = PTR_ERR(bdev);
+		goto out;
+	}
+
+	ret = pkt_get_last_written(pd, &lba);
+	if (ret) {
+		pkt_err(pd, "pkt_get_last_written failed\n");
+		goto out_putdev;
+	}
+
+	set_capacity(pd->disk, lba << 2);
+	set_capacity_and_notify(pd->bdev->bd_disk, lba << 2);
+
+	q = bdev_get_queue(pd->bdev);
+	if (write) {
+		ret = pkt_open_write(pd);
+		if (ret)
+			goto out_putdev;
+		/*
+		 * Some CDRW drives can not handle writes larger than one packet,
+		 * even if the size is a multiple of the packet size.
+		 */
+		blk_queue_max_hw_sectors(q, pd->settings.size);
+		set_bit(PACKET_WRITABLE, &pd->flags);
+	} else {
+		pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+		clear_bit(PACKET_WRITABLE, &pd->flags);
+	}
+
+	ret = pkt_set_segment_merging(pd, q);
+	if (ret)
+		goto out_putdev;
+
+	if (write) {
+		if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+			pkt_err(pd, "not enough memory for buffers\n");
+			ret = -ENOMEM;
+			goto out_putdev;
+		}
+		pkt_info(pd, "%lukB available on disc\n", lba << 1);
+	}
+
+	return 0;
+
+out_putdev:
+	blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+out:
+	return ret;
+}
+
+/*
+ * called when the device is closed. makes sure that the device flushes
+ * the internal cache before we close.
+ */
+static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
+{
+	if (flush && pkt_flush_cache(pd))
+		pkt_dbg(1, pd, "not flushing cache\n");
+
+	pkt_lock_door(pd, 0);
+
+	pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+
+	pkt_shrink_pktlist(pd);
+}
+
+static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
+{
+	if (dev_minor >= MAX_WRITERS)
+		return NULL;
+
+	dev_minor = array_index_nospec(dev_minor, MAX_WRITERS);
+	return pkt_devs[dev_minor];
+}
+
+static int pkt_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pktcdvd_device *pd = NULL;
+	int ret;
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+	if (!pd) {
+		ret = -ENODEV;
+		goto out;
+	}
+	BUG_ON(pd->refcnt < 0);
+
+	pd->refcnt++;
+	if (pd->refcnt > 1) {
+		if ((mode & FMODE_WRITE) &&
+		    !test_bit(PACKET_WRITABLE, &pd->flags)) {
+			ret = -EBUSY;
+			goto out_dec;
+		}
+	} else {
+		ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+		if (ret)
+			goto out_dec;
+		/*
+		 * needed here as well, since ext2 (among others) may change
+		 * the blocksize at mount time
+		 */
+		set_blocksize(bdev, CD_FRAMESIZE);
+	}
+
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return 0;
+
+out_dec:
+	pd->refcnt--;
+out:
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return ret;
+}
+
+static void pkt_close(struct gendisk *disk, fmode_t mode)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd->refcnt--;
+	BUG_ON(pd->refcnt < 0);
+	if (pd->refcnt == 0) {
+		int flush = test_bit(PACKET_WRITABLE, &pd->flags);
+		pkt_release_dev(pd, flush);
+	}
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+}
+
+
+static void pkt_end_io_read_cloned(struct bio *bio)
+{
+	struct packet_stacked_data *psd = bio->bi_private;
+	struct pktcdvd_device *pd = psd->pd;
+
+	psd->bio->bi_status = bio->bi_status;
+	bio_put(bio);
+	bio_endio(psd->bio);
+	mempool_free(psd, &psd_pool);
+	pkt_bio_finished(pd);
+}
+
+static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
+{
+	struct bio *cloned_bio =
+		bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
+	struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
+
+	psd->pd = pd;
+	psd->bio = bio;
+	cloned_bio->bi_private = psd;
+	cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+	pd->stats.secs_r += bio_sectors(bio);
+	pkt_queue_bio(pd, cloned_bio);
+}
+
+static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+{
+	struct pktcdvd_device *pd = q->queuedata;
+	sector_t zone;
+	struct packet_data *pkt;
+	int was_empty, blocked_bio;
+	struct pkt_rb_node *node;
+
+	zone = get_zone(bio->bi_iter.bi_sector, pd);
+
+	/*
+	 * If we find a matching packet in state WAITING or READ_WAIT, we can
+	 * just append this bio to that packet.
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	blocked_bio = 0;
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->sector == zone) {
+			spin_lock(&pkt->lock);
+			if ((pkt->state == PACKET_WAITING_STATE) ||
+			    (pkt->state == PACKET_READ_WAIT_STATE)) {
+				bio_list_add(&pkt->orig_bios, bio);
+				pkt->write_size +=
+					bio->bi_iter.bi_size / CD_FRAMESIZE;
+				if ((pkt->write_size >= pkt->frames) &&
+				    (pkt->state == PACKET_WAITING_STATE)) {
+					atomic_inc(&pkt->run_sm);
+					wake_up(&pd->wqueue);
+				}
+				spin_unlock(&pkt->lock);
+				spin_unlock(&pd->cdrw.active_list_lock);
+				return;
+			} else {
+				blocked_bio = 1;
+			}
+			spin_unlock(&pkt->lock);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+	/*
+	 * Test if there is enough room left in the bio work queue
+	 * (queue size >= congestion on mark).
+	 * If not, wait till the work queue size is below the congestion off mark.
+	 */
+	spin_lock(&pd->lock);
+	if (pd->write_congestion_on > 0
+	    && pd->bio_queue_size >= pd->write_congestion_on) {
+		struct wait_bit_queue_entry wqe;
+
+		init_wait_var_entry(&wqe, &pd->congested, 0);
+		for (;;) {
+			prepare_to_wait_event(__var_waitqueue(&pd->congested),
+					      &wqe.wq_entry,
+					      TASK_UNINTERRUPTIBLE);
+			if (pd->bio_queue_size <= pd->write_congestion_off)
+				break;
+			pd->congested = true;
+			spin_unlock(&pd->lock);
+			schedule();
+			spin_lock(&pd->lock);
+		}
+	}
+	spin_unlock(&pd->lock);
+
+	/*
+	 * No matching packet found. Store the bio in the work queue.
+	 */
+	node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
+	node->bio = bio;
+	spin_lock(&pd->lock);
+	BUG_ON(pd->bio_queue_size < 0);
+	was_empty = (pd->bio_queue_size == 0);
+	pkt_rbtree_insert(pd, node);
+	spin_unlock(&pd->lock);
+
+	/*
+	 * Wake up the worker thread.
+	 */
+	atomic_set(&pd->scan_queue, 1);
+	if (was_empty) {
+		/* This wake_up is required for correct operation */
+		wake_up(&pd->wqueue);
+	} else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
+		/*
+		 * This wake up is not required for correct operation,
+		 * but improves performance in some cases.
+		 */
+		wake_up(&pd->wqueue);
+	}
+}
+
+static void pkt_submit_bio(struct bio *bio)
+{
+	struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+	struct bio *split;
+
+	bio = bio_split_to_limits(bio);
+
+	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+		(unsigned long long)bio->bi_iter.bi_sector,
+		(unsigned long long)bio_end_sector(bio));
+
+	/*
+	 * Clone READ bios so we can have our own bi_end_io callback.
+	 */
+	if (bio_data_dir(bio) == READ) {
+		pkt_make_request_read(pd, bio);
+		return;
+	}
+
+	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+		pkt_notice(pd, "WRITE for ro device (%llu)\n",
+			   (unsigned long long)bio->bi_iter.bi_sector);
+		goto end_io;
+	}
+
+	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
+		pkt_err(pd, "wrong bio size\n");
+		goto end_io;
+	}
+
+	do {
+		sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
+		sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
+
+		if (last_zone != zone) {
+			BUG_ON(last_zone != zone + pd->settings.size);
+
+			split = bio_split(bio, last_zone -
+					  bio->bi_iter.bi_sector,
+					  GFP_NOIO, &pkt_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
+
+		pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
+	} while (split != bio);
+
+	return;
+end_io:
+	bio_io_error(bio);
+}
+
+static void pkt_init_queue(struct pktcdvd_device *pd)
+{
+	struct request_queue *q = pd->disk->queue;
+
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
+	blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
+	q->queuedata = pd;
+}
+
+static int pkt_seq_show(struct seq_file *m, void *p)
+{
+	struct pktcdvd_device *pd = m->private;
+	char *msg;
+	int states[PACKET_NUM_STATES];
+
+	seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
+
+	seq_printf(m, "\nSettings:\n");
+	seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+	if (pd->settings.write_type == 0)
+		msg = "Packet";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+	seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+	seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+	seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+	if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+		msg = "Mode 1";
+	else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+		msg = "Mode 2";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+	seq_printf(m, "\nStatistics:\n");
+	seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+	seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+	seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+	seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+	seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+	seq_printf(m, "\nMisc:\n");
+	seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+	seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+	seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+	seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+	seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+	seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+	seq_printf(m, "\nQueue state:\n");
+	seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+	seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+	seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
+
+	pkt_count_states(pd, states);
+	seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+		   states[0], states[1], states[2], states[3], states[4], states[5]);
+
+	seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
+			pd->write_congestion_off,
+			pd->write_congestion_on);
+	return 0;
+}
+
+static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
+{
+	int i;
+	struct block_device *bdev;
+	struct scsi_device *sdev;
+
+	if (pd->pkt_dev == dev) {
+		pkt_err(pd, "recursive setup not allowed\n");
+		return -EBUSY;
+	}
+	for (i = 0; i < MAX_WRITERS; i++) {
+		struct pktcdvd_device *pd2 = pkt_devs[i];
+		if (!pd2)
+			continue;
+		if (pd2->bdev->bd_dev == dev) {
+			pkt_err(pd, "%pg already setup\n", pd2->bdev);
+			return -EBUSY;
+		}
+		if (pd2->pkt_dev == dev) {
+			pkt_err(pd, "can't chain pktcdvd devices\n");
+			return -EBUSY;
+		}
+	}
+
+	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
+	if (!sdev) {
+		blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+		return -EINVAL;
+	}
+	put_device(&sdev->sdev_gendev);
+
+	/* This is safe, since we have a reference from open(). */
+	__module_get(THIS_MODULE);
+
+	pd->bdev = bdev;
+	set_blocksize(bdev, CD_FRAMESIZE);
+
+	pkt_init_queue(pd);
+
+	atomic_set(&pd->cdrw.pending_bios, 0);
+	pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+	if (IS_ERR(pd->cdrw.thread)) {
+		pkt_err(pd, "can't start kernel thread\n");
+		goto out_mem;
+	}
+
+	proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
+	pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
+	return 0;
+
+out_mem:
+	blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+	return -ENOMEM;
+}
+
+static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+	int ret;
+
+	pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
+		cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	mutex_lock(&pktcdvd_mutex);
+	switch (cmd) {
+	case CDROMEJECT:
+		/*
+		 * The door gets locked when the device is opened, so we
+		 * have to unlock it or else the eject command fails.
+		 */
+		if (pd->refcnt == 1)
+			pkt_lock_door(pd, 0);
+		fallthrough;
+	/*
+	 * forward selected CDROM ioctls to CD-ROM, for UDF
+	 */
+	case CDROMMULTISESSION:
+	case CDROMREADTOCENTRY:
+	case CDROM_LAST_WRITTEN:
+	case CDROM_SEND_PACKET:
+	case SCSI_IOCTL_SEND_COMMAND:
+		if (!bdev->bd_disk->fops->ioctl)
+			ret = -ENOTTY;
+		else
+			ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
+		break;
+	default:
+		pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
+		ret = -ENOTTY;
+	}
+	mutex_unlock(&pktcdvd_mutex);
+
+	return ret;
+}
+
+static unsigned int pkt_check_events(struct gendisk *disk,
+				     unsigned int clearing)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+	struct gendisk *attached_disk;
+
+	if (!pd)
+		return 0;
+	if (!pd->bdev)
+		return 0;
+	attached_disk = pd->bdev->bd_disk;
+	if (!attached_disk || !attached_disk->fops->check_events)
+		return 0;
+	return attached_disk->fops->check_events(attached_disk, clearing);
+}
+
+static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
+}
+
+static const struct block_device_operations pktcdvd_ops = {
+	.owner =		THIS_MODULE,
+	.submit_bio =		pkt_submit_bio,
+	.open =			pkt_open,
+	.release =		pkt_close,
+	.ioctl =		pkt_ioctl,
+	.compat_ioctl =		blkdev_compat_ptr_ioctl,
+	.check_events =		pkt_check_events,
+	.devnode =		pkt_devnode,
+};
+
+/*
+ * Set up mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
+{
+	int idx;
+	int ret = -ENOMEM;
+	struct pktcdvd_device *pd;
+	struct gendisk *disk;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++)
+		if (!pkt_devs[idx])
+			break;
+	if (idx == MAX_WRITERS) {
+		pr_err("max %d writers supported\n", MAX_WRITERS);
+		ret = -EBUSY;
+		goto out_mutex;
+	}
+
+	pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+	if (!pd)
+		goto out_mutex;
+
+	ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
+					sizeof(struct pkt_rb_node));
+	if (ret)
+		goto out_mem;
+
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+	INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+	spin_lock_init(&pd->cdrw.active_list_lock);
+
+	spin_lock_init(&pd->lock);
+	spin_lock_init(&pd->iosched.lock);
+	bio_list_init(&pd->iosched.read_queue);
+	bio_list_init(&pd->iosched.write_queue);
+	sprintf(pd->name, DRIVER_NAME"%d", idx);
+	init_waitqueue_head(&pd->wqueue);
+	pd->bio_queue = RB_ROOT;
+
+	pd->write_congestion_on  = write_congestion_on;
+	pd->write_congestion_off = write_congestion_off;
+
+	ret = -ENOMEM;
+	disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!disk)
+		goto out_mem;
+	pd->disk = disk;
+	disk->major = pktdev_major;
+	disk->first_minor = idx;
+	disk->minors = 1;
+	disk->fops = &pktcdvd_ops;
+	disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
+	strcpy(disk->disk_name, pd->name);
+	disk->private_data = pd;
+
+	pd->pkt_dev = MKDEV(pktdev_major, idx);
+	ret = pkt_new_dev(pd, dev);
+	if (ret)
+		goto out_mem2;
+
+	/* inherit events of the host device */
+	disk->events = pd->bdev->bd_disk->events;
+
+	ret = add_disk(disk);
+	if (ret)
+		goto out_mem2;
+
+	pkt_sysfs_dev_new(pd);
+	pkt_debugfs_dev_new(pd);
+
+	pkt_devs[idx] = pd;
+	if (pkt_dev)
+		*pkt_dev = pd->pkt_dev;
+
+	mutex_unlock(&ctl_mutex);
+	return 0;
+
+out_mem2:
+	put_disk(disk);
+out_mem:
+	mempool_exit(&pd->rb_pool);
+	kfree(pd);
+out_mutex:
+	mutex_unlock(&ctl_mutex);
+	pr_err("setup of pktcdvd device failed\n");
+	return ret;
+}
+
+/*
+ * Tear down mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_remove_dev(dev_t pkt_dev)
+{
+	struct pktcdvd_device *pd;
+	int idx;
+	int ret = 0;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		pd = pkt_devs[idx];
+		if (pd && (pd->pkt_dev == pkt_dev))
+			break;
+	}
+	if (idx == MAX_WRITERS) {
+		pr_debug("dev not setup\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (pd->refcnt > 0) {
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!IS_ERR(pd->cdrw.thread))
+		kthread_stop(pd->cdrw.thread);
+
+	pkt_devs[idx] = NULL;
+
+	pkt_debugfs_dev_remove(pd);
+	pkt_sysfs_dev_remove(pd);
+
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+
+	remove_proc_entry(pd->name, pkt_proc);
+	pkt_dbg(1, pd, "writer unmapped\n");
+
+	del_gendisk(pd->disk);
+	put_disk(pd->disk);
+
+	mempool_exit(&pd->rb_pool);
+	kfree(pd);
+
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+
+out:
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
+{
+	struct pktcdvd_device *pd;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
+	if (pd) {
+		ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+		ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+	} else {
+		ctrl_cmd->dev = 0;
+		ctrl_cmd->pkt_dev = 0;
+	}
+	ctrl_cmd->num_devices = MAX_WRITERS;
+
+	mutex_unlock(&ctl_mutex);
+}
+
+static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct pkt_ctrl_command ctrl_cmd;
+	int ret = 0;
+	dev_t pkt_dev = 0;
+
+	if (cmd != PACKET_CTRL_CMD)
+		return -ENOTTY;
+
+	if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+
+	switch (ctrl_cmd.command) {
+	case PKT_CTRL_CMD_SETUP:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
+		ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
+		break;
+	case PKT_CTRL_CMD_TEARDOWN:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
+		break;
+	case PKT_CTRL_CMD_STATUS:
+		pkt_get_status(&ctrl_cmd);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations pkt_ctl_fops = {
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= pkt_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= pkt_ctl_compat_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice pkt_misc = {
+	.minor 		= MISC_DYNAMIC_MINOR,
+	.name  		= DRIVER_NAME,
+	.nodename	= "pktcdvd/control",
+	.fops  		= &pkt_ctl_fops
+};
+
+static int __init pkt_init(void)
+{
+	int ret;
+
+	mutex_init(&ctl_mutex);
+
+	ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
+				    sizeof(struct packet_stacked_data));
+	if (ret)
+		return ret;
+	ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
+	if (ret) {
+		mempool_exit(&psd_pool);
+		return ret;
+	}
+
+	ret = register_blkdev(pktdev_major, DRIVER_NAME);
+	if (ret < 0) {
+		pr_err("unable to register block device\n");
+		goto out2;
+	}
+	if (!pktdev_major)
+		pktdev_major = ret;
+
+	ret = pkt_sysfs_init();
+	if (ret)
+		goto out;
+
+	pkt_debugfs_init();
+
+	ret = misc_register(&pkt_misc);
+	if (ret) {
+		pr_err("unable to register misc device\n");
+		goto out_misc;
+	}
+
+	pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL);
+
+	return 0;
+
+out_misc:
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+out:
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+out2:
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
+	return ret;
+}
+
+static void __exit pkt_exit(void)
+{
+	remove_proc_entry("driver/"DRIVER_NAME, NULL);
+	misc_deregister(&pkt_misc);
+
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
+}
+
+MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
+MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
+MODULE_LICENSE("GPL");
+
+module_init(pkt_init);
+module_exit(pkt_exit);
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
new file mode 100644
index 000000000000..f9c5ac80d59b
--- /dev/null
+++ b/include/linux/pktcdvd.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef __PKTCDVD_H
+#define __PKTCDVD_H
+
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/cdrom.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/mempool.h>
+#include <uapi/linux/pktcdvd.h>
+
+/* default bio write queue congestion marks */
+#define PKT_WRITE_CONGESTION_ON    10000
+#define PKT_WRITE_CONGESTION_OFF   9000
+
+
+struct packet_settings
+{
+	__u32			size;		/* packet size in (512 byte) sectors */
+	__u8			fp;		/* fixed packets */
+	__u8			link_loss;	/* the rest is specified
+						 * as per Mt Fuji */
+	__u8			write_type;
+	__u8			track_mode;
+	__u8			block_mode;
+};
+
+/*
+ * Very crude stats for now
+ */
+struct packet_stats
+{
+	unsigned long		pkt_started;
+	unsigned long		pkt_ended;
+	unsigned long		secs_w;
+	unsigned long		secs_rg;
+	unsigned long		secs_r;
+};
+
+struct packet_cdrw
+{
+	struct list_head	pkt_free_list;
+	struct list_head	pkt_active_list;
+	spinlock_t		active_list_lock; /* Serialize access to pkt_active_list */
+	struct task_struct	*thread;
+	atomic_t		pending_bios;
+};
+
+/*
+ * Switch to high speed reading after reading this many kilobytes
+ * with no interspersed writes.
+ */
+#define HI_SPEED_SWITCH 512
+
+struct packet_iosched
+{
+	atomic_t		attention;	/* Set to non-zero when queue processing is needed */
+	int			writing;	/* Non-zero when writing, zero when reading */
+	spinlock_t		lock;		/* Protecting read/write queue manipulations */
+	struct bio_list		read_queue;
+	struct bio_list		write_queue;
+	sector_t		last_write;	/* The sector where the last write ended */
+	int			successive_reads;
+};
+
+/*
+ * 32 buffers of 2048 bytes
+ */
+#if (PAGE_SIZE % CD_FRAMESIZE) != 0
+#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
+#endif
+#define PACKET_MAX_SIZE		128
+#define FRAMES_PER_PAGE		(PAGE_SIZE / CD_FRAMESIZE)
+#define PACKET_MAX_SECTORS	(PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
+
+enum packet_data_state {
+	PACKET_IDLE_STATE,			/* Not used at the moment */
+	PACKET_WAITING_STATE,			/* Waiting for more bios to arrive, so */
+						/* we don't have to do as much */
+						/* data gathering */
+	PACKET_READ_WAIT_STATE,			/* Waiting for reads to fill in holes */
+	PACKET_WRITE_WAIT_STATE,		/* Waiting for the write to complete */
+	PACKET_RECOVERY_STATE,			/* Recover after read/write errors */
+	PACKET_FINISHED_STATE,			/* After write has finished */
+
+	PACKET_NUM_STATES			/* Number of possible states */
+};
+
+/*
+ * Information needed for writing a single packet
+ */
+struct pktcdvd_device;
+
+struct packet_data
+{
+	struct list_head	list;
+
+	spinlock_t		lock;		/* Lock protecting state transitions and */
+						/* orig_bios list */
+
+	struct bio_list		orig_bios;	/* Original bios passed to pkt_make_request */
+						/* that will be handled by this packet */
+	int			write_size;	/* Total size of all bios in the orig_bios */
+						/* list, measured in number of frames */
+
+	struct bio		*w_bio;		/* The bio we will send to the real CD */
+						/* device once we have all data for the */
+						/* packet we are going to write */
+	sector_t		sector;		/* First sector in this packet */
+	int			frames;		/* Number of frames in this packet */
+
+	enum packet_data_state	state;		/* Current state */
+	atomic_t		run_sm;		/* Incremented whenever the state */
+						/* machine needs to be run */
+	long			sleep_time;	/* Set this to non-zero to make the state */
+						/* machine run after this many jiffies. */
+
+	atomic_t		io_wait;	/* Number of pending IO operations */
+	atomic_t		io_errors;	/* Number of read/write errors during IO */
+
+	struct bio		*r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
+	struct page		*pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
+
+	int			cache_valid;	/* If non-zero, the data for the zone defined */
+						/* by the sector variable is completely cached */
+						/* in the pages[] vector. */
+
+	int			id;		/* ID number for debugging */
+	struct pktcdvd_device	*pd;
+};
+
+struct pkt_rb_node {
+	struct rb_node		rb_node;
+	struct bio		*bio;
+};
+
+struct packet_stacked_data
+{
+	struct bio		*bio;		/* Original read request bio */
+	struct pktcdvd_device	*pd;
+};
+#define PSD_POOL_SIZE		64
+
+struct pktcdvd_device
+{
+	struct block_device	*bdev;		/* dev attached */
+	dev_t			pkt_dev;	/* our dev */
+	char			name[20];
+	struct packet_settings	settings;
+	struct packet_stats	stats;
+	int			refcnt;		/* Open count */
+	int			write_speed;	/* current write speed, kB/s */
+	int			read_speed;	/* current read speed, kB/s */
+	unsigned long		offset;		/* start offset */
+	__u8			mode_offset;	/* 0 / 8 */
+	__u8			type;
+	unsigned long		flags;
+	__u16			mmc3_profile;
+	__u32			nwa;		/* next writable address */
+	__u32			lra;		/* last recorded address */
+	struct packet_cdrw	cdrw;
+	wait_queue_head_t	wqueue;
+
+	spinlock_t		lock;		/* Serialize access to bio_queue */
+	struct rb_root		bio_queue;	/* Work queue of bios we need to handle */
+	int			bio_queue_size;	/* Number of nodes in bio_queue */
+	bool			congested;	/* Someone is waiting for bio_queue_size
+						 * to drop. */
+	sector_t		current_sector;	/* Keep track of where the elevator is */
+	atomic_t		scan_queue;	/* Set to non-zero when pkt_handle_queue */
+						/* needs to be run. */
+	mempool_t		rb_pool;	/* mempool for pkt_rb_node allocations */
+
+	struct packet_iosched   iosched;
+	struct gendisk		*disk;
+
+	int			write_congestion_off;
+	int			write_congestion_on;
+
+	struct device		*dev;		/* sysfs pktcdvd[0-7] dev */
+
+	struct dentry		*dfs_d_root;	/* debugfs: devname directory */
+	struct dentry		*dfs_f_info;	/* debugfs: info file */
+};
+
+#endif /* __PKTCDVD_H */
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
new file mode 100644
index 000000000000..9cbb55d21c94
--- /dev/null
+++ b/include/uapi/linux/pktcdvd.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef _UAPI__PKTCDVD_H
+#define _UAPI__PKTCDVD_H
+
+#include <linux/types.h>
+
+/*
+ * 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
+ */
+#define PACKET_DEBUG		1
+
+#define	MAX_WRITERS		8
+
+#define PKT_RB_POOL_SIZE	512
+
+/*
+ * How long we should hold a non-full packet before starting data gathering.
+ */
+#define PACKET_WAIT_TIME	(HZ * 5 / 1000)
+
+/*
+ * use drive write caching -- we need deferred error handling to be
+ * able to successfully recover with this option (drive will return good
+ * status as soon as the cdb is validated).
+ */
+#if defined(CONFIG_CDROM_PKTCDVD_WCACHE)
+#define USE_WCACHING		1
+#else
+#define USE_WCACHING		0
+#endif
+
+/*
+ * No user-servicable parts beyond this point ->
+ */
+
+/*
+ * device types
+ */
+#define PACKET_CDR		1
+#define	PACKET_CDRW		2
+#define PACKET_DVDR		3
+#define PACKET_DVDRW		4
+
+/*
+ * flags
+ */
+#define PACKET_WRITABLE		1	/* pd is writable */
+#define PACKET_NWA_VALID	2	/* next writable address valid */
+#define PACKET_LRA_VALID	3	/* last recorded address valid */
+#define PACKET_MERGE_SEGS	4	/* perform segment merging to keep */
+					/* underlying cdrom device happy */
+
+/*
+ * Disc status -- from READ_DISC_INFO
+ */
+#define PACKET_DISC_EMPTY	0
+#define PACKET_DISC_INCOMPLETE	1
+#define PACKET_DISC_COMPLETE	2
+#define PACKET_DISC_OTHER	3
+
+/*
+ * write type, and corresponding data block type
+ */
+#define PACKET_MODE1		1
+#define PACKET_MODE2		2
+#define PACKET_BLOCK_MODE1	8
+#define PACKET_BLOCK_MODE2	10
+
+/*
+ * Last session/border status
+ */
+#define PACKET_SESSION_EMPTY		0
+#define PACKET_SESSION_INCOMPLETE	1
+#define PACKET_SESSION_RESERVED		2
+#define PACKET_SESSION_COMPLETE		3
+
+#define PACKET_MCN			"4a656e734178626f65323030300000"
+
+#undef PACKET_USE_LS
+
+#define PKT_CTRL_CMD_SETUP	0
+#define PKT_CTRL_CMD_TEARDOWN	1
+#define PKT_CTRL_CMD_STATUS	2
+
+struct pkt_ctrl_command {
+	__u32 command;				/* in: Setup, teardown, status */
+	__u32 dev_index;			/* in/out: Device index */
+	__u32 dev;				/* in/out: Device nr for cdrw device */
+	__u32 pkt_dev;				/* in/out: Device nr for packet device */
+	__u32 num_devices;			/* out: Largest device index + 1 */
+	__u32 padding;				/* Not used */
+};
+
+/*
+ * packet ioctls
+ */
+#define PACKET_IOCTL_MAGIC	('X')
+#define PACKET_CTRL_CMD		_IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command)
+
+
+#endif /* _UAPI__PKTCDVD_H */
-- 
cgit 


From 19e183b54528f11fafeca60fc6d0821e29ff281e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 22 Dec 2022 18:12:50 +0000
Subject: elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size}

A subsequent fix for arm64 will use this parameter to parse the vma
information from the snapshot created by dump_vma_snapshot() rather than
traversing the vma list without the mmap_lock.

Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file")
Cc: <stable@vger.kernel.org> # 5.18.x
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Seth Jenkins <sethjenkins@google.com>
Suggested-by: Seth Jenkins <sethjenkins@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/elfcore.c | 4 ++--
 arch/ia64/kernel/elfcore.c  | 4 ++--
 arch/x86/um/elfcore.c       | 4 ++--
 fs/binfmt_elf.c             | 4 ++--
 fs/binfmt_elf_fdpic.c       | 4 ++--
 include/linux/elfcore.h     | 8 ++++----
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 45f0aba1d875..f25519e283a5 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -76,7 +76,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
 	return ret;
 }
 
-Elf_Half elf_core_extra_phdrs(void)
+Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
 	int vma_count = 0;
@@ -113,7 +113,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
 	size_t data_size = 0;
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 94680521fbf9..8895df121540 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -7,7 +7,7 @@
 #include <asm/elf.h>
 
 
-Elf64_Half elf_core_extra_phdrs(void)
+Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return GATE_EHDR->e_phnum;
 }
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 48a3eb09d951..650cdbbdaf45 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -7,7 +7,7 @@
 #include <asm/elf.h>
 
 
-Elf32_Half elf_core_extra_phdrs(void)
+Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
 }
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index de63572a9404..9a780fafc539 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * The number of segs are recored into ELF header as 16bit value.
 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
 	 */
-	segs = cprm->vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
 
 	/* for notes section */
 	segs++;
@@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	offset += cprm->vma_data_size;
-	offset += elf_core_extra_data_size();
+	offset += elf_core_extra_data_size(cprm);
 	e_shoff = offset;
 
 	if (e_phnum == PN_XNUM) {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 096e3520a0b1..a05eafcacfb2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	tmp->next = thread_list;
 	thread_list = tmp;
 
-	segs = cprm->vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
 
 	/* for notes section */
 	segs++;
@@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	offset += cprm->vma_data_size;
-	offset += elf_core_extra_data_size();
+	offset += elf_core_extra_data_size(cprm);
 	e_shoff = offset;
 
 	if (e_phnum == PN_XNUM) {
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 9ec81290e3c8..bd5560542c79 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
  * Dumping its extra ELF program headers includes all the other information
  * a debugger needs to easily find how the gate DSO was being used.
  */
-extern Elf_Half elf_core_extra_phdrs(void);
+extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm);
 extern int
 elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
 elf_core_write_extra_data(struct coredump_params *cprm);
-extern size_t elf_core_extra_data_size(void);
+extern size_t elf_core_extra_data_size(struct coredump_params *cprm);
 #else
-static inline Elf_Half elf_core_extra_phdrs(void)
+static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return 0;
 }
@@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-static inline size_t elf_core_extra_data_size(void)
+static inline size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	return 0;
 }
-- 
cgit 


From e7895f017b79410bf4591396a733b876dc1e0e9d Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 6 Jan 2023 10:44:00 -0500
Subject: bpf: remove the do_idr_lock parameter from bpf_prog_free_id()

It was determined that the do_idr_lock parameter to
bpf_prog_free_id() was not necessary as it should always be true.

Suggested-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230106154400.74211-2-paul@paul-moore.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  |  2 +-
 kernel/bpf/syscall.c | 20 ++++++--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3de24cfb7a3d..634d37a599fa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1832,7 +1832,7 @@ void bpf_prog_inc(struct bpf_prog *prog);
 struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
-void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+void bpf_prog_free_id(struct bpf_prog *prog);
 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
 
 struct btf_field *btf_record_find(const struct btf_record *rec,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 61bb19e81b9c..ecca9366c7a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
 	return id > 0 ? 0 : id;
 }
 
-void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+void bpf_prog_free_id(struct bpf_prog *prog)
 {
 	unsigned long flags;
 
@@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 	if (!prog->aux->id)
 		return;
 
-	if (do_idr_lock)
-		spin_lock_irqsave(&prog_idr_lock, flags);
-	else
-		__acquire(&prog_idr_lock);
-
+	spin_lock_irqsave(&prog_idr_lock, flags);
 	idr_remove(&prog_idr, prog->aux->id);
 	prog->aux->id = 0;
-
-	if (do_idr_lock)
-		spin_unlock_irqrestore(&prog_idr_lock, flags);
-	else
-		__release(&prog_idr_lock);
+	spin_unlock_irqrestore(&prog_idr_lock, flags);
 }
 
 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
@@ -2067,11 +2059,11 @@ static void bpf_prog_put_deferred(struct work_struct *work)
 	prog = aux->prog;
 	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
 	bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
-	bpf_prog_free_id(prog, true);
+	bpf_prog_free_id(prog);
 	__bpf_prog_put_noref(prog, true);
 }
 
-static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
+static void __bpf_prog_put(struct bpf_prog *prog)
 {
 	struct bpf_prog_aux *aux = prog->aux;
 
@@ -2087,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 
 void bpf_prog_put(struct bpf_prog *prog)
 {
-	__bpf_prog_put(prog, true);
+	__bpf_prog_put(prog);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_put);
 
-- 
cgit 


From da2e552b469a0cd130ff70a88ccc4139da428a65 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 28 Nov 2022 19:05:47 +0200
Subject: net/mlx5: Fix command stats access after free

Command may fail while driver is reloading and can't accept FW commands
till command interface is reinitialized. Such command failure is being
logged to command stats. This results in NULL pointer access as command
stats structure is being freed and reallocated during mlx5 devlink
reload (see kernel log below).

Fix it by making command stats statically allocated on driver probe.

Kernel log:
[ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0
[ 2394.810610] PGD 0 P4D 0
[ 2394.811811] Oops: 0002 [#1] SMP NOPTI
...
[ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0
...
[ 2394.829505] Call Trace:
[ 2394.830667]  _raw_spin_lock_irq+0x23/0x26
[ 2394.831858]  cmd_status_err+0x55/0x110 [mlx5_core]
[ 2394.833020]  mlx5_access_reg+0xe7/0x150 [mlx5_core]
[ 2394.834175]  mlx5_query_port_ptys+0x78/0xa0 [mlx5_core]
[ 2394.835337]  mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core]
[ 2394.836454]  ? kmem_cache_alloc_trace+0x140/0x1c0
[ 2394.837562]  __rh_call_get_link_ksettings+0x33/0x100
[ 2394.838663]  ? __rtnl_unlock+0x25/0x50
[ 2394.839755]  __ethtool_get_link_ksettings+0x72/0x150
[ 2394.840862]  duplex_show+0x6e/0xc0
[ 2394.841963]  dev_attr_show+0x1c/0x40
[ 2394.843048]  sysfs_kf_seq_show+0x9b/0x100
[ 2394.844123]  seq_read+0x153/0x410
[ 2394.845187]  vfs_read+0x91/0x140
[ 2394.846226]  ksys_read+0x4f/0xb0
[ 2394.847234]  do_syscall_64+0x5b/0x1a0
[ 2394.848228]  entry_SYSCALL_64_after_hwframe+0x65/0xca

Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 ++-----------
 include/linux/mlx5/driver.h                   |  2 +-
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d3ca745d107d..c837103a9ee3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		return -EINVAL;
 	}
 
-	cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL);
-	if (!cmd->stats)
-		return -ENOMEM;
-
 	cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
-	if (!cmd->pool) {
-		err = -ENOMEM;
-		goto dma_pool_err;
-	}
+	if (!cmd->pool)
+		return -ENOMEM;
 
 	err = alloc_cmd_page(dev, cmd);
 	if (err)
@@ -2268,8 +2262,6 @@ err_free_page:
 
 err_free_pool:
 	dma_pool_destroy(cmd->pool);
-dma_pool_err:
-	kvfree(cmd->stats);
 	return err;
 }
 
@@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
 	destroy_msg_cache(dev);
 	free_cmd_page(dev, cmd);
 	dma_pool_destroy(cmd->pool);
-	kvfree(cmd->stats);
 }
 
 void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d476255c9a3f..76ef2e4fde38 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -315,7 +315,7 @@ struct mlx5_cmd {
 	struct mlx5_cmd_debug dbg;
 	struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
 	int checksum_disabled;
-	struct mlx5_cmd_stats *stats;
+	struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
 };
 
 struct mlx5_cmd_mailbox {
-- 
cgit 


From 980a637d11fe8dfc734f508a422185c2de55e669 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 4 Jan 2023 09:35:09 +0100
Subject: ARM: omap1: fix !ARCH_OMAP1_ANY link failures

While compile-testing randconfig builds for the upcoming boardfile
removal, I noticed that an earlier patch of mine was completely
broken, and the introduction of CONFIG_ARCH_OMAP1_ANY only replaced
one set of build failures with another one, now resulting in
link failures like

ld: drivers/video/fbdev/omap/omapfb_main.o: in function `omapfb_do_probe':
drivers/video/fbdev/omap/omapfb_main.c:1703: undefined reference to `omap_set_dma_priority'
ld: drivers/dma/ti/omap-dma.o: in function `omap_dma_free_chan_resources':
drivers/dma/ti/omap-dma.c:777: undefined reference to `omap_free_dma'
drivers/dma/ti/omap-dma.c:1685: undefined reference to `omap_get_plat_info'
ld: drivers/usb/gadget/udc/omap_udc.o: in function `next_in_dma':
drivers/usb/gadget/udc/omap_udc.c:820: undefined reference to `omap_get_dma_active_status'

I tried reworking it, but the resulting patch ended up much bigger than
simply avoiding the original problem of unused-function warnings like

arch/arm/mach-omap1/mcbsp.c:76:30: error: unused variable 'omap1_mcbsp_ops' [-Werror,-Wunused-variable]

As a result, revert the previous fix, and rearrange the code that
produces warnings to hide them. For mcbsp, the #ifdef check can
simply be removed as the cpu_is_omapxxx() checks already achieve
the same result, while in the io.c the easiest solution appears to
be to merge the common map bits into each soc specific portion.
This gets cleaned in a nicer way after omap7xx support gets dropped,
as the remaining SoCs all have the exact same I/O map.

Fixes: 615dce5bf736 ("ARM: omap1: fix build with no SoC selected")
Cc: stable@vger.kernel.org
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-omap1/Kconfig     |  5 +----
 arch/arm/mach-omap1/Makefile    |  4 ----
 arch/arm/mach-omap1/io.c        | 32 +++++++++++++++-----------------
 arch/arm/mach-omap1/mcbsp.c     | 21 ---------------------
 arch/arm/mach-omap1/pm.h        |  7 -------
 include/linux/soc/ti/omap1-io.h |  4 ++--
 6 files changed, 18 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index 538a960257cc..7ec7ada287e0 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -4,6 +4,7 @@ menuconfig ARCH_OMAP1
 	depends on ARCH_MULTI_V4T || ARCH_MULTI_V5
 	depends on CPU_LITTLE_ENDIAN
 	depends on ATAGS
+	select ARCH_OMAP
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_OMAP
 	select CLKSRC_MMIO
@@ -45,10 +46,6 @@ config ARCH_OMAP16XX
 	select CPU_ARM926T
 	select OMAP_DM_TIMER
 
-config ARCH_OMAP1_ANY
-	select ARCH_OMAP
-	def_bool ARCH_OMAP730 || ARCH_OMAP850 || ARCH_OMAP15XX || ARCH_OMAP16XX
-
 config ARCH_OMAP
 	bool
 
diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile
index 506074b86333..0615cb0ba580 100644
--- a/arch/arm/mach-omap1/Makefile
+++ b/arch/arm/mach-omap1/Makefile
@@ -3,8 +3,6 @@
 # Makefile for the linux kernel.
 #
 
-ifdef CONFIG_ARCH_OMAP1_ANY
-
 # Common support
 obj-y := io.o id.o sram-init.o sram.o time.o irq.o mux.o flash.o \
 	 serial.o devices.o dma.o omap-dma.o fb.o
@@ -59,5 +57,3 @@ obj-$(CONFIG_ARCH_OMAP730)		+= gpio7xx.o
 obj-$(CONFIG_ARCH_OMAP850)		+= gpio7xx.o
 obj-$(CONFIG_ARCH_OMAP15XX)		+= gpio15xx.o
 obj-$(CONFIG_ARCH_OMAP16XX)		+= gpio16xx.o
-
-endif
diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c
index d2db9b8aed3f..0074b011a05a 100644
--- a/arch/arm/mach-omap1/io.c
+++ b/arch/arm/mach-omap1/io.c
@@ -22,17 +22,14 @@
  * The machine specific code may provide the extra mapping besides the
  * default mapping provided here.
  */
-static struct map_desc omap_io_desc[] __initdata = {
+#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
+static struct map_desc omap7xx_io_desc[] __initdata = {
 	{
 		.virtual	= OMAP1_IO_VIRT,
 		.pfn		= __phys_to_pfn(OMAP1_IO_PHYS),
 		.length		= OMAP1_IO_SIZE,
 		.type		= MT_DEVICE
-	}
-};
-
-#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
-static struct map_desc omap7xx_io_desc[] __initdata = {
+	},
 	{
 		.virtual	= OMAP7XX_DSP_BASE,
 		.pfn		= __phys_to_pfn(OMAP7XX_DSP_START),
@@ -49,6 +46,12 @@ static struct map_desc omap7xx_io_desc[] __initdata = {
 
 #ifdef CONFIG_ARCH_OMAP15XX
 static struct map_desc omap1510_io_desc[] __initdata = {
+	{
+		.virtual	= OMAP1_IO_VIRT,
+		.pfn		= __phys_to_pfn(OMAP1_IO_PHYS),
+		.length		= OMAP1_IO_SIZE,
+		.type		= MT_DEVICE
+	},
 	{
 		.virtual	= OMAP1510_DSP_BASE,
 		.pfn		= __phys_to_pfn(OMAP1510_DSP_START),
@@ -65,6 +68,12 @@ static struct map_desc omap1510_io_desc[] __initdata = {
 
 #if defined(CONFIG_ARCH_OMAP16XX)
 static struct map_desc omap16xx_io_desc[] __initdata = {
+	{
+		.virtual	= OMAP1_IO_VIRT,
+		.pfn		= __phys_to_pfn(OMAP1_IO_PHYS),
+		.length		= OMAP1_IO_SIZE,
+		.type		= MT_DEVICE
+	},
 	{
 		.virtual	= OMAP16XX_DSP_BASE,
 		.pfn		= __phys_to_pfn(OMAP16XX_DSP_START),
@@ -79,18 +88,9 @@ static struct map_desc omap16xx_io_desc[] __initdata = {
 };
 #endif
 
-/*
- * Maps common IO regions for omap1
- */
-static void __init omap1_map_common_io(void)
-{
-	iotable_init(omap_io_desc, ARRAY_SIZE(omap_io_desc));
-}
-
 #if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850)
 void __init omap7xx_map_io(void)
 {
-	omap1_map_common_io();
 	iotable_init(omap7xx_io_desc, ARRAY_SIZE(omap7xx_io_desc));
 }
 #endif
@@ -98,7 +98,6 @@ void __init omap7xx_map_io(void)
 #ifdef CONFIG_ARCH_OMAP15XX
 void __init omap15xx_map_io(void)
 {
-	omap1_map_common_io();
 	iotable_init(omap1510_io_desc, ARRAY_SIZE(omap1510_io_desc));
 }
 #endif
@@ -106,7 +105,6 @@ void __init omap15xx_map_io(void)
 #if defined(CONFIG_ARCH_OMAP16XX)
 void __init omap16xx_map_io(void)
 {
-	omap1_map_common_io();
 	iotable_init(omap16xx_io_desc, ARRAY_SIZE(omap16xx_io_desc));
 }
 #endif
diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c
index 05c25c432449..b1632cbe37e6 100644
--- a/arch/arm/mach-omap1/mcbsp.c
+++ b/arch/arm/mach-omap1/mcbsp.c
@@ -89,7 +89,6 @@ static struct omap_mcbsp_ops omap1_mcbsp_ops = {
 #define OMAP1610_MCBSP2_BASE	0xfffb1000
 #define OMAP1610_MCBSP3_BASE	0xe1017000
 
-#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850)
 struct resource omap7xx_mcbsp_res[][6] = {
 	{
 		{
@@ -159,14 +158,7 @@ static struct omap_mcbsp_platform_data omap7xx_mcbsp_pdata[] = {
 };
 #define OMAP7XX_MCBSP_RES_SZ		ARRAY_SIZE(omap7xx_mcbsp_res[1])
 #define OMAP7XX_MCBSP_COUNT		ARRAY_SIZE(omap7xx_mcbsp_res)
-#else
-#define omap7xx_mcbsp_res_0		NULL
-#define omap7xx_mcbsp_pdata		NULL
-#define OMAP7XX_MCBSP_RES_SZ		0
-#define OMAP7XX_MCBSP_COUNT		0
-#endif
 
-#ifdef CONFIG_ARCH_OMAP15XX
 struct resource omap15xx_mcbsp_res[][6] = {
 	{
 		{
@@ -266,14 +258,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = {
 };
 #define OMAP15XX_MCBSP_RES_SZ		ARRAY_SIZE(omap15xx_mcbsp_res[1])
 #define OMAP15XX_MCBSP_COUNT		ARRAY_SIZE(omap15xx_mcbsp_res)
-#else
-#define omap15xx_mcbsp_res_0		NULL
-#define omap15xx_mcbsp_pdata		NULL
-#define OMAP15XX_MCBSP_RES_SZ		0
-#define OMAP15XX_MCBSP_COUNT		0
-#endif
 
-#ifdef CONFIG_ARCH_OMAP16XX
 struct resource omap16xx_mcbsp_res[][6] = {
 	{
 		{
@@ -373,12 +358,6 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = {
 };
 #define OMAP16XX_MCBSP_RES_SZ		ARRAY_SIZE(omap16xx_mcbsp_res[1])
 #define OMAP16XX_MCBSP_COUNT		ARRAY_SIZE(omap16xx_mcbsp_res)
-#else
-#define omap16xx_mcbsp_res_0		NULL
-#define omap16xx_mcbsp_pdata		NULL
-#define OMAP16XX_MCBSP_RES_SZ		0
-#define OMAP16XX_MCBSP_COUNT		0
-#endif
 
 static void omap_mcbsp_register_board_cfg(struct resource *res, int res_count,
 			struct omap_mcbsp_platform_data *config, int size)
diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h
index d9165709c532..0d1f092821ff 100644
--- a/arch/arm/mach-omap1/pm.h
+++ b/arch/arm/mach-omap1/pm.h
@@ -106,13 +106,6 @@
 #define OMAP7XX_IDLECT3		0xfffece24
 #define OMAP7XX_IDLE_LOOP_REQUEST	0x0C00
 
-#if     !defined(CONFIG_ARCH_OMAP730) && \
-	!defined(CONFIG_ARCH_OMAP850) && \
-	!defined(CONFIG_ARCH_OMAP15XX) && \
-	!defined(CONFIG_ARCH_OMAP16XX)
-#warning "Power management for this processor not implemented yet"
-#endif
-
 #ifndef __ASSEMBLER__
 
 #include <linux/clk.h>
diff --git a/include/linux/soc/ti/omap1-io.h b/include/linux/soc/ti/omap1-io.h
index f7f12728d4a6..9a60f45899d3 100644
--- a/include/linux/soc/ti/omap1-io.h
+++ b/include/linux/soc/ti/omap1-io.h
@@ -5,7 +5,7 @@
 #ifndef __ASSEMBLER__
 #include <linux/types.h>
 
-#ifdef CONFIG_ARCH_OMAP1_ANY
+#ifdef CONFIG_ARCH_OMAP1
 /*
  * NOTE: Please use ioremap + __raw_read/write where possible instead of these
  */
@@ -15,7 +15,7 @@ extern u32 omap_readl(u32 pa);
 extern void omap_writeb(u8 v, u32 pa);
 extern void omap_writew(u16 v, u32 pa);
 extern void omap_writel(u32 v, u32 pa);
-#else
+#elif defined(CONFIG_COMPILE_TEST)
 static inline u8 omap_readb(u32 pa)  { return 0; }
 static inline u16 omap_readw(u32 pa) { return 0; }
 static inline u32 omap_readl(u32 pa) { return 0; }
-- 
cgit 


From a1193de562f54c7c9f60ca9f2db96e50a7608de1 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 4 Jan 2023 16:02:40 -0800
Subject: mm: fix vma->anon_name memory leak for anonymous shmem VMAs

free_anon_vma_name() is missing a check for anonymous shmem VMA which
leads to a memory leak due to refcount not being dropped.  Fix this by
calling anon_vma_name_put() unconditionally.  It will free vma->anon_name
whenever it's non-NULL.

Link: https://lkml.kernel.org/r/20230105000241.1450843-1-surenb@google.com
Fixes: d09e8ca6cb93 ("mm: anonymous shared memory naming")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reported-by: syzbot+91edf9178386a07d06a7@syzkaller.appspotmail.com
Cc: Hugh Dickins <hughd@google.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e8ed225d8f7c..ff3f3f23f649 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -413,8 +413,7 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
 	 * Not using anon_vma_name because it generates a warning if mmap_lock
 	 * is not held, which might be the case here.
 	 */
-	if (!vma->vm_file)
-		anon_vma_name_put(vma->anon_name);
+	anon_vma_name_put(vma->anon_name);
 }
 
 static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
-- 
cgit 


From 0411d6ee50e3b74a793848e4f41f45860163f5cc Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 6 Jan 2023 20:33:31 +0000
Subject: include/linux/mm: fix release_pages_arg kernel doc comment

Commit 449c796768c9 ("mm: teach release_pages() to take an array of
encoded page pointers too") added the kernel doc comment for
release_pages() on top of 'union release_pages_arg', so making 'make
htmldocs' complains as below:

    ./include/linux/mm.h:1268: warning: cannot understand function prototype: 'typedef union '

The kernel doc comment for the function is already on top of the
function's definition in mm/swap.c, and the new comment is actually not
for the function but indeed release_pages_arg.  Fixing the comment to
reflect the intent would be one option.  But, kernel doc cannot parse
the union as below due to the attribute.

    ./include/linux/mm.h:1272: error: Cannot parse struct or union!

Modify the comment to reflect the intent but do not mark it as a kernel
doc comment.

Link: https://lkml.kernel.org/r/20230106203331.127532-1-sj@kernel.org
Fixes: 449c796768c9 ("mm: teach release_pages() to take an array of encoded page pointers too")
Signed-off-by: SeongJae Park <sj@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f3f196e4d66d..8f857163ac89 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1270,10 +1270,10 @@ static inline void folio_put_refs(struct folio *folio, int refs)
 		__folio_put(folio);
 }
 
-/**
- * release_pages - release an array of pages or folios
+/*
+ * union release_pages_arg - an array of pages or folios
  *
- * This just releases a simple array of multiple pages, and
+ * release_pages() releases a simple array of multiple pages, and
  * accepts various different forms of said page array: either
  * a regular old boring array of pages, an array of folios, or
  * an array of encoded page pointers.
-- 
cgit 


From 8651a137e62ebfde3df95cbb1ca055d013ec5b9e Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Sat, 7 Jan 2023 00:00:05 +0000
Subject: mm: update mmap_sem comments to refer to mmap_lock

The rename from mm->mmap_sem to mm->mmap_lock was performed in commit
da1c55f1b272 ("mmap locking API: rename mmap_sem to mmap_lock") and commit
c1e8d7c6a7a6 ("map locking API: convert mmap_sem comments"), however some
incorrect comments remain.

This patch simply corrects those comments which are obviously incorrect
within mm itself.

Link: https://lkml.kernel.org/r/33fba04389ab63fc4980e7ba5442f521df6dc657.1673048927.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 include/linux/page_ref.h | 2 +-
 mm/hugetlb.c             | 4 ++--
 mm/madvise.c             | 2 +-
 mm/mmap.c                | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3b8475007734..9757067c3053 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -581,7 +581,7 @@ struct vm_area_struct {
 	/*
 	 * For private and shared anonymous mappings, a pointer to a null
 	 * terminated string containing the name given to the vma, or NULL if
-	 * unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
+	 * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
 	 */
 	struct anon_vma_name *anon_name;
 #endif
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 2e677e6ad09f..d7c2d33baa7f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -301,7 +301,7 @@ static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
  *
  * You can also use this function if you're holding a lock that prevents
  * pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_sem or page table lock for page tables.  In this case,
+ * or the mmap_lock or page table lock for page tables.  In this case,
  * it will always succeed, and you could have used a plain folio_get(),
  * but it's sometimes more convenient to have a common function called
  * from both locked and RCU-protected contexts.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f96794be71d5..7fcdb98c9e68 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1183,7 +1183,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
 
 /*
  * Reset and decrement one ref on hugepage private reservation.
- * Called with mm->mmap_sem writer semaphore held.
+ * Called with mm->mmap_lock writer semaphore held.
  * This function should be only used by move_vma() and operate on
  * same sized vma. It should never come here with last ref on the
  * reservation.
@@ -5152,7 +5152,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
 
 	/*
 	 * We don't have to worry about the ordering of src and dst ptlocks
-	 * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock.
+	 * because exclusive mmap_lock (or the i_mmap_lock) prevents deadlock.
 	 */
 	if (src_ptl != dst_ptl)
 		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
diff --git a/mm/madvise.c b/mm/madvise.c
index a56a6d17e201..b6ea204d4e23 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -130,7 +130,7 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
 #endif /* CONFIG_ANON_VMA_NAME */
 /*
  * Update the vm_flags on region of a vma, splitting it or merging it as
- * necessary.  Must be called with mmap_sem held for writing;
+ * necessary.  Must be called with mmap_lock held for writing;
  * Caller should ensure anon_name stability by raising its refcount even when
  * anon_name belongs to a valid vma because this function might free that vma.
  */
diff --git a/mm/mmap.c b/mm/mmap.c
index 420476fcfbfc..425a9349e610 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2294,7 +2294,7 @@ static inline int munmap_sidetree(struct vm_area_struct *vma,
  * @start: The aligned start address to munmap.
  * @end: The aligned end address to munmap.
  * @uf: The userfaultfd list_head
- * @downgrade: Set to true to attempt a write downgrade of the mmap_sem
+ * @downgrade: Set to true to attempt a write downgrade of the mmap_lock
  *
  * If @downgrade is true, check return code for potential release of the lock.
  */
@@ -2469,7 +2469,7 @@ map_count_exceeded:
  * @len: The length of the range to munmap
  * @uf: The userfaultfd list_head
  * @downgrade: set to true if the user wants to attempt to write_downgrade the
- * mmap_sem
+ * mmap_lock
  *
  * This function takes a @mas that is either pointing to the previous VMA or set
  * to MA_START and sets it up to remove the mapping(s).  The @len will be
-- 
cgit 


From ed058eab22d64c00663563e8e1e112989c65c59f Mon Sep 17 00:00:00 2001
From: Henning Schild <henning.schild@siemens.com>
Date: Thu, 22 Dec 2022 11:37:19 +0100
Subject: platform/x86: simatic-ipc: correct name of a model

What we called IPC427G should be renamed to BX-39A to be more in line
with the actual product name.

Signed-off-by: Henning Schild <henning.schild@siemens.com>
Link: https://lore.kernel.org/r/20221222103720.8546-2-henning.schild@siemens.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/simatic-ipc.c            | 2 +-
 include/linux/platform_data/x86/simatic-ipc.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c
index ca76076fc706..2ab1f8da32b0 100644
--- a/drivers/platform/x86/simatic-ipc.c
+++ b/drivers/platform/x86/simatic-ipc.c
@@ -46,7 +46,7 @@ static struct {
 	{SIMATIC_IPC_IPC427D, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_NONE},
 	{SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E},
-	{SIMATIC_IPC_IPC427G, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+	{SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
 };
 
 static int register_platform_devices(u32 station_id)
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index 632320ec8f08..a4a6cba412cb 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -32,7 +32,7 @@ enum simatic_ipc_station_ids {
 	SIMATIC_IPC_IPC477E = 0x00000A02,
 	SIMATIC_IPC_IPC127E = 0x00000D01,
 	SIMATIC_IPC_IPC227G = 0x00000F01,
-	SIMATIC_IPC_IPC427G = 0x00001001,
+	SIMATIC_IPC_IPCBX_39A = 0x00001001,
 };
 
 static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
-- 
cgit 


From d348b1d761e358a4ba03fb34aa7e3dbd278db236 Mon Sep 17 00:00:00 2001
From: Henning Schild <henning.schild@siemens.com>
Date: Thu, 22 Dec 2022 11:37:20 +0100
Subject: platform/x86: simatic-ipc: add another model

Add IPC PX-39A support.

Signed-off-by: Henning Schild <henning.schild@siemens.com>
Link: https://lore.kernel.org/r/20221222103720.8546-3-henning.schild@siemens.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/simatic-ipc.c            | 1 +
 include/linux/platform_data/x86/simatic-ipc.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c
index 2ab1f8da32b0..b3622419cd1a 100644
--- a/drivers/platform/x86/simatic-ipc.c
+++ b/drivers/platform/x86/simatic-ipc.c
@@ -47,6 +47,7 @@ static struct {
 	{SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+	{SIMATIC_IPC_IPCPX_39A, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_227G},
 };
 
 static int register_platform_devices(u32 station_id)
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index a4a6cba412cb..a48bb5240977 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -33,6 +33,7 @@ enum simatic_ipc_station_ids {
 	SIMATIC_IPC_IPC127E = 0x00000D01,
 	SIMATIC_IPC_IPC227G = 0x00000F01,
 	SIMATIC_IPC_IPCBX_39A = 0x00001001,
+	SIMATIC_IPC_IPCPX_39A = 0x00001002,
 };
 
 static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
-- 
cgit 


From d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 9 Jan 2023 10:44:31 +0100
Subject: efi: tpm: Avoid READ_ONCE() for accessing the event log

Nathan reports that recent kernels built with LTO will crash when doing
EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a
misaligned load from the TPM event log, which is annotated with
READ_ONCE(), and under LTO, this gets translated into a LDAR instruction
which does not tolerate misaligned accesses.

Interestingly, this does not happen when booting the same kernel
straight from the UEFI shell, and so the fact that the event log may
appear misaligned in memory may be caused by a bug in GRUB or SHIM.

However, using READ_ONCE() to access firmware tables is slightly unusual
in any case, and here, we only need to ensure that 'event' is not
dereferenced again after it gets unmapped, but this is already taken
care of by the implicit barrier() semantics of the early_memunmap()
call.

Cc: <stable@vger.kernel.org>
Cc: Peter Jones <pjones@redhat.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://github.com/ClangBuiltLinux/linux/issues/1782
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/tpm_eventlog.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 20c0ff54b7a0..7d68a5cc5881 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev
 	 * The loop below will unmap these fields if the log is larger than
 	 * one page, so save them here for reference:
 	 */
-	count = READ_ONCE(event->count);
-	event_type = READ_ONCE(event->event_type);
+	count = event->count;
+	event_type = event->event_type;
 
 	/* Verify that it's the log header */
 	if (event_header->pcr_idx != 0 ||
-- 
cgit 


From cd702d18c882d5a4ea44bbdb38edd5d5577ef640 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Mon, 16 Jan 2023 16:22:15 +0200
Subject: usb: acpi: add helper to check port lpm capability using acpi _DSM

Add a helper to evaluate ACPI usb device specific method (_DSM) provided
in case the USB3 port shouldn't enter U1 and U2 link states.

This _DSM was added as port specific retimer configuration may lead to
exit latencies growing beyond U1/U2 exit limits, and OS needs a way to
find which ports can't support U1/U2 link power management states.

This _DSM is also used by windows:
Link: https://docs.microsoft.com/en-us/windows-hardware/drivers/bringup/usb-device-specific-method---dsm-

Some patch issues found in testing resolved by Ron Lee

Cc: stable@vger.kernel.org
Tested-by: Ron Lee <ron.lee@intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20230116142216.1141605-7-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/usb-acpi.c | 65 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h         |  3 +++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index 6d93428432f1..533baa85083c 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -37,6 +37,71 @@ bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
 }
 EXPORT_SYMBOL_GPL(usb_acpi_power_manageable);
 
+#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899"
+#define USB_DSM_DISABLE_U1_U2_FOR_PORT	5
+
+/**
+ * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port.
+ * @hdev: USB device belonging to the usb hub
+ * @index: zero based port index
+ *
+ * Some USB3 ports may not support USB3 link power management U1/U2 states
+ * due to different retimer setup. ACPI provides _DSM method which returns 0x01
+ * if U1 and U2 states should be disabled. Evaluate _DSM with:
+ * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899
+ * Arg1: Revision ID = 0
+ * Arg2: Function Index = 5
+ * Arg3: (empty)
+ *
+ * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0
+ */
+
+int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
+{
+	union acpi_object *obj;
+	acpi_handle port_handle;
+	int port1 = index + 1;
+	guid_t guid;
+	int ret;
+
+	ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid);
+	if (ret)
+		return ret;
+
+	port_handle = usb_get_hub_port_acpi_handle(hdev, port1);
+	if (!port_handle) {
+		dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1);
+		return -ENODEV;
+	}
+
+	if (!acpi_check_dsm(port_handle, &guid, 0,
+			    BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) {
+		dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n",
+			port1, USB_DSM_DISABLE_U1_U2_FOR_PORT);
+		return -ENODEV;
+	}
+
+	obj = acpi_evaluate_dsm(port_handle, &guid, 0,
+				USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL);
+
+	if (!obj)
+		return -ENODEV;
+
+	if (obj->type != ACPI_TYPE_INTEGER) {
+		dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1);
+		ACPI_FREE(obj);
+		return -EINVAL;
+	}
+
+	if (obj->integer.value == 0x01)
+		ret = 1;
+
+	ACPI_FREE(obj);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable);
+
 /**
  * usb_acpi_set_power_state - control usb port's power via acpi power
  * resource
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 7d5325d47c45..04a7e94fb772 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -774,11 +774,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf);
 extern int usb_acpi_set_power_state(struct usb_device *hdev, int index,
 	bool enable);
 extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index);
+extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index);
 #else
 static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index,
 	bool enable) { return 0; }
 static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
 	{ return true; }
+static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index)
+	{ return 0; }
 #endif
 
 /* USB autosuspend and autoresume */
-- 
cgit 


From 11cefeb2058f9f2322713b2683be9301556705e2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 12 Dec 2022 16:20:35 +0100
Subject: USB: fix misleading usb_set_intfdata() kernel doc

The struct device driver-data pointer is used for any data that a driver
may need in various callbacks while bound to the device. For
convenience, subsystems typically provide wrappers such as
usb_set_intfdata() of the generic accessor functions for use in bus
callbacks.

There is generally no longer any need for a driver to clear the pointer,
but since commit 0998d0631001 ("device-core: Ensure drvdata = NULL when
no driver is bound") the driver-data pointer is set to NULL by driver
core post unbind anyway.

For historical reasons, USB core also clears this pointer when an
explicitly claimed interface is released.

Due to a misunderstanding, a misleading kernel doc comment for
usb_set_intfdata() was recently added which claimed that the driver data
pointer must not be cleared during disconnect before "all actions [are]
completed", which is both imprecise and incorrect.

Specifically, drivers like cdc-acm which claim additional interfaces use
the driver-data pointer as a flag which is cleared when the first
interface is unbound. As long as a driver does not do something odd like
dereference the pointer in, for example, completion callbacks, this can
be done at any time during disconnect. And in any case this is no
different than for any other resource, like the driver data itself,
which may be freed by the disconnect callback.

Note that the comment actually also claimed that the interface itself
was somehow being set to NULL by driver core.

Fix the kernel doc by removing incorrect, overly specific and misleading
details and adding a comment about why some drivers do clear the
driver-data pointer.

Fixes: 27ef17849779 ("usb: add usb_set_intfdata() documentation")
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20221212152035.31806-1-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 04a7e94fb772..86d1c8e79566 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -267,16 +267,15 @@ static inline void *usb_get_intfdata(struct usb_interface *intf)
 }
 
 /**
- * usb_set_intfdata() - associate driver-specific data with the interface
- * @intf: the usb interface
- * @data: pointer to the device priv structure or %NULL
+ * usb_set_intfdata() - associate driver-specific data with an interface
+ * @intf: USB interface
+ * @data: driver data
  *
- * Drivers should use this function in their probe() to associate their
- * driver-specific data with the usb interface.
+ * Drivers can use this function in their probe() callbacks to associate
+ * driver-specific data with an interface.
  *
- * When disconnecting, the core will take care of setting @intf back to %NULL,
- * so no actions are needed on the driver side. The interface should not be set
- * to %NULL before all actions completed (e.g. no outsanding URB remaining).
+ * Note that there is generally no need to clear the driver-data pointer even
+ * if some drivers do so for historical or implementation-specific reasons.
  */
 static inline void usb_set_intfdata(struct usb_interface *intf, void *data)
 {
-- 
cgit 


From d2c865553adb7ad9abf3894cbb209cf1bc95484a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Jan 2023 17:41:14 +0100
Subject: firmware: zynqmp: fix declarations for gcc-13

gcc-13.0.1 reports a type mismatch for two functions:

drivers/firmware/xilinx/zynqmp.c:1228:5: error: conflicting types for 'zynqmp_pm_set_rpu_mode' due to enum/integer mismatch; have 'int(u32,  enum rpu_oper_mode)' {aka 'int(unsigned int,  enum rpu_oper_mode)'} [-Werror=enum-int-mismatch]
 1228 | int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode)
      |     ^~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/firmware/xilinx/zynqmp.c:25:
include/linux/firmware/xlnx-zynqmp.h:552:5: note: previous declaration of 'zynqmp_pm_set_rpu_mode' with type 'int(u32,  u32)' {aka 'int(unsigned int,  unsigned int)'}
  552 | int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1);
      |     ^~~~~~~~~~~~~~~~~~~~~~
drivers/firmware/xilinx/zynqmp.c:1246:5: error: conflicting types for 'zynqmp_pm_set_tcm_config' due to enum/integer mismatch; have 'int(u32,  enum rpu_tcm_comb)' {aka 'int(unsigned int,  enum rpu_tcm_comb)'} [-Werror=enum-int-mismatch]
 1246 | int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode)
      |     ^~~~~~~~~~~~~~~~~~~~~~~~
include/linux/firmware/xlnx-zynqmp.h:553:5: note: previous declaration of 'zynqmp_pm_set_tcm_config' with type 'int(u32,  u32)' {aka 'int(unsigned int,  unsigned int)'}
  553 | int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1);
      |     ^~~~~~~~~~~~~~~~~~~~~~~~

Change the declaration in the header to match the function definition.

Acked-by: Michal Simek <michal.simek@amd.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/firmware/xlnx-zynqmp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index b986e267d149..b09f443d3ab9 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -545,8 +545,8 @@ int zynqmp_pm_request_wake(const u32 node,
 			   const u64 address,
 			   const enum zynqmp_pm_request_ack ack);
 int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mode);
-int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1);
-int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1);
+int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode);
+int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode);
 int zynqmp_pm_set_sd_config(u32 node, enum pm_sd_config_type config, u32 value);
 int zynqmp_pm_set_gem_config(u32 node, enum pm_gem_config_type config,
 			     u32 value);
@@ -845,12 +845,12 @@ static inline int zynqmp_pm_get_rpu_mode(u32 node_id, enum rpu_oper_mode *rpu_mo
 	return -ENODEV;
 }
 
-static inline int zynqmp_pm_set_rpu_mode(u32 node_id, u32 arg1)
+static inline int zynqmp_pm_set_rpu_mode(u32 node_id, enum rpu_oper_mode rpu_mode)
 {
 	return -ENODEV;
 }
 
-static inline int zynqmp_pm_set_tcm_config(u32 node_id, u32 arg1)
+static inline int zynqmp_pm_set_tcm_config(u32 node_id, enum rpu_tcm_comb tcm_mode)
 {
 	return -ENODEV;
 }
-- 
cgit 


From 39f5a81f7ad80eb3fbcbfd817c6552db9de5504d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 24 Jan 2023 11:57:52 +0100
Subject: platform/x86: apple-gmux: Move port defines to apple-gmux.h

This is a preparation patch for adding a new static inline
apple_gmux_detect() helper which actually checks a supported
gmux is present, rather then only checking an ACPI device with
the HID is there as apple_gmux_present() does.

Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection")
Link: https://lore.kernel.org/platform-driver-x86/20230123113750.462144-1-hdegoede@redhat.com/
Reported-by: Emmanouil Kouroupakis <kartebi@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230124105754.62167-2-hdegoede@redhat.com
---
 drivers/platform/x86/apple-gmux.c | 23 -----------------------
 include/linux/apple-gmux.h        | 23 +++++++++++++++++++++++
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index ca33df7ea550..a0af01f6a0fd 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -64,29 +64,6 @@ struct apple_gmux_data {
 
 static struct apple_gmux_data *apple_gmux_data;
 
-/*
- * gmux port offsets. Many of these are not yet used, but may be in the
- * future, and it's useful to have them documented here anyhow.
- */
-#define GMUX_PORT_VERSION_MAJOR		0x04
-#define GMUX_PORT_VERSION_MINOR		0x05
-#define GMUX_PORT_VERSION_RELEASE	0x06
-#define GMUX_PORT_SWITCH_DISPLAY	0x10
-#define GMUX_PORT_SWITCH_GET_DISPLAY	0x11
-#define GMUX_PORT_INTERRUPT_ENABLE	0x14
-#define GMUX_PORT_INTERRUPT_STATUS	0x16
-#define GMUX_PORT_SWITCH_DDC		0x28
-#define GMUX_PORT_SWITCH_EXTERNAL	0x40
-#define GMUX_PORT_SWITCH_GET_EXTERNAL	0x41
-#define GMUX_PORT_DISCRETE_POWER	0x50
-#define GMUX_PORT_MAX_BRIGHTNESS	0x70
-#define GMUX_PORT_BRIGHTNESS		0x74
-#define GMUX_PORT_VALUE			0xc2
-#define GMUX_PORT_READ			0xd0
-#define GMUX_PORT_WRITE			0xd4
-
-#define GMUX_MIN_IO_LEN			(GMUX_PORT_BRIGHTNESS + 4)
-
 #define GMUX_INTERRUPT_ENABLE		0xff
 #define GMUX_INTERRUPT_DISABLE		0x00
 
diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index ddb10aa67b14..80efaaf89e07 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h
@@ -11,6 +11,29 @@
 
 #define GMUX_ACPI_HID "APP000B"
 
+/*
+ * gmux port offsets. Many of these are not yet used, but may be in the
+ * future, and it's useful to have them documented here anyhow.
+ */
+#define GMUX_PORT_VERSION_MAJOR		0x04
+#define GMUX_PORT_VERSION_MINOR		0x05
+#define GMUX_PORT_VERSION_RELEASE	0x06
+#define GMUX_PORT_SWITCH_DISPLAY	0x10
+#define GMUX_PORT_SWITCH_GET_DISPLAY	0x11
+#define GMUX_PORT_INTERRUPT_ENABLE	0x14
+#define GMUX_PORT_INTERRUPT_STATUS	0x16
+#define GMUX_PORT_SWITCH_DDC		0x28
+#define GMUX_PORT_SWITCH_EXTERNAL	0x40
+#define GMUX_PORT_SWITCH_GET_EXTERNAL	0x41
+#define GMUX_PORT_DISCRETE_POWER	0x50
+#define GMUX_PORT_MAX_BRIGHTNESS	0x70
+#define GMUX_PORT_BRIGHTNESS		0x74
+#define GMUX_PORT_VALUE			0xc2
+#define GMUX_PORT_READ			0xd0
+#define GMUX_PORT_WRITE			0xd4
+
+#define GMUX_MIN_IO_LEN			(GMUX_PORT_BRIGHTNESS + 4)
+
 #if IS_ENABLED(CONFIG_APPLE_GMUX)
 
 /**
-- 
cgit 


From d143908f80f3e5d164ac3342f73d6b9f536e8b4d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 24 Jan 2023 11:57:53 +0100
Subject: platform/x86: apple-gmux: Add apple_gmux_detect() helper

Add a new (static inline) apple_gmux_detect() helper to apple-gmux.h
which can be used for gmux detection instead of apple_gmux_present().

The latter is not really reliable since an ACPI device with a HID
of APP000B is present on some devices without a gmux at all, as well
as on devices with a newer (unsupported) MMIO based gmux model.

This causes apple_gmux_present() to return false-positives on
a number of different Apple laptop models.

This new helper uses the same probing as the actual apple-gmux
driver, so that it does not return false positives.

To avoid code duplication the gmux_probe() function of the actual
driver is also moved over to using the new apple_gmux_detect() helper.

This avoids false positives (vs _HID + IO region detection) on:

MacBookPro5,4
https://pastebin.com/8Xjq7RhS

MacBookPro8,1
https://linux-hardware.org/?probe=e513cfbadb&log=dmesg

MacBookPro9,2
https://bugzilla.kernel.org/attachment.cgi?id=278961

MacBookPro10,2
https://lkml.org/lkml/2014/9/22/657

MacBookPro11,2
https://forums.fedora-fr.org/viewtopic.php?id=70142

MacBookPro11,4
https://raw.githubusercontent.com/im-0/investigate-card-reader-suspend-problem-on-mbp11.4/master/test-16/dmesg

Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection")
Link: https://lore.kernel.org/platform-driver-x86/20230123113750.462144-1-hdegoede@redhat.com/
Reported-by: Emmanouil Kouroupakis <kartebi@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230124105754.62167-3-hdegoede@redhat.com
---
 drivers/platform/x86/apple-gmux.c | 70 ++++++++-----------------------
 include/linux/apple-gmux.h        | 86 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index a0af01f6a0fd..9333f82cfa8a 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -226,23 +226,6 @@ static void gmux_write32(struct apple_gmux_data *gmux_data, int port,
 		gmux_pio_write32(gmux_data, port, val);
 }
 
-static bool gmux_is_indexed(struct apple_gmux_data *gmux_data)
-{
-	u16 val;
-
-	outb(0xaa, gmux_data->iostart + 0xcc);
-	outb(0x55, gmux_data->iostart + 0xcd);
-	outb(0x00, gmux_data->iostart + 0xce);
-
-	val = inb(gmux_data->iostart + 0xcc) |
-		(inb(gmux_data->iostart + 0xcd) << 8);
-
-	if (val == 0x55aa)
-		return true;
-
-	return false;
-}
-
 /**
  * DOC: Backlight control
  *
@@ -582,60 +565,43 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 	int ret = -ENXIO;
 	acpi_status status;
 	unsigned long long gpe;
+	bool indexed = false;
+	u32 version;
 
 	if (apple_gmux_data)
 		return -EBUSY;
 
+	if (!apple_gmux_detect(pnp, &indexed)) {
+		pr_info("gmux device not present\n");
+		return -ENODEV;
+	}
+
 	gmux_data = kzalloc(sizeof(*gmux_data), GFP_KERNEL);
 	if (!gmux_data)
 		return -ENOMEM;
 	pnp_set_drvdata(pnp, gmux_data);
 
 	res = pnp_get_resource(pnp, IORESOURCE_IO, 0);
-	if (!res) {
-		pr_err("Failed to find gmux I/O resource\n");
-		goto err_free;
-	}
-
 	gmux_data->iostart = res->start;
 	gmux_data->iolen = resource_size(res);
 
-	if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
-		pr_err("gmux I/O region too small (%lu < %u)\n",
-		       gmux_data->iolen, GMUX_MIN_IO_LEN);
-		goto err_free;
-	}
-
 	if (!request_region(gmux_data->iostart, gmux_data->iolen,
 			    "Apple gmux")) {
 		pr_err("gmux I/O already in use\n");
 		goto err_free;
 	}
 
-	/*
-	 * Invalid version information may indicate either that the gmux
-	 * device isn't present or that it's a new one that uses indexed
-	 * io
-	 */
-
-	ver_major = gmux_read8(gmux_data, GMUX_PORT_VERSION_MAJOR);
-	ver_minor = gmux_read8(gmux_data, GMUX_PORT_VERSION_MINOR);
-	ver_release = gmux_read8(gmux_data, GMUX_PORT_VERSION_RELEASE);
-	if (ver_major == 0xff && ver_minor == 0xff && ver_release == 0xff) {
-		if (gmux_is_indexed(gmux_data)) {
-			u32 version;
-			mutex_init(&gmux_data->index_lock);
-			gmux_data->indexed = true;
-			version = gmux_read32(gmux_data,
-				GMUX_PORT_VERSION_MAJOR);
-			ver_major = (version >> 24) & 0xff;
-			ver_minor = (version >> 16) & 0xff;
-			ver_release = (version >> 8) & 0xff;
-		} else {
-			pr_info("gmux device not present\n");
-			ret = -ENODEV;
-			goto err_release;
-		}
+	if (indexed) {
+		mutex_init(&gmux_data->index_lock);
+		gmux_data->indexed = true;
+		version = gmux_read32(gmux_data, GMUX_PORT_VERSION_MAJOR);
+		ver_major = (version >> 24) & 0xff;
+		ver_minor = (version >> 16) & 0xff;
+		ver_release = (version >> 8) & 0xff;
+	} else {
+		ver_major = gmux_read8(gmux_data, GMUX_PORT_VERSION_MAJOR);
+		ver_minor = gmux_read8(gmux_data, GMUX_PORT_VERSION_MINOR);
+		ver_release = gmux_read8(gmux_data, GMUX_PORT_VERSION_RELEASE);
 	}
 	pr_info("Found gmux version %d.%d.%d [%s]\n", ver_major, ver_minor,
 		ver_release, (gmux_data->indexed ? "indexed" : "classic"));
diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index 80efaaf89e07..1f68b49bcd68 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h
@@ -8,6 +8,8 @@
 #define LINUX_APPLE_GMUX_H
 
 #include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/pnp.h>
 
 #define GMUX_ACPI_HID "APP000B"
 
@@ -35,14 +37,89 @@
 #define GMUX_MIN_IO_LEN			(GMUX_PORT_BRIGHTNESS + 4)
 
 #if IS_ENABLED(CONFIG_APPLE_GMUX)
+static inline bool apple_gmux_is_indexed(unsigned long iostart)
+{
+	u16 val;
+
+	outb(0xaa, iostart + 0xcc);
+	outb(0x55, iostart + 0xcd);
+	outb(0x00, iostart + 0xce);
+
+	val = inb(iostart + 0xcc) | (inb(iostart + 0xcd) << 8);
+	if (val == 0x55aa)
+		return true;
+
+	return false;
+}
 
 /**
- * apple_gmux_present() - detect if gmux is built into the machine
+ * apple_gmux_detect() - detect if gmux is built into the machine
+ *
+ * @pnp_dev:     Device to probe or NULL to use the first matching device
+ * @indexed_ret: Returns (by reference) if the gmux is indexed or not
+ *
+ * Detect if a supported gmux device is present by actually probing it.
+ * This avoids the false positives returned on some models by
+ * apple_gmux_present().
+ *
+ * Return: %true if a supported gmux ACPI device is detected and the kernel
+ * was configured with CONFIG_APPLE_GMUX, %false otherwise.
+ */
+static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret)
+{
+	u8 ver_major, ver_minor, ver_release;
+	struct device *dev = NULL;
+	struct acpi_device *adev;
+	struct resource *res;
+	bool indexed = false;
+	bool ret = false;
+
+	if (!pnp_dev) {
+		adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1);
+		if (!adev)
+			return false;
+
+		dev = get_device(acpi_get_first_physical_node(adev));
+		acpi_dev_put(adev);
+		if (!dev)
+			return false;
+
+		pnp_dev = to_pnp_dev(dev);
+	}
+
+	res = pnp_get_resource(pnp_dev, IORESOURCE_IO, 0);
+	if (!res || resource_size(res) < GMUX_MIN_IO_LEN)
+		goto out;
+
+	/*
+	 * Invalid version information may indicate either that the gmux
+	 * device isn't present or that it's a new one that uses indexed io.
+	 */
+	ver_major = inb(res->start + GMUX_PORT_VERSION_MAJOR);
+	ver_minor = inb(res->start + GMUX_PORT_VERSION_MINOR);
+	ver_release = inb(res->start + GMUX_PORT_VERSION_RELEASE);
+	if (ver_major == 0xff && ver_minor == 0xff && ver_release == 0xff) {
+		indexed = apple_gmux_is_indexed(res->start);
+		if (!indexed)
+			goto out;
+	}
+
+	if (indexed_ret)
+		*indexed_ret = indexed;
+
+	ret = true;
+out:
+	put_device(dev);
+	return ret;
+}
+
+/**
+ * apple_gmux_present() - check if gmux ACPI device is present
  *
  * Drivers may use this to activate quirks specific to dual GPU MacBook Pros
  * and Mac Pros, e.g. for deferred probing, runtime pm and backlight.
  *
- * Return: %true if gmux is present and the kernel was configured
+ * Return: %true if gmux ACPI device is present and the kernel was configured
  * with CONFIG_APPLE_GMUX, %false otherwise.
  */
 static inline bool apple_gmux_present(void)
@@ -57,6 +134,11 @@ static inline bool apple_gmux_present(void)
 	return false;
 }
 
+static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret)
+{
+	return false;
+}
+
 #endif /* !CONFIG_APPLE_GMUX */
 
 #endif /* LINUX_APPLE_GMUX_H */
-- 
cgit