aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS3
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c12
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c28
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/trace/events/qdisc.h2
-rw-r--r--kernel/bpf/arena.c16
-rw-r--r--kernel/bpf/ringbuf.c31
-rw-r--r--kernel/bpf/verifier.c61
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/ipv4/inet_connection_sock.c17
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/unix/af_unix.c37
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c63
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
28 files changed, 1274 insertions, 496 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 972e1c8fec86..b24bb64604f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4083,12 +4083,13 @@ F: kernel/bpf/ringbuf.c
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <[email protected]>
-R: Matt Bobrowski <[email protected]>
+M: Matt Bobrowski <[email protected]>
S: Maintained
F: Documentation/bpf/prog_lsm.rst
F: include/linux/bpf_lsm.h
F: kernel/bpf/bpf_lsm.c
+F: kernel/trace/bpf_trace.c
F: security/bpf/
BPF [SELFTESTS] (Test Runners & Infrastructure)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 0433109b42e5..0580b2fee21c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -2196,7 +2196,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d)
struct ksz_device *dev = kirq->dev;
int ret;
- ret = ksz_write32(dev, kirq->reg_mask, kirq->masked);
+ ret = ksz_write8(dev, kirq->reg_mask, kirq->masked);
if (ret)
dev_err(dev->dev, "failed to change IRQ mask\n");
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5490f0f9c112..23ebeb143987 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2482,6 +2482,18 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[bufidx];
+
+ /* Sanity checks on our free map to make sure it points to an index
+ * that is not being occupied by another skb. If skb memory is
+ * not freed then we see congestion control kick in and halt tx.
+ */
+ if (unlikely(tx_buff->skb)) {
+ dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
+ skb_is_gso(skb) ? "tso_pool" : "tx_pool",
+ queue_num, bufidx);
+ dev_kfree_skb_any(tx_buff->skb);
+ }
+
tx_buff->skb = skb;
tx_buff->index = bufidx;
tx_buff->pool_index = queue_num;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index d087cf954f75..608ad31a9702 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2798,6 +2798,8 @@ static int add_adev(struct gdma_dev *gd)
if (ret)
goto init_fail;
+ /* madev is owned by the auxiliary device */
+ madev = NULL;
ret = auxiliary_device_add(adev);
if (ret)
goto add_fail;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index f30eee4a5a80..b6c01a88098d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -375,7 +375,9 @@ typedef void (*ionic_cq_done_cb)(void *done_arg);
unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do,
ionic_cq_cb cb, ionic_cq_done_cb done_cb,
void *done_arg);
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do);
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi);
int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev,
struct ionic_queue *q, unsigned int index, const char *name,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1934e9d6d9e4..1837a30ba08a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1189,7 +1189,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
ionic_rx_service, NULL, NULL);
if (lif->hwstamp_txq)
- tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget);
+ tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget, !!budget);
work_done = max(max(n_work, a_work), max(rx_work, tx_work));
if (work_done < budget && napi_complete_done(napi, work_done)) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index aed7d9cbce03..9fdd7cd3ef19 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -23,7 +23,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp);
+ struct ionic_txq_comp *comp,
+ bool in_napi);
static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell)
{
@@ -944,7 +945,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
u32 work_done = 0;
u32 flags = 0;
- work_done = ionic_tx_cq_service(cq, budget);
+ work_done = ionic_tx_cq_service(cq, budget, !!budget);
if (unlikely(!budget))
return budget;
@@ -1028,7 +1029,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
txqcq = lif->txqcqs[qi];
txcq = &lif->txqcqs[qi]->cq;
- tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT);
+ tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, !!budget);
if (unlikely(!budget))
return budget;
@@ -1161,7 +1162,8 @@ static void ionic_tx_desc_unmap_bufs(struct ionic_queue *q,
static void ionic_tx_clean(struct ionic_queue *q,
struct ionic_tx_desc_info *desc_info,
- struct ionic_txq_comp *comp)
+ struct ionic_txq_comp *comp,
+ bool in_napi)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
struct ionic_qcq *qcq = q_to_qcq(q);
@@ -1213,11 +1215,13 @@ static void ionic_tx_clean(struct ionic_queue *q,
desc_info->bytes = skb->len;
stats->clean++;
- napi_consume_skb(skb, 1);
+ napi_consume_skb(skb, likely(in_napi) ? 1 : 0);
}
static bool ionic_tx_service(struct ionic_cq *cq,
- unsigned int *total_pkts, unsigned int *total_bytes)
+ unsigned int *total_pkts,
+ unsigned int *total_bytes,
+ bool in_napi)
{
struct ionic_tx_desc_info *desc_info;
struct ionic_queue *q = cq->bound_q;
@@ -1239,7 +1243,7 @@ static bool ionic_tx_service(struct ionic_cq *cq,
desc_info->bytes = 0;
index = q->tail_idx;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, comp);
+ ionic_tx_clean(q, desc_info, comp, in_napi);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
@@ -1253,7 +1257,9 @@ static bool ionic_tx_service(struct ionic_cq *cq,
return true;
}
-unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
+unsigned int ionic_tx_cq_service(struct ionic_cq *cq,
+ unsigned int work_to_do,
+ bool in_napi)
{
unsigned int work_done = 0;
unsigned int bytes = 0;
@@ -1262,7 +1268,7 @@ unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do)
if (work_to_do == 0)
return 0;
- while (ionic_tx_service(cq, &pkts, &bytes)) {
+ while (ionic_tx_service(cq, &pkts, &bytes, in_napi)) {
if (cq->tail_idx == cq->num_descs - 1)
cq->done_color = !cq->done_color;
cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1);
@@ -1288,7 +1294,7 @@ void ionic_tx_flush(struct ionic_cq *cq)
{
u32 work_done;
- work_done = ionic_tx_cq_service(cq, cq->num_descs);
+ work_done = ionic_tx_cq_service(cq, cq->num_descs, false);
if (work_done)
ionic_intr_credits(cq->idev->intr_ctrl, cq->bound_intr->index,
work_done, IONIC_INTR_CRED_RESET_COALESCE);
@@ -1305,7 +1311,7 @@ void ionic_tx_empty(struct ionic_queue *q)
desc_info = &q->tx_info[q->tail_idx];
desc_info->bytes = 0;
q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
- ionic_tx_clean(q, desc_info, NULL);
+ ionic_tx_clean(q, desc_info, NULL, false);
if (desc_info->skb) {
pkts++;
bytes += desc_info->bytes;
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 663e46348ce3..386d62769ded 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1372,6 +1372,8 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */
{QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */
{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7d6b1254c92d..c0deaafebfdc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child);
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout);
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
struct request_sock *req,
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index f1b5e816e7e5..ff33f41a9db7 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset,
TP_ARGS(q),
TP_STRUCT__entry(
- __string( dev, qdisc_dev(q)->name )
+ __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 583ee4fe48ef..e52b3ad231b9 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
struct vma_list {
struct vm_area_struct *vma;
struct list_head head;
+ atomic_t mmap_count;
};
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
if (!vml)
return -ENOMEM;
+ atomic_set(&vml->mmap_count, 1);
vma->vm_private_data = vml;
vml->vma = vma;
list_add(&vml->head, &arena->vma_list);
return 0;
}
+static void arena_vm_open(struct vm_area_struct *vma)
+{
+ struct vma_list *vml = vma->vm_private_data;
+
+ atomic_inc(&vml->mmap_count);
+}
+
static void arena_vm_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
- struct vma_list *vml;
+ struct vma_list *vml = vma->vm_private_data;
+ if (!atomic_dec_and_test(&vml->mmap_count))
+ return;
guard(mutex)(&arena->lock);
- vml = vma->vm_private_data;
+ /* update link list under lock */
list_del(&vml->head);
vma->vm_private_data = NULL;
kfree(vml);
@@ -287,6 +298,7 @@ out:
}
static const struct vm_operations_struct arena_vm_ops = {
+ .open = arena_vm_open,
.close = arena_vm_close,
.fault = arena_vm_fault,
};
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 010cfee7ffe9..214a9fa8c6fb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6236,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
}
reg->u32_min_value = 0;
reg->u32_max_value = U32_MAX;
+ reg->var_off = tnum_subreg(tnum_unknown);
}
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6280,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
reg->s32_max_value = s32_max;
reg->u32_min_value = (u32)s32_min;
reg->u32_max_value = (u32)s32_max;
+ reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
return;
}
@@ -12719,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
return res < a;
}
+static bool signed_add16_overflows(s16 a, s16 b)
+{
+ /* Do the add in u16, where overflow is well-defined */
+ s16 res = (s16)((u16)a + (u16)b);
+
+ if (b < 0)
+ return res > a;
+ return res < a;
+}
+
static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
@@ -17448,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
- goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18730,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (signed_add32_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->imm += delta;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (signed_add16_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->off += delta;
+ }
+ }
+ return 0;
+}
+
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
u32 off, u32 cnt)
{
@@ -20004,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
@@ -20546,6 +20594,13 @@ next_insn:
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
}
/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41693154e426..022c12059cf2 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
mutex_lock(&mem_id_lock);
ret = __mem_id_init_hash_table();
mutex_unlock(&mem_id_lock);
- if (ret < 0) {
- WARN_ON(1);
+ if (ret < 0)
return ERR_PTR(ret);
- }
}
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ff41bd6f99c3..5926159a6f20 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -657,8 +657,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v4_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 85f4b8fdbe5e..da5dba120bc9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -400,8 +400,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
- reqsk_put(req);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
+ reqsk_free(req);
+ else
+ reqsk_put(req);
+
return 0;
drop_and_free:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d81f74ce0f02..d4f0eff8b20f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1122,25 +1122,34 @@ drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
-static void reqsk_queue_hash_req(struct request_sock *req,
+static bool reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
+ bool found_dup_sk = false;
+
+ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
+ return false;
+
+ /* The timer needs to be setup after a successful insertion. */
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
refcount_set(&req->rsk_refcnt, 2 + 1);
+ return true;
}
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
- reqsk_queue_hash_req(req, timeout);
+ if (!reqsk_queue_hash_req(req, timeout))
+ return false;
+
inet_csk_reqsk_queue_added(sk);
+ return true;
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 01d208e0eef3..2e39cb881e20 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2782,13 +2782,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
+/* Sometimes we deduce that packets have been dropped due to reasons other than
+ * congestion, like path MTU reductions or failed client TFO attempts. In these
+ * cases we call this function to retransmit as many packets as cwnd allows,
+ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
+ * non-zero value (and may do so in a later calling context due to TSQ), we
+ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
+ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
+ * are using the correct retrans_stamp and don't declare ETIMEDOUT
+ * prematurely).
+ */
+static void tcp_non_congestion_loss_retransmit(struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (icsk->icsk_ca_state != TCP_CA_Loss) {
+ tp->high_seq = tp->snd_nxt;
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
+ tp->prior_ssthresh = 0;
+ tp->undo_marker = 0;
+ tcp_set_ca_state(sk, TCP_CA_Loss);
+ }
+ tcp_xmit_retransmit_queue(sk);
+}
+
/* Do a simple retransmit without using the backoff mechanisms in
* tcp_timer. This is used for path mtu discovery.
* The socket is already locked here.
*/
void tcp_simple_retransmit(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int mss;
@@ -2828,14 +2852,7 @@ void tcp_simple_retransmit(struct sock *sk)
* in network, but units changed and effective
* cwnd/ssthresh really reduced now.
*/
- if (icsk->icsk_ca_state != TCP_CA_Loss) {
- tp->high_seq = tp->snd_nxt;
- tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_ssthresh = 0;
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Loss);
- }
- tcp_xmit_retransmit_queue(sk);
+ tcp_non_congestion_loss_retransmit(sk);
}
EXPORT_SYMBOL(tcp_simple_retransmit);
@@ -6295,8 +6312,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data)
tcp_mark_skb_lost(sk, data);
- tcp_xmit_retransmit_queue(sk);
- tp->retrans_stamp = 0;
+ tcp_non_congestion_loss_retransmit(sk);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
@@ -7257,7 +7273,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
if (!want_cookie) {
req->timeout = tcp_timeout_init((struct sock *)req);
- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
+ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
+ req->timeout))) {
+ reqsk_free(req);
+ return 0;
+ }
+
}
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5e695a9a609c..142f56770b77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2613,10 +2613,24 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
{
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
- skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
- skb = NULL;
+ if (!unix_skb_len(skb)) {
+ struct sk_buff *unlinked_skb = NULL;
+
+ spin_lock(&sk->sk_receive_queue.lock);
+
+ if (copied && (!u->oob_skb || skb == u->oob_skb)) {
+ skb = NULL;
+ } else if (flags & MSG_PEEK) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ } else {
+ unlinked_skb = skb;
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ }
+
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(unlinked_skb);
} else {
struct sk_buff *unlinked_skb = NULL;
@@ -3093,12 +3107,23 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
case SIOCATMARK:
{
+ struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int answ = 0;
+ mutex_lock(&u->iolock);
+
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
- answ = 1;
+ if (skb) {
+ struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+
+ if (skb == oob_skb ||
+ (!oob_skb && !unix_skb_len(skb)))
+ answ = 1;
+ }
+
+ mutex_unlock(&u->iolock);
+
err = put_user(answ, (int __user *)arg);
}
break;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e0b3887b3d2d..dd49c1d23a60 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_n.c test_ringbuf_map_key.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 4c6f42dae409..da430df45aa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,9 +12,11 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
+
#include "test_ringbuf.lskel.h"
#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
+#include "test_ringbuf_write.lskel.h"
#define EDONE 7777
@@ -84,6 +86,58 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
+static void ringbuf_write_subtest(void)
+{
+ struct test_ringbuf_write_lskel *skel;
+ int page_size = getpagesize();
+ size_t *mmap_ptr;
+ int err, rb_fd;
+
+ skel = test_ringbuf_write_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = 0x4000;
+
+ err = test_ringbuf_write_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
+ goto cleanup;
+ *mmap_ptr = 0x3000;
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
+ goto cleanup;
+
+ err = test_ringbuf_write_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup_ringbuf;
+
+ skel->bss->discarded = 0;
+ skel->bss->passed = 0;
+
+ /* trigger exactly two samples */
+ syscall(__NR_getpgid);
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->discarded, 2, "discarded");
+ ASSERT_EQ(skel->bss->passed, 0, "passed");
+
+ test_ringbuf_write_lskel__detach(skel);
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_write_lskel__destroy(skel);
+}
+
static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
@@ -451,4 +505,6 @@ void test_ringbuf(void)
ringbuf_n_subtest();
if (test__start_subtest("ringbuf_map_key"))
ringbuf_map_key_subtest();
+ if (test__start_subtest("ringbuf_write"))
+ ringbuf_write_subtest();
}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
new file mode 100644
index 000000000000..350513c0e4c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* outputs */
+long passed = 0;
+long discarded = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_write(void *ctx)
+{
+ int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
+ void *sample1, *sample2;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample1)
+ return 0;
+ /* first one can pass */
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample2) {
+ bpf_ringbuf_discard(sample1, 0);
+ __sync_fetch_and_add(&discarded, 1);
+ return 0;
+ }
+ /* second one must not */
+ __sync_fetch_and_add(&passed, 1);
+ foo = sample2 + 4084;
+ *foo = 256;
+ bpf_ringbuf_discard(sample1, 0);
+ bpf_ringbuf_discard(sample2, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index bd676d7e615f..80c737b6d340 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void)
);
}
+int tmp_var;
+SEC("socket")
+__failure __msg("infinite loop detected at insn 2")
+__naked void jgt_imm64_and_may_goto(void)
+{
+ asm volatile (" \
+ r0 = %[tmp_var] ll; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -3; /* off -3 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm_addr(tmp_var)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("infinite loop detected at insn 1")
+__naked void may_goto_self(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -1; /* off -1 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void may_goto_neg_off(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+ goto l0_%=; \
+ goto l1_%=; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -2; /* off -2 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
SEC("tc")
__failure
__flag(BPF_F_TEST_STATE_FREQ)
@@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb)
return 0;
}
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto2(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void jlt_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: call %[bpf_jiffies64]; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ if r0 < 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm(bpf_jiffies64)
+ : __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+SEC("socket")
+__success __retval(0)
+__naked void gotol_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ gotol l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+#endif
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto_subprog(void)
+{
+ asm volatile (" \
+ call subprog_with_may_goto; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void subprog_with_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
#define ARR_SZ 1000000
int zero;
char arr[ARR_SZ];
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index cbb9d6714f53..028ec855587b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -224,6 +224,69 @@ l0_%=: \
: __clobber_all);
}
+SEC("socket")
+__description("MOV32SX, S8, var_off u32_max")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0")
+__naked void mov64sx_s32_varoff_1(void)
+{
+ asm volatile (" \
+l0_%=: \
+ r3 = *(u8 *)(r10 -387); \
+ w7 = (s8)w3; \
+ if w7 >= 0x2533823b goto l0_%=; \
+ w0 = 0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ w7 = (s8)w3; \
+ if w7 s>= 16 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ r3 |= 0x80; \
+ w7 = (s8)w3; \
+ if w7 s>= -5 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 49a56eb5d036..666ab7d9390b 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -43,7 +43,6 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
-test_unix_oob
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 3b83c797650d..50584479540b 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights
+TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
new file mode 100644
index 000000000000..16d0c172eaeb
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+#define BUF_SZ 32
+
+FIXTURE(msg_oob)
+{
+ int fd[4]; /* 0: AF_UNIX sender
+ * 1: AF_UNIX receiver
+ * 2: TCP sender
+ * 3: TCP receiver
+ */
+ int signal_fd;
+ int epoll_fd[2]; /* 0: AF_UNIX receiver
+ * 1: TCP receiver
+ */
+ bool tcp_compliant;
+};
+
+FIXTURE_VARIANT(msg_oob)
+{
+ bool peek;
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, no_peek)
+{
+ .peek = false,
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, peek)
+{
+ .peek = true
+};
+
+static void create_unix_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int ret;
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(ret, 0);
+}
+
+static void create_tcp_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ int listen_fd;
+ int ret;
+
+ listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(listen_fd, 0);
+
+ ret = listen(listen_fd, -1);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[2] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd[2], 0);
+
+ ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GE(self->fd[3], 0);
+
+ ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK);
+ ASSERT_EQ(ret, 0);
+}
+
+static void setup_sigurg(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct signalfd_siginfo siginfo;
+ int pid = getpid();
+ sigset_t mask;
+ int i, ret;
+
+ for (i = 0; i < 2; i++) {
+ ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ret = sigemptyset(&mask);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigaddset(&mask, SIGURG);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ ASSERT_GE(self->signal_fd, 0);
+
+ ret = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ ASSERT_EQ(ret, -1);
+}
+
+static void setup_epollpri(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct epoll_event event = {
+ .events = EPOLLPRI,
+ };
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ self->epoll_fd[i] = epoll_create1(0);
+ ASSERT_GE(self->epoll_fd[i], 0);
+
+ ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void close_sockets(FIXTURE_DATA(msg_oob) *self)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ close(self->fd[i]);
+}
+
+FIXTURE_SETUP(msg_oob)
+{
+ create_unix_socketpair(_metadata, self);
+ create_tcp_socketpair(_metadata, self);
+
+ setup_sigurg(_metadata, self);
+ setup_epollpri(_metadata, self);
+
+ self->tcp_compliant = true;
+}
+
+FIXTURE_TEARDOWN(msg_oob)
+{
+ close_sockets(self);
+}
+
+static void __epollpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_remaining)
+{
+ struct epoll_event event[2] = {};
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++)
+ ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0);
+
+ ASSERT_EQ(ret[0], oob_remaining);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(ret[0], ret[1]);
+
+ if (oob_remaining) {
+ ASSERT_EQ(event[0].events, EPOLLPRI);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(event[0].events, event[1].events);
+ }
+}
+
+static void __sendpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *buf, size_t len, int flags)
+{
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++) {
+ struct signalfd_siginfo siginfo = {};
+ int bytes;
+
+ ret[i] = send(self->fd[i * 2], buf, len, flags);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+
+ if (flags & MSG_OOB) {
+ ASSERT_EQ(bytes, sizeof(siginfo));
+ ASSERT_EQ(siginfo.ssi_signo, SIGURG);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ }
+
+ ASSERT_EQ(bytes, -1);
+ }
+
+ ASSERT_EQ(ret[0], len);
+ ASSERT_EQ(ret[0], ret[1]);
+}
+
+static void __recvpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *expected_buf, int expected_len,
+ int buf_len, int flags)
+{
+ int i, ret[2], recv_errno[2], expected_errno = 0;
+ char recv_buf[2][BUF_SZ] = {};
+ bool printed = false;
+
+ ASSERT_GE(BUF_SZ, buf_len);
+
+ errno = 0;
+
+ for (i = 0; i < 2; i++) {
+ ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ recv_errno[i] = errno;
+ }
+
+ if (expected_len < 0) {
+ expected_errno = -expected_len;
+ expected_len = -1;
+ }
+
+ if (ret[0] != expected_len || recv_errno[0] != expected_errno) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(ret[0], expected_len);
+ ASSERT_EQ(recv_errno[0], expected_errno);
+ }
+
+ if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+
+ printed = true;
+
+ if (self->tcp_compliant) {
+ ASSERT_EQ(ret[0], ret[1]);
+ ASSERT_EQ(recv_errno[0], recv_errno[1]);
+ }
+ }
+
+ if (expected_len >= 0) {
+ int cmp;
+
+ cmp = strncmp(expected_buf, recv_buf[0], expected_len);
+ if (cmp) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(cmp, 0);
+ }
+
+ cmp = strncmp(recv_buf[0], recv_buf[1], expected_len);
+ if (cmp) {
+ if (!printed) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+ }
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(cmp, 0);
+ }
+ }
+}
+
+static void __setinlinepair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int i, oob_inline = 1;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE,
+ &oob_inline, sizeof(oob_inline));
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void __siocatmarkpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_head)
+{
+ int answ[2] = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ASSERT_EQ(answ[0], oob_head);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(answ[0], answ[1]);
+}
+
+#define sendpair(buf, len, flags) \
+ __sendpair(_metadata, self, buf, len, flags)
+
+#define recvpair(expected_buf, expected_len, buf_len, flags) \
+ do { \
+ if (variant->peek) \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, \
+ buf_len, (flags) | MSG_PEEK); \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, buf_len, flags); \
+ } while (0)
+
+#define epollpair(oob_remaining) \
+ __epollpair(_metadata, self, oob_remaining)
+
+#define siocatmarkpair(oob_head) \
+ __siocatmarkpair(_metadata, self, oob_head)
+
+#define setinlinepair() \
+ __setinlinepair(_metadata, self)
+
+#define tcp_incompliant \
+ for (self->tcp_compliant = false; \
+ self->tcp_compliant == false; \
+ self->tcp_compliant = true)
+
+TEST_F(msg_oob, non_oob)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 5, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_break_drop)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("ld", 2, 2, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_drop_2)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+ }
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ tcp_incompliant {
+ recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */
+ }
+
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("d", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob)
+{
+ setinlinepair();
+
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_break)
+{
+ setinlinepair();
+
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */
+ }
+
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("rld", 3, 3, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_no_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ setinlinepair();
+
+ sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ setinlinepair();
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP recv()s "y". */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(false);
+ }
+}
+
+TEST_F(msg_oob, inline_ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
deleted file mode 100644
index a7c51889acd5..000000000000
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ /dev/null
@@ -1,436 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-#include <sys/un.h>
-#include <sys/signal.h>
-#include <sys/poll.h>
-
-static int pipefd[2];
-static int signal_recvd;
-static pid_t producer_id;
-static char sock_name[32];
-
-static void sig_hand(int sn, siginfo_t *si, void *p)
-{
- signal_recvd = sn;
-}
-
-static int set_sig_handler(int signal)
-{
- struct sigaction sa;
-
- sa.sa_sigaction = sig_hand;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO | SA_RESTART;
-
- return sigaction(signal, &sa, NULL);
-}
-
-static void set_filemode(int fd, int set)
-{
- int flags = fcntl(fd, F_GETFL, 0);
-
- if (set)
- flags &= ~O_NONBLOCK;
- else
- flags |= O_NONBLOCK;
- fcntl(fd, F_SETFL, flags);
-}
-
-static void signal_producer(int fd)
-{
- char cmd;
-
- cmd = 'S';
- write(fd, &cmd, sizeof(cmd));
-}
-
-static void wait_for_signal(int fd)
-{
- char buf[5];
-
- read(fd, buf, 5);
-}
-
-static void die(int status)
-{
- fflush(NULL);
- unlink(sock_name);
- kill(producer_id, SIGTERM);
- exit(status);
-}
-
-int is_sioctatmark(int fd)
-{
- int ans = -1;
-
- if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
-#ifdef DEBUG
- perror("SIOCATMARK Failed");
-#endif
- }
- return ans;
-}
-
-void read_oob(int fd, char *c)
-{
-
- *c = ' ';
- if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
-#ifdef DEBUG
- perror("Reading MSG_OOB Failed");
-#endif
- }
-}
-
-int read_data(int pfd, char *buf, int size)
-{
- int len = 0;
-
- memset(buf, size, '0');
- len = read(pfd, buf, size);
-#ifdef DEBUG
- if (len < 0)
- perror("read failed");
-#endif
- return len;
-}
-
-static void wait_for_data(int pfd, int event)
-{
- struct pollfd pfds[1];
-
- pfds[0].fd = pfd;
- pfds[0].events = event;
- poll(pfds, 1, -1);
-}
-
-void producer(struct sockaddr_un *consumer_addr)
-{
- int cfd;
- char buf[64];
- int i;
-
- memset(buf, 'x', sizeof(buf));
- cfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
- wait_for_signal(pipefd[0]);
- if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(*consumer_addr)) != 0) {
- perror("Connect failed");
- kill(0, SIGTERM);
- exit(1);
- }
-
- for (i = 0; i < 2; i++) {
- /* Test 1: Test for SIGURG and OOB */
- wait_for_signal(pipefd[0]);
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 2: Test for OOB being overwitten */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '#';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 3: Test for SIOCATMARK */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- send(cfd, buf, sizeof(buf), 0);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 4: Test for 1byte OOB msg */
- memset(buf, 'x', sizeof(buf));
- buf[0] = '@';
- send(cfd, buf, 1, MSG_OOB);
- }
-}
-
-int
-main(int argc, char **argv)
-{
- int lfd, pfd;
- struct sockaddr_un consumer_addr, paddr;
- socklen_t len = sizeof(consumer_addr);
- char buf[1024];
- int on = 0;
- char oob;
- int atmark;
-
- lfd = socket(AF_UNIX, SOCK_STREAM, 0);
- memset(&consumer_addr, 0, sizeof(consumer_addr));
- consumer_addr.sun_family = AF_UNIX;
- sprintf(sock_name, "unix_oob_%d", getpid());
- unlink(sock_name);
- strcpy(consumer_addr.sun_path, sock_name);
-
- if ((bind(lfd, (struct sockaddr *)&consumer_addr,
- sizeof(consumer_addr))) != 0) {
- perror("socket bind failed");
- exit(1);
- }
-
- pipe(pipefd);
-
- listen(lfd, 1);
-
- producer_id = fork();
- if (producer_id == 0) {
- producer(&consumer_addr);
- exit(0);
- }
-
- set_sig_handler(SIGURG);
- signal_producer(pipefd[1]);
-
- pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
- fcntl(pfd, F_SETOWN, getpid());
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1:
- * veriyf that SIGURG is
- * delivered, 63 bytes are
- * read, oob is '@', and POLLPRI works.
- */
- wait_for_data(pfd, POLLPRI);
- read_oob(pfd, &oob);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 63 || oob != '@') {
- fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2:
- * Verify that the first OOB is over written by
- * the 2nd one and the first OOB is returned as
- * part of the read, and sigurg is received.
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = 0;
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- read_oob(pfd, &oob);
- if (!signal_recvd || len != 127 || oob != '#') {
- fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and atmark
- * is set.
- * oob is '%' and second read returns
- * 64 bytes.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 150)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr,
- "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
- signal_recvd, len, oob, atmark);
- die(1);
- }
-
- signal_recvd = 0;
-
- len = read_data(pfd, buf, 1024);
- if (len != 64) {
- fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4:
- * verify that a single byte
- * oob message is delivered.
- * set non blocking mode and
- * check proper error is
- * returned and sigurg is
- * received and correct
- * oob is read.
- */
-
- set_filemode(pfd, 0);
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if ((len == -1) && (errno == 11))
- len = 0;
-
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 0 || oob != '@') {
- fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- set_filemode(pfd, 1);
-
- /* Inline Testing */
-
- on = 1;
- if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
- perror("SO_OOBINLINE");
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1 -- Inline:
- * Check that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
- */
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
-
- if (!signal_recvd || len != 63) {
- fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
- signal_recvd, len);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
-
- if (len != 1) {
- fprintf(stderr,
- "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2 -- Inline:
- * Verify that the first OOB is over written by
- * the 2nd one and read breaks correctly on
- * 2nd OOB boundary with the first OOB returned as
- * part of the read, and sigurg is delivered and
- * siocatmark returns true.
- * next read returns one byte, the oob byte
- * and siocatmark returns false.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 127 || atmark != 1 || !signal_recvd) {
- fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
- len, atmark);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 1 || buf[0] != '#' || atmark == 1) {
- fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3 -- Inline:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and siocatmark
- * is true after the read.
- * subsequent read returns 65 bytes
- * because of oob which should be '%'.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 126)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (!signal_recvd || len != 127 || !atmark) {
- fprintf(stderr,
- "Test 3 Inline failed, sigurg %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 65 || buf[0] != '%' || atmark != 0) {
- fprintf(stderr,
- "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4 -- Inline:
- * verify that a single
- * byte oob message is delivered
- * and read returns one byte, the oob
- * byte and sigurg is received
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 1 || buf[0] != '@') {
- fprintf(stderr,
- "Test 4 Inline failed, signal %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
- die(0);
-}