diff options
-rw-r--r-- | Documentation/networking/index.rst | 1 | ||||
-rw-r--r-- | Documentation/networking/tcp_ao.rst | 444 | ||||
-rw-r--r-- | include/linux/sockptr.h | 23 | ||||
-rw-r--r-- | include/linux/tcp.h | 30 | ||||
-rw-r--r-- | include/net/dropreason-core.h | 30 | ||||
-rw-r--r-- | include/net/tcp.h | 288 | ||||
-rw-r--r-- | include/net/tcp_ao.h | 362 | ||||
-rw-r--r-- | include/uapi/linux/snmp.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/tcp.h | 105 | ||||
-rw-r--r-- | net/ipv4/Kconfig | 17 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 5 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 246 | ||||
-rw-r--r-- | net/ipv4/tcp_ao.c | 2392 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 98 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 363 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 50 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 236 | ||||
-rw-r--r-- | net/ipv4/tcp_sigpool.c | 358 | ||||
-rw-r--r-- | net/ipv6/Makefile | 1 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 5 | ||||
-rw-r--r-- | net/ipv6/tcp_ao.c | 168 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 378 |
24 files changed, 5175 insertions, 436 deletions
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 2ffc5ad10295..683eb42309cc 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -106,6 +106,7 @@ Contents: sysfs-tagging tc-actions-env-rules tc-queue-filters + tcp_ao tcp-thin team timestamping diff --git a/Documentation/networking/tcp_ao.rst b/Documentation/networking/tcp_ao.rst new file mode 100644 index 000000000000..cfa5bf1cc542 --- /dev/null +++ b/Documentation/networking/tcp_ao.rst @@ -0,0 +1,444 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================================== +TCP Authentication Option Linux implementation (RFC5925) +======================================================== + +TCP Authentication Option (TCP-AO) provides a TCP extension aimed at verifying +segments between trusted peers. It adds a new TCP header option with +a Message Authentication Code (MAC). MACs are produced from the content +of a TCP segment using a hashing function with a password known to both peers. +The intent of TCP-AO is to deprecate TCP-MD5 providing better security, +key rotation and support for variety of hashing algorithms. + +1. Introduction +=============== + +.. table:: Short and Limited Comparison of TCP-AO and TCP-MD5 + + +----------------------+------------------------+-----------------------+ + | | TCP-MD5 | TCP-AO | + +======================+========================+=======================+ + |Supported hashing |MD5 |Must support HMAC-SHA1 | + |algorithms |(cryptographically weak)|(chosen-prefix attacks)| + | | |and CMAC-AES-128 (only | + | | |side-channel attacks). | + | | |May support any hashing| + | | |algorithm. | + +----------------------+------------------------+-----------------------+ + |Length of MACs (bytes)|16 |Typically 12-16. | + | | |Other variants that fit| + | | |TCP header permitted. | + +----------------------+------------------------+-----------------------+ + |Number of keys per |1 |Many | + |TCP connection | | | + +----------------------+------------------------+-----------------------+ + |Possibility to change |Non-practical (both |Supported by protocol | + |an active key |peers have to change | | + | |them during MSL) | | + +----------------------+------------------------+-----------------------+ + |Protection against |No |Yes: ignoring them | + |ICMP 'hard errors' | |by default on | + | | |established connections| + +----------------------+------------------------+-----------------------+ + |Protection against |No |Yes: pseudo-header | + |traffic-crossing | |includes TCP ports. | + |attack | | | + +----------------------+------------------------+-----------------------+ + |Protection against |No |Sequence Number | + |replayed TCP segments | |Extension (SNE) and | + | | |Initial Sequence | + | | |Numbers (ISNs) | + +----------------------+------------------------+-----------------------+ + |Supports |Yes |No. ISNs+SNE are needed| + |Connectionless Resets | |to correctly sign RST. | + +----------------------+------------------------+-----------------------+ + |Standards |RFC 2385 |RFC 5925, RFC 5926 | + +----------------------+------------------------+-----------------------+ + + +1.1 Frequently Asked Questions (FAQ) with references to RFC 5925 +---------------------------------------------------------------- + +Q: Can either SendID or RecvID be non-unique for the same 4-tuple +(srcaddr, srcport, dstaddr, dstport)? + +A: No [3.1]:: + + >> The IDs of MKTs MUST NOT overlap where their TCP connection + identifiers overlap. + +Q: Can Master Key Tuple (MKT) for an active connection be removed? + +A: No, unless it's copied to Transport Control Block (TCB) [3.1]:: + + It is presumed that an MKT affecting a particular connection cannot + be destroyed during an active connection -- or, equivalently, that + its parameters are copied to an area local to the connection (i.e., + instantiated) and so changes would affect only new connections. + +Q: If an old MKT needs to be deleted, how should it be done in order +to not remove it for an active connection? (As it can be still in use +at any moment later) + +A: Not specified by RFC 5925, seems to be a problem for key management +to ensure that no one uses such MKT before trying to remove it. + +Q: Can an old MKT exist forever and be used by another peer? + +A: It can, it's a key management task to decide when to remove an old key [6.1]:: + + Deciding when to start using a key is a performance issue. Deciding + when to remove an MKT is a security issue. Invalid MKTs are expected + to be removed. TCP-AO provides no mechanism to coordinate their removal, + as we consider this a key management operation. + +also [6.1]:: + + The only way to avoid reuse of previously used MKTs is to remove the MKT + when it is no longer considered permitted. + +Linux TCP-AO will try its best to prevent you from removing a key that's +being used, considering it a key management failure. But sine keeping +an outdated key may become a security issue and as a peer may +unintentionally prevent the removal of an old key by always setting +it as RNextKeyID - a forced key removal mechanism is provided, where +userspace has to supply KeyID to use instead of the one that's being removed +and the kernel will atomically delete the old key, even if the peer is +still requesting it. There are no guarantees for force-delete as the peer +may yet not have the new key - the TCP connection may just break. +Alternatively, one may choose to shut down the socket. + +Q: What happens when a packet is received on a new connection with no known +MKT's RecvID? + +A: RFC 5925 specifies that by default it is accepted with a warning logged, but +the behaviour can be configured by the user [7.5.1.a]:: + + If the segment is a SYN, then this is the first segment of a new + connection. Find the matching MKT for this segment, using the segment's + socket pair and its TCP-AO KeyID, matched against the MKT's TCP connection + identifier and the MKT's RecvID. + + i. If there is no matching MKT, remove TCP-AO from the segment. + Proceed with further TCP handling of the segment. + NOTE: this presumes that connections that do not match any MKT + should be silently accepted, as noted in Section 7.3. + +[7.3]:: + + >> A TCP-AO implementation MUST allow for configuration of the behavior + of segments with TCP-AO but that do not match an MKT. The initial default + of this configuration SHOULD be to silently accept such connections. + If this is not the desired case, an MKT can be included to match such + connections, or the connection can indicate that TCP-AO is required. + Alternately, the configuration can be changed to discard segments with + the AO option not matching an MKT. + +[10.2.b]:: + + Connections not matching any MKT do not require TCP-AO. Further, incoming + segments with TCP-AO are not discarded solely because they include + the option, provided they do not match any MKT. + +Note that Linux TCP-AO implementation differs in this aspect. Currently, TCP-AO +segments with unknown key signatures are discarded with warnings logged. + +Q: Does the RFC imply centralized kernel key management in any way? +(i.e. that a key on all connections MUST be rotated at the same time?) + +A: Not specified. MKTs can be managed in userspace, the only relevant part to +key changes is [7.3]:: + + >> All TCP segments MUST be checked against the set of MKTs for matching + TCP connection identifiers. + +Q: What happens when RNextKeyID requested by a peer is unknown? Should +the connection be reset? + +A: It should not, no action needs to be performed [7.5.2.e]:: + + ii. If they differ, determine whether the RNextKeyID MKT is ready. + + 1. If the MKT corresponding to the segment’s socket pair and RNextKeyID + is not available, no action is required (RNextKeyID of a received + segment needs to match the MKT’s SendID). + +Q: How current_key is set and when does it change? It is a user-triggered +change, or is it by a request from the remote peer? Is it set by the user +explicitly, or by a matching rule? + +A: current_key is set by RNextKeyID [6.1]:: + + Rnext_key is changed only by manual user intervention or MKT management + protocol operation. It is not manipulated by TCP-AO. Current_key is updated + by TCP-AO when processing received TCP segments as discussed in the segment + processing description in Section 7.5. Note that the algorithm allows + the current_key to change to a new MKT, then change back to a previously + used MKT (known as "backing up"). This can occur during an MKT change when + segments are received out of order, and is considered a feature of TCP-AO, + because reordering does not result in drops. + +[7.5.2.e.ii]:: + + 2. If the matching MKT corresponding to the segment’s socket pair and + RNextKeyID is available: + + a. Set current_key to the RNextKeyID MKT. + +Q: If both peers have multiple MKTs matching the connection's socket pair +(with different KeyIDs), how should the sender/receiver pick KeyID to use? + +A: Some mechanism should pick the "desired" MKT [3.3]:: + + Multiple MKTs may match a single outgoing segment, e.g., when MKTs + are being changed. Those MKTs cannot have conflicting IDs (as noted + elsewhere), and some mechanism must determine which MKT to use for each + given outgoing segment. + + >> An outgoing TCP segment MUST match at most one desired MKT, indicated + by the segment’s socket pair. The segment MAY match multiple MKTs, provided + that exactly one MKT is indicated as desired. Other information in + the segment MAY be used to determine the desired MKT when multiple MKTs + match; such information MUST NOT include values in any TCP option fields. + +Q: Can TCP-MD5 connection migrate to TCP-AO (and vice-versa): + +A: No [1]:: + + TCP MD5-protected connections cannot be migrated to TCP-AO because TCP MD5 + does not support any changes to a connection’s security algorithm + once established. + +Q: If all MKTs are removed on a connection, can it become a non-TCP-AO signed +connection? + +A: [7.5.2] doesn't have the same choice as SYN packet handling in [7.5.1.i] +that would allow accepting segments without a sign (which would be insecure). +While switching to non-TCP-AO connection is not prohibited directly, it seems +what the RFC means. Also, there's a requirement for TCP-AO connections to +always have one current_key [3.3]:: + + TCP-AO requires that every protected TCP segment match exactly one MKT. + +[3.3]:: + + >> An incoming TCP segment including TCP-AO MUST match exactly one MKT, + indicated solely by the segment’s socket pair and its TCP-AO KeyID. + +[4.4]:: + + One or more MKTs. These are the MKTs that match this connection’s + socket pair. + +Q: Can a non-TCP-AO connection become a TCP-AO-enabled one? + +A: No: for already established non-TCP-AO connection it would be impossible +to switch using TCP-AO as the traffic key generation requires the initial +sequence numbers. Paraphrasing, starting using TCP-AO would require +re-establishing the TCP connection. + +2. In-kernel MKTs database vs database in userspace +=================================================== + +Linux TCP-AO support is implemented using ``setsockopt()s``, in a similar way +to TCP-MD5. It means that a userspace application that wants to use TCP-AO +should perform ``setsockopt()`` on a TCP socket when it wants to add, +remove or rotate MKTs. This approach moves the key management responsibility +to userspace as well as decisions on corner cases, i.e. what to do if +the peer doesn't respect RNextKeyID; moving more code to userspace, especially +responsible for the policy decisions. Besides, it's flexible and scales well +(with less locking needed than in the case of an in-kernel database). One also +should keep in mind that mainly intended users are BGP processes, not any +random applications, which means that compared to IPsec tunnels, +no transparency is really needed and modern BGP daemons already have +``setsockopt()s`` for TCP-MD5 support. + +.. table:: Considered pros and cons of the approaches + + +----------------------+------------------------+-----------------------+ + | | ``setsockopt()`` | in-kernel DB | + +======================+========================+=======================+ + | Extendability | ``setsockopt()`` | Netlink messages are | + | | commands should be | simple and extendable | + | | extendable syscalls | | + +----------------------+------------------------+-----------------------+ + | Required userspace | BGP or any application | could be transparent | + | changes | that wants TCP-AO needs| as tunnels, providing | + | | to perform | something like | + | | ``setsockopt()s`` | ``ip tcpao add key`` | + | | and do key management | (delete/show/rotate) | + +----------------------+------------------------+-----------------------+ + |MKTs removal or adding| harder for userspace | harder for kernel | + +----------------------+------------------------+-----------------------+ + | Dump-ability | ``getsockopt()`` | Netlink .dump() | + | | | callback | + +----------------------+------------------------+-----------------------+ + | Limits on kernel | equal | + | resources/memory | | + +----------------------+------------------------+-----------------------+ + | Scalability | contention on | contention on | + | | ``TCP_LISTEN`` sockets | the whole database | + +----------------------+------------------------+-----------------------+ + | Monitoring & warnings| ``TCP_DIAG`` | same Netlink socket | + +----------------------+------------------------+-----------------------+ + | Matching of MKTs | half-problem: only | hard | + | | listen sockets | | + +----------------------+------------------------+-----------------------+ + + +3. uAPI +======= + +Linux provides a set of ``setsockopt()s`` and ``getsockopt()s`` that let +userspace manage TCP-AO on a per-socket basis. In order to add/delete MKTs +``TCP_AO_ADD_KEY`` and ``TCP_AO_DEL_KEY`` TCP socket options must be used +It is not allowed to add a key on an established non-TCP-AO connection +as well as to remove the last key from TCP-AO connection. + +``setsockopt(TCP_AO_DEL_KEY)`` command may specify ``tcp_ao_del::current_key`` ++ ``tcp_ao_del::set_current`` and/or ``tcp_ao_del::rnext`` ++ ``tcp_ao_del::set_rnext`` which makes such delete "forced": it +provides userspace a way to delete a key that's being used and atomically set +another one instead. This is not intended for normal use and should be used +only when the peer ignores RNextKeyID and keeps requesting/using an old key. +It provides a way to force-delete a key that's not trusted but may break +the TCP-AO connection. + +The usual/normal key-rotation can be performed with ``setsockopt(TCP_AO_INFO)``. +It also provides a uAPI to change per-socket TCP-AO settings, such as +ignoring ICMPs, as well as clear per-socket TCP-AO packet counters. +The corresponding ``getsockopt(TCP_AO_INFO)`` can be used to get those +per-socket TCP-AO settings. + +Another useful command is ``getsockopt(TCP_AO_GET_KEYS)``. One can use it +to list all MKTs on a TCP socket or use a filter to get keys for a specific +peer and/or sndid/rcvid, VRF L3 interface or get current_key/rnext_key. + +To repair TCP-AO connections ``setsockopt(TCP_AO_REPAIR)`` is available, +provided that the user previously has checkpointed/dumped the socket with +``getsockopt(TCP_AO_REPAIR)``. + +A tip here for scaled TCP_LISTEN sockets, that may have some thousands TCP-AO +keys, is: use filters in ``getsockopt(TCP_AO_GET_KEYS)`` and asynchronous +delete with ``setsockopt(TCP_AO_DEL_KEY)``. + +Linux TCP-AO also provides a bunch of segment counters that can be helpful +with troubleshooting/debugging issues. Every MKT has good/bad counters +that reflect how many packets passed/failed verification. +Each TCP-AO socket has the following counters: +- for good segments (properly signed) +- for bad segments (failed TCP-AO verification) +- for segments with unknown keys +- for segments where an AO signature was expected, but wasn't found +- for the number of ignored ICMPs + +TCP-AO per-socket counters are also duplicated with per-netns counters, +exposed with SNMP. Those are ``TCPAOGood``, ``TCPAOBad``, ``TCPAOKeyNotFound``, +``TCPAORequired`` and ``TCPAODroppedIcmps``. + +RFC 5925 very permissively specifies how TCP port matching can be done for +MKTs:: + + TCP connection identifier. A TCP socket pair, i.e., a local IP + address, a remote IP address, a TCP local port, and a TCP remote port. + Values can be partially specified using ranges (e.g., 2-30), masks + (e.g., 0xF0), wildcards (e.g., "*"), or any other suitable indication. + +Currently Linux TCP-AO implementation doesn't provide any TCP port matching. +Probably, port ranges are the most flexible for uAPI, but so far +not implemented. + +4. ``setsockopt()`` vs ``accept()`` race +======================================== + +In contrast with TCP-MD5 established connection which has just one key, +TCP-AO connections may have many keys, which means that accepted connections +on a listen socket may have any amount of keys as well. As copying all those +keys on a first properly signed SYN would make the request socket bigger, that +would be undesirable. Currently, the implementation doesn't copy keys +to request sockets, but rather look them up on the "parent" listener socket. + +The result is that when userspace removes TCP-AO keys, that may break +not-yet-established connections on request sockets as well as not removing +keys from sockets that were already established, but not yet ``accept()``'ed, +hanging in the accept queue. + +The reverse is valid as well: if userspace adds a new key for a peer on +a listener socket, the established sockets in accept queue won't +have the new keys. + +At this moment, the resolution for the two races: +``setsockopt(TCP_AO_ADD_KEY)`` vs ``accept()`` +and ``setsockopt(TCP_AO_DEL_KEY)`` vs ``accept()`` is delegated to userspace. +This means that it's expected that userspace would check the MKTs on the socket +that was returned by ``accept()`` to verify that any key rotation that +happened on listen socket is reflected on the newly established connection. + +This is a similar "do-nothing" approach to TCP-MD5 from the kernel side and +may be changed later by introducing new flags to ``tcp_ao_add`` +and ``tcp_ao_del``. + +Note that this race is rare for it needs TCP-AO key rotation to happen +during the 3-way handshake for the new TCP connection. + +5. Interaction with TCP-MD5 +=========================== + +A TCP connection can not migrate between TCP-AO and TCP-MD5 options. The +established sockets that have either AO or MD5 keys are restricted for +adding keys of the other option. + +For listening sockets the picture is different: BGP server may want to receive +both TCP-AO and (deprecated) TCP-MD5 clients. As a result, both types of keys +may be added to TCP_CLOSED or TCP_LISTEN sockets. It's not allowed to add +different types of keys for the same peer. + +6. SNE Linux implementation +=========================== + +RFC 5925 [6.2] describes the algorithm of how to extend TCP sequence numbers +with SNE. In short: TCP has to track the previous sequence numbers and set +sne_flag when the current SEQ number rolls over. The flag is cleared when +both current and previous SEQ numbers cross 0x7fff, which is 32Kb. + +In times when sne_flag is set, the algorithm compares SEQ for each packet with +0x7fff and if it's higher than 32Kb, it assumes that the packet should be +verified with SNE before the increment. As a result, there's +this [0; 32Kb] window, when packets with (SNE - 1) can be accepted. + +Linux implementation simplifies this a bit: as the network stack already tracks +the first SEQ byte that ACK is wanted for (snd_una) and the next SEQ byte that +is wanted (rcv_nxt) - that's enough information for a rough estimation +on where in the 4GB SEQ number space both sender and receiver are. +When they roll over to zero, the corresponding SNE gets incremented. + +tcp_ao_compute_sne() is called for each TCP-AO segment. It compares SEQ numbers +from the segment with snd_una or rcv_nxt and fits the result into a 2GB window around them, +detecting SEQ numbers rolling over. That simplifies the code a lot and only +requires SNE numbers to be stored on every TCP-AO socket. + +The 2GB window at first glance seems much more permissive compared to +RFC 5926. But that is only used to pick the correct SNE before/after +a rollover. It allows more TCP segment replays, but yet all regular +TCP checks in tcp_sequence() are applied on the verified segment. +So, it trades a bit more permissive acceptance of replayed/retransmitted +segments for the simplicity of the algorithm and what seems better behaviour +for large TCP windows. + +7. Links +======== + +RFC 5925 The TCP Authentication Option + https://www.rfc-editor.org/rfc/pdfrfc/rfc5925.txt.pdf + +RFC 5926 Cryptographic Algorithms for the TCP Authentication Option (TCP-AO) + https://www.rfc-editor.org/rfc/pdfrfc/rfc5926.txt.pdf + +Draft "SHA-2 Algorithm for the TCP Authentication Option (TCP-AO)" + https://datatracker.ietf.org/doc/html/draft-nayak-tcp-sha2-03 + +RFC 2385 Protection of BGP Sessions via the TCP MD5 Signature Option + https://www.rfc-editor.org/rfc/pdfrfc/rfc2385.txt.pdf + +:Author: Dmitry Safonov <[email protected]> diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index bae5e2369b4f..307961b41541 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -55,6 +55,29 @@ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) return copy_from_sockptr_offset(dst, src, 0, size); } +static inline int copy_struct_from_sockptr(void *dst, size_t ksize, + sockptr_t src, size_t usize) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + if (!sockptr_is_kernel(src)) + return copy_struct_from_user(dst, ksize, src.user, size); + + if (usize < ksize) { + memset(dst + size, 0, rest); + } else if (usize > ksize) { + char *p = src.kernel; + + while (rest--) { + if (*p++) + return -E2BIG; + } + } + memcpy(dst, src.kernel, size); + return 0; +} + static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, const void *src, size_t size) { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6df715b6e51d..ec4e9367f5b0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -166,6 +166,11 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; +#ifdef CONFIG_TCP_AO + u8 ao_keyid; + u8 ao_rcv_next; + u8 maclen; +#endif }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -173,6 +178,19 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +static inline bool tcp_rsk_used_ao(const struct request_sock *req) +{ + /* The real length of MAC is saved in the request socket, + * signing anything with zero-length makes no sense, so here is + * a little hack.. + */ +#ifndef CONFIG_TCP_AO + return false; +#else + return tcp_rsk(req)->maclen != 0; +#endif +} + #define TCP_RMEM_TO_WIN_SCALE 8 struct tcp_sock { @@ -447,13 +465,18 @@ struct tcp_sock { bool syn_smc; /* SYN includes SMC */ #endif -#ifdef CONFIG_TCP_MD5SIG -/* TCP AF-Specific parts; only used by MD5 Signature support so far */ +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ const struct tcp_sock_af_ops *af_specific; +#ifdef CONFIG_TCP_MD5SIG /* TCP MD5 Signature Option information */ struct tcp_md5sig_info __rcu *md5sig_info; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif +#endif /* TCP fastopen related information */ struct tcp_fastopen_request *fastopen_req; @@ -509,6 +532,9 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_info __rcu *ao_info; +#endif }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 845dce805de7..3c70ad53a49c 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -20,9 +20,14 @@ FN(IP_NOPROTO) \ FN(SOCKET_RCVBUFF) \ FN(PROTO_MEM) \ + FN(TCP_AUTH_HDR) \ FN(TCP_MD5NOTFOUND) \ FN(TCP_MD5UNEXPECTED) \ FN(TCP_MD5FAILURE) \ + FN(TCP_AONOTFOUND) \ + FN(TCP_AOUNEXPECTED) \ + FN(TCP_AOKEYNOTFOUND) \ + FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ FN(TCP_ZEROWINDOW) \ @@ -143,6 +148,11 @@ enum skb_drop_reason { */ SKB_DROP_REASON_PROTO_MEM, /** + * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met + * twice or set incorrectly. + */ + SKB_DROP_REASON_TCP_AUTH_HDR, + /** * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, * corresponding to LINUX_MIB_TCPMD5NOTFOUND */ @@ -158,6 +168,26 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_MD5FAILURE, /** + * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected, + * corresponding to LINUX_MIB_TCPAOREQUIRED + */ + SKB_DROP_REASON_TCP_AONOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it + * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOUNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown, + * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND + */ + SKB_DROP_REASON_TCP_AOKEYNOTFOUND, + /** + * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong, + * corresponding to LINUX_MIB_TCPAOBAD + */ + SKB_DROP_REASON_TCP_AOFAILURE, + /** * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( * see LINUX_MIB_TCPBACKLOGDROP) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 993b7fcd4e46..d2f0736b76b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -37,6 +37,7 @@ #include <net/snmp.h> #include <net/ip.h> #include <net/tcp_states.h> +#include <net/tcp_ao.h> #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> @@ -194,6 +195,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ @@ -436,7 +438,6 @@ int tcp_mmap(struct file *file, struct socket *sock, void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * BPF SKB-less helpers @@ -1688,12 +1689,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->retransmit_skb_hint = NULL; } -union tcp_md5_addr { - struct in_addr a4; -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr a6; -#endif -}; +#define tcp_md5_addr tcp_ao_addr /* - key database */ struct tcp_md5sig_key { @@ -1737,12 +1733,39 @@ union tcp_md5sum_block { #endif }; -/* - pool: digest algorithm, hash description and scratch buffer */ -struct tcp_md5sig_pool { - struct ahash_request *md5_req; - void *scratch; +/* + * struct tcp_sigpool - per-CPU pool of ahash_requests + * @scratch: per-CPU temporary area, that can be used between + * tcp_sigpool_start() and tcp_sigpool_end() to perform + * crypto request + * @req: pre-allocated ahash request + */ +struct tcp_sigpool { + void *scratch; + struct ahash_request *req; }; +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size); +void tcp_sigpool_get(unsigned int id); +void tcp_sigpool_release(unsigned int id); +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len); + +/** + * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @c: returned tcp_sigpool for usage (uninitialized on failure) + * + * Returns 0 on success, error otherwise. + */ +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); +/** + * tcp_sigpool_end - enable bh and stop using tcp_sigpool + * @c: tcp_sigpool context that was returned by tcp_sigpool_start() + */ +void tcp_sigpool_end(struct tcp_sigpool *c); +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len); /* - functions */ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); @@ -1755,6 +1778,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); +void tcp_clear_md5_list(struct sock *sk); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1763,20 +1787,29 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family); + int family, bool any_l3index); static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; - return __tcp_md5_do_lookup(sk, l3index, addr, family); + return __tcp_md5_do_lookup(sk, l3index, addr, family, false); +} + +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return NULL; + return __tcp_md5_do_lookup(sk, 0, addr, family, true); } enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif); + int family, int l3index, const __u8 *hash_location); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) @@ -1788,27 +1821,29 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index, return NULL; } +static inline struct tcp_md5sig_key * +tcp_md5_do_lookup_any_l3index(const struct sock *sk, + const union tcp_md5_addr *addr, int family) +{ + return NULL; +} + static inline enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { return SKB_NOT_DROPPED_YET; } #define tcp_twsk_md5_key(twsk) NULL #endif -bool tcp_alloc_md5sig_pool(void); - -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -static inline void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); -} +int tcp_md5_alloc_sigpool(void); +void tcp_md5_release_sigpool(void); +void tcp_md5_add_sigpool(void); +extern int tcp_md5_sigpool_id; -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, - unsigned int header_len); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, +int tcp_md5_hash_key(struct tcp_sigpool *hp, const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ @@ -2157,6 +2192,18 @@ struct tcp_sock_af_ops { sockptr_t optval, int optlen); #endif +#ifdef CONFIG_TCP_AO + int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen); + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid); + int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); + int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +#endif }; struct tcp_request_sock_ops { @@ -2169,6 +2216,15 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *(*ao_lookup)(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); + int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk); + int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +#endif #ifdef CONFIG_SYN_COOKIES __u32 (*cookie_init_seq)(const struct sk_buff *skb, __u16 *mss); @@ -2209,6 +2265,76 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, } #endif +struct tcp_key { + union { + struct { + struct tcp_ao_key *ao_key; + char *traffic_key; + u32 sne; + u8 rcv_next; + }; + struct tcp_md5sig_key *md5_key; + }; + enum { + TCP_KEY_NONE = 0, + TCP_KEY_MD5, + TCP_KEY_AO, + } type; +}; + +static inline void tcp_get_current_key(const struct sock *sk, + struct tcp_key *out) +{ +#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG) + const struct tcp_sock *tp = tcp_sk(sk); +#endif + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key)) { + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + out->ao_key = READ_ONCE(ao->current_key); + out->type = TCP_KEY_AO; + return; + } + } +#endif +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + rcu_access_pointer(tp->md5sig_info)) { + out->md5_key = tp->af_specific->md5_lookup(sk, sk); + if (out->md5_key) { + out->type = TCP_KEY_MD5; + return; + } + } +#endif + out->type = TCP_KEY_NONE; +} + +static inline bool tcp_key_is_md5(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_MD5SIG + if (static_branch_unlikely(&tcp_md5_needed.key) && + key->type == TCP_KEY_MD5) + return true; +#endif + return false; +} + +static inline bool tcp_key_is_ao(const struct tcp_key *key) +{ +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + key->type == TCP_KEY_AO) + return true; +#endif + return false; +} + int tcpv4_offload_init(void); void tcp_v4_init(void); @@ -2567,4 +2693,116 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +static inline int tcp_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const struct tcp_ao_hdr **aoh) +{ + const u8 *md5_tmp, *ao_tmp; + int ret; + + ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp); + if (ret) + return ret; + + if (md5_hash) + *md5_hash = md5_tmp; + + if (aoh) { + if (!ao_tmp) + *aoh = NULL; + else + *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2); + } + + return 0; +} + +static inline bool tcp_ao_required(struct sock *sk, const void *saddr, + int family, int l3index, bool stat_inc) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + struct tcp_ao_key *ao_key; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + + ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return false; + + ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1); + if (ao_info->ao_required || ao_key) { + if (stat_inc) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED); + atomic64_inc(&ao_info->counters.ao_required); + } + return true; + } +#endif + return false; +} + +/* Called with rcu_read_lock() */ +static inline enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} + #endif /* _TCP_H */ diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h new file mode 100644 index 000000000000..a375a171ef3c --- /dev/null +++ b/include/net/tcp_ao.h @@ -0,0 +1,362 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_AO_H +#define _TCP_AO_H + +#define TCP_AO_KEY_ALIGN 1 +#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN) + +union tcp_ao_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + +struct tcp_ao_hdr { + u8 kind; + u8 length; + u8 keyid; + u8 rnext_keyid; +}; + +struct tcp_ao_counters { + atomic64_t pkt_good; + atomic64_t pkt_bad; + atomic64_t key_not_found; + atomic64_t ao_required; + atomic64_t dropped_icmp; +}; + +struct tcp_ao_key { + struct hlist_node node; + union tcp_ao_addr addr; + u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align; + unsigned int tcp_sigpool_id; + unsigned int digest_size; + int l3index; + u8 prefixlen; + u8 family; + u8 keylen; + u8 keyflags; + u8 sndid; + u8 rcvid; + u8 maclen; + struct rcu_head rcu; + atomic64_t pkt_good; + atomic64_t pkt_bad; + u8 traffic_keys[]; +}; + +static inline u8 *rcv_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys; +} + +static inline u8 *snd_other_key(struct tcp_ao_key *key) +{ + return key->traffic_keys + key->digest_size; +} + +static inline int tcp_ao_maclen(const struct tcp_ao_key *key) +{ + return key->maclen; +} + +static inline int tcp_ao_len(const struct tcp_ao_key *key) +{ + return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); +} + +static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) +{ + return key->digest_size; +} + +static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key) +{ + return sizeof(struct tcp_ao_key) + (key->digest_size << 1); +} + +struct tcp_ao_info { + /* List of tcp_ao_key's */ + struct hlist_head head; + /* current_key and rnext_key aren't maintained on listen sockets. + * Their purpose is to cache keys on established connections, + * saving needless lookups. Never dereference any of them from + * listen sockets. + * ::current_key may change in RX to the key that was requested by + * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid + * load/store tearing. + * Do the same for ::rnext_key, if you don't hold socket lock + * (it's changed only by userspace request in setsockopt()). + */ + struct tcp_ao_key *current_key; + struct tcp_ao_key *rnext_key; + struct tcp_ao_counters counters; + u32 ao_required :1, + accept_icmps :1, + __unused :30; + __be32 lisn; + __be32 risn; + /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ, + * that protect TCP-AO connection from replayed old TCP segments. + * See RFC5925 (6.2). + * In order to get correct SNE, there's a helper tcp_ao_compute_sne(). + * It needs SEQ basis to understand whereabouts are lower SEQ numbers. + * According to that basis vector, it can provide incremented SNE + * when SEQ rolls over or provide decremented SNE when there's + * a retransmitted segment from before-rolling over. + * - for request sockets such basis is rcv_isn/snt_isn, which seems + * good enough as it's unexpected to receive 4 Gbytes on reqsk. + * - for full sockets the basis is rcv_nxt/snd_una. snd_una is + * taken instead of snd_nxt as currently it's easier to track + * in tcp_snd_una_update(), rather than updating SNE in all + * WRITE_ONCE(tp->snd_nxt, ...) + * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt. + * tw_snd_nxt is not expected to change, while tw_rcv_nxt may. + */ + u32 snd_sne; + u32 rcv_sne; + refcount_t refcnt; /* Protects twsk destruction */ + struct rcu_head rcu; +}; + +#define tcp_hash_fail(msg, family, skb, fmt, ...) \ +do { \ + const struct tcphdr *th = tcp_hdr(skb); \ + char hdr_flags[5] = {}; \ + char *f = hdr_flags; \ + \ + if (th->fin) \ + *f++ = 'F'; \ + if (th->syn) \ + *f++ = 'S'; \ + if (th->rst) \ + *f++ = 'R'; \ + if (th->ack) \ + *f++ = 'A'; \ + if (f != hdr_flags) \ + *f = ' '; \ + if ((family) == AF_INET) { \ + net_info_ratelimited("%s for (%pI4, %d)->(%pI4, %d) %s" fmt "\n", \ + msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ + &ip_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } else { \ + net_info_ratelimited("%s for [%pI6c]:%u->[%pI6c]:%u %s" fmt "\n", \ + msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ + &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ + hdr_flags, ##__VA_ARGS__); \ + } \ +} while (0) + +#ifdef CONFIG_TCP_AO +/* TCP-AO structures and functions */ +#include <linux/jump_label.h> +extern struct static_key_false_deferred tcp_ao_needed; + +struct tcp4_ao_context { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp6_ao_context { + struct in6_addr saddr; + struct in6_addr daddr; + __be16 sport; + __be16 dport; + __be32 sisn; + __be32 disn; +}; + +struct tcp_sigpool; +#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \ + TCPF_CLOSE | TCPF_CLOSE_WAIT | \ + TCPF_LAST_ACK | TCPF_CLOSING) + +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location); +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid); +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family); +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp); +void tcp_ao_destroy_sock(struct sock *sk, bool twsk); +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp); +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code); +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen); +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen); +enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh); +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq); +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid); +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne); +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne); + +/* ipv4 specific functions */ +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid); +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send); +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +/* ipv6 specific functions */ +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes); +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, __be32 sisn, __be32 disn); +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send); +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req); +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, int sndid, int rcvid); +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid); +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne); +int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne); +void tcp_ao_established(struct sock *sk); +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); +void tcp_ao_connect_init(struct sock *sk); +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index); +#else /* CONFIG_TCP_AO */ + +static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + return 0; +} + +static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index) +{ +} + +static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family, + int type, int code) +{ + return false; +} + +static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk, + const struct sk_buff *skb, unsigned short int family, + const struct request_sock *req, int l3index, + const struct tcp_ao_hdr *aoh) +{ + return SKB_NOT_DROPPED_YET; +} + +static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, + int l3index, const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return NULL; +} + +static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ +} + +static inline void tcp_ao_established(struct sock *sk) +{ +} + +static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, + struct tcp_sock *tp) +{ +} + +static inline void tcp_ao_connect_init(struct sock *sk) +{ +} + +static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_get_repair(struct sock *sk, + sockptr_t optval, sockptr_t optlen) +{ + return -ENOPROTOOPT; +} + +static inline int tcp_ao_set_repair(struct sock *sk, + sockptr_t optval, unsigned int optlen) +{ + return -ENOPROTOOPT; +} +#endif + +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash); +#else +static inline int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + *md5_hash = NULL; + *ao_hash = NULL; + return 0; +} +#endif + +#endif /* _TCP_AO_H */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b2b72886cb6d..a0819c6a5988 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -297,6 +297,11 @@ enum LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */ + LINUX_MIB_TCPAOREQUIRED, /* TCPAORequired */ + LINUX_MIB_TCPAOBAD, /* TCPAOBad */ + LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */ + LINUX_MIB_TCPAOGOOD, /* TCPAOGood */ + LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8aa3916e14f6..c07e9f90c084 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -129,6 +129,11 @@ enum { #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ +#define TCP_AO_ADD_KEY 38 /* Add/Set MKT */ +#define TCP_AO_DEL_KEY 39 /* Delete MKT */ +#define TCP_AO_INFO 40 /* Set/list TCP-AO per-socket options */ +#define TCP_AO_GET_KEYS 41 /* List MKT(s) */ +#define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 @@ -361,6 +366,106 @@ struct tcp_diag_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; }; +#define TCP_AO_MAXKEYLEN 80 + +#define TCP_AO_KEYF_IFINDEX (1 << 0) /* L3 ifindex for VRF */ +#define TCP_AO_KEYF_EXCLUDE_OPT (1 << 1) /* "Indicates whether TCP + * options other than TCP-AO + * are included in the MAC + * calculation" + */ + +struct tcp_ao_add { /* setsockopt(TCP_AO_ADD_KEY) */ + struct __kernel_sockaddr_storage addr; /* peer's address for the key */ + char alg_name[64]; /* crypto hash algorithm to use */ + __s32 ifindex; /* L3 dev index for VRF */ + __u32 set_current :1, /* set key as Current_key at once */ + set_rnext :1, /* request it from peer with RNext_key */ + reserved :30; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ + __u8 prefix; /* peer's address prefix */ + __u8 sndid; /* SendID for outgoing segments */ + __u8 rcvid; /* RecvID to match for incoming seg */ + __u8 maclen; /* length of authentication code (hash) */ + __u8 keyflags; /* see TCP_AO_KEYF_ */ + __u8 keylen; /* length of ::key */ + __u8 key[TCP_AO_MAXKEYLEN]; +} __attribute__((aligned(8))); + +struct tcp_ao_del { /* setsockopt(TCP_AO_DEL_KEY) */ + struct __kernel_sockaddr_storage addr; /* peer's address for the key */ + __s32 ifindex; /* L3 dev index for VRF */ + __u32 set_current :1, /* corresponding ::current_key */ + set_rnext :1, /* corresponding ::rnext */ + del_async :1, /* only valid for listen sockets */ + reserved :29; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ + __u8 prefix; /* peer's address prefix */ + __u8 sndid; /* SendID for outgoing segments */ + __u8 rcvid; /* RecvID to match for incoming seg */ + __u8 current_key; /* KeyID to set as Current_key */ + __u8 rnext; /* KeyID to set as Rnext_key */ + __u8 keyflags; /* see TCP_AO_KEYF_ */ +} __attribute__((aligned(8))); + +struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO), getsockopt(TCP_AO_INFO) */ + /* Here 'in' is for setsockopt(), 'out' is for getsockopt() */ + __u32 set_current :1, /* in/out: corresponding ::current_key */ + set_rnext :1, /* in/out: corresponding ::rnext */ + ao_required :1, /* in/out: don't accept non-AO connects */ + set_counters :1, /* in: set/clear ::pkt_* counters */ + accept_icmps :1, /* in/out: accept incoming ICMPs */ + reserved :27; /* must be 0 */ + __u16 reserved2; /* padding, must be 0 */ + __u8 current_key; /* in/out: KeyID of Current_key */ + __u8 rnext; /* in/out: keyid of RNext_key */ + __u64 pkt_good; /* in/out: verified segments */ + __u64 pkt_bad; /* in/out: failed verification */ + __u64 pkt_key_not_found; /* in/out: could not find a key to verify */ + __u64 pkt_ao_required; /* in/out: segments missing TCP-AO sign */ + __u64 pkt_dropped_icmp; /* in/out: ICMPs that were ignored */ +} __attribute__((aligned(8))); + +struct tcp_ao_getsockopt { /* getsockopt(TCP_AO_GET_KEYS) */ + struct __kernel_sockaddr_storage addr; /* in/out: dump keys for peer + * with this address/prefix + */ + char alg_name[64]; /* out: crypto hash algorithm */ + __u8 key[TCP_AO_MAXKEYLEN]; + __u32 nkeys; /* in: size of the userspace buffer + * @optval, measured in @optlen - the + * sizeof(struct tcp_ao_getsockopt) + * out: number of keys that matched + */ + __u16 is_current :1, /* in: match and dump Current_key, + * out: the dumped key is Current_key + */ + + is_rnext :1, /* in: match and dump RNext_key, + * out: the dumped key is RNext_key + */ + get_all :1, /* in: dump all keys */ + reserved :13; /* padding, must be 0 */ + __u8 sndid; /* in/out: dump keys with SendID */ + __u8 rcvid; /* in/out: dump keys with RecvID */ + __u8 prefix; /* in/out: dump keys with address/prefix */ + __u8 maclen; /* out: key's length of authentication + * code (hash) + */ + __u8 keyflags; /* in/out: see TCP_AO_KEYF_ */ + __u8 keylen; /* out: length of ::key */ + __s32 ifindex; /* in/out: L3 dev index for VRF */ + __u64 pkt_good; /* out: verified segments */ + __u64 pkt_bad; /* out: segments that failed verification */ +} __attribute__((aligned(8))); + +struct tcp_ao_repair { /* {s,g}etsockopt(TCP_AO_REPAIR) */ + __be32 snt_isn; + __be32 rcv_isn; + __u32 snd_sne; + __u32 rcv_sne; +} __attribute__((aligned(8))); + /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */ #define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 2dfb12230f08..8e94ed7c56a0 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -741,10 +741,27 @@ config DEFAULT_TCP_CONG default "bbr" if DEFAULT_BBR default "cubic" +config TCP_SIGPOOL + tristate + +config TCP_AO + bool "TCP: Authentication Option (RFC5925)" + select CRYPTO + select TCP_SIGPOOL + depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) + help + TCP-AO specifies the use of stronger Message Authentication Codes (MACs), + protects against replays for long-lived TCP connections, and + provides more details on the association of security with TCP + connections than TCP MD5 (See RFC5925) + + If unsure, say N. + config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO select CRYPTO_MD5 + select TCP_SIGPOOL help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index b18ba8ef93ad..e144a02a6a61 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -62,12 +62,14 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_TCP_SIGPOOL) += tcp_sigpool.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o ifeq ($(CONFIG_BPF_JIT),y) obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a85b0aba3646..5f4654ebff48 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -299,6 +299,11 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH), + SNMP_MIB_ITEM("TCPAORequired", LINUX_MIB_TCPAOREQUIRED), + SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD), + SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND), + SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD), + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c64334363230..98b25e5d147b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -344,6 +344,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; struct flowi4 fl4; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -405,6 +406,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); + /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 156264531124..53bcc17c91e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3593,6 +3593,31 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, __tcp_sock_set_quickack(sk, val); break; + case TCP_AO_REPAIR: + err = tcp_ao_set_repair(sk, optval, optlen); + break; +#ifdef CONFIG_TCP_AO + case TCP_AO_ADD_KEY: + case TCP_AO_DEL_KEY: + case TCP_AO_INFO: { + /* If this is the first TCP-AO setsockopt() on the socket, + * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR + * in any state. + */ + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + goto ao_parse; + if (rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk))) + goto ao_parse; + if (tp->repair) + goto ao_parse; + err = -EISCONN; + break; +ao_parse: + err = tp->af_specific->ao_parse(sk, optname, optval, optlen); + break; + } +#endif #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: @@ -4267,6 +4292,21 @@ zerocopy_rcv_out: return err; } #endif + case TCP_AO_REPAIR: + return tcp_ao_get_repair(sk, optval, optlen); + case TCP_AO_GET_KEYS: + case TCP_AO_INFO: { + int err; + + sockopt_lock_sock(sk); + if (optname == TCP_AO_GET_KEYS) + err = tcp_ao_get_mkts(sk, optval, optlen); + else + err = tcp_ao_get_sock_info(sk, optval, optlen); + sockopt_release_sock(sk); + + return err; + } default: return -ENOPROTOOPT; } @@ -4305,141 +4345,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG -static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); -static DEFINE_MUTEX(tcp_md5sig_mutex); -static bool tcp_md5sig_pool_populated = false; +int tcp_md5_sigpool_id = -1; +EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); -static void __tcp_alloc_md5sig_pool(void) +int tcp_md5_alloc_sigpool(void) { - struct crypto_ahash *hash; - int cpu; - - hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - return; - - for_each_possible_cpu(cpu) { - void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; - struct ahash_request *req; - - if (!scratch) { - scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + - sizeof(struct tcphdr), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!scratch) - return; - per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; - } - if (per_cpu(tcp_md5sig_pool, cpu).md5_req) - continue; - - req = ahash_request_alloc(hash, GFP_KERNEL); - if (!req) - return; - - ahash_request_set_callback(req, 0, NULL, NULL); - - per_cpu(tcp_md5sig_pool, cpu).md5_req = req; - } - /* before setting tcp_md5sig_pool_populated, we must commit all writes - * to memory. See smp_rmb() in tcp_get_md5sig_pool() - */ - smp_wmb(); - /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool() - * and tcp_get_md5sig_pool(). - */ - WRITE_ONCE(tcp_md5sig_pool_populated, true); -} - -bool tcp_alloc_md5sig_pool(void) -{ - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { - mutex_lock(&tcp_md5sig_mutex); - - if (!tcp_md5sig_pool_populated) - __tcp_alloc_md5sig_pool(); + size_t scratch_size; + int ret; - mutex_unlock(&tcp_md5sig_mutex); + scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr); + ret = tcp_sigpool_alloc_ahash("md5", scratch_size); + if (ret >= 0) { + /* As long as any md5 sigpool was allocated, the return + * id would stay the same. Re-write the id only for the case + * when previously all MD5 keys were deleted and this call + * allocates the first MD5 key, which may return a different + * sigpool id than was used previously. + */ + WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */ + return 0; } - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - return READ_ONCE(tcp_md5sig_pool_populated); + return ret; } -EXPORT_SYMBOL(tcp_alloc_md5sig_pool); - -/** - * tcp_get_md5sig_pool - get md5sig_pool for this user - * - * We use percpu structure, so if we succeed, we exit with preemption - * and BH disabled, to make sure another thread or softirq handling - * wont try to get same context. - */ -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) +void tcp_md5_release_sigpool(void) { - local_bh_disable(); - - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (READ_ONCE(tcp_md5sig_pool_populated)) { - /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ - smp_rmb(); - return this_cpu_ptr(&tcp_md5sig_pool); - } - local_bh_enable(); - return NULL; + tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - const struct sk_buff *skb, unsigned int header_len) +void tcp_md5_add_sigpool(void) { - struct scatterlist sg; - const struct tcphdr *tp = tcp_hdr(skb); - struct ahash_request *req = hp->md5_req; - unsigned int i; - const unsigned int head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; - const struct skb_shared_info *shi = skb_shinfo(skb); - struct sk_buff *frag_iter; - - sg_init_table(&sg, 1); - - sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); - ahash_request_set_crypt(req, &sg, NULL, head_data_len); - if (crypto_ahash_update(req)) - return 1; - - for (i = 0; i < shi->nr_frags; ++i) { - const skb_frag_t *f = &shi->frags[i]; - unsigned int offset = skb_frag_off(f); - struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); - - sg_set_page(&sg, page, skb_frag_size(f), - offset_in_page(offset)); - ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); - if (crypto_ahash_update(req)) - return 1; - } - - skb_walk_frags(skb, frag_iter) - if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) - return 1; - - return 0; + tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_sigpool *hp, + const struct tcp_md5sig_key *key) { u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; sg_init_one(&sg, key->key, keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + ahash_request_set_crypt(hp->req, &sg, NULL, keylen); - /* We use data_race() because tcp_md5_do_add() might change key->key under us */ - return data_race(crypto_ahash_update(hp->md5_req)); + /* We use data_race() because tcp_md5_do_add() might change + * key->key under us + */ + return data_race(crypto_ahash_update(hp->req)); } EXPORT_SYMBOL(tcp_md5_hash_key); @@ -4447,42 +4398,24 @@ EXPORT_SYMBOL(tcp_md5_hash_key); enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { - /* - * This gets called for each TCP segment that arrives - * so we want to be efficient. + /* This gets called for each TCP segment that has TCP-MD5 option. * We have 3 drop cases: * o No MD5 hash and one expected. * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct tcphdr *th = tcp_hdr(skb); const struct tcp_sock *tp = tcp_sk(sk); - int genhash, l3index; + struct tcp_md5sig_key *key; u8 newhash[16]; + int genhash; - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family); - hash_location = tcp_parse_md5sig_option(th); - - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return SKB_NOT_DROPPED_YET; - - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } + key = tcp_md5_do_lookup(sk, l3index, saddr, family); - if (!hash_expected && hash_location) { + if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4491,27 +4424,26 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, * IPv4-mapped case. */ if (family == AF_INET) - genhash = tcp_v4_md5_hash_skb(newhash, - hash_expected, - NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else - genhash = tp->af_specific->calc_md5_hash(newhash, - hash_expected, + genhash = tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { - net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" - : "", l3index); + tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", + genhash ? "tcp_v4_calc_md5_hash failed" + : "", l3index); } else { - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", - genhash ? "failed" : "mismatch", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), l3index); + if (genhash) { + tcp_hash_fail("MD5 Hash failed", + AF_INET6, skb, "L3 index %d", + l3index); + } else { + tcp_hash_fail("MD5 Hash mismatch", + AF_INET6, skb, "L3 index %d", + l3index); + } } return SKB_DROP_REASON_TCP_MD5FAILURE; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c new file mode 100644 index 000000000000..6a845e906a1d --- /dev/null +++ b/net/ipv4/tcp_ao.c @@ -0,0 +1,2392 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov <[email protected]> + * Francesco Ruggeri <[email protected]> + * Salam Noureddine <[email protected]> + */ +#define pr_fmt(fmt) "TCP: " fmt + +#include <crypto/hash.h> +#include <linux/inetdevice.h> +#include <linux/tcp.h> + +#include <net/tcp.h> +#include <net/ipv6.h> +#include <net/icmp.h> + +DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); + +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp) +{ + struct scatterlist sg; + int ret; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp->req), + mkt->key, mkt->keylen)) + goto clear_hash; + + ret = crypto_ahash_init(hp->req); + if (ret) + goto clear_hash; + + sg_init_one(&sg, ctx, len); + ahash_request_set_crypt(hp->req, &sg, key, len); + crypto_ahash_update(hp->req); + + ret = crypto_ahash_final(hp->req); + if (ret) + goto clear_hash; + + return 0; +clear_hash: + memset(key, 0, tcp_ao_digest_size(mkt)); + return 1; +} + +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) +{ + bool ignore_icmp = false; + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + + /* RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + */ + if (family == AF_INET) { + if (type != ICMP_DEST_UNREACH) + return false; + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED) + return false; + } else { + if (type != ICMPV6_DEST_UNREACH) + return false; + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH) + return false; + } + + rcu_read_lock(); + switch (sk->sk_state) { + case TCP_TIME_WAIT: + ao = rcu_dereference(tcp_twsk(sk)->ao_info); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: + case TCP_LISTEN: + case TCP_NEW_SYN_RECV: + /* RFC5925 specifies to ignore ICMPs *only* on connections + * in synchronized states. + */ + rcu_read_unlock(); + return false; + default: + ao = rcu_dereference(tcp_sk(sk)->ao_info); + } + + if (ao && !ao->accept_icmps) { + ignore_icmp = true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS); + atomic64_inc(&ao->counters.dropped_icmp); + } + rcu_read_unlock(); + + return ignore_icmp; +} + +/* Optimized version of tcp_ao_do_lookup(): only for sockets for which + * it's known that the keys in ao_info are matching peer's + * family/address/VRF/etc. + */ +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if ((sndid >= 0 && key->sndid != sndid) || + (rcvid >= 0 && key->rcvid != rcvid)) + continue; + return key; + } + + return NULL; +} + +static int ipv4_prefix_cmp(const struct in_addr *addr1, + const struct in_addr *addr2, + unsigned int prefixlen) +{ + __be32 mask = inet_make_mask(prefixlen); + __be32 a1 = addr1->s_addr & mask; + __be32 a2 = addr2->s_addr & mask; + + if (a1 == a2) + return 0; + return memcmp(&a1, &a2, sizeof(a1)); +} + +static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ + if (sndid >= 0 && key->sndid != sndid) + return (key->sndid > sndid) ? 1 : -1; + if (rcvid >= 0 && key->rcvid != rcvid) + return (key->rcvid > rcvid) ? 1 : -1; + if (l3index >= 0 && (key->keyflags & TCP_AO_KEYF_IFINDEX)) { + if (key->l3index != l3index) + return (key->l3index > l3index) ? 1 : -1; + } + + if (family == AF_UNSPEC) + return 0; + if (key->family != family) + return (key->family > family) ? 1 : -1; + + if (family == AF_INET) { + if (ntohl(key->addr.a4.s_addr) == INADDR_ANY) + return 0; + if (ntohl(addr->a4.s_addr) == INADDR_ANY) + return 0; + return ipv4_prefix_cmp(&key->addr.a4, &addr->a4, prefixlen); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_any(&key->addr.a6) || ipv6_addr_any(&addr->a6)) + return 0; + if (ipv6_prefix_equal(&key->addr.a6, &addr->a6, prefixlen)) + return 0; + return memcmp(&key->addr.a6, &addr->a6, sizeof(addr->a6)); +#endif + } + return -1; +} + +static int tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&addr->a6)) { + __be32 addr4 = addr->a6.s6_addr32[3]; + + return __tcp_ao_key_cmp(key, l3index, + (union tcp_ao_addr *)&addr4, + prefixlen, AF_INET, sndid, rcvid); + } +#endif + return __tcp_ao_key_cmp(key, l3index, addr, + prefixlen, family, sndid, rcvid); +} + +static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, int family, u8 prefix, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return NULL; + + ao = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return NULL; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + u8 prefixlen = min(prefix, key->prefixlen); + + if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen, + family, sndid, rcvid)) + return key; + } + return NULL; +} + +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return __tcp_ao_do_lookup(sk, l3index, addr, family, U8_MAX, sndid, rcvid); +} + +static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) +{ + struct tcp_ao_info *ao; + + ao = kzalloc(sizeof(*ao), flags); + if (!ao) + return NULL; + INIT_HLIST_HEAD(&ao->head); + refcount_set(&ao->refcnt, 1); + + return ao; +} + +static void tcp_ao_link_mkt(struct tcp_ao_info *ao, struct tcp_ao_key *mkt) +{ + hlist_add_head_rcu(&mkt->node, &ao->head); +} + +static struct tcp_ao_key *tcp_ao_copy_key(struct sock *sk, + struct tcp_ao_key *key) +{ + struct tcp_ao_key *new_key; + + new_key = sock_kmalloc(sk, tcp_ao_sizeof_key(key), + GFP_ATOMIC); + if (!new_key) + return NULL; + + *new_key = *key; + INIT_HLIST_NODE(&new_key->node); + tcp_sigpool_get(new_key->tcp_sigpool_id); + atomic64_set(&new_key->pkt_good, 0); + atomic64_set(&new_key->pkt_bad, 0); + + return new_key; +} + +static void tcp_ao_key_free_rcu(struct rcu_head *head) +{ + struct tcp_ao_key *key = container_of(head, struct tcp_ao_key, rcu); + + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + struct hlist_node *n; + + if (twsk) { + ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); + tcp_twsk(sk)->ao_info = NULL; + } else { + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); + tcp_sk(sk)->ao_info = NULL; + } + + if (!ao || !refcount_dec_and_test(&ao->refcnt)) + return; + + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del_rcu(&key->node); + if (!twsk) + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + kfree_rcu(ao, rcu); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) +{ + struct tcp_ao_info *ao_info = rcu_dereference_protected(tp->ao_info, 1); + + if (ao_info) { + struct tcp_ao_key *key; + struct hlist_node *n; + int omem = 0; + + hlist_for_each_entry_safe(key, n, &ao_info->head, node) { + omem += tcp_ao_sizeof_key(key); + } + + refcount_inc(&ao_info->refcnt); + atomic_sub(omem, &(((struct sock *)tp)->sk_omem_alloc)); + rcu_assign_pointer(tcptw->ao_info, ao_info); + } else { + tcptw->ao_info = NULL; + } +} + +/* 4 tuple and ISNs are expected in NBO */ +static int tcp_v4_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + /* See RFC5926 3.1.1 */ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp4_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = saddr; + tmp->ctx.daddr = daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (send) + return tcp_v4_ao_calc_key(mkt, key, sk->sk_rcv_saddr, + sk->sk_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v4_ao_calc_key(mkt, key, sk->sk_daddr, + sk->sk_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (mkt->family == AF_INET) + return tcp_v4_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#if IS_ENABLED(CONFIG_IPV6) + else if (mkt->family == AF_INET6) + return tcp_v6_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#endif + else + return -EOPNOTSUPP; +} + +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v4_ao_calc_key(mkt, key, + ireq->ir_loc_addr, ireq->ir_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +static int tcp_v4_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v4_ao_calc_key(mkt, key, iph->saddr, iph->daddr, + th->source, th->dest, sisn, disn); +} + +static int tcp_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn, int family) +{ + if (family == AF_INET) + return tcp_v4_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#endif + return -EAFNOSUPPORT; +} + +static int tcp_v4_ao_hash_pseudoheader(struct tcp_sigpool *hp, + __be32 daddr, __be32 saddr, + int nbytes) +{ + struct tcp4_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + bp->saddr = saddr; + bp->daddr = daddr; + bp->pad = 0; + bp->protocol = IPPROTO_TCP; + bp->len = cpu_to_be16(nbytes); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_pseudoheader(unsigned short int family, + const struct sock *sk, + const struct sk_buff *skb, + struct tcp_sigpool *hp, int nbytes) +{ + const struct tcphdr *th = tcp_hdr(skb); + + /* TODO: Can we rely on checksum being zero to mean outbound pkt? */ + if (!th->check) { + if (family == AF_INET) + return tcp_v4_ao_hash_pseudoheader(hp, sk->sk_daddr, + sk->sk_rcv_saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_hash_pseudoheader(hp, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, skb->len); +#endif + else + return -EAFNOSUPPORT; + } + + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_v4_ao_hash_pseudoheader(hp, iph->daddr, + iph->saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_v6_ao_hash_pseudoheader(hp, &iph->daddr, + &iph->saddr, skb->len); +#endif + } + return -EAFNOSUPPORT; +} + +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq) +{ + u32 sne = next_sne; + + if (before(seq, next_seq)) { + if (seq > next_seq) + sne--; + } else { + if (seq < next_seq) + sne++; + } + + return sne; +} + +/* tcp_ao_hash_sne(struct tcp_sigpool *hp) + * @hp - used for hashing + * @sne - sne value + */ +static int tcp_ao_hash_sne(struct tcp_sigpool *hp, u32 sne) +{ + struct scatterlist sg; + __be32 *bp; + + bp = (__be32 *)hp->scratch; + *bp = htonl(sne); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_header(struct tcp_sigpool *hp, + const struct tcphdr *th, + bool exclude_options, u8 *hash, + int hash_offset, int hash_len) +{ + int err, len = th->doff << 2; + struct scatterlist sg; + u8 *hdr = hp->scratch; + + /* We are not allowed to change tcphdr, make a local copy */ + if (exclude_options) { + len = sizeof(*th) + sizeof(struct tcp_ao_hdr) + hash_len; + memcpy(hdr, th, sizeof(*th)); + memcpy(hdr + sizeof(*th), + (u8 *)th + hash_offset - sizeof(struct tcp_ao_hdr), + sizeof(struct tcp_ao_hdr)); + memset(hdr + sizeof(*th) + sizeof(struct tcp_ao_hdr), + 0, hash_len); + ((struct tcphdr *)hdr)->check = 0; + } else { + len = th->doff << 2; + memcpy(hdr, th, len); + /* zero out tcp-ao hash */ + ((struct tcphdr *)hdr)->check = 0; + memset(hdr + hash_offset, 0, hash_len); + } + + sg_init_one(&sg, hdr, len); + ahash_request_set_crypt(hp->req, &sg, NULL, len); + err = crypto_ahash_update(hp->req); + WARN_ON_ONCE(err != 0); + return err; +} + +int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne) +{ + int tkey_len = tcp_ao_digest_size(key); + int hash_offset = ao_hash - (char *)th; + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (family == AF_INET) { + if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr, + saddr->a4.s_addr, th->doff * 4)) + goto clear_hash; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6, + &saddr->a6, th->doff * 4)) + goto clear_hash; +#endif + } else { + WARN_ON_ONCE(1); + goto clear_hash; + } + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + const struct tcphdr *th = tcp_hdr(skb); + int tkey_len = tcp_ao_digest_size(key); + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + /* For now use sha1 by default. Depends on alg in tcp_ao_key */ + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (tcp_ao_hash_pseudoheader(family, sk, skb, &hp, skb->len)) + goto clear_hash; + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET, ao_hash, key, sk, skb, + tkey, hash_offset, sne); +} + +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v4_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} + +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct inet_request_sock *ireq = inet_rsk(req); + union tcp_ao_addr *addr = (union tcp_ao_addr *)&ireq->ir_rmt_addr; + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); +} + +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid) +{ + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + union tcp_ao_addr *addr = (union tcp_ao_addr *)&addr_sk->sk_daddr; + + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); +} + +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_ao_info *ao_info; + + *allocated_traffic_key = false; + /* If there's no socket - than initial sisn/disn are unknown. + * Drop the segment. RFC5925 (7.7) advises to require graceful + * restart [RFC4724]. Alternatively, the RFC5925 advises to + * save/restore traffic keys before/after reboot. + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR + * options to restore a socket post-reboot. + */ + if (!sk) + return -ENOTCONN; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + unsigned int family = READ_ONCE(sk->sk_family); + union tcp_ao_addr *addr; + __be32 disn, sisn; + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + *sne = tcp_ao_compute_sne(0, tcp_rsk(req)->snt_isn, seq); + } else { + sisn = th->seq; + disn = 0; + } + if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + addr = (union tcp_md5_addr *)&ipv6_hdr(skb)->saddr; + else + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + family = AF_INET; +#endif + + sk = sk_const_to_full_sk(sk); + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao_info) + return -ENOENT; + *key = tcp_ao_do_lookup(sk, l3index, addr, family, + -1, aoh->rnext_keyid); + if (!*key) + return -ENOENT; + *traffic_key = kmalloc(tcp_ao_digest_size(*key), GFP_ATOMIC); + if (!*traffic_key) + return -ENOMEM; + *allocated_traffic_key = true; + if (tcp_ao_calc_key_skb(*key, *traffic_key, skb, + sisn, disn, family)) + return -1; + *keyid = (*key)->rcvid; + } else { + struct tcp_ao_key *rnext_key; + u32 snd_basis; + + if (sk->sk_state == TCP_TIME_WAIT) { + ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); + snd_basis = tcp_twsk(sk)->tw_snd_nxt; + } else { + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + snd_basis = tcp_sk(sk)->snd_una; + } + if (!ao_info) + return -ENOENT; + + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + if (!*key) + return -ENOENT; + *traffic_key = snd_other_key(*key); + rnext_key = READ_ONCE(ao_info->rnext_key); + *keyid = rnext_key->rcvid; + *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), + snd_basis, seq); + } + return 0; +} + +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao; + void *tkey_buf = NULL; + u8 *traffic_key; + u32 sne; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + traffic_key = snd_other_key(key); + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { + __be32 disn; + + if (!(tcb->tcp_flags & TCPHDR_ACK)) { + disn = 0; + tkey_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!tkey_buf) + return -ENOMEM; + traffic_key = tkey_buf; + } else { + disn = ao->risn; + } + tp->af_specific->ao_calc_key_sk(key, traffic_key, + sk, ao->lisn, disn, true); + } + sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una), + ntohl(th->seq)); + tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key, + hash_location - (u8 *)th, sne); + kfree(tkey_buf); + return 0; +} + +static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, + const struct sock *sk, const struct sk_buff *skb, + int sndid, int rcvid, int l3index) +{ + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET, sndid, rcvid); + } else { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET6, sndid, rcvid); + } +} + +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + struct tcp_ao_key *key; + + treq->maclen = 0; + + if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) + return; + + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); + if (!key) + /* Key not found, continue without TCP-AO */ + return; + + treq->ao_rcv_next = aoh->keyid; + treq->ao_keyid = aoh->rnext_keyid; + treq->maclen = tcp_ao_maclen(key); +} + +static enum skb_drop_reason +tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, + unsigned short int family, struct tcp_ao_info *info, + const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, + u8 *traffic_key, u8 *phash, u32 sne, int l3index) +{ + u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); + const struct tcphdr *th = tcp_hdr(skb); + void *hash_buf = NULL; + + if (maclen != tcp_ao_maclen(key)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash wrong length", family, skb, + "%u != %d L3index: %d", maclen, + tcp_ao_maclen(key), l3index); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + + hash_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!hash_buf) + return SKB_DROP_REASON_NOT_SPECIFIED; + + /* XXX: make it per-AF callback? */ + tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, + (phash - (u8 *)th), sne); + if (memcmp(phash, hash_buf, maclen)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash mismatch", family, skb, + "L3index: %d", l3index); + kfree(hash_buf); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOGOOD); + atomic64_inc(&info->counters.pkt_good); + atomic64_inc(&key->pkt_good); + kfree(hash_buf); + return SKB_NOT_DROPPED_YET; +} + +enum skb_drop_reason +tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, + unsigned short int family, const struct request_sock *req, + int l3index, const struct tcp_ao_hdr *aoh) +{ + const struct tcphdr *th = tcp_hdr(skb); + u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ + struct tcp_ao_info *info; + enum skb_drop_reason ret; + struct tcp_ao_key *key; + __be32 sisn, disn; + u8 *traffic_key; + u32 sne = 0; + + info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!info) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + tcp_hash_fail("AO key not found", family, skb, + "keyid: %u L3index: %d", aoh->keyid, l3index); + return SKB_DROP_REASON_TCP_AOUNEXPECTED; + } + + if (unlikely(th->syn)) { + sisn = th->seq; + disn = 0; + } + + /* Fast-path */ + if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) { + enum skb_drop_reason err; + struct tcp_ao_key *current_key; + + /* Check if this socket's rnext_key matches the keyid in the + * packet. If not we lookup the key based on the keyid + * matching the rcvid in the mkt. + */ + key = READ_ONCE(info->rnext_key); + if (key->rcvid != aoh->keyid) { + key = tcp_ao_established_key(info, -1, aoh->keyid); + if (!key) + goto key_not_found; + } + + /* Delayed retransmitted SYN */ + if (unlikely(th->syn && !th->ack)) + goto verify_hash; + + sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt, + ntohl(th->seq)); + /* Established socket, traffic key are cached */ + traffic_key = rcv_other_key(key); + err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne, l3index); + if (err) + return err; + current_key = READ_ONCE(info->current_key); + /* Key rotation: the peer asks us to use new key (RNext) */ + if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + /* If the key is not found we do nothing. */ + key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); + if (key) + /* pairs with tcp_ao_del_cmd */ + WRITE_ONCE(info->current_key, key); + } + return SKB_NOT_DROPPED_YET; + } + + /* Lookup key based on peer address and keyid. + * current_key and rnext_key must not be used on tcp listen + * sockets as otherwise: + * - request sockets would race on those key pointers + * - tcp_ao_del_cmd() allows async key removal + */ + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); + if (!key) + goto key_not_found; + + if (th->syn && !th->ack) + goto verify_hash; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + /* Make the initial syn the likely case here */ + if (unlikely(req)) { + sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn, + ntohl(th->seq)); + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + } else if (unlikely(th->ack && !th->syn)) { + /* Possible syncookie packet */ + sisn = htonl(ntohl(th->seq) - 1); + disn = htonl(ntohl(th->ack_seq) - 1); + sne = tcp_ao_compute_sne(0, ntohl(sisn), + ntohl(th->seq)); + } else if (unlikely(!th->syn)) { + /* no way to figure out initial sisn/disn - drop */ + return SKB_DROP_REASON_TCP_FLAGS; + } + } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + disn = info->lisn; + if (th->syn || th->rst) + sisn = th->seq; + else + sisn = info->risn; + } else { + WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state); + return SKB_DROP_REASON_TCP_AOFAILURE; + } +verify_hash: + traffic_key = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!traffic_key) + return SKB_DROP_REASON_NOT_SPECIFIED; + tcp_ao_calc_key_skb(key, traffic_key, skb, sisn, disn, family); + ret = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne, l3index); + kfree(traffic_key); + return ret; + +key_not_found: + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + atomic64_inc(&info->counters.key_not_found); + tcp_hash_fail("Requested by the peer AO key id not found", + family, skb, "L3index: %d", l3index); + return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; +} + +static int tcp_ao_cache_traffic_keys(const struct sock *sk, + struct tcp_ao_info *ao, + struct tcp_ao_key *ao_key) +{ + u8 *traffic_key = snd_other_key(ao_key); + int ret; + + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, true); + if (ret) + return ret; + + traffic_key = rcv_other_key(ao_key); + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, false); + return ret; +} + +void tcp_ao_connect_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + int family, l3index; + + ao_info = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return; + + /* Remove all keys that don't match the peer */ + family = sk->sk_family; + if (family == AF_INET) + addr = (union tcp_ao_addr *)&sk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + addr = (union tcp_ao_addr *)&sk->sk_v6_daddr; +#endif + else + return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + sk->sk_bound_dev_if); + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) + continue; + + if (key == ao_info->current_key) + ao_info->current_key = NULL; + if (key == ao_info->rnext_key) + ao_info->rnext_key = NULL; + hlist_del_rcu(&key->node); + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + key = tp->af_specific->ao_lookup(sk, sk, -1, -1); + if (key) { + /* if current_key or rnext_key were not provided, + * use the first key matching the peer + */ + if (!ao_info->current_key) + ao_info->current_key = key; + if (!ao_info->rnext_key) + ao_info->rnext_key = key; + tp->tcp_header_len += tcp_ao_len(key); + + ao_info->lisn = htonl(tp->write_seq); + ao_info->snd_sne = 0; + } else { + /* Can't happen: tcp_connect() verifies that there's + * at least one tcp-ao key that matches the remote peer. + */ + WARN_ON_ONCE(1); + rcu_assign_pointer(tp->ao_info, NULL); + kfree(ao_info); + } +} + +void tcp_ao_established(struct sock *sk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); + ao->rcv_sne = 0; + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family) +{ + struct tcp_ao_key *key, *new_key, *first_key; + struct tcp_ao_info *new_ao, *ao; + struct hlist_node *key_head; + int l3index, ret = -ENOMEM; + union tcp_ao_addr *addr; + bool match = false; + + ao = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao) + return 0; + + /* New socket without TCP-AO on it */ + if (!tcp_rsk_used_ao(req)) + return 0; + + new_ao = tcp_ao_alloc_info(GFP_ATOMIC); + if (!new_ao) + return -ENOMEM; + new_ao->lisn = htonl(tcp_rsk(req)->snt_isn); + new_ao->risn = htonl(tcp_rsk(req)->rcv_isn); + new_ao->ao_required = ao->ao_required; + new_ao->accept_icmps = ao->accept_icmps; + + if (family == AF_INET) { + addr = (union tcp_ao_addr *)&newsk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + addr = (union tcp_ao_addr *)&newsk->sk_v6_daddr; +#endif + } else { + ret = -EAFNOSUPPORT; + goto free_ao; + } + l3index = l3mdev_master_ifindex_by_index(sock_net(newsk), + newsk->sk_bound_dev_if); + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if (tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) + continue; + + new_key = tcp_ao_copy_key(newsk, key); + if (!new_key) + goto free_and_exit; + + tcp_ao_cache_traffic_keys(newsk, new_ao, new_key); + tcp_ao_link_mkt(new_ao, new_key); + match = true; + } + + if (!match) { + /* RFC5925 (7.4.1) specifies that the TCP-AO status + * of a connection is determined on the initial SYN. + * At this point the connection was TCP-AO enabled, so + * it can't switch to being unsigned if peer's key + * disappears on the listening socket. + */ + ret = -EKEYREJECTED; + goto free_and_exit; + } + + if (!static_key_fast_inc_not_disabled(&tcp_ao_needed.key.key)) { + ret = -EUSERS; + goto free_and_exit; + } + + key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); + first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); + + key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1); + if (key) + new_ao->current_key = key; + else + new_ao->current_key = first_key; + + /* set rnext_key */ + key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next); + if (key) + new_ao->rnext_key = key; + else + new_ao->rnext_key = first_key; + + sk_gso_disable(newsk); + rcu_assign_pointer(tcp_sk(newsk)->ao_info, new_ao); + + return 0; + +free_and_exit: + hlist_for_each_entry_safe(key, key_head, &new_ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + atomic_sub(tcp_ao_sizeof_key(key), &newsk->sk_omem_alloc); + kfree_sensitive(key); + } +free_ao: + kfree(new_ao); + return ret; +} + +static bool tcp_ao_can_set_current_rnext(struct sock *sk) +{ + /* There aren't current/rnext keys on TCP_LISTEN sockets */ + if (sk->sk_state == TCP_LISTEN) + return false; + return true; +} + +static int tcp_ao_verify_ipv4(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **addr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd->addr; + struct inet_sock *inet = inet_sk(sk); + + if (sin->sin_family != AF_INET) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin->sin_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0) { + __be32 mask; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY) + return -EINVAL; + if (cmd->prefix > 32) + return -EINVAL; + + mask = inet_make_mask(cmd->prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (ntohl(inet->inet_daddr) != INADDR_ANY && + (inet->inet_daddr & mask) != sin->sin_addr.s_addr) + return -EINVAL; + } else { + if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY) + return -EINVAL; + } + + *addr = (union tcp_ao_addr *)&sin->sin_addr; + return 0; +} + +static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) +{ + unsigned int syn_tcp_option_space; + bool is_kdf_aes_128_cmac = false; + struct crypto_ahash *tfm; + struct tcp_sigpool hp; + void *tmp_key = NULL; + int err; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", cmd->alg_name)) { + strscpy(cmd->alg_name, "cmac(aes)", sizeof(cmd->alg_name)); + is_kdf_aes_128_cmac = (cmd->keylen != 16); + tmp_key = kmalloc(cmd->keylen, GFP_KERNEL); + if (!tmp_key) + return -ENOMEM; + } + + key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */ + + /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss + * - tstamp - wscale - sackperm), + * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b. + * + * In order to allow D-SACK with TCP-AO, the header size should be: + * (MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED + * - TCPOLEN_SACK_BASE_ALIGNED + * - 2 * TCPOLEN_SACK_PERBLOCK) = 8 (maclen = 4), + * see tcp_established_options(). + * + * RFC5925, 2.2: + * Typical MACs are 96-128 bits (12-16 bytes), but any length + * that fits in the header of the segment being authenticated + * is allowed. + * + * RFC5925, 7.6: + * TCP-AO continues to consume 16 bytes in non-SYN segments, + * leaving a total of 24 bytes for other options, of which + * the timestamp consumes 10. This leaves 14 bytes, of which 10 + * are used for a single SACK block. When two SACK blocks are used, + * such as to handle D-SACK, a smaller TCP-AO MAC would be required + * to make room for the additional SACK block (i.e., to leave 18 + * bytes for the D-SACK variant of the SACK option) [RFC2883]. + * Note that D-SACK is not supportable in TCP MD5 in the presence + * of timestamps, because TCP MD5’s MAC length is fixed and too + * large to leave sufficient option space. + */ + syn_tcp_option_space = MAX_TCP_OPTION_SPACE; + syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; + syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; + syn_tcp_option_space -= TCPOLEN_SACKPERM_ALIGNED; + if (tcp_ao_len(key) > syn_tcp_option_space) { + err = -EMSGSIZE; + goto err_kfree; + } + + key->keylen = cmd->keylen; + memcpy(key->key, cmd->key, cmd->keylen); + + err = tcp_sigpool_start(key->tcp_sigpool_id, &hp); + if (err) + goto err_kfree; + + tfm = crypto_ahash_reqtfm(hp.req); + if (is_kdf_aes_128_cmac) { + void *scratch = hp.scratch; + struct scatterlist sg; + + memcpy(tmp_key, cmd->key, cmd->keylen); + sg_init_one(&sg, tmp_key, cmd->keylen); + + /* Using zero-key of 16 bytes as described in RFC5926 */ + memset(scratch, 0, 16); + err = crypto_ahash_setkey(tfm, scratch, 16); + if (err) + goto err_pool_end; + + err = crypto_ahash_init(hp.req); + if (err) + goto err_pool_end; + + ahash_request_set_crypt(hp.req, &sg, key->key, cmd->keylen); + err = crypto_ahash_update(hp.req); + if (err) + goto err_pool_end; + + err |= crypto_ahash_final(hp.req); + if (err) + goto err_pool_end; + key->keylen = 16; + } + + err = crypto_ahash_setkey(tfm, key->key, key->keylen); + if (err) + goto err_pool_end; + + tcp_sigpool_end(&hp); + kfree_sensitive(tmp_key); + + if (tcp_ao_maclen(key) > key->digest_size) + return -EINVAL; + + return 0; + +err_pool_end: + tcp_sigpool_end(&hp); +err_kfree: + kfree_sensitive(tmp_key); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd->addr; + struct in6_addr *addr = &sin6->sin6_addr; + u8 prefix = cmd->prefix; + + if (sin6->sin6_family != AF_INET6) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin6->sin6_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0 && ipv6_addr_v4mapped(addr)) { + __be32 addr4 = addr->s6_addr32[3]; + __be32 mask; + + if (prefix > 32 || ntohl(addr4) == INADDR_ANY) + return -EINVAL; + + mask = inet_make_mask(prefix); + if (addr4 & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + __be32 daddr4 = sk->sk_v6_daddr.s6_addr32[3]; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return -EINVAL; + if ((daddr4 & mask) != addr4) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)&addr->s6_addr32[3]; + *family = AF_INET; + return 0; + } else if (cmd->prefix != 0) { + struct in6_addr pfx; + + if (ipv6_addr_any(addr) || prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr, prefix); + if (ipv6_addr_cmp(&pfx, addr)) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_prefix_equal(&sk->sk_v6_daddr, addr, prefix)) + + return -EINVAL; + } else { + if (!ipv6_addr_any(addr)) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)addr; + return 0; +} +#else +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + return -EOPNOTSUPP; +} +#endif + +static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) { + return rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + } else if (sk->sk_state == TCP_TIME_WAIT) { + return rcu_dereference_protected(tcp_twsk(sk)->ao_info, + lockdep_sock_is_held(sk)); + } + return ERR_PTR(-ESOCKTNOSUPPORT); +} + +static struct tcp_ao_info *getsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) + return rcu_dereference(tcp_sk(sk)->ao_info); + else if (sk->sk_state == TCP_TIME_WAIT) + return rcu_dereference(tcp_twsk(sk)->ao_info); + + return ERR_PTR(-ESOCKTNOSUPPORT); +} + +#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_IFINDEX | TCP_AO_KEYF_EXCLUDE_OPT) +#define TCP_AO_GET_KEYF_VALID (TCP_AO_KEYF_IFINDEX) + +static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, + struct tcp_ao_add *cmd) +{ + const char *algo = cmd->alg_name; + unsigned int digest_size; + struct crypto_ahash *tfm; + struct tcp_ao_key *key; + struct tcp_sigpool hp; + int err, pool_id; + size_t size; + + /* Force null-termination of alg_name */ + cmd->alg_name[ARRAY_SIZE(cmd->alg_name) - 1] = '\0'; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", algo)) + algo = "cmac(aes)"; + + /* Full TCP header (th->doff << 2) should fit into scratch area, + * see tcp_ao_hash_header(). + */ + pool_id = tcp_sigpool_alloc_ahash(algo, 60); + if (pool_id < 0) + return ERR_PTR(pool_id); + + err = tcp_sigpool_start(pool_id, &hp); + if (err) + goto err_free_pool; + + tfm = crypto_ahash_reqtfm(hp.req); + if (crypto_ahash_alignmask(tfm) > TCP_AO_KEY_ALIGN) { + err = -EOPNOTSUPP; + goto err_pool_end; + } + digest_size = crypto_ahash_digestsize(tfm); + tcp_sigpool_end(&hp); + + size = sizeof(struct tcp_ao_key) + (digest_size << 1); + key = sock_kmalloc(sk, size, GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto err_free_pool; + } + + key->tcp_sigpool_id = pool_id; + key->digest_size = digest_size; + return key; + +err_pool_end: + tcp_sigpool_end(&hp); +err_free_pool: + tcp_sigpool_release(pool_id); + return ERR_PTR(err); +} + +static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + struct tcp_ao_add cmd; + int ret, l3index = 0; + bool first = false; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + ret = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (ret) + return ret; + + if (cmd.keylen > TCP_AO_MAXKEYLEN) + return -EINVAL; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (family == AF_INET) + ret = tcp_ao_verify_ipv4(sk, &cmd, &addr); + else + ret = tcp_ao_verify_ipv6(sk, &cmd, &addr, &family); + if (ret) + return ret; + + if (cmd.keyflags & ~TCP_AO_KEYF_ALL) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + /* For cmd.tcp_ifindex = 0 the key will apply to the default VRF */ + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX && cmd.ifindex) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), cmd.ifindex); + if (dev && netif_is_l3_master(dev)) + l3index = dev->ifindex; + rcu_read_unlock(); + + if (!dev || !l3index) + return -EINVAL; + + /* It's still possible to bind after adding keys or even + * re-bind to a different dev (with CAP_NET_RAW). + * So, no reason to return error here, rather try to be + * nice and warn the user. + */ + if (bound_dev_if && bound_dev_if != cmd.ifindex) + net_warn_ratelimited("AO key ifindex %d != sk bound ifindex %d\n", + cmd.ifindex, bound_dev_if); + } + + /* Don't allow keys for peers that have a matching TCP-MD5 key */ + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX) { + /* Non-_exact version of tcp_md5_do_lookup() will + * as well match keys that aren't bound to a specific VRF + * (that will make them match AO key with + * sysctl_tcp_l3dev_accept = 1 + */ + if (tcp_md5_do_lookup(sk, l3index, addr, family)) + return -EKEYREJECTED; + } else { + if (tcp_md5_do_lookup_any_l3index(sk, addr, family)) + return -EKEYREJECTED; + } + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + + if (!ao_info) { + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } else { + /* Check that neither RecvID nor SendID match any + * existing key for the peer, RFC5925 3.1: + * > The IDs of MKTs MUST NOT overlap where their + * > TCP connection identifiers overlap. + */ + if (__tcp_ao_do_lookup(sk, l3index, addr, family, cmd.prefix, -1, cmd.rcvid)) + return -EEXIST; + if (__tcp_ao_do_lookup(sk, l3index, addr, family, + cmd.prefix, cmd.sndid, -1)) + return -EEXIST; + } + + key = tcp_ao_key_alloc(sk, &cmd); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto err_free_ao; + } + + INIT_HLIST_NODE(&key->node); + memcpy(&key->addr, addr, (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); + key->prefixlen = cmd.prefix; + key->family = family; + key->keyflags = cmd.keyflags; + key->sndid = cmd.sndid; + key->rcvid = cmd.rcvid; + key->l3index = l3index; + atomic64_set(&key->pkt_good, 0); + atomic64_set(&key->pkt_bad, 0); + + ret = tcp_ao_parse_crypto(&cmd, key); + if (ret < 0) + goto err_free_sock; + + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { + tcp_ao_cache_traffic_keys(sk, ao_info, key); + if (first) { + ao_info->current_key = key; + ao_info->rnext_key = key; + } + } + + tcp_ao_link_mkt(ao_info, key); + if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + ret = -EUSERS; + goto err_free_sock; + } + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + + if (cmd.set_current) + WRITE_ONCE(ao_info->current_key, key); + if (cmd.set_rnext) + WRITE_ONCE(ao_info->rnext_key, key); + return 0; + +err_free_sock: + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +err_free_ao: + if (first) + kfree(ao_info); + return ret; +} + +static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, + bool del_async, struct tcp_ao_key *key, + struct tcp_ao_key *new_current, + struct tcp_ao_key *new_rnext) +{ + int err; + + hlist_del_rcu(&key->node); + + /* Support for async delete on listening sockets: as they don't + * need current_key/rnext_key maintaining, we don't need to check + * them and we can just free all resources in RCU fashion. + */ + if (del_async) { + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + return 0; + } + + /* At this moment another CPU could have looked this key up + * while it was unlinked from the list. Wait for RCU grace period, + * after which the key is off-list and can't be looked up again; + * the rx path [just before RCU came] might have used it and set it + * as current_key (very unlikely). + * Free the key with next RCU grace period (in case it was + * current_key before tcp_ao_current_rnext() might have + * changed it in forced-delete). + */ + synchronize_rcu(); + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + + if (unlikely(READ_ONCE(ao_info->current_key) == key || + READ_ONCE(ao_info->rnext_key) == key)) { + err = -EBUSY; + goto add_key; + } + + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + + return 0; +add_key: + hlist_add_head_rcu(&key->node, &ao_info->head); + return err; +} + +#define TCP_AO_DEL_KEYF_ALL (TCP_AO_KEYF_IFINDEX) +static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL; + int err, addr_len, l3index = 0; + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_del cmd; + __u8 prefix; + u16 port; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.keyflags & ~TCP_AO_DEL_KEYF_ALL) + return -EINVAL; + + /* No sanity check for TCP_AO_KEYF_IFINDEX as if a VRF + * was destroyed, there still should be a way to delete keys, + * that were bound to that l3intf. So, fail late at lookup stage + * if there is no key for that ifindex. + */ + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) + return -ENOENT; + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) + return -ENOENT; + } + if (cmd.del_async && sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr; + + addr = (union tcp_ao_addr *)&sin->sin_addr; + addr_len = sizeof(struct in_addr); + port = ntohs(sin->sin_port); + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.addr; + struct in6_addr *addr6 = &sin6->sin6_addr; + + if (ipv6_addr_v4mapped(addr6)) { + addr = (union tcp_ao_addr *)&addr6->s6_addr32[3]; + addr_len = sizeof(struct in_addr); + family = AF_INET; + } else { + addr = (union tcp_ao_addr *)addr6; + addr_len = sizeof(struct in6_addr); + } + port = ntohs(sin6->sin6_port); + } + prefix = cmd.prefix; + + /* Currently matching is not performed on port (or port ranges) */ + if (port != 0) + return -EINVAL; + + /* We could choose random present key here for current/rnext + * but that's less predictable. Let's be strict and don't + * allow removing a key that's in use. RFC5925 doesn't + * specify how-to coordinate key removal, but says: + * "It is presumed that an MKT affecting a particular + * connection cannot be destroyed during an active connection" + */ + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (cmd.sndid != key->sndid || + cmd.rcvid != key->rcvid) + continue; + + if (family != key->family || + prefix != key->prefixlen || + memcmp(addr, &key->addr, addr_len)) + continue; + + if ((cmd.keyflags & TCP_AO_KEYF_IFINDEX) != + (key->keyflags & TCP_AO_KEYF_IFINDEX)) + continue; + + if (key->l3index != l3index) + continue; + + if (key == new_current || key == new_rnext) + continue; + + return tcp_ao_delete_key(sk, ao_info, cmd.del_async, key, + new_current, new_rnext); + } + return -ENOENT; +} + +/* cmd.ao_required makes a socket TCP-AO only. + * Don't allow any md5 keys for any l3intf on the socket together with it. + * Restricting it early in setsockopt() removes a check for + * ao_info->ao_required on inbound tcp segment fast-path. + */ +static int tcp_ao_required_verify(struct sock *sk) +{ +#ifdef CONFIG_TCP_MD5SIG + const struct tcp_md5sig_info *md5sig; + + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return 0; + + md5sig = rcu_dereference_check(tcp_sk(sk)->md5sig_info, + lockdep_sock_is_held(sk)); + if (!md5sig) + return 0; + + if (rcu_dereference_check(hlist_first_rcu(&md5sig->head), + lockdep_sock_is_held(sk))) + return 1; +#endif + return 0; +} + +static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *new_current = NULL, *new_rnext = NULL; + struct tcp_ao_info *ao_info; + struct tcp_ao_info_opt cmd; + bool first = false; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) { + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } + + if (cmd.ao_required && tcp_ao_required_verify(sk)) + return -EKEYREJECTED; + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/port/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) { + err = -ENOENT; + goto out; + } + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) { + err = -ENOENT; + goto out; + } + } + if (cmd.set_counters) { + atomic64_set(&ao_info->counters.pkt_good, cmd.pkt_good); + atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad); + atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found); + atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required); + atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp); + } + + ao_info->ao_required = cmd.ao_required; + ao_info->accept_icmps = cmd.accept_icmps; + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + err = -EUSERS; + goto out; + } + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + return 0; +out: + if (first) + kfree(ao_info); + return err; +} + +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen) +{ + if (WARN_ON_ONCE(family != AF_INET && family != AF_INET6)) + return -EAFNOSUPPORT; + + switch (cmd) { + case TCP_AO_ADD_KEY: + return tcp_ao_add_cmd(sk, family, optval, optlen); + case TCP_AO_DEL_KEY: + return tcp_ao_del_cmd(sk, family, optval, optlen); + case TCP_AO_INFO: + return tcp_ao_info_cmd(sk, family, optval, optlen); + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} + +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET, optval, optlen); +} + +/* tcp_ao_copy_mkts_to_user(ao_info, optval, optlen) + * + * @ao_info: struct tcp_ao_info on the socket that + * socket getsockopt(TCP_AO_GET_KEYS) is executed on + * @optval: pointer to array of tcp_ao_getsockopt structures in user space. + * Must be != NULL. + * @optlen: pointer to size of tcp_ao_getsockopt structure. + * Must be != NULL. + * + * Return value: 0 on success, a negative error number otherwise. + * + * optval points to an array of tcp_ao_getsockopt structures in user space. + * optval[0] is used as both input and output to getsockopt. It determines + * which keys are returned by the kernel. + * optval[0].nkeys is the size of the array in user space. On return it contains + * the number of keys matching the search criteria. + * If tcp_ao_getsockopt::get_all is set, then all keys in the socket are + * returned, otherwise only keys matching <addr, prefix, sndid, rcvid> + * in optval[0] are returned. + * optlen is also used as both input and output. The user provides the size + * of struct tcp_ao_getsockopt in user space, and the kernel returns the size + * of the structure in kernel space. + * The size of struct tcp_ao_getsockopt may differ between user and kernel. + * There are three cases to consider: + * * If usize == ksize, then keys are copied verbatim. + * * If usize < ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in optval[0] + * (ksize - usize) are interpreted as 0 by the kernel. + * * If usize > ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (usize - ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * On return the kernel fills in min(usize, ksize) in each entry of the array. + * The layout of the fields in the user and kernel structures is expected to + * be the same (including in the 32bit vs 64bit case). + */ +static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, + sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_getsockopt opt_in, opt_out; + struct tcp_ao_key *key, *current_key; + bool do_address_matching = true; + union tcp_ao_addr *addr = NULL; + int err, l3index, user_len; + unsigned int max_keys; /* maximum number of keys to copy to user */ + size_t out_offset = 0; + size_t bytes_to_write; /* number of bytes to write to user level */ + u32 matched_keys; /* keys from ao_info matched so far */ + int optlen_out; + __be16 port = 0; + + if (copy_from_sockptr(&user_len, optlen, sizeof(int))) + return -EFAULT; + + if (user_len <= 0) + return -EINVAL; + + memset(&opt_in, 0, sizeof(struct tcp_ao_getsockopt)); + err = copy_struct_from_sockptr(&opt_in, sizeof(opt_in), + optval, user_len); + if (err < 0) + return err; + + if (opt_in.pkt_good || opt_in.pkt_bad) + return -EINVAL; + if (opt_in.keyflags & ~TCP_AO_GET_KEYF_VALID) + return -EINVAL; + if (opt_in.ifindex && !(opt_in.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + if (opt_in.reserved != 0) + return -EINVAL; + + max_keys = opt_in.nkeys; + l3index = (opt_in.keyflags & TCP_AO_KEYF_IFINDEX) ? opt_in.ifindex : -1; + + if (opt_in.get_all || opt_in.is_current || opt_in.is_rnext) { + if (opt_in.get_all && (opt_in.is_current || opt_in.is_rnext)) + return -EINVAL; + do_address_matching = false; + } + + switch (opt_in.addr.ss_family) { + case AF_INET: { + struct sockaddr_in *sin; + __be32 mask; + + sin = (struct sockaddr_in *)&opt_in.addr; + port = sin->sin_port; + addr = (union tcp_ao_addr *)&sin->sin_addr; + + if (opt_in.prefix > 32) + return -EINVAL; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY && + opt_in.prefix != 0) + return -EINVAL; + + mask = inet_make_mask(opt_in.prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6; + struct in6_addr *addr6; + + sin6 = (struct sockaddr_in6 *)&opt_in.addr; + addr = (union tcp_ao_addr *)&sin6->sin6_addr; + addr6 = &sin6->sin6_addr; + port = sin6->sin6_port; + + /* We don't have to change family and @addr here if + * ipv6_addr_v4mapped() like in key adding: + * tcp_ao_key_cmp() does it. Do the sanity checks though. + */ + if (opt_in.prefix != 0) { + if (ipv6_addr_v4mapped(addr6)) { + __be32 mask, addr4 = addr6->s6_addr32[3]; + + if (opt_in.prefix > 32 || + ntohl(addr4) == INADDR_ANY) + return -EINVAL; + mask = inet_make_mask(opt_in.prefix); + if (addr4 & ~mask) + return -EINVAL; + } else { + struct in6_addr pfx; + + if (ipv6_addr_any(addr6) || + opt_in.prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr6, opt_in.prefix); + if (ipv6_addr_cmp(&pfx, addr6)) + return -EINVAL; + } + } else if (!ipv6_addr_any(addr6)) { + return -EINVAL; + } + break; + } + case 0: + if (!do_address_matching) + break; + fallthrough; + default: + return -EAFNOSUPPORT; + } + + if (!do_address_matching) { + /* We could just ignore those, but let's do stricter checks */ + if (addr || port) + return -EINVAL; + if (opt_in.prefix || opt_in.sndid || opt_in.rcvid) + return -EINVAL; + } + + bytes_to_write = min_t(int, user_len, sizeof(struct tcp_ao_getsockopt)); + matched_keys = 0; + /* May change in RX, while we're dumping, pre-fetch it */ + current_key = READ_ONCE(ao_info->current_key); + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (opt_in.get_all) + goto match; + + if (opt_in.is_current || opt_in.is_rnext) { + if (opt_in.is_current && key == current_key) + goto match; + if (opt_in.is_rnext && key == ao_info->rnext_key) + goto match; + continue; + } + + if (tcp_ao_key_cmp(key, l3index, addr, opt_in.prefix, + opt_in.addr.ss_family, + opt_in.sndid, opt_in.rcvid) != 0) + continue; +match: + matched_keys++; + if (matched_keys > max_keys) + continue; + + memset(&opt_out, 0, sizeof(struct tcp_ao_getsockopt)); + + if (key->family == AF_INET) { + struct sockaddr_in *sin_out = (struct sockaddr_in *)&opt_out.addr; + + sin_out->sin_family = key->family; + sin_out->sin_port = 0; + memcpy(&sin_out->sin_addr, &key->addr, sizeof(struct in_addr)); + } else { + struct sockaddr_in6 *sin6_out = (struct sockaddr_in6 *)&opt_out.addr; + + sin6_out->sin6_family = key->family; + sin6_out->sin6_port = 0; + memcpy(&sin6_out->sin6_addr, &key->addr, sizeof(struct in6_addr)); + } + opt_out.sndid = key->sndid; + opt_out.rcvid = key->rcvid; + opt_out.prefix = key->prefixlen; + opt_out.keyflags = key->keyflags; + opt_out.is_current = (key == current_key); + opt_out.is_rnext = (key == ao_info->rnext_key); + opt_out.nkeys = 0; + opt_out.maclen = key->maclen; + opt_out.keylen = key->keylen; + opt_out.ifindex = key->l3index; + opt_out.pkt_good = atomic64_read(&key->pkt_good); + opt_out.pkt_bad = atomic64_read(&key->pkt_bad); + memcpy(&opt_out.key, key->key, key->keylen); + tcp_sigpool_algo(key->tcp_sigpool_id, opt_out.alg_name, 64); + + /* Copy key to user */ + if (copy_to_sockptr_offset(optval, out_offset, + &opt_out, bytes_to_write)) + return -EFAULT; + out_offset += user_len; + } + + optlen_out = (int)sizeof(struct tcp_ao_getsockopt); + if (copy_to_sockptr(optlen, &optlen_out, sizeof(int))) + return -EFAULT; + + out_offset = offsetof(struct tcp_ao_getsockopt, nkeys); + if (copy_to_sockptr_offset(optval, out_offset, + &matched_keys, sizeof(u32))) + return -EFAULT; + + return 0; +} + +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info *ao_info; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen); +} + +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info_opt out, in = {}; + struct tcp_ao_key *current_key; + struct tcp_ao_info *ao; + int err, len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + /* Copying this "in" only to check ::reserved, ::reserved2, + * that may be needed to extend (struct tcp_ao_info_opt) and + * what getsockopt() provides in future. + */ + err = copy_struct_from_sockptr(&in, sizeof(in), optval, len); + if (err) + return err; + + if (in.reserved != 0 || in.reserved2 != 0) + return -EINVAL; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + memset(&out, 0, sizeof(out)); + out.ao_required = ao->ao_required; + out.accept_icmps = ao->accept_icmps; + out.pkt_good = atomic64_read(&ao->counters.pkt_good); + out.pkt_bad = atomic64_read(&ao->counters.pkt_bad); + out.pkt_key_not_found = atomic64_read(&ao->counters.key_not_found); + out.pkt_ao_required = atomic64_read(&ao->counters.ao_required); + out.pkt_dropped_icmp = atomic64_read(&ao->counters.dropped_icmp); + + current_key = READ_ONCE(ao->current_key); + if (current_key) { + out.set_current = 1; + out.current_key = current_key->sndid; + } + if (ao->rnext_key) { + out.set_rnext = 1; + out.rnext = ao->rnext_key->rcvid; + } + + if (copy_to_sockptr(optval, &out, min_t(int, len, sizeof(out)))) + return -EFAULT; + + return 0; +} + +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair cmd; + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (!tp->repair) + return -EPERM; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + WRITE_ONCE(ao->lisn, cmd.snt_isn); + WRITE_ONCE(ao->risn, cmd.rcv_isn); + WRITE_ONCE(ao->snd_sne, cmd.snd_sne); + WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne); + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); + + return 0; +} + +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair opt; + struct tcp_ao_info *ao; + int len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + rcu_read_lock(); + ao = getsockopt_ao_info(sk); + if (IS_ERR_OR_NULL(ao)) { + rcu_read_unlock(); + return ao ? PTR_ERR(ao) : -ENOENT; + } + + opt.snt_isn = ao->lisn; + opt.rcv_isn = ao->risn; + opt.snd_sne = READ_ONCE(ao->snd_sne); + opt.rcv_sne = READ_ONCE(ao->rcv_sne); + rcu_read_unlock(); + + if (copy_to_sockptr(optval, &opt, min_t(int, len, sizeof(opt)))) + return -EFAULT; + return 0; +} diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 00d04ab68958..50aaa1527150 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3572,6 +3572,21 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin)); } +static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && ack < tp->snd_una) + ao->snd_sne++; +#endif +} + /* If we update tp->snd_una, also update tp->bytes_acked */ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { @@ -3579,9 +3594,25 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) sock_owned_by_me((struct sock *)tp); tp->bytes_acked += delta; + tcp_snd_sne_update(tp, ack); tp->snd_una = ack; } +static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && seq < tp->rcv_nxt) + ao->rcv_sne++; +#endif +} + /* If we update tp->rcv_nxt, also update tp->bytes_received */ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { @@ -3589,6 +3620,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) sock_owned_by_me((struct sock *)tp); tp->bytes_received += delta; + tcp_rcv_sne_update(tp, seq); WRITE_ONCE(tp->rcv_nxt, seq); } @@ -4255,39 +4287,58 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) /* - * Parse MD5 Signature option + * Parse Signature options */ -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) { int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); + unsigned int minlen = TCPOLEN_MD5SIG; + + if (IS_ENABLED(CONFIG_TCP_AO)) + minlen = sizeof(struct tcp_ao_hdr) + 1; + + *md5_hash = NULL; + *ao_hash = NULL; /* If not enough data remaining, we can short cut */ - while (length >= TCPOLEN_MD5SIG) { + while (length >= minlen) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: - return NULL; + return 0; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2 || opsize > length) - return NULL; - if (opcode == TCPOPT_MD5SIG) - return opsize == TCPOLEN_MD5SIG ? ptr : NULL; + return -EINVAL; + if (opcode == TCPOPT_MD5SIG) { + if (opsize != TCPOLEN_MD5SIG) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *md5_hash = ptr; + } else if (opcode == TCPOPT_AO) { + if (opsize <= sizeof(struct tcp_ao_hdr)) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *ao_hash = ptr; + } } ptr += opsize - 2; length -= opsize; } - return NULL; + return 0; } -EXPORT_SYMBOL(tcp_parse_md5sig_option); +EXPORT_SYMBOL(tcp_do_parse_auth_options); #endif /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM @@ -6151,6 +6202,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + tcp_ao_finish_connect(sk, skb); tcp_set_state(sk, TCP_ESTABLISHED); icsk->icsk_ack.lrcvtime = tcp_jiffies32; @@ -6436,6 +6488,16 @@ consume: * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. */ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + WRITE_ONCE(ao->risn, th->seq); + ao->rcv_sne = 0; + } +#endif tcp_set_state(sk, TCP_SYN_RECV); if (tp->rx_opt.saw_tstamp) { @@ -6648,6 +6710,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) skb); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); } + tcp_ao_established(sk); smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -7024,6 +7087,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct flowi fl; u8 syncookies; +#ifdef CONFIG_TCP_AO + const struct tcp_ao_hdr *aoh; +#endif + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without @@ -7110,6 +7177,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, inet_rsk(req)->ecn_ok = 0; } +#ifdef CONFIG_TCP_AO + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto drop_and_release; /* Invalid TCP options */ + if (aoh) { + tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr); + tcp_rsk(req)->ao_rcv_next = aoh->keyid; + tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + } else { + tcp_rsk(req)->maclen = 0; + } +#endif tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7583d4e34c8c..5f693bbd578d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -507,6 +509,11 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. @@ -657,6 +664,52 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) + +static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, + struct ip_reply_arg *arg, struct tcphdr *reply, + __be32 reply_options[REPLY_OPTIONS_LEN]) +{ +#ifdef CONFIG_TCP_AO + int sdif = tcp_v4_sdif(skb); + int dif = inet_iif(skb); + int l3index = sdif ? dif : 0; + bool allocated_traffic_key; + struct tcp_ao_key *key; + char *traffic_key; + bool drop = true; + u32 ao_sne = 0; + u8 keyid; + + rcu_read_lock(); + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, ntohl(reply->seq), + &key, &traffic_key, &allocated_traffic_key, + &keyid, &ao_sne)) + goto out; + + reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | + (aoh->rnext_keyid << 8) | keyid); + arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + reply->doff = arg->iov[0].iov_len / 4; + + if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], + key, traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + reply, ao_sne)) + goto out; + drop = false; +out: + rcu_read_unlock(); + if (allocated_traffic_key) + kfree(traffic_key); + return drop; +#else + return true; +#endif +} + /* * This routine will send an RST to the other tcp. * @@ -670,26 +723,21 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -#ifdef CONFIG_TCP_MD5SIG -#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED -#else -#define OPTION_BYTES sizeof(__be32) -#endif - static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[OPTION_BYTES / sizeof(__be32)]; + __be32 opt[REPLY_OPTIONS_LEN]; } rep; + const __u8 *md5_hash_location = NULL; + const struct tcp_ao_hdr *aoh; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key = NULL; - const __u8 *hash_location = NULL; unsigned char newhash[16]; - int genhash; struct sock *sk1 = NULL; + int genhash; #endif u64 transmit_time = 0; struct sock *ctl_sk; @@ -726,9 +774,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) + return; + + if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt)) + return; + #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { const union tcp_md5_addr *addr; int l3index; @@ -739,7 +794,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - } else if (hash_location) { + } else if (md5_hash_location) { const union tcp_md5_addr *addr; int sdif = tcp_v4_sdif(skb); int dif = inet_iif(skb); @@ -771,7 +826,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } @@ -863,17 +918,13 @@ out: static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, + struct tcp_key *key, int reply_flags, u8 tos, u32 txhash) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) -#ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -#endif - ]; + __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)]; } rep; struct net *net = sock_net(sk); struct ip_reply_arg arg; @@ -904,7 +955,7 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | @@ -915,10 +966,28 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->saddr, + key->md5_key, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &rep.th); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + int offset = (tsecr) ? 3 : 0; + + rep.opt[offset++] = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + key->rcv_next); + arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4); + rep.th.doff = arg.iov[0].iov_len / 4; + + tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], + key->ao_key, key->traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + &rep.th, key->sne); + } +#endif arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ @@ -951,18 +1020,53 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + if (static_branch_unlikely(&tcp_ao_needed.key)) { + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) { + inet_twsk_put(tw); + return; + } + + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + key.sne = READ_ONCE(ao_info->snd_sne); + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), tcptw->tw_ts_recent, - tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw), + tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, tw->tw_tos, - tw->tw_txhash - ); + tw->tw_txhash); inet_twsk_put(tw); } @@ -970,8 +1074,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - const union tcp_md5_addr *addr; - int l3index; + struct tcp_key key = {}; /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -979,23 +1082,77 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { + const union tcp_md5_addr *addr; + const struct tcp_ao_hdr *aoh; + int l3index; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, + aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI4, %d)->(%pI4, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ip_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; + + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v4_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + const union tcp_md5_addr *addr; + int l3index; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } + /* RFC 7323 2.3 * The window field (SEG.WND) of every outgoing segment, with the * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; - l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), - 0, - tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos, READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } /* @@ -1082,7 +1239,7 @@ static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key * /* Find the Key structure for an address. */ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family) + int family, bool any_l3index) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1101,7 +1258,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index) + if (!any_l3index && key->flags & TCP_MD5SIG_FLAG_IFINDEX && + key->l3index != l3index) continue; if (family == AF_INET) { mask = inet_make_mask(key->prefixlen); @@ -1221,10 +1379,6 @@ static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; - if (!tcp_alloc_md5sig_pool()) { - sock_kfree_s(sk, key, sizeof(*key)); - return -ENOMEM; - } memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; @@ -1246,8 +1400,13 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, GFP_KERNEL)) + if (tcp_md5_alloc_sigpool()) + return -ENOMEM; + + if (tcp_md5sig_info_add(sk, GFP_KERNEL)) { + tcp_md5_release_sigpool(); return -ENOMEM; + } if (!static_branch_inc(&tcp_md5_needed.key)) { struct tcp_md5sig_info *md5sig; @@ -1255,6 +1414,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1271,8 +1431,12 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) + tcp_md5_add_sigpool(); + + if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) { + tcp_md5_release_sigpool(); return -ENOMEM; + } if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { struct tcp_md5sig_info *md5sig; @@ -1281,6 +1445,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1306,7 +1471,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, } EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1330,6 +1495,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, const union tcp_md5_addr *addr; u8 prefixlen = 32; int l3index = 0; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -1342,6 +1508,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -1376,11 +1543,17 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp, __be32 daddr, __be32 saddr, const struct tcphdr *th, int nbytes) { @@ -1400,38 +1573,35 @@ static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -1440,9 +1610,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sigpool hp; __be32 saddr, daddr; if (sk) { /* valid for establish/request sockets */ @@ -1454,30 +1623,28 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, daddr = iph->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -1526,6 +1693,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup_rsk, + .ao_calc_key = tcp_v4_ao_calc_key_rsk, + .ao_synack_hash = tcp_v4_ao_synack_hash, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, #endif @@ -1627,12 +1799,16 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Copy over the MD5 key from the original socket */ addr = (union tcp_md5_addr *)&newinet->inet_daddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - if (key) { + if (key && !tcp_rsk_used_ao(req)) { if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key)) goto put_and_exit; sk_gso_disable(newsk); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET)) + goto put_and_exit; /* OOM, release back memory */ +#endif if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; @@ -2043,9 +2219,9 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &iph->saddr, &iph->daddr, - AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (unlikely(drop_reason)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -2122,8 +2298,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, - &iph->daddr, AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -2270,11 +2446,19 @@ const struct inet_connection_sock_af_ops ipv4_specific = { }; EXPORT_SYMBOL(ipv4_specific); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v4_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, + .ao_parse = tcp_v4_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, +#endif }; #endif @@ -2289,13 +2473,25 @@ static int tcp_v4_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv4_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; #endif return 0; } +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5sig_info_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_info *md5sig; + + md5sig = container_of(head, struct tcp_md5sig_info, rcu); + kfree(md5sig); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2320,12 +2516,15 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { + struct tcp_md5sig_info *md5sig; + + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); tcp_clear_md5_list(sk); - kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); - tp->md5sig_info = NULL; - static_branch_slow_dec_deferred(&tcp_md5_needed); + call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu); + rcu_assign_pointer(tp->md5sig_info, NULL); } #endif + tcp_ao_destroy_sock(sk, false); /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ace806c5bd0c..a9807eeb311c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -51,6 +51,18 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, return TCP_TW_SUCCESS; } +static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference(tcptw->ao_info); + if (unlikely(ao && seq < tcptw->tw_rcv_nxt)) + WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1); +#endif + tcptw->tw_rcv_nxt = seq; +} + /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -136,7 +148,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; - tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq); + if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -261,10 +274,9 @@ static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (!tcptw->tw_md5_key) return; - if (!tcp_alloc_md5sig_pool()) - goto out_free; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; + tcp_md5_add_sigpool(); } return; out_free: @@ -280,7 +292,7 @@ out_free: void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_timewait_sock *tw; @@ -317,6 +329,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #endif tcp_time_wait_init(sk, tcptw); + tcp_ao_time_wait(tcptw, tp); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -349,18 +362,29 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } EXPORT_SYMBOL(tcp_time_wait); +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5_twsk_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_key *key; + + key = container_of(head, struct tcp_md5sig_key, rcu); + kfree(key); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - kfree_rcu(twsk->tw_md5_key, rcu); - static_branch_slow_dec_deferred(&tcp_md5_needed); - } + if (twsk->tw_md5_key) + call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif + tcp_ao_destroy_sock(sk, true); } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); @@ -495,6 +519,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key; +#endif if (!newsk) return NULL; @@ -583,6 +610,13 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ #endif +#ifdef CONFIG_TCP_AO + newtp->ao_info = NULL; + ao_key = treq->af_specific->ao_lookup(sk, req, + tcp_rsk(req)->ao_keyid, -1); + if (ao_key) + newtp->tcp_header_len += tcp_ao_len(ao_key); + #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ca4d7594efd4..f558c054cf6e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -422,6 +422,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_FAST_OPEN_COOKIE BIT(8) #define OPTION_SMC BIT(9) #define OPTION_MPTCP BIT(10) +#define OPTION_AO BIT(11) static void smc_options_write(__be32 *ptr, u16 *options) { @@ -614,19 +615,52 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, - struct tcp_out_options *opts) + const struct tcp_request_sock *tcprsk, + struct tcp_out_options *opts, + struct tcp_key *key) { __be32 *ptr = (__be32 *)(th + 1); u16 options = opts->options; /* mungable copy */ - if (unlikely(OPTION_MD5 & options)) { + if (tcp_key_is_md5(key)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; - } + } else if (tcp_key_is_ao(key)) { +#ifdef CONFIG_TCP_AO + u8 maclen = tcp_ao_maclen(key->ao_key); + + if (tcprsk) { + u8 aolen = maclen + sizeof(struct tcp_ao_hdr); + *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | + (tcprsk->ao_keyid << 8) | + (tcprsk->ao_rcv_next)); + } else { + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + goto out_ao; + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + } + opts->hash_location = (__u8 *)ptr; + ptr += maclen / sizeof(*ptr); + if (unlikely(maclen % sizeof(*ptr))) { + memset(ptr, TCPOPT_NOP, sizeof(*ptr)); + ptr++; + } +out_ao: +#endif + } if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | @@ -767,23 +801,25 @@ static void mptcp_set_option_cond(const struct request_sock *req, */ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; + bool timestamps; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - remaining -= TCPOLEN_MD5SIG_ALIGNED; + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + timestamps = false; + opts->options |= OPTION_MD5; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + } else { + timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); + if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len(key->ao_key); } } -#endif /* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS @@ -797,7 +833,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { + if (likely(timestamps)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; @@ -850,7 +886,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - const struct tcp_md5sig_key *md5, + const struct tcp_key *key, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) @@ -858,8 +894,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; -#ifdef CONFIG_TCP_MD5SIG - if (md5) { + if (tcp_key_is_md5(key)) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -870,8 +905,11 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ if (synack_type != TCP_SYNACK_COOKIE) ireq->tstamp_ok &= !ireq->sack_ok; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len(key->ao_key); + ireq->tstamp_ok &= !ireq->sack_ok; } -#endif /* We always send an MSS option. */ opts->mss = mss; @@ -922,7 +960,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; @@ -930,17 +968,14 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->options = 0; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; - } + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + size += tcp_ao_len(key->ao_key); } -#endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; @@ -1245,7 +1280,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; struct sk_buff *oskb = NULL; - struct tcp_md5sig_key *md5; + struct tcp_key key; struct tcphdr *th; u64 prior_wstamp; int err; @@ -1277,11 +1312,11 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); + tcp_get_current_key(sk, &key); if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { - tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); + tcp_options_size = tcp_syn_options(sk, skb, &opts, &key); } else { - tcp_options_size = tcp_established_options(sk, skb, &opts, - &md5); + tcp_options_size = tcp_established_options(sk, skb, &opts, &key); /* Force a PSH flag on all (GSO) packets to expedite GRO flush * at receiver : This slightly improve GRO performance. * Note that we do not force the PSH flag for non GSO packets, @@ -1362,16 +1397,25 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->window = htons(min(tp->rcv_wnd, 65535U)); } - tcp_options_write(th, tp, &opts); + tcp_options_write(th, tp, NULL, &opts, &key); + if (tcp_key_is_md5(&key)) { #ifdef CONFIG_TCP_MD5SIG - /* Calculate the MD5 hash, as we have all we need now */ - if (md5) { + /* Calculate the MD5 hash, as we have all we need now */ sk_gso_disable(sk); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, skb); - } + key.md5_key, sk, skb); #endif + } else if (tcp_key_is_ao(&key)) { + int err; + + err = tcp_ao_transmit_skb(sk, skb, key.ao_key, th, + opts.hash_location); + if (err) { + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); + return -ENOMEM; + } + } /* BPF prog is the last one writing header option */ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); @@ -1804,7 +1848,7 @@ unsigned int tcp_current_mss(struct sock *sk) u32 mss_now; unsigned int header_len; struct tcp_out_options opts; - struct tcp_md5sig_key *md5; + struct tcp_key key; mss_now = tp->mss_cache; @@ -1813,8 +1857,8 @@ unsigned int tcp_current_mss(struct sock *sk) if (mtu != inet_csk(sk)->icsk_pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } - - header_len = tcp_established_options(sk, NULL, &opts, &md5) + + tcp_get_current_key(sk, &key); + header_len = tcp_established_options(sk, NULL, &opts, &key) + sizeof(struct tcphdr); /* The mss_cache is sized based on tp->tcp_header_len, which assumes * some common options. If this is an odd packet (because we have SACK @@ -3611,8 +3655,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_key *md5 = NULL; struct tcp_out_options opts; + struct tcp_key key = {}; struct sk_buff *skb; int tcp_header_size; struct tcphdr *th; @@ -3664,16 +3708,48 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif + if (tcp_rsk_used_ao(req)) { +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key = NULL; + u8 maclen = tcp_rsk(req)->maclen; + u8 keyid = tcp_rsk(req)->ao_keyid; + + ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), + keyid, -1); + /* If there is no matching key - avoid sending anything, + * especially usigned segments. It could try harder and lookup + * for another peer-matching key, but the peer has requested + * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. + */ + if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) { + u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0; + + rcu_read_unlock(); + kfree_skb(skb); + net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n", + keyid, maclen, key_maclen); + return NULL; + } + key.ao_key = ao_key; + key.type = TCP_KEY_AO; +#endif + } else { +#ifdef CONFIG_TCP_MD5SIG + key.md5_key = tcp_rsk(req)->af_specific->req_md5_lookup(sk, + req_to_sk(req)); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4); /* bpf program will be interested in the tcp_flags */ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK; - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc, synack_type, - syn_skb) + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, + &key, foc, synack_type, syn_skb) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -3693,15 +3769,24 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write(th, NULL, &opts); + tcp_options_write(th, NULL, tcp_rsk(req), &opts, &key); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); -#ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) + if (tcp_key_is_md5(&key)) { +#ifdef CONFIG_TCP_MD5SIG tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, req_to_sk(req), skb); + key.md5_key, req_to_sk(req), skb); +#endif + } else if (tcp_key_is_ao(&key)) { +#ifdef CONFIG_TCP_AO + tcp_rsk(req)->af_specific->ao_synack_hash(opts.hash_location, + key.ao_key, req, skb, + opts.hash_location - (u8 *)th, 0); +#endif + } +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_unlock(); #endif @@ -3749,6 +3834,8 @@ static void tcp_connect_init(struct sock *sk) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; + tcp_ao_connect_init(sk); + /* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; @@ -3931,6 +4018,53 @@ int tcp_connect(struct sock *sk) tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); +#if defined(CONFIG_TCP_MD5SIG) && defined(CONFIG_TCP_AO) + /* Has to be checked late, after setting daddr/saddr/ops. + * Return error if the peer has both a md5 and a tcp-ao key + * configured as this is ambiguous. + */ + if (unlikely(rcu_dereference_protected(tp->md5sig_info, + lockdep_sock_is_held(sk)))) { + bool needs_ao = !!tp->af_specific->ao_lookup(sk, sk, -1, -1); + bool needs_md5 = !!tp->af_specific->md5_lookup(sk, sk); + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao_info) { + /* This is an extra check: tcp_ao_required() in + * tcp_v{4,6}_parse_md5_keys() should prevent adding + * md5 keys on ao_required socket. + */ + needs_ao |= ao_info->ao_required; + WARN_ON_ONCE(ao_info->ao_required && needs_md5); + } + if (needs_md5 && needs_ao) + return -EKEYREJECTED; + + /* If we have a matching md5 key and no matching tcp-ao key + * then free up ao_info if allocated. + */ + if (needs_md5) { + tcp_ao_destroy_sock(sk, false); + } else if (needs_ao) { + tcp_clear_md5_list(sk); + kfree(rcu_replace_pointer(tp->md5sig_info, NULL, + lockdep_sock_is_held(sk))); + } + } +#endif +#ifdef CONFIG_TCP_AO + if (unlikely(rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)))) { + /* Don't allow connecting if ao is configured but no + * matching key is found. + */ + if (!tp->af_specific->ao_lookup(sk, sk, -1, -1)) + return -EKEYREJECTED; + } +#endif + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c new file mode 100644 index 000000000000..65a8eaae2fec --- /dev/null +++ b/net/ipv4/tcp_sigpool.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <crypto/hash.h> +#include <linux/cpu.h> +#include <linux/kref.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/workqueue.h> +#include <net/tcp.h> + +static size_t __scratch_size; +static DEFINE_PER_CPU(void __rcu *, sigpool_scratch); + +struct sigpool_entry { + struct crypto_ahash *hash; + const char *alg; + struct kref kref; + uint16_t needs_key:1, + reserved:15; +}; + +#define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry)) +static struct sigpool_entry cpool[CPOOL_SIZE]; +static unsigned int cpool_populated; +static DEFINE_MUTEX(cpool_mutex); + +/* Slow-path */ +struct scratches_to_free { + struct rcu_head rcu; + unsigned int cnt; + void *scratches[]; +}; + +static void free_old_scratches(struct rcu_head *head) +{ + struct scratches_to_free *stf; + + stf = container_of(head, struct scratches_to_free, rcu); + while (stf->cnt--) + kfree(stf->scratches[stf->cnt]); + kfree(stf); +} + +/** + * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path + * @size: request size for the scratch/temp buffer + */ +static int sigpool_reserve_scratch(size_t size) +{ + struct scratches_to_free *stf; + size_t stf_sz = struct_size(stf, scratches, num_possible_cpus()); + int cpu, err = 0; + + lockdep_assert_held(&cpool_mutex); + if (__scratch_size >= size) + return 0; + + stf = kmalloc(stf_sz, GFP_KERNEL); + if (!stf) + return -ENOMEM; + stf->cnt = 0; + + size = max(size, __scratch_size); + cpus_read_lock(); + for_each_possible_cpu(cpu) { + void *scratch, *old_scratch; + + scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + if (!scratch) { + err = -ENOMEM; + break; + } + + old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + scratch, lockdep_is_held(&cpool_mutex)); + if (!cpu_online(cpu) || !old_scratch) { + kfree(old_scratch); + continue; + } + stf->scratches[stf->cnt++] = old_scratch; + } + cpus_read_unlock(); + if (!err) + __scratch_size = size; + + call_rcu(&stf->rcu, free_old_scratches); + return err; +} + +static void sigpool_scratch_free(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + NULL, lockdep_is_held(&cpool_mutex))); + __scratch_size = 0; +} + +static int __cpool_try_clone(struct crypto_ahash *hash) +{ + struct crypto_ahash *tmp; + + tmp = crypto_clone_ahash(hash); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + crypto_free_ahash(tmp); + return 0; +} + +static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg) +{ + struct crypto_ahash *cpu0_hash; + int ret; + + e->alg = kstrdup(alg, GFP_KERNEL); + if (!e->alg) + return -ENOMEM; + + cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(cpu0_hash)) { + ret = PTR_ERR(cpu0_hash); + goto out_free_alg; + } + + e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY; + + ret = __cpool_try_clone(cpu0_hash); + if (ret) + goto out_free_cpu0_hash; + e->hash = cpu0_hash; + kref_init(&e->kref); + return 0; + +out_free_cpu0_hash: + crypto_free_ahash(cpu0_hash); +out_free_alg: + kfree(e->alg); + e->alg = NULL; + return ret; +} + +/** + * tcp_sigpool_alloc_ahash - allocates pool for ahash requests + * @alg: name of async hash algorithm + * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size + */ +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) +{ + int i, ret; + + /* slow-path */ + mutex_lock(&cpool_mutex); + ret = sigpool_reserve_scratch(scratch_size); + if (ret) + goto out; + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + continue; + if (strcmp(cpool[i].alg, alg)) + continue; + + if (kref_read(&cpool[i].kref) > 0) + kref_get(&cpool[i].kref); + else + kref_init(&cpool[i].kref); + ret = i; + goto out; + } + + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + break; + } + if (i >= CPOOL_SIZE) { + ret = -ENOSPC; + goto out; + } + + ret = __cpool_alloc_ahash(&cpool[i], alg); + if (!ret) { + ret = i; + if (i == cpool_populated) + cpool_populated++; + } +out: + mutex_unlock(&cpool_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash); + +static void __cpool_free_entry(struct sigpool_entry *e) +{ + crypto_free_ahash(e->hash); + kfree(e->alg); + memset(e, 0, sizeof(*e)); +} + +static void cpool_cleanup_work_cb(struct work_struct *work) +{ + bool free_scratch = true; + unsigned int i; + + mutex_lock(&cpool_mutex); + for (i = 0; i < cpool_populated; i++) { + if (kref_read(&cpool[i].kref) > 0) { + free_scratch = false; + continue; + } + if (!cpool[i].alg) + continue; + __cpool_free_entry(&cpool[i]); + } + if (free_scratch) + sigpool_scratch_free(); + mutex_unlock(&cpool_mutex); +} + +static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb); +static void cpool_schedule_cleanup(struct kref *kref) +{ + schedule_work(&cpool_cleanup_work); +} + +/** + * tcp_sigpool_release - decreases number of users for a pool. If it was + * the last user of the pool, releases any memory that was consumed. + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_release(unsigned int id) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return; + + /* slow-path */ + kref_put(&cpool[id].kref, cpool_schedule_cleanup); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_release); + +/** + * tcp_sigpool_get - increases number of users (refcounter) for a pool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_get(unsigned int id) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return; + kref_get(&cpool[id].kref); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_get); + +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH) +{ + struct crypto_ahash *hash; + + rcu_read_lock_bh(); + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) { + rcu_read_unlock_bh(); + return -EINVAL; + } + + hash = crypto_clone_ahash(cpool[id].hash); + if (IS_ERR(hash)) { + rcu_read_unlock_bh(); + return PTR_ERR(hash); + } + + c->req = ahash_request_alloc(hash, GFP_ATOMIC); + if (!c->req) { + crypto_free_ahash(hash); + rcu_read_unlock_bh(); + return -ENOMEM; + } + ahash_request_set_callback(c->req, 0, NULL, NULL); + + /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is + * valid (allocated) until tcp_sigpool_end(). + */ + c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch)); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_start); + +void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) +{ + struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); + + rcu_read_unlock_bh(); + ahash_request_free(c->req); + crypto_free_ahash(hash); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_end); + +/** + * tcp_sigpool_algo - return algorithm of tcp_sigpool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @buf: buffer to return name of algorithm + * @buf_len: size of @buf + */ +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) +{ + if (WARN_ON_ONCE(id > cpool_populated || !cpool[id].alg)) + return -EINVAL; + + return strscpy(buf, cpool[id].alg, buf_len); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_algo); + +/** + * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool + * @hp: tcp_sigpool pointer + * @skb: buffer to add sign for + * @header_len: TCP header length for this segment + */ +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len) +{ + const unsigned int head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + const struct tcphdr *tp = tcp_hdr(skb); + struct ahash_request *req = hp->req; + struct sk_buff *frag_iter; + struct scatterlist sg; + unsigned int i; + + sg_init_table(&sg, 1); + + sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len); + ahash_request_set_crypt(req, &sg, NULL, head_data_len); + if (crypto_ahash_update(req)) + return 1; + + for (i = 0; i < shi->nr_frags; ++i) { + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); + struct page *page; + + page = skb_frag_page(f) + (offset >> PAGE_SHIFT); + sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); + ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); + if (crypto_ahash_update(req)) + return 1; + } + + skb_walk_frags(skb, frag_iter) + if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0)) + return 1; + + return 0; +} +EXPORT_SYMBOL(tcp_sigpool_hash_skb_data); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Per-CPU pool of crypto requests"); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 3036a45e8a1e..d283c59df4c1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o obj-y += mcast_snoop.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o endif diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5014aa663452..500f6ed3b8cf 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -140,6 +140,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -214,6 +215,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_isn = cookie; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c new file mode 100644 index 000000000000..3c09ac26206e --- /dev/null +++ b/net/ipv6/tcp_ao.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov <[email protected]> + * Francesco Ruggeri <[email protected]> + * Salam Noureddine <[email protected]> + */ +#include <crypto/hash.h> +#include <linux/tcp.h> + +#include <net/tcp.h> +#include <net/ipv6.h> + +static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp6_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = *saddr; + tmp->ctx.daddr = *daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, + &iph->daddr, th->source, + th->dest, sisn, disn); +} + +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send) +{ + if (send) + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr, + &sk->sk_v6_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v6_ao_calc_key(mkt, key, + &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid) +{ + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + struct in6_addr *addr = &addr_sk->sk_v6_daddr; + + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *addr = &ireq->ir_v6_rmt_addr; + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes) +{ + struct tcp6_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + /* 1. TCP pseudo-header (RFC2460) */ + bp->saddr = *saddr; + bp->daddr = *daddr; + bp->len = cpu_to_be32(nbytes); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey, + hash_offset, sne); +} + +int tcp_v6_parse_ao(struct sock *sk, int cmd, + sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen); +} + +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc27988512a6..937a02c2e534 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; -#else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int l3index) -{ - return NULL; -} #endif /* Helper returning the inet6 address from a given tcp socket. @@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; @@ -403,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -413,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); @@ -607,8 +607,10 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -621,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -661,17 +664,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { + addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET, + l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); + } - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, flags, + addr = (union tcp_md5_addr *)&sin6->sin6_addr; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + + return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) @@ -692,39 +711,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -734,10 +750,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, const struct sock *sk, const struct sk_buff *skb) { - const struct in6_addr *saddr, *daddr; - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + const struct in6_addr *saddr, *daddr; + struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; @@ -748,34 +763,31 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, daddr = &ip6h->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } - #endif static void tcp_v6_init_req(struct request_sock *req, @@ -834,6 +846,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup_rsk, + .ao_calc_key = tcp_v6_ao_calc_key_rsk, + .ao_synack_hash = tcp_v6_ao_synack_hash, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -845,8 +862,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label, u32 priority, u32 txhash) + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -861,13 +878,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) + if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + if (tcp_key_is_ao(key)) + tot_len += tcp_ao_len(key->ao_key); #ifdef CONFIG_MPTCP - if (rst && !key) { + if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) @@ -908,14 +925,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)topt, key, + tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + *topt++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (key->rcv_next)); + + tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, + key->traffic_key, + (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, + (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, + t1, key->sne); + } +#endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; @@ -978,19 +1009,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 seq = 0, ack_seq = 0; - struct tcp_md5sig_key *key = NULL; -#ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; - unsigned char newhash[16]; - int genhash; - struct sock *sk1 = NULL; + const __u8 *md5_hash_location = NULL; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + bool allocated_traffic_key = false; #endif + const struct tcp_ao_hdr *aoh; + struct tcp_key key = {}; + u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; +#ifdef CONFIG_TCP_MD5SIG + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -1002,9 +1037,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) + return; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); +#endif +#ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; @@ -1012,8 +1051,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); - } else if (hash_location) { + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; + } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; @@ -1037,12 +1078,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? dif : 0; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); - if (!key) + key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); + if (!key.md5_key) goto out; + key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif @@ -1053,6 +1095,20 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); +#ifdef CONFIG_TCP_AO + if (aoh) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, + &key.ao_key, &key.traffic_key, + &allocated_traffic_key, + &key.rcv_next, &key.sne)) + goto out; + key.type = TCP_KEY_AO; + } +#endif + if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { @@ -1072,45 +1128,141 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, + ipv6_get_dsfield(ipv6h), label, priority, txhash, + &key); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: + if (allocated_traffic_key) + kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, + struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, + tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + if (static_branch_unlikely(&tcp_ao_needed.key)) { + + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto out; + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, + aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + /* rcv_next switches to our rcv_next */ + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.sne = READ_ONCE(ao_info->snd_sne); + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), - tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); +#ifdef CONFIG_TCP_AO +out: +#endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - int l3index; + struct tcp_key key = {}; + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { + const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; + const struct tcp_ao_hdr *aoh; + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; - l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, + l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1126,10 +1278,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, + &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } @@ -1234,7 +1387,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -1359,19 +1512,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - /* Copy over the MD5 key from the original socket */ - key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); - if (key) { - const union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; + if (!tcp_rsk_used_ao(req)) { + /* Copy over the MD5 key from the original socket */ + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); + if (key) { + const union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto out; + } } } #endif +#ifdef CONFIG_TCP_AO + /* Copy over tcp_ao_info if any */ + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) + goto out; /* OOM */ +#endif if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); @@ -1643,9 +1803,9 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1719,8 +1879,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -1902,11 +2062,19 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .mtu_reduced = tcp_v6_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v6_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, +#endif }; #endif @@ -1928,11 +2096,19 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .mtu_reduced = tcp_v4_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, +#endif }; #endif @@ -1947,7 +2123,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif |