1
0
mirror of https://github.com/openbsd/src.git synced 2024-12-22 16:42:56 -08:00

Fix path MTU discovery for TCP LRO/TSO when forwarding.

When doing LRO (Large Receive Offload), the drivers, currently ix(4)
and lo(4) only, record an upper bound of the size of the original
packets in ph_mss.  When sending, either stack or hardware must
chop the packets with TSO (TCP Segmentation Offload) to that size.
That means we have to call tcp_if_output_tso() before ifp->if_output().
Put that logic into if_output_tso() to avoid code duplication.  As
TCP packets on the wire do not get larger that way, path MTU discovery
should still work.

tested by and OK jan@
This commit is contained in:
bluhm 2023-07-07 08:05:02 +00:00
parent 0b88608e55
commit 5ebaba9d29
6 changed files with 75 additions and 58 deletions

View File

@ -1,4 +1,4 @@
/* $OpenBSD: if.c,v 1.704 2023/07/06 04:55:04 dlg Exp $ */
/* $OpenBSD: if.c,v 1.705 2023/07/07 08:05:02 bluhm Exp $ */
/* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
/*
@ -885,6 +885,57 @@ if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
return error;
}
int
if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
struct rtentry *rt, u_int mtu)
{
uint32_t ifcap;
int error;
switch (dst->sa_family) {
case AF_INET:
ifcap = IFCAP_TSOv4;
break;
#ifdef INET6
case AF_INET6:
ifcap = IFCAP_TSOv6;
break;
#endif
default:
unhandled_af(dst->sa_family);
}
/*
* Try to send with TSO first. When forwarding LRO may set
* maximium segment size in mbuf header. Chop TCP segment
* even if it would fit interface MTU to preserve maximum
* path MTU.
*/
error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
if (error || *mp == NULL)
return error;
if ((*mp)->m_pkthdr.len <= mtu) {
switch (dst->sa_family) {
case AF_INET:
in_hdr_cksum_out(*mp, ifp);
in_proto_cksum_out(*mp, ifp);
break;
#ifdef INET6
case AF_INET6:
in6_proto_cksum_out(*mp, ifp);
break;
#endif
}
error = ifp->if_output(ifp, *mp, dst, rt);
*mp = NULL;
return error;
}
/* mp still contains mbuf that has to be fragmented or dropped. */
return 0;
}
int
if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
struct sockaddr *dst, struct rtentry *rt)

View File

@ -1,4 +1,4 @@
/* $OpenBSD: if_var.h,v 1.128 2023/06/28 11:49:49 kn Exp $ */
/* $OpenBSD: if_var.h,v 1.129 2023/07/07 08:05:02 bluhm Exp $ */
/* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */
/*
@ -329,6 +329,8 @@ int if_output_ml(struct ifnet *, struct mbuf_list *,
struct sockaddr *, struct rtentry *);
int if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *,
struct sockaddr *, struct rtentry *);
int if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *,
struct rtentry *, u_int);
int if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *);
void p2p_rtrequest(struct ifnet *, int, struct rtentry *);

View File

@ -1,4 +1,4 @@
/* $OpenBSD: pf.c,v 1.1182 2023/07/06 04:55:05 dlg Exp $ */
/* $OpenBSD: pf.c,v 1.1183 2023/07/07 08:05:02 bluhm Exp $ */
/*
* Copyright (c) 2001 Daniel Hartmeier
@ -6610,15 +6610,8 @@ pf_route(struct pf_pdesc *pd, struct pf_state *st)
ip = mtod(m0, struct ip *);
}
if (ntohs(ip->ip_len) <= ifp->if_mtu) {
in_hdr_cksum_out(m0, ifp);
in_proto_cksum_out(m0, ifp);
ifp->if_output(ifp, m0, sintosa(dst), rt);
goto done;
}
if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt,
IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL)
if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) ||
m0 == NULL)
goto done;
/*
@ -6745,14 +6738,8 @@ pf_route6(struct pf_pdesc *pd, struct pf_state *st)
goto done;
}
if (m0->m_pkthdr.len <= ifp->if_mtu) {
in6_proto_cksum_out(m0, ifp);
ifp->if_output(ifp, m0, sin6tosa(dst), rt);
goto done;
}
if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt,
IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL)
if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) ||
m0 == NULL)
goto done;
ip6stat_inc(ip6s_cantfrag);

View File

@ -1,4 +1,4 @@
/* $OpenBSD: ip_output.c,v 1.389 2023/07/04 10:48:19 bluhm Exp $ */
/* $OpenBSD: ip_output.c,v 1.390 2023/07/07 08:05:02 bluhm Exp $ */
/* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */
/*
@ -451,17 +451,9 @@ sendit:
#endif
/*
* If small enough for interface, can just send directly.
* If TSO or small enough for interface, can just send directly.
*/
if (ntohs(ip->ip_len) <= mtu) {
in_hdr_cksum_out(m, ifp);
in_proto_cksum_out(m, ifp);
error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
goto done;
}
error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt,
IFCAP_TSOv4, mtu);
error = if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, mtu);
if (error || m == NULL)
goto done;

View File

@ -1,4 +1,4 @@
/* $OpenBSD: ip6_forward.c,v 1.111 2023/06/16 19:18:56 bluhm Exp $ */
/* $OpenBSD: ip6_forward.c,v 1.112 2023/07/07 08:05:02 bluhm Exp $ */
/* $KAME: ip6_forward.c,v 1.75 2001/06/29 12:42:13 jinmei Exp $ */
/*
@ -319,8 +319,7 @@ reroute:
}
#endif
error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6,
ifp->if_mtu);
error = if_output_tso(ifp, &m, sin6tosa(sin6), rt, ifp->if_mtu);
if (error)
ip6stat_inc(ip6s_cantforward);
else if (m == NULL)
@ -328,17 +327,6 @@ reroute:
if (error || m == NULL)
goto senderr;
/* Check the size after pf_test to give pf a chance to refragment. */
if (m->m_pkthdr.len <= ifp->if_mtu) {
in6_proto_cksum_out(m, ifp);
error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
if (error)
ip6stat_inc(ip6s_cantforward);
else
ip6stat_inc(ip6s_forward);
goto senderr;
}
if (mcopy != NULL)
icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
m_freem(m);

View File

@ -1,4 +1,4 @@
/* $OpenBSD: ip6_output.c,v 1.278 2023/06/13 19:34:12 bluhm Exp $ */
/* $OpenBSD: ip6_output.c,v 1.279 2023/07/07 08:05:02 bluhm Exp $ */
/* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
/*
@ -677,7 +677,8 @@ reroute:
* 2-a: send as is if tlen <= interface mtu
* 2-b: error if tlen > interface mtu
*/
tlen = m->m_pkthdr.len;
tlen = ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ?
m->m_pkthdr.ph_mss : m->m_pkthdr.len;
if (ISSET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT)) {
CLR(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
@ -686,9 +687,8 @@ reroute:
dontfrag = 1;
else
dontfrag = 0;
if (dontfrag && /* case 2-b */
(ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ?
m->m_pkthdr.ph_mss : tlen) > ifp->if_mtu) {
if (dontfrag && tlen > ifp->if_mtu) { /* case 2-b */
#ifdef IPSEC
if (ip_mtudisc)
ipsec_adjust_mtu(m, mtu);
@ -701,16 +701,13 @@ reroute:
* transmit packet without fragmentation
*/
if (dontfrag || tlen <= mtu) { /* case 1-a and 2-a */
in6_proto_cksum_out(m, ifp);
error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt);
goto done;
error = if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt,
ifp->if_mtu);
if (error || m == NULL)
goto done;
goto bad; /* should not happen */
}
error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt,
IFCAP_TSOv6, mtu);
if (error || m == NULL)
goto done;
/*
* try to fragment the packet. case 1-b
*/