diff -urN linux-2.0.32/include/linux/socket.h linux/include/linux/socket.h --- linux-2.0.32/include/linux/socket.h Tue Aug 12 22:00:14 1997 +++ linux/include/linux/socket.h Sun Mar 29 13:15:37 1998 @@ -91,6 +91,8 @@ /*#define MSG_CTRUNC 8 - We need to support this for BSD oddments */ #define MSG_PROXY 16 /* Supply or ask second address. */ +#define MSG_EOF 32 /* indicate that TTCP is to be used for transfer */ + /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 #define SOL_IPX 256 diff -urN linux-2.0.32/include/linux/tcp.h linux/include/linux/tcp.h --- linux-2.0.32/include/linux/tcp.h Fri Nov 14 00:13:08 1997 +++ linux/include/linux/tcp.h Fri Apr 17 15:43:00 1998 @@ -56,16 +56,23 @@ enum { TCP_ESTABLISHED = 1, + TCP_ESTABLISHED_TTCP, TCP_SYN_SENT, + TCP_SYN_SENT_TTCP, TCP_SYN_RECV, + TCP_SYN_RECV_TTCP, TCP_FIN_WAIT1, + TCP_FIN_WAIT1_TTCP, TCP_FIN_WAIT2, TCP_TIME_WAIT, TCP_CLOSE, TCP_CLOSE_WAIT, + TCP_CLOSE_WAIT_TTCP, TCP_LAST_ACK, + TCP_LAST_ACK_TTCP, TCP_LISTEN, - TCP_CLOSING /* now a valid state */ + TCP_CLOSING, /* now a valid state */ + TCP_CLOSING_TTCP }; #endif /* _LINUX_TCP_H */ diff -urN linux-2.0.32/include/net/route.h linux/include/net/route.h --- linux-2.0.32/include/net/route.h Fri Nov 14 00:14:11 1997 +++ linux/include/net/route.h Fri Apr 17 15:45:26 1998 @@ -79,6 +79,9 @@ unsigned short rt_mtu; unsigned short rt_irtt; unsigned char rt_tos; + /* extra variables for TTCP */ + unsigned long rt_tao_cc; /* latest CC received from host */ + unsigned long rt_tao_ccsent; /* latest CC sent to host */ }; extern void ip_rt_flush(struct device *dev); diff -urN linux-2.0.32/include/net/snmp.h linux/include/net/snmp.h --- linux-2.0.32/include/net/snmp.h Tue Jun 6 08:22:18 1995 +++ linux/include/net/snmp.h Sun Mar 29 13:15:37 1998 @@ -93,6 +93,12 @@ unsigned long TcpInSegs; unsigned long TcpOutSegs; unsigned long TcpRetransSegs; + + /* TTCP statistics */ + unsigned long TcpTaoOk; + unsigned long TcpTaoFail; + unsigned long TcpBadCCecho; + unsigned long TcpCCdrop; }; struct udp_mib diff -urN linux-2.0.32/include/net/sock.h linux/include/net/sock.h --- linux-2.0.32/include/net/sock.h Fri Nov 14 00:13:10 1997 +++ linux/include/net/sock.h Fri Apr 17 15:43:00 1998 @@ -332,6 +332,24 @@ struct socket *socket; + /* extra variables for TTCP */ + unsigned char PRU_SEND_EOF:1, /* if MSG_EOF was specified */ + TF_REQ_CC:1, /* whether to send CC options */ + TF_SENDSYN:1, /* whether to set SYN flag */ + TF_SENDFIN:1, /* whether to set FIN flag */ + TF_RCVD_CC:1, /* if any CC options were received */ + TOF_CC:1, /* if CC option was received */ + TOF_CCNEW:1, /* if CCnew option was received */ + TOF_CCECHO:1; /* if CCecho option was received */ + unsigned char tao_test:4, /* did the TAO test pass */ + sent_mss:3, /* has MSS options been sent */ + multi_pkts:1; /* are multiple packets sent by client */ + unsigned long ccecho_rcv; /* ccecho value received */ + unsigned long cc_send; /* CC value to send on this connection */ + unsigned long cc_recv; /* CC value expected to be received */ + unsigned long t_duration; /* duration of connection */ + __u32 ttcp_datalen; + /* * Callbacks */ diff -urN linux-2.0.32/include/net/tcp.h linux/include/net/tcp.h --- linux-2.0.32/include/net/tcp.h Fri Nov 14 00:14:12 1997 +++ linux/include/net/tcp.h Fri Apr 17 15:45:26 1998 @@ -21,6 +21,14 @@ #include #include +#ifdef CONFIG_RFC1644 +/* + * This macro is used to increment the global CC value for TTCP, + * it is not allowed be zero if it wraps around + */ +#define CC_INC(c) (++(c) == 0 ? ++(c) : (c)) +#endif + /* This is for all connections with a full identity, no wildcards. */ #define TCP_HTABLE_SIZE 256 @@ -120,9 +128,9 @@ /* * 40 is maximal IP options size - * 4 is TCP option size (MSS) + * 20 is TCP option size (MSS, CC, CCECHO) */ -#define MAX_SYN_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + 4 + MAX_HEADER + 15) +#define MAX_SYN_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + 20 + MAX_HEADER + 15) #define MAX_FIN_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) #define MAX_ACK_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) #define MAX_RESET_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) @@ -185,6 +193,23 @@ #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ +/* + * Options for the implementation of TTCP + */ +#define TCPOPT_CC 11 +#define TCPOPT_CCNEW 12 +#define TCPOPT_CCECHO 13 + +/* + * TCP option lengths + */ + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_CCOPT 6 + /* * The next routines deal with comparing 32 bit unsigned ints @@ -239,6 +264,10 @@ extern void tcp_v4_unhash(struct sock *sk); +extern inline void ttcp_insert_skb(struct sk_buff *, struct sk_buff_head *); +extern int tcp_calc_cc_options(struct sock *, struct tcphdr *); +extern void tcp_build_cc_options(struct sock *, unsigned char *, int); + extern void tcp_read_wakeup(struct sock *); extern void tcp_write_xmit(struct sock *); extern void tcp_time_wait(struct sock *); @@ -315,12 +344,23 @@ * problem. Thanks to Stephen A. Wood -FvK */ +#ifdef CONFIG_RFC1644 +extern __inline const int tcp_connected(const int state) +{ + return (state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || + state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || + state == TCP_SYN_RECV || state == TCP_ESTABLISHED_TTCP || + state == TCP_CLOSE_WAIT_TTCP || state == TCP_FIN_WAIT1_TTCP || + state == TCP_LAST_ACK_TTCP || state == TCP_SYN_RECV_TTCP); +} +#else extern __inline const int tcp_connected(const int state) { - return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || - state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || - state == TCP_SYN_RECV); + return (state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || + state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || + state == TCP_SYN_RECV); } +#endif /* * Calculate(/check) TCP checksum @@ -333,11 +373,14 @@ #undef STATE_TRACE -#ifdef STATE_TRACE +#if defined(STATE_TRACE) || defined(CONFIG_RFC1644_DEBUG) +/* this has extra states thrown in to accomodate TTCP */ static char *statename[]={ - "Unused","Established","Syn Sent","Syn Recv", - "Fin Wait 1","Fin Wait 2","Time Wait", "Close", - "Close Wait","Last ACK","Listen","Closing" + "Unused","Established", "Established*", "Syn Sent", + "Syn Sent*", "Syn Recv", "Syn Recv*", "Fin Wait 1", + "Fin Wait 1*", "Fin Wait 2","Time Wait", "Close", + "Close Wait","Close Wait*","Last ACK","Last ACK*", + "Listen","Closing", "Closing*" }; #endif @@ -351,9 +394,15 @@ if(sk->debug) printk("TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); #endif +#ifdef CONFIG_RFC1644_DEBUG + printk("TTCP State %s -> %s\n", statename[oldstate],statename[state]); +#endif switch (state) { case TCP_ESTABLISHED: +#ifdef CONFIG_RFC1644 + case TCP_ESTABLISHED_TTCP: +#endif if (oldstate != TCP_ESTABLISHED) { tcp_statistics.TcpCurrEstab++; } @@ -366,7 +415,11 @@ reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME)); /* fall through */ default: +#ifdef CONFIG_RFC1644 + if (oldstate==TCP_ESTABLISHED || oldstate==TCP_ESTABLISHED_TTCP) +#else if (oldstate==TCP_ESTABLISHED) +#endif tcp_statistics.TcpCurrEstab--; } } diff -urN linux-2.0.32/net/ipv4/Config.in linux/net/ipv4/Config.in --- linux-2.0.32/net/ipv4/Config.in Tue Aug 12 18:30:25 1997 +++ linux/net/ipv4/Config.in Fri Apr 3 17:11:02 1998 @@ -48,3 +48,9 @@ #bool 'IP: Disable NAGLE algorithm (normally enabled)' CONFIG_TCP_NAGLE_OFF bool 'IP: Drop source routed frames' CONFIG_IP_NOSR bool 'IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB_LARGE +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool 'TCP: Enable RFC1644 support (T/TCP protocol) (EXPERIMENTAL)' CONFIG_RFC1644 + if [ "$CONFIG_RFC1644" = "y" ]; then + bool 'TCP: Enable RFC1644 debugging' CONFIG_RFC1644_DEBUG + fi +fi diff -urN linux-2.0.32/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c --- linux-2.0.32/net/ipv4/af_inet.c Fri Aug 15 19:23:23 1997 +++ linux/net/ipv4/af_inet.c Sun Apr 5 18:09:43 1998 @@ -111,6 +111,7 @@ #define min(a,b) ((a)<(b)?(a):(b)) +extern int tcp_do_rfc1644; extern struct proto packet_prot; extern int raw_get_info(char *, char **, off_t, int, int); extern int snmp_get_info(char *, char **, off_t, int, int); @@ -448,6 +449,26 @@ } sk->socket = sock; + +#ifdef CONFIG_RFC1644 + /* initialise the TTCP parts */ + sk->PRU_SEND_EOF = 0; + sk->TF_REQ_CC = tcp_do_rfc1644; + sk->TF_SENDSYN = 0; + sk->TF_SENDFIN = 0; + sk->TF_RCVD_CC = 0; + sk->TOF_CC = 0; + sk->TOF_CCNEW = 0; + sk->TOF_CCECHO = 0; + sk->tao_test = 0; + sk->sent_mss = 0; + sk->ccecho_rcv = 0; + sk->cc_send = 0; + sk->cc_recv = 0; + sk->t_duration = 0; + sk->ttcp_datalen = 0; +#endif + #ifdef CONFIG_TCP_NAGLE_OFF sk->nonagle = 1; #endif @@ -800,7 +821,12 @@ } sti(); +#ifdef CONFIG_RFC1644 + if ((sk2->state != TCP_ESTABLISHED || sk2->state != TCP_ESTABLISHED_TTCP) && + sk2->err > 0) { +#else if (sk2->state != TCP_ESTABLISHED && sk2->err > 0) { +#endif err = sock_error(sk2); destroy_sock(sk2); newsock->data = NULL; @@ -854,7 +880,6 @@ int flags, int *addr_len ) { struct sock *sk = (struct sock *) sock->data; - if (sk->prot->recvmsg == NULL) return(-EOPNOTSUPP); if(sk->err) diff -urN linux-2.0.32/net/ipv4/proc.c linux/net/ipv4/proc.c --- linux-2.0.32/net/ipv4/proc.c Tue Apr 8 15:47:47 1997 +++ linux/net/ipv4/proc.c Fri Apr 3 16:22:32 1998 @@ -232,6 +232,19 @@ icmp_statistics.IcmpOutTimestamps, icmp_statistics.IcmpOutTimestampReps, icmp_statistics.IcmpOutAddrMasks, icmp_statistics.IcmpOutAddrMaskReps); +#ifdef CONFIG_RFC1644 + len += sprintf (buffer + len, + "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs TaoOk TaoFail BadCCecho CCdrop\n" + "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + tcp_statistics.TcpRtoAlgorithm, tcp_statistics.TcpRtoMin, + tcp_statistics.TcpRtoMax, tcp_statistics.TcpMaxConn, + tcp_statistics.TcpActiveOpens, tcp_statistics.TcpPassiveOpens, + tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets, + tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs, + tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs, + tcp_statistics.TcpTaoOk, tcp_statistics.TcpTaoFail, + tcp_statistics.TcpBadCCecho, tcp_statistics.TcpCCdrop); +#else len += sprintf (buffer + len, "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs\n" "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", @@ -241,6 +254,7 @@ tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets, tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs, tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs); +#endif len += sprintf (buffer + len, "Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n", diff -urN linux-2.0.32/net/ipv4/route.c linux/net/ipv4/route.c --- linux-2.0.32/net/ipv4/route.c Wed Sep 17 19:00:48 1997 +++ linux/net/ipv4/route.c Fri Apr 3 16:20:50 1998 @@ -844,7 +844,11 @@ if (offset<128) { +#ifdef CONFIG_RFC1644 + sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\tCCrecv\tCCsent"); +#else sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP"); +#endif len = 128; } @@ -868,10 +872,18 @@ continue; } +#ifdef CONFIG_RFC1644 + sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d\t%d\t%d", + r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, + r->rt_flags, r->rt_refcnt, r->rt_use, 0, + (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0, + (int)r->rt_tao_cc, (int)r->rt_tao_ccsent); +#else sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d", r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, r->rt_flags, r->rt_refcnt, r->rt_use, 0, (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0); +#endif sprintf(buffer+len,"%-127s\n",temp); len += 128; if (pos >= offset+length) @@ -1492,6 +1504,11 @@ rth->rt_irtt = fi->fib_irtt; rth->rt_tos = f->fib_tos; rth->rt_flags = fi->fib_flags | RTF_HOST; +#ifdef CONFIG_RFC1644 + rth->rt_tao_cc = 0; + rth->rt_tao_ccsent = 0; +#endif + if (local) rth->rt_flags |= RTF_LOCAL; diff -urN linux-2.0.32/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- linux-2.0.32/net/ipv4/tcp.c Sun Aug 17 20:35:10 1997 +++ linux/net/ipv4/tcp.c Tue Apr 7 10:39:36 1998 @@ -441,6 +441,13 @@ unsigned long seq_offset; struct tcp_mib tcp_statistics; +#ifdef CONFIG_RFC1644 +/* global declarations for TTCP */ +int tcp_do_rfc1644 = 1; +unsigned long tcp_ccgen = 0; +static int ttcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len); +#endif + /* This is for sockets with full identity only. Sockets here will always * be without wildcards and will have the following invariant: * TCP_ESTABLISHED <= sk->state < TCP_CLOSE @@ -664,11 +671,17 @@ static struct sk_buff *tcp_find_established(struct sock *s) { struct sk_buff *p=skb_peek(&s->receive_queue); + if(p==NULL) return NULL; do { +#ifdef CONFIG_RFC1644 + if(p->sk->state == TCP_ESTABLISHED || p->sk->state == TCP_ESTABLISHED_TTCP + || p->sk->state >= TCP_FIN_WAIT1) +#else if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1) +#endif return p; p=p->next; } @@ -795,10 +808,21 @@ if(code<=NR_ICMP_UNREACH) { +#ifdef CONFIG_RFC1644 + if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || + sk->state == TCP_SYN_SENT_TTCP || sk->state == TCP_SYN_RECV || + sk->state == TCP_SYN_RECV_TTCP) +#else if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) +#endif { sk->err = icmp_err_convert[code].errno; +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV || + sk->state == TCP_SYN_SENT_TTCP || sk->state == TCP_SYN_RECV_TTCP) +#else if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) +#endif { tcp_statistics.TcpAttemptFails++; tcp_set_state(sk,TCP_CLOSE); @@ -924,7 +948,12 @@ case SEL_IN: if (sk->err) return 1; +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV || + sk->state == TCP_SYN_SENT_TTCP || sk->state == TCP_SYN_RECV_TTCP) +#else if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) +#endif break; if (sk->shutdown & RCV_SHUTDOWN) @@ -944,7 +973,12 @@ return 1; if (sk->shutdown & SEND_SHUTDOWN) return 0; +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV || + sk->state == TCP_SYN_SENT_TTCP || sk->state == TCP_SYN_RECV_TTCP) +#else if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) +#endif break; if (sk->wmem_alloc*2 > sk->sndbuf) break; @@ -1022,14 +1056,14 @@ void tcp_send_check(struct tcphdr *th, unsigned long saddr, unsigned long daddr, int len, struct sk_buff *skb) { -#ifdef DEBUG_TCP_CHECK +#if defined(DEBUG_TCP_CHECK) || defined(CONFIG_RFC1644_DEBUG) u16 check; #endif th->check = 0; th->check = tcp_check(th, len, saddr, daddr, csum_partial((char *)th,sizeof(*th),skb->csum)); -#ifdef DEBUG_TCP_CHECK +#if defined(DEBUG_TCP_CHECK) || defined(CONFIG_RFC1644_DEBUG) check = th->check; th->check = 0; th->check = tcp_check(th, len, saddr, daddr, @@ -1068,7 +1102,13 @@ { release_sock(sk); cli(); +#ifdef CONFIG_RFC1644 + if ((sk->state != TCP_ESTABLISHED || sk->state != TCP_ESTABLISHED_TTCP) && + (sk->state != TCP_CLOSE_WAIT || sk->state != TCP_CLOSE_WAIT_TTCP) && + sk->err == 0) +#else if (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT && sk->err == 0) +#endif { interruptible_sleep_on(sk->sleep); } @@ -1122,30 +1162,45 @@ { int copied = 0; struct device *dev = NULL; +#ifdef CONFIG_RFC1644 + unsigned char *ptr = NULL; + int ttcp_check = 0; +#endif /* * Wait for a connection to finish. */ - while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) +#ifdef CONFIG_RFC1644 + if (!(sk->PRU_SEND_EOF)) { - if (sk->err) - return sock_error(sk); - - if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) + /* + * If the MSG_EOF flag is set, we don't need to be in a + * connected state to start sending, much like UDP + */ +#endif + while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) { - if (sk->keepopen) - send_sig(SIGPIPE, current, 0); - return -EPIPE; + if (sk->err) + return sock_error(sk); + + if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) + { + if (sk->keepopen) + send_sig(SIGPIPE, current, 0); + return -EPIPE; + } + + if (nonblock) + return -EAGAIN; + + if (current->signal & ~current->blocked) + return -ERESTARTSYS; + + wait_for_tcp_connect(sk); } - - if (nonblock) - return -EAGAIN; - - if (current->signal & ~current->blocked) - return -ERESTARTSYS; - - wait_for_tcp_connect(sk); +#ifdef CONFIG_RFC1644 } +#endif /* * Ok commence sending @@ -1304,7 +1359,6 @@ printk(KERN_CRIT "TCP: **bug**: copy=%d, sk->mss=%d\n", copy, sk->mss); return -EFAULT; } - /* * We should really check the window here also. */ @@ -1381,12 +1435,175 @@ return(tmp); } +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_CLOSE) { + /* + * This will be the first segment of the TAO test + * the SYN is set and ACK is switched off. + */ + sk->TF_SENDSYN = 1; + skb->h.th->ack = 0; + + tcp_set_state(sk, TCP_SYN_SENT); + /* Socket identity change complete, no longer + * in TCP_CLOSE, so rehash. + */ + tcp_v4_rehash(sk); + } + + if (sk->TF_SENDSYN) { + sk->TF_SENDSYN = 0; + skb->h.th->syn = 1; + sk->write_seq += 1; + } + + /* + * This section looks after the setting of the SYN and FIN + * for segments that are greater than the MTU + */ + if (sk->PRU_SEND_EOF) { + if (len < sk->mtu) { + /* + * This is the last segment of data in this transaction + * we are sending, therefore we can switch on the FIN + */ + if(!sk->multi_pkts) + sk->TF_SENDFIN = 1; + else + sk->multi_pkts = 0; + + skb->h.th->fin = 1; + sk->write_seq += 1; + + if(sk->state == TCP_SYN_SENT) { + skb->h.th->ack = 0; + tcp_set_state(sk, TCP_SYN_SENT_TTCP); + tcp_v4_rehash(sk); + } + else if(sk->state == TCP_CLOSE_WAIT_TTCP) { + tcp_set_state(sk, TCP_LAST_ACK_TTCP); + tcp_v4_rehash(sk); + if(skb->h.th->syn) + skb->h.th->ack = 1; + } else if(sk->state == TCP_CLOSE_WAIT) { + tcp_set_state(sk, TCP_LAST_ACK); + tcp_v4_rehash(sk); + if(skb->h.th->syn) + skb->h.th->ack = 1; + } + } else { + /* + * If sending a SYN in one of the server states, + * we need to switch on the ACK bit. + */ + if (skb->h.th->syn && ((sk->state == TCP_LAST_ACK_TTCP) || + (sk->state == TCP_CLOSE_WAIT_TTCP) || + (sk->state == TCP_ESTABLISHED_TTCP))) { + skb->h.th->ack = 1; + skb->csum = csum_partial((void *)(skb->h.th+1), + skb->h.th->doff*4-sizeof(*skb->h.th),0); + } + else + skb->h.th->ack = 0; + + sk->window_seq += copy + 2; + /* + * If we are sending multiple packets, we need to check + * if the host can handle T/TCP, else we must wait for + * a SYN, ACK before proceeding. If it does understand + * T/TCP, open up the congestion window to allow multiple + * packets to be sent, else the above code will look after + * sending a FIN. + */ + if(sk->ip_route_cache->rt_tao_cc > 0) + sk->cong_window++; + else + sk->multi_pkts = 1; + } + } + + /* + * We need to test that TF_REQ_CC is switched on, + * but also that cc_send has been incremented to allow + * for the case where this isn't the first segment, and + * the peer doesn't understand TTCP + */ + if (sk->TF_REQ_CC && (sk->cc_send != 0)) + { + int cc_opts = tcp_calc_cc_options(sk,skb->h.th); + + switch(cc_opts) { + case 0: + break; + case 1: + case 2: + ttcp_check = 1; + if (!sk->sent_mss) { + /* + * if we haven't sent an MSS yet, do so now + */ + ptr = skb_put(skb,12); + skb->h.th->doff = (sizeof(struct tcphdr) + 12)/4; + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = (sk->mtu) >> 8; + ptr[3] = (sk->mtu) & 0xff; + + tcp_build_cc_options(sk,(unsigned char *)(ptr+4),cc_opts); + sk->sent_mss = 1; + sk->mss += TCPOLEN_MSS; + skb->csum = 0; + skb->csum = csum_partial(ptr, 12, 0); + } else { + ptr = skb_put(skb,8); + skb->h.th->doff = (sizeof(struct tcphdr) + 8)/4; + tcp_build_cc_options(sk,(unsigned char *)(ptr),cc_opts); + skb->csum = 0; + skb->csum = csum_partial(ptr, 8, 0); + } + + break; + case 3: + ttcp_check = 1; + if (!sk->sent_mss) { + ptr = skb_put(skb,20); + skb->h.th->doff += 5; + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = (sk->mtu) >> 8; + ptr[3] = (sk->mtu) & 0xff; + + tcp_build_cc_options(sk,(unsigned char *)(ptr+4),cc_opts); + skb->csum = csum_partial(ptr, 20, skb->csum); + sk->sent_mss = 1; + sk->mss += TCPOLEN_MSS + TCPOLEN_CCOPT + 2; + } else { + ptr = skb_put(skb,16); + skb->h.th->doff += 4; + tcp_build_cc_options(sk,(unsigned char *)(ptr),cc_opts); + skb->csum = csum_partial(ptr, 16, skb->csum); + } + + break; + } + } +#endif + if (flags & MSG_OOB) { skb->h.th->urg = 1; skb->h.th->urg_ptr = ntohs(copy); } +#ifdef CONFIG_RFC1644 + /* + * We added in extra options, recalculate the checksum + */ + if (ttcp_check == 1) + skb->csum = csum_partial_copy_fromuser(from, + skb_put(skb,copy), copy, skb->csum); + else +#endif skb->csum = csum_partial_copy_fromuser(from, skb_put(skb,copy), copy, 0); @@ -1396,12 +1613,17 @@ seglen -= copy; skb->free = 0; sk->write_seq += copy; - +#ifdef CONFIG_RFC1644 + /* we don't want it delayed if using MSG_EOF */ + if(delay && sk->PRU_SEND_EOF) + delay = 0; +#endif if (delay) { tcp_enqueue_partial(skb, sk); continue; } + tcp_send_skb(sk, skb); } } @@ -1410,6 +1632,113 @@ return copied; } +int tcp_calc_cc_options(struct sock *sk, struct tcphdr *th) +{ + int send_cc = 0; + + /* need to check the conditions for sending CC options */ + if (th->ack && !th->syn) + { + /* + * This is a normal ACK (no SYN) + * send CC option + */ + send_cc = 1; + } + else if (!th->ack && !th->syn) + { + /* + * We can only get here is T/TCP's SYN_SENT* state, when + * we're sending a non-SYN segment without waiting for + * the ack of our SYN. Send a CC option + */ + send_cc = 1; + } + else if (!th->ack && th->syn) + { + /* + * This is the initial SYN (i.e. client active open) + * Check whether to send CC or CCnew + */ + if (sk->ip_route_cache->rt_tao_ccsent != 0) + { + /* already talked with this host, send normal CC */ + /* send just a CC option */ + send_cc = 1; + } + else + { + /* don't know this host, send CC new */ + /* send just a CCnew option */ + send_cc = 2; + } + } + else if (th->syn && th->ack) + { + /* + * this is a SYN, ACK (server response to client active open). + * Send CC and CCecho. + */ + send_cc = 3; + } + + return send_cc; +} + +void tcp_build_cc_options(struct sock *sk, unsigned char *optr, int send_cc) +{ + unsigned char *ptr = optr; + + switch (send_cc) + { + case 1: + /* send just a CC option */ + + ptr[0] = TCPOPT_NOP; + ptr[1] = TCPOPT_NOP; + ptr[2] = TCPOPT_CC; + ptr[3] = TCPOLEN_CCOPT; + ptr[4] = (sk->cc_send) >> 24; + ptr[5] = ((sk->cc_send) << 8) >> 24; + ptr[6] = ((sk->cc_send) << 16) >> 24; + ptr[7] = ((sk->cc_send) << 24) >> 24; + break; + case 2: + /* send just a CCnew option */ + + ptr[0] = TCPOPT_NOP; + ptr[1] = TCPOPT_NOP; + ptr[2] = TCPOPT_CCNEW; + ptr[3] = TCPOLEN_CCOPT; + ptr[4] = (sk->cc_send) >> 24; + ptr[5] = ((sk->cc_send) << 8) >> 24; + ptr[6] = ((sk->cc_send) << 16) >> 24; + ptr[7] = ((sk->cc_send) << 24) >> 24; + break; + case 3: + /* send a CC and a CCecho option */ + + ptr[0] = TCPOPT_NOP; + ptr[1] = TCPOPT_NOP; + ptr[2] = TCPOPT_CC; + ptr[3] = TCPOLEN_CCOPT; + ptr[4] = (sk->cc_send) >> 24; + ptr[5] = ((sk->cc_send) << 8) >> 24; + ptr[6] = ((sk->cc_send) << 16) >> 24; + ptr[7] = ((sk->cc_send) << 24) >> 24; + ptr[8] = TCPOPT_NOP; + ptr[9] = TCPOPT_NOP; + ptr[10] = TCPOPT_CCECHO; + ptr[11] = TCPOLEN_CCOPT; + ptr[12] = (sk->cc_recv) >> 24; + ptr[13] = ((sk->cc_recv) << 8) >> 24; + ptr[14] = ((sk->cc_recv) << 16) >> 24; + ptr[15] = ((sk->cc_recv) << 24) >> 24; + } + + /* update ccsent cache */ + sk->ip_route_cache->rt_tao_ccsent = sk->cc_send; +} static int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int nonblock, int flags) @@ -1420,8 +1749,20 @@ * Do sanity checking for sendmsg/sendto/send */ +#ifdef CONFIG_RFC1644 + /* MSG_EOF support for TTCP */ + if (flags & ~(MSG_OOB|MSG_DONTROUTE|MSG_EOF)) +#else if (flags & ~(MSG_OOB|MSG_DONTROUTE)) +#endif goto out; + +#ifdef CONFIG_RFC1644 + /* for TTCP, we can connect from a closed state, if MSG_EOF is set */ + if (flags & MSG_EOF) + sk->PRU_SEND_EOF = 1; +#endif + if (msg->msg_name) { struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name; @@ -1430,16 +1771,40 @@ if (addr->sin_family && addr->sin_family != AF_INET) goto out; retval = -ENOTCONN; + +#ifdef CONFIG_RFC1644 + /* doesn't have to be connected if using TTCP */ + if(!(sk->PRU_SEND_EOF) && sk->state == TCP_CLOSE) +#else if(sk->state == TCP_CLOSE) +#endif goto out; retval = -EISCONN; +#ifdef CONFIG_RFC1644 + if(sk->PRU_SEND_EOF) + { + if ((addr->sin_port != sk->dummy_th.dest) || + (addr->sin_addr.s_addr != sk->daddr)) + /* This is for an implied connect */ + ttcp_connect(sk, addr, len); + } + else + { + if (addr->sin_port != sk->dummy_th.dest) + goto out; + if (addr->sin_addr.s_addr != sk->daddr) + goto out; + } +#else if (addr->sin_port != sk->dummy_th.dest) goto out; if (addr->sin_addr.s_addr != sk->daddr) goto out; +#endif } lock_sock(sk); + retval = do_tcp_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, len, nonblock, flags); /* @@ -1583,8 +1948,15 @@ /* * Tell the world if we raised the window. - */ + * But we don't want to send an ack yet if we are + * only half synchronised. This allows us to piggy + * back the data on the SYN, ACK + */ +#ifdef CONFIG_RFC1644 + if (tcp_raise_window(sk) && !sk->TF_SENDSYN) +#else if (tcp_raise_window(sk)) +#endif tcp_send_ack(sk); } @@ -1661,6 +2033,7 @@ current->state = TASK_INTERRUPTIBLE; skb = sk->receive_queue.next; + while (skb != (struct sk_buff *)&sk->receive_queue) { if (before(*seq, skb->seq)) @@ -1785,6 +2158,7 @@ if (after(sk->copied_seq,sk->urg_seq)) sk->urg_data = 0; + if (used + offset < skb->len) continue; @@ -1848,11 +2222,21 @@ { int ns=TCP_CLOSE; int send_fin=0; + switch(sk->state) { case TCP_SYN_SENT: /* No SYN back, no FIN needed */ +#ifdef CONFIG_RFC1644 + case TCP_SYN_SENT_TTCP: +#endif break; case TCP_SYN_RECV: +#ifdef CONFIG_RFC1644 + /* XXX verify this??? TTCP */ + ns=TCP_SYN_RECV_TTCP; + send_fin=1; + break; +#endif case TCP_ESTABLISHED: /* Closedown begin */ ns=TCP_FIN_WAIT1; send_fin=1; @@ -1865,6 +2249,21 @@ case TCP_CLOSE: case TCP_LISTEN: break; +#ifdef CONFIG_RFC1644 + /* XXX where is TCP_FIN_WAIT1_TTCP */ + case TCP_ESTABLISHED_TTCP: + ns=TCP_FIN_WAIT1_TTCP; + send_fin=1; + break; + case TCP_CLOSE_WAIT_TTCP: + /* XXX verify this??? TTCP */ + ns=TCP_LAST_ACK_TTCP; + send_fin=1; + break; + case TCP_LAST_ACK_TTCP: + ns=TCP_LAST_ACK; + break; +#endif case TCP_LAST_ACK: /* Could have shutdown() then close(). Be careful not to send double fin. */ ns=TCP_LAST_ACK; @@ -1918,6 +2317,19 @@ * If we've already sent a FIN, or it's a closed state */ +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_FIN_WAIT1 || + sk->state == TCP_FIN_WAIT1_TTCP || + sk->state == TCP_FIN_WAIT2 || + sk->state == TCP_CLOSING || + sk->state == TCP_CLOSING_TTCP || + sk->state == TCP_LAST_ACK || + sk->state == TCP_LAST_ACK_TTCP || + sk->state == TCP_TIME_WAIT || + sk->state == TCP_CLOSE || + sk->state == TCP_LISTEN + ) +#else if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING || @@ -1926,6 +2338,7 @@ sk->state == TCP_CLOSE || sk->state == TCP_LISTEN ) +#endif { return; } @@ -1965,6 +2378,11 @@ case TCP_FIN_WAIT1: case TCP_CLOSING: case TCP_LAST_ACK: +#ifdef CONFIG_RFC1644 + case TCP_FIN_WAIT1_TTCP: + case TCP_CLOSING_TTCP: + case TCP_LAST_ACK_TTCP: +#endif return 1; } return 0; @@ -1974,7 +2392,6 @@ static void tcp_close(struct sock *sk, unsigned long timeout) { struct sk_buff *skb; - /* * We need to grab some memory, and put together a FIN, * and then put it into the queue to be sent. @@ -2021,9 +2438,7 @@ */ if (tcp_close_state(sk,1)==1) - { tcp_send_fin(sk); - } if (timeout) { cli(); @@ -2071,7 +2486,6 @@ { struct wait_queue wait = { current, NULL }; struct sk_buff * skb = NULL; - add_wait_queue(sk->sleep, &wait); for (;;) { current->state = TASK_INTERRUPTIBLE; @@ -2099,7 +2513,6 @@ int error; struct sk_buff *skb; struct sock *newsk = NULL; - /* * We need to make sure that this socket is listening, * and that it has something pending. @@ -2114,11 +2527,32 @@ skb = tcp_find_established(sk); if (skb) { got_new_connect: - __skb_unlink(skb, &sk->receive_queue); - newsk = skb->sk; - kfree_skb(skb, FREE_READ); - sk->ack_backlog--; - error = 0; +#ifdef CONFIG_RFC1644 + if (!skb->sk->ttcp_datalen) { +#endif + __skb_unlink(skb, &sk->receive_queue); + newsk = skb->sk; + kfree_skb(skb, FREE_READ); + sk->ack_backlog--; + error = 0; +#ifdef CONFIG_RFC1644 + } else { + /* + * Since we need the SYN packet that has data in it + * we can't let tcp_accept() munch it yet + */ + __skb_unlink(skb, &sk->receive_queue); + newsk = skb->sk; + ttcp_insert_skb(skb, &newsk->receive_queue); + + /* XXX this is where we can stop the retranmission in + * multiple segments */ + newsk->copied_seq = skb->seq + skb->h.th->syn; + newsk->ttcp_datalen = 0; + sk->ack_backlog--; + error = 0; + } +#endif out: release_sock(sk); no_listen: @@ -2164,6 +2598,151 @@ } +#ifdef CONFIG_RFC1644 +/* + * This will initialise necessary structures for TAO without sending data + */ + +static int ttcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) +{ + int atype; + struct rtable *rt; + struct device *dev=NULL; + struct sk_buff *buff; + int tmp; +#ifdef CONFIG_RFC1644_DEBUG + /* XXX do we really need the buff stuff now??? TTCP */ +printk("\n"); +#endif + sk->state = TCP_CLOSE; + + /* + * Don't allow a double connect. + */ + + if(sk->daddr) + return -EINVAL; + + if (addr_len < 8) + return(-EINVAL); + + if (usin->sin_family && usin->sin_family != AF_INET) + return(-EAFNOSUPPORT); + + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + + if (usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); + + /* + * Don't want a TCP connection going to a broadcast address + */ + + if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) + return -ENETUNREACH; + + if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr, usin->sin_port)) + return -EADDRNOTAVAIL; + + lock_sock(sk); + sk->daddr = usin->sin_addr.s_addr; + + sk->rcv_ack_cnt = 1; + sk->err = 0; + sk->dummy_th.dest = usin->sin_port; + + buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL); + if (buff == NULL) { + release_sock(sk); + return(-ENOMEM); + } + buff->sk = sk; + buff->free = 0; + buff->localroute = sk->localroute; + dev = sk->bound_device; + tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, MAX_SYN_SIZE, sk->ip_tos, sk->ip_ttl, + &sk->ip_route_cache); + /* + * We can free it straight away, as we only needed it for th + * routing and device stuff + */ + sock_wfree(sk, buff); + + /* This was how it was originally... TTCP + * sk->ip_route_cache = ip_rt_route(sk->daddr, sk->localroute, dev);*/ + if ((rt = sk->ip_route_cache) != NULL && !sk->saddr) + sk->saddr = rt->rt_src; + sk->rcv_saddr = sk->saddr; + + /* + * Set up our outgoing TCP sequence number + */ + sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, + sk->dummy_th.source, + usin->sin_port); + sk->window_seq = sk->write_seq; + sk->rcv_ack_seq = sk->write_seq -1; + + sk->sent_seq = sk->write_seq; + + if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) + sk->window_clamp=rt->rt_window; + else + sk->window_clamp=0; + + if (sk->user_mss) + sk->mtu = sk->user_mss; + else if (rt) + sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr); + else + sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr); + + /* + * but not bigger than device MTU + */ + + if(sk->bound_device != NULL) + sk->mtu = min(sk->mtu, sk->bound_device->mtu - + sizeof(struct iphdr) - sizeof(struct tcphdr)); + + /* Must check it here, just to be absolutely safe. If we end up + * with an sk->mtu of zero, we can thus end up with an sk->mss + * of zero, which causes us to bomb out in tcp_do_sendmsg. -DaveM + */ + if(sk->mtu < 32) + sk->mtu = 32; /* Sanity limit */ + + sk->mss = sk->mtu - (TCPOLEN_CCOPT+TCPOLEN_MSS+2); /*max size of TTCP opts*/ + sk->window_seq = sk->mss + sk->write_seq; + + sk->cc_send = CC_INC ( tcp_ccgen ); + + if(rt&&rt->rt_flags&RTF_IRTT) + sk->rto = rt->rt_irtt; + else + sk->rto = TCP_TIMEOUT_INIT; + + sk->idletime = jiffies; + + /* For TIME WAIT truncation */ + sk->t_duration = sk->idletime; + + sk->delack_timer.function = tcp_delack_timer; + sk->delack_timer.data = (unsigned long) sk; + sk->retransmit_timer.function = tcp_retransmit_timer; + sk->retransmit_timer.data = (unsigned long)sk; + sk->retransmits = 0; + tcp_statistics.TcpActiveOpens++; + tcp_statistics.TcpOutSegs++; + + release_sock(sk); + return(0); +} +#endif + /* * This will initiate an outgoing connection. */ @@ -2329,6 +2908,7 @@ tcp_statistics.TcpOutSegs++; release_sock(sk); + return(0); } diff -urN linux-2.0.32/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c --- linux-2.0.32/net/ipv4/tcp_input.c Fri Oct 31 19:34:12 1997 +++ linux/net/ipv4/tcp_input.c Fri Apr 17 15:17:41 1998 @@ -357,7 +357,12 @@ * connect again and it will work (with luck). */ +#ifdef CONFIG_RFC1644 + if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV || + sk->state==TCP_SYN_SENT_TTCP || sk->state==TCP_SYN_RECV_TTCP) +#else if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) +#endif { tcp_send_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev,0,255); return; @@ -409,9 +414,16 @@ case TCP_TIME_WAIT: break; case TCP_SYN_SENT: +#ifdef CONFIG_RFC1644 + case TCP_SYN_SENT_TTCP: +#endif sk->err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: +#ifdef CONFIG_RFC1644 + case TCP_CLOSE_WAIT_TTCP: +#endif + sk->err = EPIPE; break; default: @@ -460,11 +472,17 @@ int mss_seen = 0; ptr = (unsigned char *)(th + 1); + + sk->TF_RCVD_CC = 0; + sk->TOF_CC = 0; + sk->TOF_CCNEW = 0; + sk->TOF_CCECHO = 0; while(length>0) { int opcode=*ptr++; int opsize=*ptr++; + switch(opcode) { case TCPOPT_EOL: @@ -480,12 +498,41 @@ switch(opcode) { case TCPOPT_MSS: - if(opsize==4 && th->syn) + if(opsize==TCPOLEN_MSS && th->syn) { sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); mss_seen = 1; } break; +#ifdef CONFIG_RFC1644 + case TCPOPT_CC: + if(opsize==TCPOLEN_CCOPT && sk->TF_REQ_CC) { + sk->TOF_CC = 1; + sk->cc_recv = ntohl(*(unsigned long *)ptr); + sk->TF_RCVD_CC = 1; + } + break; + + case TCPOPT_CCNEW: + if(opsize==TCPOLEN_CCOPT && sk->TF_REQ_CC) { + sk->TOF_CCNEW = 1; + sk->cc_recv = ntohl(*(unsigned long *)ptr); + sk->TF_RCVD_CC = 1; + } + break; + + case TCPOPT_CCECHO: + if(opsize==TCPOLEN_CCOPT && th->syn && th->ack) { + /* + * CC echo options should only be received + * in SYN, ACK segments. + */ + sk->TOF_CCECHO = 1; + sk->ccecho_rcv = ntohl(*(unsigned long *)ptr); + } + break; +#endif + /* Add other options here as people feel the urge to implement stuff like large windows */ } ptr+=opsize-2; @@ -523,12 +570,22 @@ #ifdef CONFIG_SYN_COOKIES int send_cookie = 0; #endif - +#ifdef CONFIG_RFC1644 + unsigned long data_len = sk->ttcp_datalen; +#endif th = skb->h.th; /* If the socket is dead, don't accept the connection. */ if (!sk->dead) { +#ifdef CONFIG_RFC1644 + if (data_len <= 0) + /* + * If we have received data in the SYN segment, + * we don't want to wake up the socket yet, need + * to do a bit of processing first. + */ +#endif sk->data_ready(sk,0); } else @@ -646,7 +703,7 @@ sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC); if (!sk->opt) { - kfree_s(newsk, sizeof(struct sock)); + kfree_s(newsk, sizeof(struct sock)); tcp_statistics.TcpAttemptFails++; kfree_skb(skb, FREE_READ); return; @@ -840,6 +897,128 @@ } #endif +#ifdef CONFIG_RFC1644 + if (newsk->TOF_CC) { + if ((newsk->ip_route_cache->rt_tao_cc != 0) && + (after(newsk->cc_recv, newsk->ip_route_cache->rt_tao_cc))) { + /* + * Now that the TAO test passed, we don't want to send + * a SYN, ACK straight away. We can hold on and see if + * we can piggy back data on the return. We have to set + * up some of the variables that would otherwise have + * been set up in the normal flow of TCP/IP + */ + u32 window_seq; + extern unsigned long tcp_ccgen; + + tcp_statistics.TcpTaoOk++; + tcp_statistics.TcpPassiveOpens++; + + /* + * Work out the state changes: + * move to ESTABLISHED* since we are half synchronised + */ + tcp_set_state(newsk, TCP_ESTABLISHED_TTCP); + tcp_v4_rehash(newsk); + + /* + * We can update the last CC received from this host + */ + newsk->ip_route_cache->rt_tao_cc = newsk->cc_recv; + + /* + * We can ack all the data in the SYN + */ + newsk->acked_seq = skb->end_seq; + + /* Set up the right sequence numbers */ + newsk->write_seq = seq; + newsk->window_seq = newsk->write_seq; + newsk->rcv_ack_seq = newsk->write_seq; + + /* we miss out on this in tcp_data */ + skb_pull(skb,th->doff*4); + + /* we miss out on this in tcp_ack */ + window_seq = ntohs(th->window); + if (window_seq > sk->max_window) + { + newsk->max_window = window_seq; + newsk->mss = min(window_seq, newsk->mtu - + ((TCPOLEN_CCOPT*2)+TCPOLEN_MSS+4)); + newsk->window_seq = newsk->mss + newsk->write_seq; + } + + if(th->fin) { + /* + * If we received the FIN, we can move into the + * CLOSE_WAIT* state, we don't send a syn/ack + * because we can wait and piggyback our data + * on the response. + */ + tcp_set_state(newsk, TCP_CLOSE_WAIT_TTCP); + tcp_v4_rehash(newsk); + + /* this is the tcp_send_synack stuff that we miss out on */ + newsk->sent_seq = newsk->write_seq + 1; + skb->sk = newsk; + newsk->tao_test = 1; + sk->ack_backlog++; + newsk->cc_send = CC_INC ( tcp_ccgen ); + + atomic_sub(skb->truesize, &sk->rmem_alloc); + atomic_add(skb->truesize, &newsk->rmem_alloc); + /* + * Need to queue the data for later. + */ + skb_queue_tail(&sk->receive_queue, skb); + + newsk->TF_SENDSYN = 1; /* turn on the SENDSYN flag */ + + /* + * Wake up the socket since we have all the + * data from the peer. This will wake up the + * tcp_accept() function + */ + if(!sk->dead) + sk->state_change(sk); + } else { + /* + * In this situation there is data in the packet + * but no fin, this means that there is more data to receive + * from the client. + */ + newsk->ttcp_datalen = data_len; + sk->data_ready(sk,0); + /* and we have to send a SYN|ACK */ + tcp_send_synack(newsk, sk, skb, 0); + newsk->TF_SENDSYN = 0; /* turn off the SENDSYN flag */ + newsk->mtu -= (TCPOLEN_CCOPT+2); + newsk->sent_mss = 1; + } + + return; + } else if (newsk->ip_route_cache->rt_tao_cc != 0) { + /* + * The tao test failed, we will queue the data later on, now + * we continue with the three way handshake, sending a SYN, ACK + */ + tcp_statistics.TcpTaoFail++; + sk->tao_test = 0; + } + } else { + /* + * No CC option, but maybe CCnew: + * invalidate cached value. + */ + newsk->ip_route_cache->rt_tao_cc = 0; + newsk->tao_test = 0; + if(newsk->TF_REQ_CC && newsk->TF_RCVD_CC) + newsk->mss -= TCPOLEN_CCOPT+4; + } + newsk->sent_mss = 1; +#endif + /* Set up the right sequence numbers */ newsk->write_seq = seq; newsk->window_seq = newsk->write_seq; @@ -850,6 +1029,7 @@ #else tcp_send_synack(newsk, sk, skb, 0); #endif + } @@ -975,7 +1155,7 @@ newsk->copied_seq = skb->seq; newsk->fin_seq = skb->seq-1; newsk->syn_seq = skb->seq-1; - newsk->state = TCP_SYN_RECV; + tcp_set_state(newsk, TCP_SYN_RECV); newsk->timeout = 0; newsk->ip_xmit_timeout = 0; newsk->urg_data = 0; @@ -1139,7 +1319,6 @@ { int flag = 0; u32 window_seq; - /* * 1 - there was data in packet as well as ack or new data is sent or * in shutdown state @@ -1163,7 +1342,7 @@ * then we can probably ignore it. */ - if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) + if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) goto uninteresting_ack; /* @@ -1317,6 +1496,14 @@ sk->window_seq = window_seq; sk->rcv_ack_seq = ack; sk->rcv_ack_cnt = 1; +#ifdef CONFIG_RFC1644 + /* + * We've received an ack, and its ok. + * Switch off the SENDSYN flag. + */ + if(sk->TF_SENDSYN) + sk->TF_SENDSYN = 0; +#endif } /* @@ -1391,6 +1578,7 @@ for (;;) { int was_locked; struct sk_buff * skb = sk->send_head; + if (!skb) break; @@ -1583,7 +1771,11 @@ * Move to TCP_CLOSE on success. */ +#ifdef CONFIG_RFC1644 + if (sk->state == TCP_LAST_ACK || sk->state == TCP_LAST_ACK_TTCP) +#else if (sk->state == TCP_LAST_ACK) +#endif { if (!sk->dead) sk->state_change(sk); @@ -1607,7 +1799,6 @@ if (sk->state == TCP_FIN_WAIT1) { - if (!sk->dead) sk->state_change(sk); if (sk->rcv_ack_seq == sk->write_seq) @@ -1643,11 +1834,25 @@ /* * Final ack of a three way shake */ - +#ifdef CONFIG_RFC1644 + if (sk->state==TCP_SYN_RECV || sk->state==TCP_ESTABLISHED_TTCP) +#else if (sk->state==TCP_SYN_RECV) +#endif { tcp_set_state(sk, TCP_ESTABLISHED); +#ifdef CONFIG_RFC1644 + /* + * TTCP: Now that the three way handshake has passed + * we can send this onto the user. We need to update the + * sk->acked_seq so that the tcp_queue will recognise + * that there is data available. + */ + sk->ip_route_cache->rt_tao_cc = sk->cc_recv; + sk->acked_seq += sk->ttcp_datalen; +#endif + /* Must check for peer advertising zero sized window * or else we get a sk->{mtu,mss} of zero and thus bomb out * in tcp_do_sendmsg. -DaveM @@ -1675,6 +1880,37 @@ sk->mdev = TCP_TIMEOUT_INIT; } +#ifdef CONFIG_RFC1644 + if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_SENT_TTCP) { + struct sk_buff * skb = sk->send_head; + + tcp_options(sk,th); + + /* + * We have received an Ack for our SYN. We pull the segment + * off the retransmission queue, switch off its SYN and + * update its sequence number since the other end is expecting + * a higher sequence. + */ + /* XXX added in TF_RCVD_CC */ + if (skb != NULL && !sk->TF_RCVD_CC) { + int size = skb->len - ((unsigned char *)skb->h.th - skb->data); + unsigned int seq; + + /* switch off the syn */ + skb->h.th->syn = 0; + /* update the sequence */ + seq = ntohl(skb->h.th->seq); + seq++; + skb->h.th->seq = htonl(seq); + /* reduce the write sequence by one */ + sk->write_seq--; + /* recalculate the checksum */ + tcp_send_check(skb->h.th, sk->saddr, sk->daddr, size, skb); + } + } +#endif + /* * The following code has been greatly simplified from the * old hacked up stuff. The wonders of properly setting the @@ -1710,9 +1946,7 @@ */ if (after(ack, sk->sent_seq)) - { return 0; - } /* * Restart the keepalive timer. @@ -1758,6 +1992,7 @@ case TCP_SYN_RECV: case TCP_SYN_SENT: case TCP_ESTABLISHED: + /* * move to CLOSE_WAIT, tcp_data() already handled * sending the ack. @@ -1767,6 +2002,28 @@ sk->shutdown = SHUTDOWN_MASK; break; +#ifdef CONFIG_RFC1644 + /* incorrect state transitions XXX */ + /* this may be where the server stuff is + * causing trouble */ + case TCP_SYN_RECV_TTCP: + case TCP_SYN_SENT_TTCP: + case TCP_FIN_WAIT1_TTCP: + tcp_set_state(sk,TCP_CLOSING_TTCP); + break; + case TCP_CLOSING_TTCP: + tcp_set_state(sk,TCP_CLOSING); + break; + case TCP_ESTABLISHED_TTCP: + /* + * move to CLOSE_WAIT*, tcp_data() already handled + * sending the ack. + */ + tcp_set_state(sk,TCP_CLOSE_WAIT_TTCP); + if (th->rst) + sk->shutdown = SHUTDOWN_MASK; + break; +#endif case TCP_CLOSE_WAIT: case TCP_CLOSING: /* @@ -1861,6 +2118,13 @@ __skb_insert(skb, prev, next, list); } +#ifdef CONFIG_RFC1644 +inline void ttcp_insert_skb(struct sk_buff * skb, struct sk_buff_head * list) +{ + tcp_insert_skb(skb, list); +} +#endif + /* * Called for each packet when we find a new ACK endpoint sequence in it */ @@ -1887,7 +2151,6 @@ */ ack_seq = sk->acked_seq; - if (!after(skb->seq, ack_seq)) { if (after(skb->end_seq, ack_seq)) { /* the packet straddles our window end */ @@ -1929,7 +2192,13 @@ * anything more to come. */ if (!sk->delay_acks || th->fin) { +#ifdef CONFIG_RFC1644 + if(!sk->TF_SENDSYN && sk->state != TCP_LAST_ACK_TTCP) { +#endif tcp_send_ack(sk); +#ifdef CONFIG_RFC1644 + } +#endif } else { /* * If psh is set we assume it's an @@ -1968,7 +2237,7 @@ * [We now do this.] * */ - + if (!skb->acked) { if(sk->debug) @@ -2023,6 +2292,24 @@ return(0); } +#ifdef CONFIG_RFC1644 + if (th->syn && th->fin && th->ack && !sk->TF_RCVD_CC) { + /* + * We have encountered a broken implementation of TCP. + * We need to free this skb, and wait for the next one + * or the socket will be woken up with no valid data. + * What's happening is that in the 2.0.x series of Linux + * kernels, when setting up the tcp header in tcp_send_synack + * it memcpys the received skb's header, and then sets all + * the flags to their appropriate values, except for the FIN + * flag. Which means that the other host sent a FIN without + * realising it and causes havoc for us. + */ + kfree_skb(skb, FREE_READ); + return(0); + } +#endif + /* * We no longer have anyone receiving data on this connection. @@ -2261,10 +2548,12 @@ struct tcphdr *th; struct sock *sk; __u32 seq; +#ifdef CONFIG_RFC1644 + short int data_len = len-(skb->h.th->doff*4); +#endif #ifdef CONFIG_IP_TRANSPARENT_PROXY int r; #endif - /* * "redo" is 1 if we have already seen this skb but couldn't * use it at that time (the socket was locked). In that case @@ -2343,9 +2632,8 @@ * exist so should cause resets as if the port was unreachable. */ - if (sk->zapped || sk->state==TCP_CLOSE) { + if (sk->zapped || sk->state==TCP_CLOSE) goto no_tcp_socket; - } if (!sk->prot) { @@ -2353,7 +2641,6 @@ return(0); } - /* * Charge the memory to the socket. */ @@ -2379,11 +2666,52 @@ if(sk->state!=TCP_ESTABLISHED) /* Skip this lot for normal flow */ { - /* * Now deal with unusual cases. */ +#ifdef CONFIG_RFC1644 + if(sk->state == TCP_ESTABLISHED_TTCP) { + /* + * We're in the situation where the are multiple segments + * of data coming in. + * A new socket will already have been set up in tcp_conn_request() + * so now we need to queue the data. + */ + tcp_options(sk,th); + if (after(sk->cc_recv, sk->ip_route_cache->rt_tao_cc) || + before(sk->cc_recv, sk->ip_route_cache->rt_tao_cc)) { + tcp_statistics.TcpCCdrop++; + /* XXX send a reset here? */ + return 0; + } + if (!th->fin) { + /* + * This isn't the last segment, we just queue it. + */ + sk->acked_seq = skb->end_seq; + skb_queue_tail(&sk->receive_queue, skb); + } else { + /* + * This will be the last segment, we can now move + * to TCP_CLOSE_WAIT* + */ + tcp_set_state(sk, TCP_CLOSE_WAIT_TTCP); + tcp_v4_rehash(sk); + sk->acked_seq = skb->end_seq; + skb_queue_tail(&sk->receive_queue, skb); + /* + * Since we've received all the data, we can + * wake up the socket. This will wake up the + * tcp_recv() function. + */ + if(!sk->dead) + sk->state_change(sk); + } + return 0; + } +#endif + if(sk->state==TCP_LISTEN) { if (th->ack) { /* These use the socket TOS.. might want to be the received TOS */ @@ -2438,6 +2766,7 @@ */ if(th->rst || !th->syn || th->ack || (r = ip_chk_addr(daddr)) == IS_BROADCAST || r == IS_MULTICAST) #else + if (skb->sk->tao_test == 0) if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR) #endif { @@ -2451,8 +2780,15 @@ seq = secure_tcp_sequence_number(saddr, daddr, skb->h.th->dest, skb->h.th->source); +#ifdef CONFIG_RFC1644 + /* we set this so that tcp_accept won't kill the skb */ + sk->ttcp_datalen = (unsigned long)data_len; +#endif tcp_conn_request(sk, skb, daddr, saddr, opt, dev, seq); - +#ifdef CONFIG_RFC1644 + sk->ttcp_datalen = 0; +#endif + /* * Now we have several options: In theory there is nothing else * in the frame. KA9Q has an option to send data with the syn, @@ -2472,7 +2808,12 @@ * then it's a new connection */ +#ifdef CONFIG_RFC1644 + if ((sk->state == TCP_SYN_RECV || sk->state == TCP_SYN_RECV_TTCP) && + th->syn && skb->seq+1 == sk->acked_seq) +#else if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq) +#endif { kfree_skb(skb, FREE_READ); return 0; @@ -2484,7 +2825,11 @@ * not be in line code. [AC] */ +#ifdef CONFIG_RFC1644 + if(sk->state==TCP_SYN_SENT || sk->state == TCP_SYN_SENT_TTCP) +#else if(sk->state==TCP_SYN_SENT) +#endif { /* Crossed SYN or previous junk segment */ if(th->ack) @@ -2512,9 +2857,8 @@ { /* A valid ack from a different connection start. Shouldn't happen but cover it */ - tcp_statistics.TcpAttemptFails++; - tcp_send_reset(daddr, saddr, th, - sk->prot, opt,dev,0,255); + tcp_statistics.TcpAttemptFails++; + tcp_send_reset(daddr, saddr, th, sk->prot, opt,dev,0,255); kfree_skb(skb, FREE_READ); return 0; } @@ -2541,9 +2885,113 @@ sk->acked_seq = skb->seq+1; sk->lastwin_seq = skb->seq+1; sk->fin_seq = skb->seq; +#ifdef CONFIG_RFC1644 + if (sk->TOF_CCECHO == 1) { + /* + * This is the receipt of the SYN, ACK from the server. + * We need to check that we have received the correct + * CCecho value so that we know if we're on the right + * connection. + */ + if (sk->cc_send != sk->ccecho_rcv) { + /* Ops, wrong ccecho segment! */ + if (sk->ip_route_cache->rt_tao_ccsent != 0) { + tcp_statistics.TcpBadCCecho++; + goto discard_it; + } else { + tcp_statistics.TcpAttemptFails++; + tcp_send_reset(daddr, saddr, th, sk->prot, opt,dev,0,255); + kfree_skb(skb, FREE_READ); + return 0; + } + } else { + + /* + * the next few state changes can be combined + * its like this for debugging purposes XXX + */ + + /* + * We have the correct CCecho value, the TAO test passes + * move to the next state + */ + if (sk->state == TCP_SYN_SENT) + tcp_set_state(sk, TCP_ESTABLISHED_TTCP); + else + tcp_set_state(sk, TCP_FIN_WAIT1_TTCP); + + tcp_statistics.TcpTaoOk++; + + /* + * received at least an ACK of SYN, move to next state + */ + if (sk->state == TCP_ESTABLISHED_TTCP) + tcp_set_state(sk, TCP_ESTABLISHED); + else + tcp_set_state(sk, TCP_FIN_WAIT1); + + if ((sk->state == TCP_FIN_WAIT1) && (skb->ack_seq == sk->write_seq)) { + /* + * received ACK of FIN, move to FIN_WAIT2 + */ + tcp_set_state(sk, TCP_FIN_WAIT2); + } + + /* segment is acceptable, update cache */ + sk->ip_route_cache->rt_tao_ccsent = sk->ccecho_rcv; + sk->ip_route_cache->rt_tao_cc = sk->cc_recv; + + /* + * If there's data, delay ACK; if not send an ack + */ + if (data_len > 0 ) { + tcp_delack_estimator(sk); + } else { + if(!th->fin) + /* + * In this case, the three way handshake is + * in progress, we need to resend our FIN. + * Reduce the sequence number by one + * since we're resending it. + */ + sk->sent_seq--; + + tcp_send_ack(sk); + } + + if (sk->TF_SENDFIN) + sk->TF_SENDFIN = 0; + + /* XXX added this in */ + tcp_v4_rehash(sk); + } + } else { + /* + * No CCecho, treat as normal TCP/IP + */ + /* invalidate cache */ + sk->ip_route_cache->rt_tao_cc = 0; + if (sk->state == TCP_SYN_SENT) { + tcp_set_state(sk, TCP_ESTABLISHED); + } + else if (sk->state == TCP_SYN_SENT_TTCP) { + tcp_set_state(sk, TCP_FIN_WAIT1); + sk->TF_SENDFIN = 1; + /* we'll be resending the FIN */ + sk->sent_seq--; + } + + tcp_send_ack(sk); + tcp_options(sk,th); + if (sk->TF_SENDFIN) + sk->TF_SENDFIN = 0; + } + +#else tcp_send_ack(sk); tcp_set_state(sk, TCP_ESTABLISHED); tcp_options(sk,th); +#endif #if 0 sk->dummy_th.dest=th->source; @@ -2579,8 +3027,42 @@ tcp_statistics.TcpAttemptFails++; return tcp_reset(sk,skb); } +#ifdef CONFIG_RFC1644 + if (sk->TOF_CC) { + /* + * We're now in a simultaneous open situation + */ + if ((sk->ip_route_cache->rt_tao_cc != 0) && + (after(sk->cc_recv, sk->ip_route_cache->rt_tao_cc))) { + /* + * TAO test passes + * update cache and make transition: + * SYN-SENT -> ESTABLISHED + * SYN-SENT* -> FIN-WAIT-1* + */ + tcp_statistics.TcpTaoOk++; + sk->ip_route_cache->rt_tao_cc = sk->cc_recv; + /* XXX this looks very dodgy! */ + if(sk->TF_SENDFIN) { + tcp_set_state(sk,TCP_FIN_WAIT1); + sk->TF_SENDFIN = 0; + } else { + tcp_set_state(sk,TCP_ESTABLISHED); + } + sk->TF_SENDSYN = 1; + } else { + tcp_set_state(sk,TCP_SYN_RECV); + if (sk->ip_route_cache->rt_tao_cc != 0) + tcp_statistics.TcpTaoFail++; + } + } else { + /* CCnew or no option => invalidate cache */ + sk->ip_route_cache->rt_tao_cc = 0; + tcp_set_state(sk,TCP_SYN_RECV); + } +#else tcp_set_state(sk,TCP_SYN_RECV); - +#endif /* * FIXME: * Must send SYN|ACK here @@ -2606,60 +3088,98 @@ * checking "sk->users" for the new socket as well as doing all * the normal tests on the packet. */ - +#ifdef _TTCP + if (sk->state == TCP_TIME_WAIT || + sk->state == TCP_LAST_ACK || sk->state == TCP_CLOSING) { + /* + * If segment contains a SYN and CC [not CCnew] option + * and peer understands T/TCP (cc_recv != 0): + * if state == TIME_WAIT and connection duration > MSL, + * drop packet and sent RST; + * + * if CC > CCrecv then is new SYN, and can implicitly + * ack the FIN (and data) in retransmission queue. + * Complete close and delete TCPCB. Then reprocess + * segment, hoping to find new TCPCB in LISTEN state; + * + * else must be old SYN; drop it. + * else do normal processing. + */ + if ((th->syn) && (sk->TOF_CC) && + (sk->ip_route_cache->rt_tao_cc != 0)) { + if (sk->state == TCP_TIME_WAIT && + (sk->t_duration - jiffies)/100 > MSL) { + tcp_statistics.TcpAttemptFails++; + tcp_send_reset(daddr, saddr, th, sk->prot, opt,dev,0,255); + kfree_skb(skb, FREE_READ); + return 0; + } + if (after(sk->ip_route_cache->rt_tao_cc, + sk->ip_route_cache->rt_tao_ccrecv)) { + sk->ttcp_datalen = (unsigned long)data_len; + tcp_conn_request(sk, skb, daddr, saddr, opt, dev, seq); + sk->ttcp_datalen = 0; + return 0; + } else { + kfree_skb(skb, FREE_READ); + } + } + } +#else #define BSD_TIME_WAIT #ifdef BSD_TIME_WAIT - if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && - after(skb->seq, sk->acked_seq) && !th->rst) - { - u32 seq = sk->write_seq; - if(sk->debug) - printk("Doing a BSD time wait\n"); - tcp_statistics.TcpEstabResets++; - atomic_sub(skb->truesize, &sk->rmem_alloc); - skb->sk = NULL; - sk->err=ECONNRESET; - tcp_set_state(sk, TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; + if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && + after(skb->seq, sk->acked_seq) && !th->rst) + { + u32 seq = sk->write_seq; + if(sk->debug) + printk("Doing a BSD time wait\n"); + tcp_statistics.TcpEstabResets++; + atomic_sub(skb->truesize, &sk->rmem_alloc); + skb->sk = NULL; + sk->err=ECONNRESET; + tcp_set_state(sk, TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; #ifdef CONFIG_IP_TRANSPARENT_PROXY - /* What to do here? - * For the non-proxy case, this code is effectively almost a no-op, - * due to the sk = NULL. Is that intentional? If so, why shouldn't we - * do the same for the proxy case and get rid of some useless code? - */ - if (skb->redirport) - sk = tcp_v4_proxy_lookup(saddr, th->source, daddr, th->dest, - dev->pa_addr, skb->redirport, dev); - else -#endif - sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest, dev); - /* this is not really correct: we should check sk->users */ - if (sk && sk->state==TCP_LISTEN) - { - skb->sk = sk; - atomic_add(skb->truesize, &sk->rmem_alloc); - /* FIXME: Is the sequence number addition - * of 128000 here enough for fast networks? - * Also, does this reduce the security of - * our tcp sequence numbers? - */ - tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); - return 0; - } - kfree_skb(skb, FREE_READ); - return 0; - } -#endif - } + /* What to do here? + * For the non-proxy case, this code is effectively almost a no-op, + * due to the sk = NULL. Is that intentional? If so, why shouldn't we + * do the same for the proxy case and get rid of some useless code? + */ + if (skb->redirport) + sk = tcp_v4_proxy_lookup(saddr, th->source, daddr, th->dest, + dev->pa_addr, skb->redirport, dev); + else +#endif + sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest, dev); + /* this is not really correct: we should check sk->users */ + if (sk && sk->state==TCP_LISTEN) + { + skb->sk = sk; + atomic_add(skb->truesize, &sk->rmem_alloc); + /* FIXME: Is the sequence number addition + * of 128000 here enough for fast networks? + * Also, does this reduce the security of + * our tcp sequence numbers? + */ + tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); + return 0; + } + kfree_skb(skb, FREE_READ); + return 0; + } +#endif +#endif + } - /* - * We are now in normal data flow (see the step list in the RFC) - * Note most of these are inline now. I'll inline the lot when - * I have time to test it hard and look at what gcc outputs - */ + /* + * We are now in normal data flow (see the step list in the RFC) + * Note most of these are inline now. I'll inline the lot when + * I have time to test it hard and look at what gcc outputs + */ - if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn)) - { + if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn)) + { bad_tcp_sequence(sk, th, skb->end_seq-th->syn, dev); kfree_skb(skb, FREE_READ); return 0; @@ -2700,16 +3220,19 @@ * Our three way handshake failed. */ +#ifdef CONFIG_RFC1644 + if(sk->state==TCP_SYN_RECV || sk->state == TCP_ESTABLISHED_TTCP) +#else if(sk->state==TCP_SYN_RECV) +#endif { tcp_send_reset(daddr, saddr, th,sk->prot, opt, dev,0,255); } kfree_skb(skb, FREE_READ); return 0; } - -rfc_step6: /* I'll clean this up later */ +rfc_step6: /* I'll clean this up later */ /* * If the accepted buffer put us over our queue size we * now drop it (we must process the ack first to avoid @@ -2752,7 +3275,7 @@ /* * And done */ - + return 0; no_tcp_socket: diff -urN linux-2.0.32/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c --- linux-2.0.32/net/ipv4/tcp_output.c Wed Oct 15 21:47:56 1997 +++ linux/net/ipv4/tcp_output.c Mon Apr 6 21:12:23 1998 @@ -164,6 +164,7 @@ * anything for a long time, in which case we have no reason to * believe that our congestion window is still correct. */ + if (sk->send_head == 0 && (jiffies - sk->idletime) > sk->rto) { sk->cong_window = 1; sk->cong_count = 0; @@ -221,7 +222,7 @@ * by the ip layer. This causes half the problems with * unroutable FIN's and other things. */ - + sk->prot->queue_xmit(sk, skb->dev, skb, 0); /* @@ -269,6 +270,7 @@ if (sk == NULL) return; + while ((skb = tcp_dequeue_partial(sk)) != NULL) tcp_send_skb(sk, skb); } @@ -546,8 +548,13 @@ struct sk_buff *skb2 = sk->write_queue.next; while (skb2 && skb2->dev == skb->dev) { skb2->raddr=rt->rt_gateway; - if (sk->state == TCP_SYN_SENT && sysctl_ip_dynaddr) - ip_rewrite_addrs (sk, skb2, dev); +#ifdef CONFIG_RFC1644 + if ((sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_SENT_TTCP) + && sysctl_ip_dynaddr) +#else + if (sk->state == TCP_SYN_SENT && sysctl_ip_dynaddr) +#endif + ip_rewrite_addrs (sk, skb2, dev); skb_pull(skb2,((unsigned char *)skb2->ip_hdr)-skb2->data); skb2->dev = dev; skb2->arp=1; @@ -572,8 +579,12 @@ } } skb->raddr=rt->rt_gateway; - if (skb->dev !=dev && sk->state == TCP_SYN_SENT && sysctl_ip_dynaddr) - ip_rewrite_addrs(sk, skb, dev); +#ifdef CONFIG_RFC1644 + if (skb->dev !=dev && (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_SENT_TTCP) && sysctl_ip_dynaddr) +#else + if (skb->dev !=dev && sk->state == TCP_SYN_SENT && sysctl_ip_dynaddr) +#endif + ip_rewrite_addrs(sk, skb, dev); skb->dev=dev; skb->arp=1; #ifdef CONFIG_FIREWALL @@ -840,7 +851,7 @@ struct sk_buff *buff; struct device *dev=NULL; int tmp; - + buff = sock_wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); if (buff == NULL) @@ -911,7 +922,27 @@ t1->ack_seq = htonl(sk->acked_seq); t1->window = htons(tcp_select_window(sk)); t1->fin = 1; - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); + +#ifdef CONFIG_RFC1644 + if (sk->TF_REQ_CC && sk->TF_RCVD_CC) + { + /* + * Set up our good old CC options + */ + unsigned char *ptr = skb_put(buff,8); + int cc_opts = 0; + + cc_opts = tcp_calc_cc_options(sk,t1); + + tcp_build_cc_options(sk,ptr,cc_opts); + + t1->doff = sizeof(*t1)/4 + 2; + buff->csum = csum_partial(ptr, 8, 0); + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1) + 8, buff); + } + else +#endif + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); /* * If there is data in the write queue, the fin must be appended to @@ -930,7 +961,7 @@ } else { - sk->sent_seq = sk->write_seq; + sk->sent_seq = sk->write_seq; sk->prot->queue_xmit(sk, dev, buff, 0); tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); } @@ -944,6 +975,8 @@ struct sk_buff * buff; struct device *ndev=NULL; int tmp; + int cc_opts = 0; + int ttcp_check = 0; buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); if (buff == NULL) @@ -1001,12 +1034,45 @@ t1->psh = 0; t1->ack_seq = htonl(newsk->acked_seq); t1->doff = sizeof(*t1)/4+1; + +#ifdef CONFIG_RFC1644 + t1->fin = 0; + + if(newsk->TF_REQ_CC && newsk->TF_RCVD_CC) + { + /* + * Set up our good old CC options + */ + extern unsigned long tcp_ccgen; + + newsk->cc_send = CC_INC ( tcp_ccgen ); + + cc_opts = tcp_calc_cc_options(newsk,t1); + + ptr = skb_put(buff,20); + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = ((newsk->mtu) >> 8) & 0xff; + ptr[3] =(newsk->mtu) & 0xff; + + tcp_build_cc_options(newsk,(unsigned char *)(ptr+4),cc_opts); + t1->doff += 4; + ttcp_check = 1; + buff->csum = 0; + buff->csum = csum_partial(ptr, 20, 0); + } + else { +#endif ptr = skb_put(buff,4); ptr[0] = 2; ptr[1] = 4; ptr[2] = ((newsk->mtu) >> 8) & 0xff; ptr[3] =(newsk->mtu) & 0xff; buff->csum = csum_partial(ptr, 4, 0); +#ifdef CONFIG_RFC1644 + } +#endif + #ifdef CONFIG_SYN_COOKIES /* Don't save buff on the newsk chain if we are going to destroy * newsk anyway in a second, it just delays getting rid of newsk. @@ -1021,6 +1087,15 @@ atomic_sub(buff->truesize, &newsk->wmem_alloc); } #endif +#ifdef CONFIG_RFC1644 + /* + * If ttcp_check is set, then there is a larger buffer + * to checksum + */ + if (ttcp_check) + tcp_send_check(t1, newsk->saddr, newsk->daddr, sizeof(*t1)+20, buff); + else +#endif tcp_send_check(t1, newsk->saddr, newsk->daddr, sizeof(*t1)+4, buff); if (destroy) newsk->prot->queue_xmit(NULL, ndev, buff, 1); @@ -1177,7 +1252,59 @@ t1->seq = htonl(sk->sent_seq); t1->ack_seq = htonl(sk->acked_seq); t1->window = htons(tcp_select_window(sk)); +#ifdef CONFIG_RFC1644 + if (sk->TF_SENDSYN) { + t1->syn = 1; + sk->TF_SENDSYN = 0; + sk->sent_seq++; + } + + if (sk->TF_SENDFIN) { + t1->fin = 1; + sk->sent_seq++; + /* XXX added in following lines */ + if(!sk->TF_RCVD_CC) + sk->write_seq++; + } + /* + * XXX, I'll probably have to stick in the mtu stuff here + */ + if (sk->TF_REQ_CC && sk->TF_RCVD_CC) + { + /* + * Set up our good old CC options + */ + unsigned char *ptr = NULL; + int cc_opts = tcp_calc_cc_options(sk,t1); + + switch(cc_opts) { + case 0: + break; + case 1: + case 2: + ptr = skb_put(buff,8); + t1->doff = (sizeof(struct tcphdr) + 8) / 4; + + tcp_build_cc_options(sk,ptr,cc_opts); + + buff->csum = 0; + buff->csum = csum_partial(ptr, 8, 0); + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1) + 8, buff); + break; + case 3: + ptr = skb_put(buff,16); + t1->doff = (sizeof(struct tcphdr) + 16) / 4; + + tcp_build_cc_options(sk,ptr,cc_opts); + buff->csum = 0; + buff->csum = csum_partial(ptr, 16, 0); + tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1) + 16, buff); + break; + } + } + else +#endif tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); if (sk->debug) printk(KERN_ERR "\rtcp_ack: seq %x ack %x\n", sk->sent_seq, sk->acked_seq); @@ -1206,12 +1333,26 @@ * [listen/close will never occur here anyway] */ +#ifdef CONFIG_RFC1644 + if (sk->state != TCP_ESTABLISHED && + sk->state != TCP_ESTABLISHED_TTCP && + sk->state != TCP_CLOSE_WAIT && + sk->state != TCP_CLOSE_WAIT_TTCP && + sk->state != TCP_FIN_WAIT1 && + sk->state != TCP_FIN_WAIT1_TTCP && + sk->state != TCP_LAST_ACK && + sk->state != TCP_LAST_ACK_TTCP && + sk->state != TCP_CLOSING && + sk->state != TCP_CLOSING_TTCP + ) +#else if (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT && sk->state != TCP_FIN_WAIT1 && sk->state != TCP_LAST_ACK && sk->state != TCP_CLOSING ) +#endif { return; }