diff -u /sys/netinet/tcp_input.c ./netinet_eifel/tcp_input.c
--- /sys/netinet/tcp_input.c	Tue Apr 20 12:09:15 1999
+++ ./netinet_eifel/tcp_input.c	Thu Apr 27 12:38:37 2000
@@ -97,7 +97,12 @@
 static void	 tcp_pulloutofband __P((struct socket *,
 	    struct tcpiphdr *, struct mbuf *));
 static int	 tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
+
+#ifdef EIFEL_RTO
+static void	 tcp_xmit_timer __P((struct tcpcb *, short, short));
+#else
 static void	 tcp_xmit_timer __P((struct tcpcb *, int));
+#endif
 
 
 /*
@@ -557,11 +562,23 @@
 				 */
 				++tcpstat.tcps_predack;
 				if ((to.to_flag & TOF_TS) != 0)
+#ifdef EIFEL_RTO
+					tcp_xmit_timer(tp,
+					    tcp_now - to.to_tsecr + 1,
+					    TCP_RTT_SAMPL_EVERY);
+#else 
 					tcp_xmit_timer(tp,
 					    tcp_now - to.to_tsecr + 1);
+#endif
 				else if (tp->t_rtt &&
 					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
+#ifdef EIFEL_RTO
+					tcp_xmit_timer(tp,
+					    tp->t_rtt,
+					    TCP_RTT_SAMPL_ONCE);
+#else
 					tcp_xmit_timer(tp, tp->t_rtt);
+#endif
 				acked = ti->ti_ack - tp->snd_una;
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
@@ -1351,6 +1368,71 @@
 				    ti->ti_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks == tcprexmtthresh) {
+#ifdef EIFEL_ALG
+					if (tp->t_rxtshift == 0) {
+				       	/*
+				       	 * Count this as the first retrans-
+					 * mission.
+				       	 * This ensures that old_snd_cwnd and 
+				       	 * old_snd_ssthresh won't get over-
+					 * written in tcp_output() should 
+					 * another retransmission (caused by 
+					 * timeout) occur for this segment. 
+					 * Also this provides for the code 
+					 * that handels the response to a 
+					 * detected spurious timeout to be 
+					 * identical to the code that handles 
+					 * the response to a detected spurious
+					 * fast retransmit (caused by packet 
+					 * re-ordering). I.e., both cases are 
+					 * detected and responded to in the 
+					 * same way.
+				       	 */
+						tp->t_rxtshift++;
+
+						tp->old_snd_cwnd = 
+						  tp->snd_cwnd;
+						tp->old_snd_ssthresh = 
+						  tp->snd_ssthresh;
+
+						tp->t_timer[TCPT_REXMT] = 0;
+						tp->t_rtt = 0;
+
+						/* ti->ti_ack == tp->snd_una */
+						tp->snd_nxt = ti->ti_ack;
+						tp->snd_cwnd = tp->t_maxseg;
+						(void) tcp_output(tp);
+					} else {
+				       	/*
+				       	 * A (at least one) timeout has 
+					 * occured before we got the 3rd 
+					 * DUPACK. Thus, cwnd and sstresh have
+					 * already been saved (we don't have 
+					 * to save them again) but also 
+				       	 * modified (slow start). Now that we 
+					 * got the 3rd DUPACK we know better. 
+					 * We can do congestion avoidance 
+					 * instead. Therefore, we need to 
+					 * first restore cwnd and ssthresh.
+				       	 */
+						tp->snd_cwnd = 
+						  tp->old_snd_cwnd;
+						tp->snd_ssthresh = 
+						  tp->old_snd_ssthresh;
+					} 
+					
+					tp->snd_nxt = tp->snd_max;
+					{
+					u_int win = 
+					  min(tp->snd_wnd, tp->snd_cwnd) 
+					  / 2 / tp->t_maxseg;
+					if (win < 2)
+						win = 2;
+					tp->snd_ssthresh = win * tp->t_maxseg; 
+					tp->snd_cwnd = tp->snd_ssthresh +
+					  tp->t_maxseg * tp->t_dupacks; 
+					}
+#else
 					tcp_seq onxt = tp->snd_nxt;
 					u_int win =
 					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
@@ -1368,6 +1450,8 @@
 					       tp->t_maxseg * tp->t_dupacks;
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
+#endif /* EIFEL_ALG */
+
 					goto drop;
 				} else if (tp->t_dupacks > tcprexmtthresh) {
 					tp->snd_cwnd += tp->t_maxseg;
@@ -1413,6 +1497,77 @@
 		}
 
 process_ACK:
+
+#ifdef EIFEL_REXMT
+		/*
+		 * We don't have to remember the timestamps of acked
+		 * segments anymore. However, segments can be acked 
+		 * partially so we have to account for that case, too.
+		 */
+		if (tp->ts_list) {
+			while ((tp->ts_list + tp->ts_snd_una)->len &&
+			    SEQ_GEQ(ti->ti_ack, 
+			        ((tp->ts_list + tp->ts_snd_una)->seq 
+			        + (tp->ts_list + tp->ts_snd_una)->len))) {
+				(tp->ts_list + tp->ts_snd_una)->len = 0;
+				tp->ts_snd_una++;
+				if (tp->ts_snd_una >= tp->ts_list_max)
+					tp->ts_snd_una = 0;
+			}
+		}
+		if (!(tp->ts_list + tp->ts_snd_max)->len) {
+			bzero((caddr_t)tp->ts_list, tp->ts_list_max 
+				* sizeof(struct ts_list_entry));
+			tp->ts_snd_una = tp->ts_snd_max = 0;
+		}
+#endif
+
+#ifdef EIFEL_ALG
+		/* 
+		 * If timestamps are used and we are waiting for 
+		 * an ACK after a retransmission and we detect 
+		 * that the retransmission was spurious (caused 
+		 * either by a spurious timeout or a spurious 
+		 * fast retransmit) then we resume transmission 
+		 * off the top.
+		 * In addition, if only a single spurious 
+		 * retransmission occured we restore sstresh 
+		 * and cwnd to their original values. If two 
+		 * spurious timeouts occured we cut cwnd in half, 
+		 * and if more than two spurious timeouts occured 
+		 * we do nothing (cwnd will remain to be set to 
+		 * tp->t_maxseg). I.e., the more spurious timeouts 
+		 * occured for that packet, the more in trouble 
+		 * the connection is and the more conservative we 
+		 * need to be. 
+		 *
+		 * Because timestamps can wrap, we use the macro 
+		 * that is used for sequence number comparisons.
+		 */
+		if (to.to_flag & TOF_TS && 
+		    SEQ_LT(tp->snd_nxt, tp->snd_max) &&
+		    SEQ_GT(tp->ts_first_rexmit, to.to_tsecr)) {
+
+			tp->snd_nxt = tp->snd_max;
+
+			if (tp->t_rxtshift == 1) {
+				tp->snd_cwnd = tp->old_snd_cwnd;
+				tp->snd_ssthresh = tp->old_snd_ssthresh;
+			} else if (tp->t_rxtshift == 2) {
+				u_int win = min(tp->snd_wnd, 
+						tp->old_snd_cwnd) 
+				            / 2 / tp->t_maxseg;
+				if (win < 2) {
+					tp->snd_cwnd = tp->t_maxseg;
+					tp->snd_ssthresh = 2 * tp->t_maxseg;
+				} else {
+					tp->snd_cwnd = tp->snd_ssthresh = 
+					  win * tp->t_maxseg; 
+				}
+			}
+		}
+#endif /* EIFEL_ALG */
+
 		acked = ti->ti_ack - tp->snd_una;
 		tcpstat.tcps_rcvackpack++;
 		tcpstat.tcps_rcvackbyte += acked;
@@ -1427,9 +1582,19 @@
 		 * Recompute the initial retransmit timer.
 		 */
 		if (to.to_flag & TOF_TS)
+#ifdef EIFEL_RTO
+			tcp_xmit_timer(tp,
+			    tcp_now - to.to_tsecr + 1,
+			    TCP_RTT_SAMPL_EVERY);
+#else 
 			tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
+#endif
 		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+#ifdef EIFEL_RTO
+			tcp_xmit_timer(tp,tp->t_rtt,TCP_RTT_SAMPL_ONCE);
+#else
 			tcp_xmit_timer(tp,tp->t_rtt);
+#endif
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
@@ -1441,7 +1606,26 @@
 			tp->t_timer[TCPT_REXMT] = 0;
 			needoutput = 1;
 		} else if (tp->t_timer[TCPT_PERSIST] == 0)
+#ifdef EIFEL_REXMT
+			{
+			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+			/*
+			 * Restart the REXMT with RTO minus the age of 
+			 * the oldest unacked sequence number.
+			 */
+			if (tp->ts_list && 
+			    (tp->ts_list + tp->ts_snd_una)->len &&
+			    (tcp_now - (tp->ts_list + tp->ts_snd_una)->ts > 0)) 
+				tp->t_timer[TCPT_REXMT] -= 
+				  (tcp_now - (tp->ts_list + tp->ts_snd_una)->ts);
+
+			if (tp->t_timer[TCPT_REXMT] < 1)
+				tp->t_timer[TCPT_REXMT] = 1;
+			}
+#else
 			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+#endif
 
 		/*
 		 * If no data (only SYN) was ACK'd,
@@ -1950,6 +2134,244 @@
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
+#ifdef EIFEL_RTO
+void
+tcp_xmit_timer(tp, rtt, sample)
+	register struct tcpcb *tp;
+	short rtt, sample;
+{
+	/* 
+	 * We want to calculate as precise as possible and 
+	 * are using large scales. Thus, we need shifter in 
+	 * case t_srtt and t_var are only 16 bit values.
+	 */
+	register int32_t shifter;
+	register u_short smooth_gain;
+	register u_short rttvar_weight = 3;
+
+#if (PR_SLOWHZ > 2)
+	register int32_t delta;
+#else
+	register int delta;
+#endif
+
+	tcpstat.tcps_rttupdated++;
+
+	/*
+	 * In the following we explain the new features of the 
+	 * Eifel-Xmit-Timer:
+	 *
+	 * 1. We don't use the magic numbers (g = 1/8 and h = 1/4) as GAINs. 
+	 * These constants cause SRTT and RTTVAR to scale poorly and cause
+	 * the RTO predictor to "fall into" the RTT quickly when the pipe
+	 * capacity gets large. Instead we use 1/(SSTHRESH + 1) (SSTHRESH  
+	 * in mulitples of the maximum segment size and "+1" because the 
+	 * first flight of packets in congestion avoidance is not SSTHRESH
+	 * but (SSTHRESH + 1)) as the GAIN for both moving averages as 
+	 * long as we are network-limited. The motivation for this is that 
+	 * we want to weigh the new DELTA proportional to the number of 
+	 * RTT samples we get per flight. Thus, (SSTHRESH + 1) is a good 
+	 * (conservative) approximation for that (actually, (SSTHRESH + 1) 
+	 * can be seen as a smoothed-cwnd-signal). We do not want to use 
+	 * the exact number of (samples) packets in flight which is 
+	 * (snd_max - snd_una). That signal is too noisy. So far we have 
+	 * assumed that every packet is timed. If delayed-ACKs are used we
+	 * multiply the GAIN by 2. However, at this point we have
+	 * not implemented a "delayed-ACK Detection mechanism". Thus, 
+	 * even if we receive delayed ACKs we do not multiply the GAIN
+	 * with 2 as we said above. That's just how it should be done.
+	 * If we only get one sample per flight, we use the magic GAIN 1/3. 
+	 *
+	 * For the same reasons given for the choice of GAIN, the factor 
+	 * with which RTTVAR is weighed in the RTO calculation should be 
+	 * the inverse of that GAIN. 
+	 *
+	 * 2. We filter out negative DELTAs for calculating RTTVAR. The 
+	 * original calculation uses the absolute value of DELTA which 
+	 * causes the RTO predictor to fire up when the signal (RTT) goes 
+	 * down. This is not what we want and it contradicts basic control 
+	 * theory. Instead, we are still conservative and assume a constant 
+	 * RTTVAR as long as SRTT is above RTT.
+	 *
+	 * 3. We put in "shock absorbers" to ensure that the RTO reacts quick
+	 * to RTT increases but reacts slow when the RTT decreases. In that 
+	 * case we use the square of the gain instead of just the gain. The
+	 * motivation for squaring is again that we want to slow the RTO 
+	 * decrease with respect to the number of packets in flight.
+	 *
+	 * 4. We always restart REXMT correctly, i.e., with RTO minus the
+	 * "age" of the oldest outstanding packet. Without that feature
+	 * the REXMT is always off by roughly one RTT depending on various
+	 * factors (e.g.: delayed ACK, and interactive vs. bulk traffic).
+	 * We have implemented this feature as an independent patch called
+	 * EIFEL_REXMT.
+	 *
+	 * INTEGER ARITHMETIC AND SCALES:
+	 * In all our calculations we assume we were running on a machine
+	 * with a 10 ms timer granularity. In that case, however, t_srtt 
+	 * and t_rttvar need to be at least 32 bits long. With a 500ms 
+	 * granularity 16 bits are sufficient because we only consider RTTs 
+	 * of max. 64 seconds which are 128 x 500ms ticks.
+	 * We want the GAIN for RTTVAR and SRTT to scale to an SSTHRESH of
+	 * about 50 x MSS, i.e. more than 100 packets in flight. Beyond that 
+	 * the RTO predictor will get more aggressive. Because we are using 
+	 * the square of the gain at one point in the formula we need a 
+	 * GAIN_SCALE of 2^16. That way (1/50)^2 is still different from 
+	 * (1/49)^2 in the integer arithmetic we are using below.
+	 *
+	 *
+	 * More detail can be found in the Eifel-Xmit-Timer Paper.
+	 */
+
+	/* 
+	 * RTT == (rtt - 1):
+	 * There is no point in considering RTTs beyond TCPTV_REXMTMAX.
+	 * Such RTTs are possible! E.g. on wireless links that implement
+	 * very persistent link layer ARQ ;-)
+	 */
+	if ((rtt - 1) > TCPTV_REXMTMAX)
+		rtt = TCPTV_REXMTMAX + 1; 
+
+	if ((sample == TCP_RTT_SAMPL_EVERY)  ||
+	    (sample == TCP_RTT_SAMPL_EVERY_2)) {
+		/* 
+		 * As long as snd_ssthresh has not been touched from its 
+		 * initial value, we use  snd_cwnd instead. The same is true 
+		 * when cwnd grows beyond 2 * ssthresh. 
+		 */
+		if (tp->snd_ssthresh == (TCP_MAXWIN << TCP_MAX_WINSHIFT)) 
+			rttvar_weight = min(tp->snd_wnd, tp->snd_cwnd) 
+			                / 2 / tp->t_maxseg + 1;
+		else 
+			rttvar_weight = max(min(tp->snd_wnd, tp->snd_cwnd) 
+			                    / 2 / tp->t_maxseg + 1,
+			                    min(tp->snd_wnd, tp->snd_ssthresh) 
+			                    / tp->t_maxseg + 1);
+	
+		/* 
+		 * We have to smooth changes of t_wght. Otherwise it
+		 * can happen that the RTO sky rockets when the t_wght
+		 * suddenly increases a lot. The idea here is to change
+		 * t_wght by one (unscaled) per RTT sample we get.
+		 * Actually, this should be made dependent on the RTT
+		 * sampling rate (once per flight, every packet, or 
+		 * every 2nd packet) but ... it shouldn't matter too
+		 * much.
+		 */
+		if ((tp->t_wght >> TCP_RTT_SHIFT) < rttvar_weight) 
+			tp->t_wght += TCP_RTT_SCALE;
+		else if ((tp->t_wght >> TCP_RTT_SHIFT) > rttvar_weight) 
+			tp->t_wght -= TCP_RTT_SCALE;
+		rttvar_weight = tp->t_wght >> TCP_RTT_SHIFT;
+
+		/* 
+		 * This has never been tested because we never 
+		 * implemented a sender-side delayed-ACK 
+		 * detection mechanism.
+		 */
+		/* if (sample == TCP_RTT_SAMPL_EVERY_2)
+			rttvar_weight >> 1; */
+	} 
+
+	/* 
+	 * We also need magic numbers. "3" turns out to be a good
+	 * value with which we are save against spurious timeouts.
+	 */
+	if ((rttvar_weight < 3) || (sample == TCP_RTT_SAMPL_ONCE))
+		rttvar_weight = 3;
+
+	/* 
+	 * With TCP_GAIN_SCALE of 2^16 and a minimum rttvar_weight
+	 * of 3 the maximum smooth_gain is 21845. This is 
+	 * important to remember to make sure that we don't get
+	 * overflows.
+	 */
+	smooth_gain = TCP_GAIN_SCALE / rttvar_weight;
+
+	if (tp->t_srtt != 0) {
+		/*
+		 * DELTA = RTT - SRTT
+		 */
+		delta = ((rtt - 1) << TCP_RTT_SHIFT) - tp->t_srtt;
+
+		/*
+		 * SRTT += GAIN * DELTA
+		 */
+		shifter = smooth_gain * delta;
+		tp->t_srtt += shifter >> TCP_GAIN_SHIFT;
+
+		/* 
+		 * t_srtt cannot be 0 because that is used to indicate 
+		 * that no rtt measurement is available yet.
+		 */
+		if (tp->t_srtt < 0)
+			tp->t_srtt = 1; 
+
+		/*
+		 * RTTVAR += GAIN * (DELTA - RTTVAR)
+		 * 
+		 * BUT:
+		 * A. Filter out negative DELTAs leaving RTTVAR constant
+		 *    in that case.
+		 * B. Make the RTO come down slowly 
+		 *    (the "shock absorber"). 
+		 */
+		if (delta >= 0) {
+			if (delta - tp->t_rttvar < 0)
+				smooth_gain = TCP_GAIN_SCALE 
+				    / (rttvar_weight * rttvar_weight);
+			shifter = smooth_gain * (delta - tp->t_rttvar);
+			tp->t_rttvar += shifter >> TCP_GAIN_SHIFT;
+		}
+
+		if (tp->t_rttvar < 0)
+			tp->t_rttvar = 0;  /* no minimum */
+
+		/*
+		 * RTO = SRTT + 1/GAIN * RTTVAR
+		 */
+		shifter = (tp->t_srtt + (rttvar_weight * tp->t_rttvar))
+		    >> TCP_RTT_SHIFT;
+		tp->t_rto = shifter;
+	} else {
+		/*
+		 * Let's start conservative. The first RTT we get
+		 * is most likely the SYN's RTT. On bandwidth dominated
+		 * paths that RTT can be VERY different from that of
+		 * a data segment!
+		 * SRTT = RTT and RTTVAR = 4 * max(RTT, 1 tick)
+		 */
+		tp->t_srtt = (rtt - 1) << TCP_RTT_SHIFT;
+		tp->t_rttvar = max((rtt - 1), 1) << (TCP_RTTVAR_SHIFT + 2);
+	}
+	tp->t_rtt = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * The minimum for RTO should be RTT + 2 ticks.
+	 * If the RTT was zero (which it often is with 
+	 * a timer resolution of 500 ms) we then get
+	 * a minimum of 500 to 1000 ms because of the
+	 * heartbeat timer which expires somewhere 
+	 * between 0 and 500 ms.
+	 *
+	 * Remember RTT = (rtt - 1)
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+	              (rtt + 1), TCPTV_REXMTMAX);
+
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+#else /* EIFEL_RTO */
+
 static void
 tcp_xmit_timer(tp, rtt)
 	register struct tcpcb *tp;
@@ -2024,6 +2446,8 @@
 	tp->t_softerror = 0;
 }
 
+#endif /* EIFEL_RTO */
+
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
@@ -2186,6 +2610,26 @@
 	}
 	tp->t_maxseg = mss;
 
+#ifdef EIFEL_REXMT
+	/*
+	 * This array holds the timestamps of every unacked segment.
+	 */
+	tp->ts_list_max = so->so_snd.sb_hiwat / tp->t_maxseg;
+	if (tp->ts_list)
+		free((void *)tp->ts_list, M_TEMP);
+	tp->ts_list = 
+	    (struct ts_list_entry*)malloc(tp->ts_list_max 
+	                                  * sizeof(struct ts_list_entry), 
+	                                  M_TEMP,
+	                                  M_DONTWAIT);
+	if (tp->ts_list)
+		bzero((caddr_t)tp->ts_list, tp->ts_list_max 
+		      * sizeof(struct ts_list_entry));
+	else
+		tp->ts_list_max = 0;
+	tp->ts_snd_una = tp->ts_snd_max = 0;
+#endif
+
 #ifdef RTV_RPIPE
 	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
 #endif
@@ -2229,3 +2673,4 @@
 
 	return rt->rt_ifp->if_mtu - sizeof(struct tcpiphdr);
 }
+
diff -u /sys/netinet/tcp_output.c ./netinet_eifel/tcp_output.c
--- /sys/netinet/tcp_output.c	Wed Apr  7 15:25:52 1999
+++ ./netinet_eifel/tcp_output.c	Wed Oct 13 06:28:22 1999
@@ -62,6 +62,9 @@
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
+#ifdef EIFEL_REXMT
+#include <sys/malloc.h>
+#endif
 
 #ifdef notyet
 extern struct mbuf *m_copypack();
@@ -356,6 +359,17 @@
 	     (tp->t_flags & TF_RCVD_TSTMP))) {
 		u_int32_t *lp = (u_int32_t *)(opt + optlen);
 
+#ifdef EIFEL_ALG
+		/* 
+		 * Remember the timestamp of the FIRST retransmission.
+		 * This allows us to detect spurious retransmissions
+		 * later.
+		 */
+		if (SEQ_LT(tp->snd_nxt, tp->snd_max) && tp->t_rxtshift == 1) {
+			tp->ts_first_rexmit = tcp_now;
+		}
+#endif
+
  		/* Form timestamp option as shown in appendix A of RFC 1323. */
  		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
  		*lp++ = htonl(tcp_now);
@@ -567,6 +581,143 @@
 		ti->ti_seq = htonl(tp->snd_nxt);
 	else
 		ti->ti_seq = htonl(tp->snd_max);
+
+#ifdef EIFEL_REXMT
+	/*
+	 * Remember the exact time of when each segment is sent. 
+	 * This is needed when we *re-* start the REXMT timer for
+	 * every ACK for new data in tcp_input().
+	 *
+	 * We do this remembering in an array. If this is a 
+	 * retransmission we do not need to create a new entry 
+	 * but have to update the appropriate list entry. If we 
+	 * don't find that entry, something weird happened 
+	 * (repacketization?) and we mark the entire ts_list 
+	 * as invalid. It will then be rebuild with the next 
+	 * new segments. Until then we have to do without the 
+	 * feature of restarting REXMT precisely, i.e., REXMT 
+	 * will be more conservative which is not so bad. The same 
+	 * happens if we don't get memory.
+	 */
+	if (tp->ts_list && len) {
+		/*
+		 * The last malloc() for tp->ts_list succeeded, and
+		 * the segment carries data, i.e. it is a segment 
+		 * that might need to be retransmitted.
+		 */
+		if (SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
+			/*
+			 * This is not a retransmission.
+			 */
+			if ( ((tp->ts_snd_max + 1) %  tp->ts_list_max) == 
+			     tp->ts_snd_una) {
+				/*
+				 * tp->ts_list is full. So make it twice as
+				 * large and keep the timestamps of those 
+				 * packets that are currently unacked.
+				 */
+				struct ts_list_entry *ts_new_list, *tle;
+				size_t size = 2 * tp->ts_list_max * sizeof(*tle);
+
+				tle = ts_new_list = (struct ts_list_entry *)
+				malloc(size, M_TEMP, M_DONTWAIT);
+				if (ts_new_list == NULL) {
+					free(tp->ts_list, M_TEMP);
+				tp->ts_list = NULL;
+				tp->ts_list_max = 0;
+				tp->ts_snd_una = tp->ts_snd_max = 0;
+			} else {
+				bzero((caddr_t)ts_new_list, size);
+				while (tp->ts_snd_una != 
+				    ((tp->ts_snd_max +1) %  tp->ts_list_max)) {
+					*tle++ = tp->ts_list[tp->ts_snd_una++];
+					if (tp->ts_snd_una >= tp->ts_list_max)
+						tp->ts_snd_una = 0;
+				}
+				free(tp->ts_list, M_TEMP);
+				tp->ts_list = ts_new_list;
+				tp->ts_snd_una = 0;
+				tp->ts_snd_max = tp->ts_list_max - 1;
+				tp->ts_list_max =  2 * tp->ts_list_max;
+			}
+		} 
+
+		if (tp->ts_list) {
+			/*
+			 * Doubling the size of tp->ts_list went OK and/or
+			 * we have a free entry above ts_snd_max.
+			 */
+			if ((tp->ts_list + tp->ts_snd_max)->len) {
+				/*
+				 * tp->ts_list is not empty. 
+				 */
+				tp->ts_snd_max++;
+				if (tp->ts_snd_max >= tp->ts_list_max)
+					tp->ts_snd_max = 0;
+			} else
+				/*
+				 * This is the first valid entry into 
+				 * tp->ts_list.
+				 */
+				tp->ts_snd_una = tp->ts_snd_max = 0;
+				(tp->ts_list + tp->ts_snd_max)->seq = 
+				  ntohl(ti->ti_seq);
+				(tp->ts_list + tp->ts_snd_max)->len = len;
+				(tp->ts_list + tp->ts_snd_max)->ts = tcp_now;
+			}
+		} else {
+			/*
+			 * This is a retransmission and tp->ts_list is not 
+			 * NULL. Find the entry that holds the timestamp of
+			 * the segment we are about to retransmit. Usually,
+			 * this is *(tp->ts_list + tp->ts_snd_una) but TCP
+			 * can do a go-back-N (e.g., after a spurious timeout
+			 * and the Eifel Algorithm is not used) so in that 
+			 * case we have to look for the right entry and 
+			 * update it.
+			 */
+			if ((tp->ts_list + tp->ts_snd_una)->len) {
+				/*
+				 * Do this only when tp->ts_list was not marked
+				 * as invalid on a pass through this branch 
+				 * before.
+				 */
+				u_short idx = tp->ts_snd_una;
+				while ((idx != ((tp->ts_snd_max + 1) 
+						% tp->ts_list_max)) &&
+				       SEQ_LT((tp->ts_list + idx)->seq, 
+					      tp->snd_nxt)) {
+					idx++;
+					if (idx >= tp->ts_list_max)
+					idx = 0;
+				}
+
+				if ( (idx == ((tp->ts_snd_max + 1) 
+					      %  tp->ts_list_max)) ||
+				     ( ((tp->ts_list + idx)->seq != 
+					tp->snd_nxt) || 
+				       ((tp->ts_list + idx)->len != len) ) ) {
+					/*
+					 * If the entry was not found or does 
+					 * not match the sequence number and 
+					 * length of the segment we are going 
+					 * to retransmit, something weird 
+					 * happened (repacketization?) and we 
+					 * mark tp->ts_list as invalid. It 
+					 * will get rebuild with the next new 
+					 * packets.
+					 */
+					bzero((caddr_t)tp->ts_list, 
+					      tp->ts_list_max 
+					      * sizeof(struct ts_list_entry));
+					tp->ts_snd_una = tp->ts_snd_max = 0;
+				} else if ((tp->ts_list + idx)->len)
+					(tp->ts_list + idx)->ts = tcp_now;
+			}
+		}
+	}
+#endif
+
 	ti->ti_ack = htonl(tp->rcv_nxt);
 	if (optlen) {
 		bcopy(opt, ti + 1, optlen);
@@ -754,4 +905,9 @@
 	    TCPTV_PERSMIN, TCPTV_PERSMAX);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
+#ifdef EIFEL_FOREVER
+	if (tp->t_rxtshift == TCP_MAXRXTSHIFT)
+		tp->t_rxtshift--;
+#endif
 }
+
diff -u /sys/netinet/tcp_subr.c ./netinet_eifel/tcp_subr.c
--- /sys/netinet/tcp_subr.c	Wed Feb  3 22:40:28 1999
+++ ./netinet_eifel/tcp_subr.c	Wed Oct 13 05:58:36 1999
@@ -311,6 +311,12 @@
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+
+#ifdef EIFEL_RTO
+	tp->t_wght = 3 << TCP_RTT_SHIFT; 
+	tp->t_rto = TCPTV_RTOBASE;
+#endif
+
 	tp->t_rttmin = TCPTV_MIN;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
@@ -361,6 +367,11 @@
 	register struct rtentry *rt;
 	int dosavessthresh;
 
+#ifdef EIFEL_REXMT
+	if (tp->ts_list)
+	    free(tp->ts_list, M_TEMP);
+#endif
+
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
@@ -759,3 +770,4 @@
 tcp_cleartaocache()
 {
 }
+
diff -u /sys/netinet/tcp_timer.c ./netinet_eifel/tcp_timer.c
--- /sys/netinet/tcp_timer.c	Fri Apr 24 02:25:35 1998
+++ ./netinet_eifel/tcp_timer.c	Wed Oct 13 06:14:35 1999
@@ -226,6 +226,10 @@
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 	case TCPT_REXMT:
+#ifdef EIFEL_FOREVER
+		if (tp->t_rxtshift == TCP_MAXRXTSHIFT)
+			tp->t_rxtshift--;
+#endif
 		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 			tp->t_rxtshift = TCP_MAXRXTSHIFT;
 			tcpstat.tcps_timeoutdrop++;
@@ -251,6 +255,12 @@
 			tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 			tp->t_srtt = 0;
 		}
+#ifdef EIFEL_ALG
+		if (tp->t_rxtshift == 1) {
+			tp->old_snd_cwnd = tp->snd_cwnd;
+			tp->old_snd_ssthresh = tp->snd_ssthresh;
+		}
+#endif
 		tp->snd_nxt = tp->snd_una;
 		/*
 		 * Force a segment to be sent.
@@ -308,6 +318,10 @@
 		 * (no responses to probes) reaches the maximum
 		 * backoff that we would use if retransmitting.
 		 */
+#ifdef EIFEL_FOREVER
+		if (tp->t_rxtshift == TCP_MAXRXTSHIFT)
+			tp->t_rxtshift--;
+#endif
 		if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 		    (tp->t_idle >= tcp_maxpersistidle ||
 		    tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
@@ -369,3 +383,4 @@
 	}
 	return (tp);
 }
+
diff -u /sys/netinet/tcp_var.h ./netinet_eifel/tcp_var.h
--- /sys/netinet/tcp_var.h	Wed Jan 20 09:32:00 1999
+++ ./netinet_eifel/tcp_var.h	Wed Oct 13 05:58:36 1999
@@ -40,7 +40,15 @@
  * Kernel variables for tcp.
  */
 
-/*
+#ifdef EIFEL_REXMT
+struct ts_list_entry {
+	tcp_seq	seq;
+	long	len;
+	u_long	ts;
+};
+#endif
+
+ /*
  * Tcp control block, one per tcp; fields:
  * Organized for 16 byte cacheline efficiency.
  */
@@ -96,6 +104,12 @@
 					 * for slow start exponential to
 					 * linear switch
 					 */
+#ifdef EIFEL_ALG
+	u_long	old_snd_cwnd;
+	u_long	old_snd_ssthresh;
+	u_long	ts_first_rexmit;
+#endif
+
 	u_int	t_maxopd;		/* mss plus options */
 
 	u_int	t_idle;			/* inactivity time */
@@ -105,8 +119,19 @@
 
 	int	t_rxtcur;		/* current retransmit value */
 	u_int	t_maxseg;		/* maximum segment size */
-	int	t_srtt;			/* smoothed round-trip time */
+
+#ifdef EIFEL_RTO
+#if (PR_SLOWHZ > 2)
+	int32_t	t_srtt;		/* smoothed round-trip time */
+	int32_t	t_rttvar;		/* variance in round-trip time */
+#else
+	int	t_srtt;		/* smoothed round-trip time */
 	int	t_rttvar;		/* variance in round-trip time */
+#endif
+#else
+	int	t_srtt;		/* smoothed round-trip time */
+	int	t_rttvar;		/* variance in round-trip time */
+#endif
 
 	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rttmin;		/* minimum rtt allowed */
@@ -114,6 +139,32 @@
 	u_long	max_sndwnd;		/* largest window peer has offered */
 
 	int	t_softerror;		/* possible error not yet reported */
+
+#ifdef EIFEL_REXMT
+	struct	ts_list_entry *ts_list;
+	u_short	ts_snd_una;
+	u_short	ts_snd_max;
+	u_short	ts_list_max;
+#endif
+
+#ifdef EIFEL_RTO
+/* 
+ * We remember the RTO of the connection even if that is redundant info 
+ * as it can be recomputed from t_srtt, t_rttvar, and t_wght.
+ * However, it makes the code more readable!
+ */
+	short	t_rto;
+
+/*
+ * The weight factor for RTTVAR in the calculation of RTO:
+ */
+#if (PR_SLOWHZ > 2)
+	int32_t	t_wght;
+#else
+	short	t_wght;
+#endif
+#endif
+
 /* out-of-band data */
 	char	t_oobflags;		/* have some */
 	char	t_iobc;			/* input character */
@@ -171,6 +222,23 @@
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
 #define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
 
+#ifdef EIFEL_RTO
+
+#define	TCP_RTT_SAMPL_ONCE	0	/* RTT is measured once per flight */
+#define	TCP_RTT_SAMPL_EVERY	1	/* RTT is measured for every packet */
+#define	TCP_RTT_SAMPL_EVERY_2	2	/* with delayed ACKs */
+
+#define	TCP_RTT_SCALE	64
+#define	TCP_RTT_SHIFT	6
+#define	TCP_RTTVAR_SCALE	64
+#define	TCP_RTTVAR_SHIFT	6
+#define	TCP_GAIN_SCALE	65536
+#define	TCP_GAIN_SHIFT	16
+
+#define	TCP_REXMTVAL(tp) (tp)->t_rto
+
+#else
+
 /*
  * The smoothed round-trip time and estimated variance
  * are stored as fixed point numbers scaled by the values below.
@@ -206,6 +274,8 @@
 	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
+#endif /* EIFEL_RTO */
+
 /*
  * TCP statistics.
  * Many of these should be kept per connection,
@@ -369,3 +439,4 @@
 #endif /* KERNEL */
 
 #endif /* _NETINET_TCP_VAR_H_ */
+