/*
 * tcp_var.h
 *
 * Derived from:
 *
 * Copyright (c) 1982, 1986 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that this notice is preserved and that due credit is given
 * to the University of California at Berkeley. The name of the University
 * may not be used to endorse or promote products derived from this
 * software without specific prior written permission. This software
 * is provided ``as is'' without express or implied warranty.
 *
 *	@(#)tcp_var.h	7.6 (Berkeley) 12/7/87
 *
 * Modified for x-kernel v3.3
 * Modifications Copyright (c) 1991,1996  Arizona Board of Regents
 *
 * $Revision: 1.3 $
 * $Date: 1996/02/01 15:30:57 $
 */

/*
 * xtcp configuration:  This is a half-assed attempt to make xtcp
 * self-configure for a few varieties of 4.2 and 4.3-based unixes.
 * If you don't have a) a 4.3bsd vax or b) a 3.x Sun (x<6), check
 * this carefully (it's probably not right).  Please send me mail
 * if you run into configuration problems.
 *  - Van Jacobson (van@lbl-csam.arpa)
 */
#ifndef tcp_var_h
#define tcp_var_h

#include "tcp_timer.h"

#ifndef BSD
#define BSD 42	/* if we're not 4.3, pretend we're 4.2 */
#endif

#if sun||BSD<43
#define TCP_COMPAT_42	/* set if we have to interop w/4.2 systems */
#endif

#ifndef SB_MAX
#ifdef SB_MAXCOUNT
#define	SB_MAX SB_MAXCOUNT	/* Sun has to be a little bit different... */
#else
#define SB_MAX 65535		/* XXX */
#endif SB_MAXCOUNT
#endif SB_MAX

/*
 * Bill Nowicki pointed out that the page size (CLBYTES) has
 * nothing to do with the mbuf cluster size.  So, we followed
 * Sun's lead and made the new define MCLBYTES stand for the mbuf
 * cluster size.  The following define makes up backwards compatible
 * with 4.3 and 4.2.  If CLBYTES is >1024 on your machine, check
 * this against the mbuf cluster definitions in /usr/include/sys/mbuf.h.
 */
#ifndef MCLBYTES
#define	MCLBYTES CLBYTES	/* XXX */
#endif

/*
 * The routine in_localaddr is broken in Sun's 3.4.  We redefine ours
 * (in tcp_input.c) so we use can it but won't have a name conflict.
 */
#ifdef sun
#define in_localaddr tcp_in_localaddr
#endif

/* --------------- end of xtcp config ---------------- */

/*
 * Kernel variables for tcp.
 */

struct reass {
  struct reass *next, *prev;
  struct tcphdr th;
  Msg m;
};

/*
 * Tcp control block, one per tcp; fields:
 */
struct tcpcb {
	struct	reass *seg_next;	/* sequencing queue */
	struct	reass *seg_prev;
	short	t_state;		/* state of this connection */
	short	t_timer[TCPT_NTIMERS];	/* tcp timers */
	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
	short	t_rxtcur;		/* current retransmit value */
	short	t_dupacks;		/* consecutive dup acks recd */
        int     t_rexmtthresh;          /* number of dupacks before
                                           retransmit */
        int     t_slowstart;            /* number of packets to send
                                           after slowstart */
	u_short	t_maxseg;		/* maximum segment size */
	char	t_force;		/* 1 if forcing out a byte */
	u_short	t_flags;
#define	TF_ACKNOW	0x01		/* ack peer immediately */
#define	TF_DELACK	0x02		/* ack, but try to delay it */
#define	TF_NODELAY	0x04		/* don't delay packets to coalesce */
#define	TF_NOOPT	0x08		/* don't use tcp options */
#define	TF_SENTFIN	0x10		/* have sent FIN */
#define	TF_REQ_SCALE	0x0020		/* have/will request window scaling */
#define	TF_RCVD_SCALE	0x0040		/* other side has requested scaling */
#define	TF_REQ_TSTMP	0x0080		/* have/will request timestamps */
#define	TF_RCVD_TSTMP	0x0100		/* a timestamp was received in SYN */
#define	TF_SACK_PERMIT	0x0200		/* other side said I could SACK */
	struct	tcpiphdr *t_template;	/* skeletal packet for transmit */
	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
/*
 * The following fields are used as in the protocol specification.
 * See RFC783, Dec. 1981, page 21.
 */
/* send sequence variables */
	tcp_seq	snd_una;		/* send unacknowledged */
	tcp_seq	snd_nxt;		/* send next */
	tcp_seq	snd_up;			/* send urgent pointer */
	tcp_seq	snd_wl1;		/* window update seg seq number */
	tcp_seq	snd_wl2;		/* window update seg ack number */
	tcp_seq	iss;			/* initial send sequence number */
	u_int	snd_wnd;		/* send window */
/* receive sequence variables */
	u_int  rcv_wnd;		/* receive window */
	tcp_seq	rcv_nxt;		/* receive next */
	tcp_seq	rcv_up;			/* receive urgent pointer */
	tcp_seq	irs;			/* initial receive sequence number */
/*
 * Additional variables for this implementation.
 */
/* receive variables */
	tcp_seq	rcv_adv;		/* advertised window */
/* retransmit variables */
	tcp_seq	snd_max;		/* highest sequence number sent
					 * used to recognize retransmits
					 */
/* congestion control (for slow start, source quench, retransmit after loss) */
	u_int	snd_cwnd;		/* congestion-controlled window */
	u_int  snd_ssthresh;		/* snd_cwnd size threshhold for
					 * for slow start exponential to
					 * linear switch */
/*
 * transmit timing stuff.
 * srtt and rttvar are stored as fixed point; for convenience in smoothing,
 * srtt has 3 bits to the right of the binary point, rttvar has 2.
 * "Variance" is actually smoothed difference.
 */
	short	t_idle;			/* inactivity time */
	short	t_rtt;			/* round trip time */
	tcp_seq	t_rtseq;		/* sequence number being timed */
	short	t_srtt;			/* smoothed round-trip time */
	short	t_rttvar;		/* variance in round-trip time */
        u_short t_rttmin;               /* minimum rtt allowed */
	u_short max_rcvd;		/* most peer has sent into window */
	u_int	max_sndwnd;		/* largest window peer has offered */
/* out-of-band data */
	char	t_oobflags;		/* have some */
	char	t_iobc;			/* input character */
#define	TCPOOB_HAVEDATA	0x01
#define	TCPOOB_HADDATA	0x02
	short	t_softerror;		/* possible error not yet reported */

/* LSB: from 4.4 */
/* RFC 1323 variables */
	u_char	snd_scale;		/* window scaling for send window */
	u_char	rcv_scale;		/* window scaling for recv window */
	u_char	request_r_scale;	/* pending window scaling */
	u_char	requested_s_scale;
	u_int	ts_recent;		/* timestamp echo data */
	u_int	ts_recent_age;		/* when last updated */
	tcp_seq	last_ack_sent;

	/* Added to test delayed acks vs. immediate acks */
	int	delack;
	int	win_incr2;

        /* Vegas timer stuff */         
        int     v_rtt;                     
        int     v_sa;
        int     v_sd;                      
        int     v_timeout;              
        int     v_worried;		/* While > 0, check for packet 
					 * timeout */
        int     v_time_cwin_chg;	/* Time when congestion window
					 * was last decreased. Used to prevent
					 * decreasing cwin more than once
					 * for losses which occurred at
					 * same cwin level */
        int     v_synsent;		/* Time when SYN was sent. Used to
					 * set v_rtt at beg of connection */
	int	v_newcwnd;		/* Stores new uninflated cwnd, used
					 * to reset cwnd later. Same function
					 * as snd_ssthresh, but now 
					 * snd_ssthresh is only used during
					 * slow-start */

        /* Vegas exp decrease in rate. Option to decrease window exponentially
	 * (sends on every other ACK) rather than waiting 1/2 RTT to start
	 * sending. NOT USED anymore (better to be conservative) 
	 */
        int     v_send_toggle;          
        int     v_use_sndwnd;           
        int     v_max_send;             

	/* New Vegas stuff (12/14/93) */
	/* Protect from spikes. Limits output rate to at most twice the
	 * current rate. 
	 * It helps for fairness, and results in a little less losses,
	 * but there is a problem when the receiver doesn't ACK every
	 * packet immediately (delayed ACKs) resulting in the sender
	 * not being able to keep up with the congestion window. 
	 * Therefore it is OFF by default, and my be removed in the final
	 * version.
	 */
	int	v_spike_do;
	int	v_spike_nextTime;
	int	v_spike_timeInc;
	int	v_spike_byteCnt;

	/* Exp increase every other during slow-start. */
	int	v_exp_inc_do;
	int	v_exp_inc_flag;
	int	v_exp_inc_cnt;
	int	v_exp_inc_isless;
	int	v_exp_inc_nseg;

	/* Congestion detection (or more correctly avoidance ) variables.
	 * The mechanism checks once per RTT. Current RTT began with sequence
	 * v_cong_detect_begseq 
	 */
	int	v_cong_detect_do;	      	/* Whether to do it */
	tcp_seq	v_cong_detect_begseq;		
	int	v_cong_detect_begtime;		/* When RTT began */
	int	v_cong_detect_predict_do;	/* Whether to predict the
						 * available bw during 
						 * slow-start */
	int	v_cong_detect_last_sendtime;
	int	v_cong_detect_baseRtt;		/* Current base RTT, usually
						 * the minimum */
	int	v_cong_detect_sumRTT;		/* Used to get average */
	int	v_cong_detect_cntRTT;		/* RTT during a RTT interval */
	/* The congestion window should stay within band. Going above
	 * it means that not enough buffers are being used at the
	 * bottleneck, so cwnd should increase. Going below the band means 
	 * that too many buffers are being used, so cwnd should decrease. 
	 */
	int	v_cong_detect_top_isless;	/* => cwnd < top of band */
	int	v_cong_detect_top_nseg;
	int	v_cong_detect_bot_isless;	/* => cwnd < bot of band */
	int	v_cong_detect_bot_nseg;
	int	v_incr;				/* How much to increase cwnd
						 * when an ACK is received
						 */

	int 	v_other1;	/* For testing new ideas, so don't need    */
	int	v_other2;	/* to recompile everything due to changing */
	int	v_other3;	/* this .h file */
	int	v_other4;
	int	v_other5;
	int	v_other6;
	int	v_other7;
	int	v_other8;
	int	v_other9;
	int	v_other10;
	int	v_other11;
	int	v_other12;
	int	v_other13;
	int	v_other14;
};

#define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
#define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))

/* LSB: from 4.4 */
/*
 * The smoothed round-trip time and estimated variance
 * are stored as fixed point numbers scaled by the values below.
 * For convenience, these scales are also used in smoothing the average
 * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
 * With these scales, srtt has 3 bits to the right of the binary point,
 * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
 * binary point, and is smoothed with an ALPHA of 0.75.
 */
#define	TCP_RTT_SCALE		8	/* multiplier for srtt; 3 bits frac. */
#define	TCP_RTT_SHIFT		3	/* shift for srtt; 3 bits frac. */
#define	TCP_RTTVAR_SCALE	4	/* multiplier for rttvar; 2 bits */
#define	TCP_RTTVAR_SHIFT	2	/* multiplier for rttvar; 2 bits */
/*
 * The initial retransmission should happen at rtt + 4 * rttvar.
 * Because of the way we do the smoothing, srtt and rttvar
 * will each average +1/2 tick of bias.  When we compute
 * the retransmit timer, we want 1/2 tick of rounding and
 * 1 extra tick because of +-1/2 tick uncertainty in the
 * firing of the timer.  The bias will give us exactly the
 * 1.5 tick we need.  But, because the bias is
 * statistical, we have to test that we don't drop below
 * the minimum feasible timer (which is 2 ticks).
 * This macro assumes that the value of TCP_RTTVAR_SCALE
 * is the same as the multiplier for rttvar.
 */
#define	TCP_REXMTVAL(tp) \
	(((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar)

/*
 * TCP statistics.
 * Many of these should be kept per connection,
 * but that's inconvenient at the moment.
 */
struct	tcpstat {
#if BSD<=43
       int     tcps_badsum;
       int     tcps_badoff;
       int     tcps_hdrops;
       int     tcps_badsegs;
       int     tcps_unack;
       /* 4.3+ BSD stats start here */
#endif
	u_int	tcps_connattempt;	/* connections initiated */
	u_int	tcps_accepts;		/* connections accepted */
	u_int	tcps_connects;		/* connections established */
	u_int	tcps_drops;		/* connections dropped */
	u_int	tcps_conndrops;		/* embryonic connections dropped */
	u_int	tcps_closed;		/* conn. closed (includes drops) */
	u_int	tcps_segstimed;		/* segs where we tried to get rtt */
	u_int	tcps_rttupdated;	/* times we succeeded */
	u_int	tcps_delack;		/* delayed acks sent */
	u_int	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
	u_int	tcps_rexmttimeo;	/* retransmit timeouts */
	u_int	tcps_persisttimeo;	/* persist timeouts */
	u_int	tcps_keeptimeo;		/* keepalive timeouts */
	u_int	tcps_keepprobe;		/* keepalive probes sent */
	u_int	tcps_keepdrops;		/* connections dropped in keepalive */

	u_int	tcps_sndtotal;		/* total packets sent */
	u_int	tcps_sndpack;		/* data packets sent */
	u_int	tcps_sndbyte;		/* data bytes sent */
	u_int	tcps_sndrexmitpack;	/* data packets retransmitted */
	u_int	tcps_sndrexmitbyte;	/* data bytes retransmitted */
	u_int	tcps_sndacks;		/* ack-only packets sent */
	u_int	tcps_sndprobe;		/* window probes sent */
	u_int	tcps_sndurg;		/* packets sent with URG only */
	u_int	tcps_sndwinup;		/* window update-only packets sent */
	u_int	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */

	u_int	tcps_rcvtotal;		/* total packets received */
	u_int	tcps_rcvpack;		/* packets received in sequence */
	u_int	tcps_rcvbyte;		/* bytes received in sequence */
	u_int	tcps_rcvbadsum;		/* packets received with ccksum errs */
	u_int	tcps_rcvbadoff;		/* packets received with bad offset */
	u_int	tcps_rcvshort;		/* packets received too short */
	u_int	tcps_rcvduppack;	/* duplicate-only packets received */
	u_int	tcps_rcvdupbyte;	/* duplicate-only bytes received */
	u_int	tcps_rcvpartduppack;	/* packets with some duplicate data */
	u_int	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
	u_int	tcps_rcvoopack;		/* out-of-order packets received */
	u_int	tcps_rcvoobyte;		/* out-of-order bytes received */
	u_int	tcps_rcvpackafterwin;	/* packets with data after window */
	u_int	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
	u_int	tcps_rcvafterclose;	/* packets rcvd after "close" */
	u_int	tcps_rcvwinprobe;	/* rcvd window probe packets */
	u_int	tcps_rcvdupack;		/* rcvd duplicate acks */
	u_int	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
	u_int	tcps_rcvackpack;	/* rcvd ack packets */
	u_int	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
	u_int	tcps_rcvwinupd;		/* rcvd window update packets */
/* LSB: from 4.4 */
	u_int	tcps_pawsdrop;		/* segments dropped due to PAWS */
	u_int	tcps_predack;		/* times hdr predict ok for acks */
	u_int	tcps_preddat;		/*times hdr predict ok for data pkts */
	u_int	tcps_pcbcachemiss;
/* LSB: for Vegas */
	u_int  tcps_dupftimeo;		/* Fine grained timeout on dup ACK */
	u_int  tcps_worryftimeo;	/* Fine grained timeout on worry */

};


/*struct	inpcb tcb;*/		/* head of queue of active tcpcb's */
/*struct	tcpstat tcpstat;*/	/* tcp statistics */
struct	tcpiphdr *tcp_template(struct tcpcb *);
struct	tcpcb *tcp_destroy(struct tcpcb *), *tcp_drop(struct tcpcb *, int);
/* struct	tcpcb *tcp_timers(); */
struct  tcpcb *tcp_disconnect( struct tcpcb *);
struct  tcpcb *tcp_usrclosed( struct tcpcb *);

#endif
