1 /*
2  $Log:	rdp_prim.c,v $
3  * Revision 2.15  85/03/12  08:41:13  walsh
4  * Don't advise the user about ENOBUFS errors passed back from device drivers+
5  * ip_send() when the protocol has a retransmission strategy.
6  *
7  * Revision 2.14  85/03/06  10:06:16  walsh
8  * Corrected some error handling and reporting.
9  *
10  * Revision 2.13  85/02/26  08:26:56  walsh
11  * First pass at using IP source routing information to establish connections
12  * (possibly with hosts not known by the Internet gateways.)  The hooks with
13  * TCP could be better done - particularly dealing with IP addresses in the
14  * header for checksums and tcpdb lookups.
15  *
16  * Revision 2.12  84/11/15  09:56:04  walsh
17  * redid how we deal with compiler padding in the RDP header structure.
18  *
19  * Revision 2.11  84/11/08  16:11:51  walsh
20  * Added code to gather statistics on RDP traffic.  This makes the RDPCB
21  * too big unles you make mbufs 512 bytes large.  RDP_CS should be turned off
22  * unless you do.
23  *
24  * Revision 2.10  84/11/06  15:24:26  walsh
25  * *** empty log message ***
26  *
27  * Revision 2.9  84/11/06  14:30:23  walsh
28  * intorduced RDP_HLSHIFT
29  *
30  * Revision 2.8  84/11/06  09:09:16  walsh
31  * added missing include.
32  *
33  * Revision 2.7  84/11/05  15:55:28  walsh
34  * update_nulltimer() macro began to look inappropriate with recent
35  * changes, so its been stripped out and put in-line.
36  *
37  * Revision 2.6  84/11/05  15:23:23  walsh
38  * *** empty log message ***
39  *
40  * Revision 2.5  84/11/05  15:17:53  walsh
41  * added comments on acknowledgement strategy.
42  *
43  * Revision 2.4  84/11/05  11:05:20  walsh
44  * comment and adjust number for rdp_iss in a mathematically correct way
45  * as a result of benchmarks (cf. operationally correct).
46  *
47  * Revision 2.3  84/11/02  18:24:09  walsh
48  * Protocol specifiers want NULL message to have own sequence number in
49  * case of slow (t>NULL msg timeout) packets.  I don't see this as a problem,
50  * and even if happened (dubious) would only delay discovery, but I
51  * didn't win this one.  Initially not designed for this, but fixes are
52  * in almost neatly.
53  *
54  * Revision 2.2  84/11/02  15:28:56  walsh
55  * Allow for RDP header fields not on natural boundries.  (Protocol
56  * specifiers say will be part of next version in 6-12 months).
57  * Until then, there goes the speed...  Yucho modifications.
58  *
59  * Revision 2.1  84/11/02  10:13:48  walsh
60  * Fixed to include RCS comments in checked out source.
61  *
62  *
63  * description:
64  * some primitives for RDP.
65  *
66  * revision 1.10
67  * date: 84/07/19 10:21:35;  author: walsh;  state: Exp;  lines added/del: 1/0
68  * Organized macros and classified their definitions in rdp_macros.h.
69  *
70  * revision 1.9
71  * date: 84/07/18 18:50:44;  author: walsh;  state: Exp;  lines added/del: 5/0
72  * Added provision for sending of NULL messages.  These are sent on an idle
73  * connection to determine that the other side still exists.
74  *
75  * revision 1.8
76  * date: 84/07/12 20:04:16;  author: walsh;  state: Exp;  lines added/del: 1/2
77  * *** empty log message ***
78  *
79  * revision 1.7
80  * date: 84/07/12 13:48:14;  author: walsh;  state: Exp;  lines added/del: 31/16
81  * Rather than in-line stuffing of IP/RDP headers, at least half of which are
82  * constant, copy headers in from a template of what the headers are like.  The
83  * bcopy() call is turned into a movc3 instruction on the VAX by a sed script
84  * run over the assembler output of the C compiler.  Marginal speed-up.
85  *
86  * revision 1.6
87  * date: 84/07/12 09:29:48;  author: walsh;  state: Exp;  lines added/del: 6/9
88  * Found:
89  * 1.  If MGET saves anything over mget, it's down in the noise, so skip it.
90  * 2.  stuff_eacks as macro DOES save time.
91  * 3.  Optimized stuff_eacks by timing and looking at a lot of assembler output
92  *	from the C compiler.
93  *
94  * revision 1.5
95  * date: 84/07/10 09:41:52;  author: walsh;  state: Exp;  lines added/del: 43/41
96  * Corrected problem with conversion of stuff_eacks from a function to a
97  * macro.
98  *
99  * Changed rdp_sendpkt to avoid unecessary allocation and deallocation of
100  * an mbuf for syn and eack options.
101  *
102  * revision 1.4
103  * date: 84/07/09 14:39:54;  author: walsh;  state: Exp;  lines added/del: 1/0
104  * Part of ACK-delay algorithm.  Whenever send a packet with an ACK, set
105  * ACK-delay timer to zero.
106  *
107  * revision 1.3
108  * date: 84/07/06 14:10:35;  author: wjacobso;  state: Exp;  lines added/del: 34/34
109  * stuff_eacks made into macro; added register var definitions
110  *
111  * revision 1.2
112  * date: 84/07/06 09:49:37;  author: root;  state: Exp;  lines added/del: 11/4
113  * This version seems to run bug-free.
114  *
115  * revision 1.1
116  * date: 84/06/26 14:17:39;  author: walsh;  state: Exp;
117  * Initial revision
118  *
119  */
120 
121 
122 #ifdef RDP
123 #include "../h/param.h"
124 #include "../h/dir.h"
125 #include "../h/user.h"
126 #include "../h/kernel.h"
127 #include "../h/inode.h"
128 #include "../h/mbuf.h"
129 #include "../h/socket.h"
130 #include "../h/socketvar.h"
131 #include "../h/protosw.h"
132 
133 #include "../net/if.h"
134 #include "../net/route.h"
135 
136 #include "../bbnnet/in.h"
137 #include "../bbnnet/in_var.h"
138 #include "../bbnnet/net.h"
139 #include "../bbnnet/in_pcb.h"
140 #include "../bbnnet/ip.h"
141 #include "../bbnnet/rdp.h"
142 #include "../bbnnet/rdp_macros.h"
143 
144 extern struct inpcb rdp;
145 
146 rdp_template(rdpcb)
147 RDPCB	*rdpcb;
148 {
149     register struct ip	*ip;
150     register RDPHDR		*pkt;
151     register INPCB		*inp;
152 
153     ip	= (struct ip *) rdpcb->r_template;
154     pkt	= (RDPHDR *) (ip+1);
155     inp	= rdpcb->r_inpcb;
156 
157     ip->ip_p	= IPPROTO_RDP;
158     ip->ip_tos	= 0;
159     ip->ip_src	= inp->inp_laddr;
160     ip->ip_dst	= inp->inp_faddr;
161 
162     pkt->rh_ver	= RDP_VERSION;
163     pkt->rh_hdrlen	= RDPHDRSZ >> RDP_HLSHIFT;
164     pkt->rh_sport	= inp->inp_lport;
165     pkt->rh_dport	= inp->inp_fport;
166     pkt->rh_flags	= 0;
167     RDP_CKSUM(pkt)	= 0;
168 }
169 
170 /***********************************************************************
171  Comments on the Acknowledgement Strategy
172 
173  I.  It would be desirable to save network, CPU, and buffering resources
174 	by not retransmitting packets which have arrived at the destination
175 	host.  One might think of doing this by using the cumulative ack
176 	(ACK) for flow control and the extended ack (EACK) for reliability.
177 	[EACK upon reception by protocol module, ACK upon reception by
178 	application]
179 
180  You can't do this.
181 
182  1.  The protocol specifiers state that an implementation may choose
183 	to treat ACK#1 EACK#2 EACK#3 as ACK#3.  We want to avoid receiving
184 	packets we can't buffer.  (see goals above)
185  2.  NULL packets are sent on idle connections (all packets acknowledged)
186 	and occupy sequence number space.  We want to avoid situations
187 	where a NULL packet goes out of our window, doesn't get acked,
188 	and the connection gets dropped.  (connection would be idle since
189 	no transmissions/retransmissions would be going on if packets
190 	received by RDP, but destination process busy doing something
191 	else.)
192  3.  Relying on ACKs to be sent to move the window's edge after the
193 	datagram is picked up by the user process has to deal with the
194 	fact that that ACK may be lost.  In TCP, one uses a persistence
195 	timer to deal with this.  Use of ACK/EACK above implies that
196 	there is no data with which to perform persistence (if buffers
197 	reclaimed at EACK and not at ACK.)  One could think of using
198 	NULL messages to reopen the window, but a) NULL messages only
199 	start up after LONG delays,  b) implementations are only required
200 	to respond to, not initiate, NULL messages.
201 
202  2 would be a correctness error.  1+3 would be stylistic problems.
203  Therefore, the acknowledgement strategy implemented here is to
204  ACK or EACK messages only after they have been received by the destination
205  process.  This means that some unecessary retransmissions might occur,
206  but such is life.  (Unecessary from the point of view that the
207  information made it through the network a-o.k.)
208 
209  If the application is slow to pick up messages from the protocol,
210  then that will be reflected in the round trip time estimate (and
211  retransmission time) for the other end.  So, maybe this isn't all
212  that bad.  Especially since most processes will be fairly interested
213  and responsive to network input.
214 
215  ***********************************************************************/
216 
217 /*
218  * rdp_init ensures max # of options fit in mbuf
219  */
220 #define stuff_eacks(rdpcb, optm) \
221 { \
222 	register int		 pass;					\
223 	register int		 index;					\
224 		 EACKOPTIONS	*eopt;					\
225  \
226 	pass	= 0;							\
227 	index	= rdpcb->r_rcvq.rq_front;				\
228  \
229 	do {								\
230 		if (rdpcb->r_rcvq.rq_msgs[index] == RDP_DELIVERED){	\
231 			if (optm == NULL){				\
232 				optm = m_get(M_DONTWAIT, MT_HEADER);	\
233 				if (optm == NULL)			\
234 					break;				\
235 				optm->m_len = 0;			\
236 				eopt = mtod(optm, EACKOPTIONS *);	\
237 			}						\
238 			eopt->rh_eackno =				\
239 				htonl(rdpcb->r_rcvq.rq_baseseq + pass); \
240 			eopt ++;					\
241 			optm->m_len += sizeof(EACKOPTIONS);		\
242 		}							\
243 		index = (index + 1) % rdpcb->r_rcvq.rq_maxqlen;		\
244 	} while (++pass < rdpcb->r_rcvq.rq_maxqlen);			\
245 }
246 
247 rdp_sendpkt (rdpcb, data, datalen, seqnum)
248 register RDPCB	*rdpcb;
249 MBUF	*data;
250 int datalen;	/* length of data mbuf chain in bytes */
251 rdpsequence seqnum;	/* host order.  seq# of this packet */
252 {
253     register MBUF	*m;
254     register RDPHDR	*pkt;
255 
256     m = m_get(M_DONTWAIT, MT_HEADER);
257     if (m == NULL)
258     {
259 	m_freem(data);
260 	return (ENOBUFS);
261     }
262 
263     m->m_len = sizeof(struct ip) + RDPHDRSZ;
264     m->m_off = MMAXOFF
265 	- (sizeof(struct ip) + RDPHDRSZ + sizeof(SYNOPTIONS));
266     m->m_next = data;
267 
268     pkt = (RDPHDR *) (mtod(m, caddr_t) + sizeof(struct ip));
269     /*
270      * Fills in most IP and RDP header fields
271      */
272     bcopy (rdpcb->r_template, mtod(m, caddr_t), (unsigned)RDP_TEMPLSIZE);
273 
274     pkt->rh_dlen	= htons((u_short)datalen);
275     RDP_SEQNO(pkt)	= htonl((u_long)seqnum);
276 
277     if (rdpcb->r_sendrst)
278     {
279 	pkt->rh_flags |= RDP_fRST;
280 #ifdef RDP_CS
281 	rdpcb->r_sent.r_rstpkts ++;
282 #endif
283     }
284     else
285     {
286 	if (! rdpcb->r_synacked)
287 	{
288 	    register SYNOPTIONS *synopt;
289 
290 	    pkt->rh_flags	|= RDP_fSYN;
291 	    pkt->rh_hdrlen	+= sizeof(SYNOPTIONS) >> RDP_HLSHIFT;
292 	    m->m_len	+= sizeof(SYNOPTIONS);
293 	    synopt	= RDP_OPT(pkt, SYNOPTIONS *);
294 	    synopt->rh_nbuf = htons((u_short)rdpcb->r_ournbuf);
295 	    synopt->rh_maxlen = htons((u_short)rdpcb->r_ourmaxlen + HDRSLOP);
296 	    synopt->rh_options = 0;
297 	    if (rdpcb->r_sequential)
298 		synopt->rh_options |= RDP_oSEQUENTIAL;
299 	    synopt->rh_options = htons(synopt->rh_options);
300 #ifdef RDP_CS
301 	    rdpcb->r_sent.r_synpkts ++;
302 #endif
303 	}
304 	else
305 	{
306 	    register MBUF *optm;
307 
308 	    /* possible EACK */
309 	    optm = NULL;
310 	    stuff_eacks(rdpcb, optm);
311 
312 	    if (optm)
313 	    {
314 #define OKSZ (datalen+RDPHDRSZ+sizeof(struct ip)+optm->m_len <= rdpcb->r_hismaxlen)
315 		if (OKSZ)
316 		{
317 		    pkt->rh_flags |= RDP_fEACK;
318 		    pkt->rh_hdrlen += optm->m_len >> RDP_HLSHIFT;
319 		    optm->m_next = m->m_next;
320 		    m->m_next = optm;
321 		}
322 		else
323 		    m_free(optm);
324 #undef OKSZ
325 	    }
326 
327 	    if (rdpcb->r_sendnull)
328 	    {
329 		rdpcb->r_sendnull = FALSE;
330 		pkt->rh_flags |= RDP_fNULL;
331 #ifdef RDP_CS
332 		rdpcb->r_sent.r_nullpkts ++;
333 #endif
334 	    }
335 	}
336 
337 	if (rdpcb->r_synrcvd)
338 	{
339 	    rdpcb->r_timers[RDP_tACKDELAY] = 0;
340 	    pkt->rh_flags |= RDP_fACK;
341 	    RDP_ACKNO(pkt) = htonl(rdpcb->r_rcvq.rq_baseseq -1);
342 	}
343     }
344 
345     RDP_CKSUM(pkt) = rdp_cksum (m);
346 
347 #ifdef RDP_CS
348     rdpcb->r_sent.r_total ++;
349 #endif
350 
351     if (debug_rdpcb(rdpcb))
352 	rdp_debug (rdpcb, mtod(m, caddr_t), -1, RDP_sSAME);
353 
354     /*
355      * and ship packet off via IP.  Remember that since this protocol
356      * involves retransmissions, errors can occur asynchronous to a
357      * (write) system call, and that therefore we can not send the
358      * error all the way back up through subroutine return values.  We
359      * must also post it back via advise_user() at some point, and this
360      * looks like a good point to try it.
361      */
362     {
363 	register int	error;
364 
365 	error = ip_send (rdpcb->r_inpcb, m,(int)( datalen + hdrlen(pkt)),FALSE);
366 
367 	if (error)
368 	    /*
369 	     * Since we use retransmissions, don't need to tell user
370 	     * process about this.  (Can be as simple as interface
371 	     * or host structure queues are too long due to current
372 	     * heavy traffic.  Backing off will take care of that.)
373 	     */
374 	    if (error != ENOBUFS)
375 		advise_user(rdpcbtoso(rdpcb), error);
376 	return (error);
377     }
378 }
379 
380 rdp_timeo()
381 {
382     register struct inpcb *inp, *next;
383     register RDPCB	*rdpcb;
384     register int	 timer;
385     int s;
386     register rdpstate newstate;
387 
388     s = splnet();
389     rdp_iss += RDP_ISSINCR;
390     /*
391      * Remember, if CLOSEWAIT timer goes off, lose rdpcb's mbuf and
392      * and next pointer in it.
393      */
394     inp = rdp.inp_next;
395     while (inp != &rdp)
396     {
397 	next = inp->inp_next;
398 	if (rdpcb = inptordpcb(inp))
399 	{
400 	    rdpcb->r_rtt ++;
401 
402 	    for (timer = 0; timer < RDP_NTIMERS; timer++)
403 	    {
404 		if (rdpcb->r_timers[timer])
405 		{
406 		    rdpcb->r_timers[timer] --;
407 		    if (rdpcb->r_timers[timer] == 0)
408 		    {
409 			RDP_ACTION (RDP_iTIMER, rdpcb, timer, newstate)
410 			if (newstate == RDP_sCLOSED)
411 			    /* next rdpcb */
412 			    break;
413 		    }
414 		}
415 	    }
416 	}
417 	inp = next;
418     }
419     splx(s);
420 }
421 #endif
422