1 /*      randseq.c         May, 1995
2 
3         copyright (c) 1995    William R. Pearson
4 */
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <ctype.h>
10 
11 char *verstr="version 2.0, May, 1995";
12 
13 #define TRUE 1
14 #define FALSE 0
15 
16 #ifndef BIGMEM
17 #define QFILE_SIZE 40
18 #define MAXTST 2000	/* longest test sequence */
19 #define MAXLIB 10000
20 #else
21 #define QFILE_SIZE 256
22 #define MAXTST 10000
23 #define MAXLIB 50000
24 #endif
25 
26 #define MAXHIST 201	/* number of histogram divisions */
27 
28 #include "upam.gbl"
29 
30 FILE *outfd;		/* fd for output file */
31 
32 /* globals for matching */
33 
34 
35 char *aa0, *aa10;	/* amino acid sequence data */
36 int maxn;		/* max space for lib sequence (MAXDIAG-n0) */
37 int n0;			/* length of aa0, length of aa1, n0+n1,
38 				diagonal offset */
39 
40 int dnaseq = 0;	/* true if DNA query sequence */
41 
42 FILE *outfd;
43 char rline[20],sline[20];
44 char resfile[QFILE_SIZE];
45 
46 extern int optind;
47 char *getenv(), *smptr, *cptr;		/* scoring matrix env */
48 
49 extern int outtty;
50 char smstr[QFILE_SIZE];
51 long sq0off=1;
52 int wflag = -1;
53 int wsiz, iframe=0;
54 
main(argc,argv)55 main(argc, argv)
56      int argc; char **argv;
57 {
58   char tname[40], lname[40], qline[40];
59   char *bp;
60   int i,ic,icnt=1;
61   char info[256];
62 
63 #ifdef UNIX
64   outtty=isatty(1);
65 #endif
66 
67 #ifdef __MWERKS__
68   SIOUXSettings.asktosaveonclose=TRUE;
69   SIOUXSettings.showstatusline=FALSE;
70   SIOUXSettings.autocloseonquit=TRUE;
71 
72   argc = ccommand(&argv);
73   if (GetResource('DLOG',IntroDID)==0L && OpenResFile("\pFASTA.rsrc")<0) {
74     SysBeep(100); fprintf(stderr," WARNING FASTA.rsrc file could not be found\n");
75   }
76   InitEvent();
77   GetVol((unsigned char *)prompt,&ouvRef);
78   wpos.h=50; wpos.v=100;
79 #endif
80 
81   if ((aa0=calloc(MAXTST+MAXLIB,sizeof(char)))==0) {
82     printf(" cannot allocate sequence array\n");
83     exit(1);
84   }
85   maxn = MAXTST+MAXLIB;
86   if ((aa10=calloc(MAXLIB,sizeof(char)))==0) {
87     printf(" cannot allocate sequence array\n");
88     exit(1);
89   }
90 
91   initenv(argc,argv);
92 
93   if (argc-optind < 2) {
94     printf(" randseq 2.0 [May, 1995] produces a shuffled sequence\n");
95   l1: printf(" seed sequence file name: ");
96     fgets(tname,40,stdin);
97     if (tname[strlen(tname)-1]=='\n') tname[strlen(tname)-1]='\0';
98     if (strlen(tname)==0) goto l1;
99     if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
100       printf(" %s : sequence not found\n",tname);
101       goto l1;
102     }
103 
104     if (wflag<=0) {
105       printf(" local (window) (w), codon (c) or uniform (u) shuffle [u]? ");
106       fgets(qline,40,stdin);
107       if ((bp=strchr(qline,'\n'))!=NULL) *bp='\0';
108     }
109     else qline[0]='\0';
110     if (tolower(qline[0])=='w' || wflag==1) {
111       wflag = 1;
112       wsiz = 10;
113       printf(" local shuffle window size [10] ");
114       fgets(qline,40,stdin);
115       if (qline[0]!='\0' && qline[0]!='\n') {
116 	sscanf(qline,"%d",&wsiz);
117 	if (wsiz <= 2) wsiz = 10;
118       }
119     }
120     else if (tolower(qline[0])=='c' || wflag==3) {
121       wflag = 3;
122       iframe = 1;
123       printf(" codon frame (1-3)[1]");
124       fgets(qline,40,stdin);
125       if (qline[0]!='\0' && qline[0]!='\n') {
126 	sscanf(qline,"%d",&iframe);
127       }
128     }
129     else wflag=0;
130 
131     printf(" number of shuffles: [%d]: ",icnt);
132     if (fgets(qline,40,stdin)!=NULL) {
133       sscanf(qline,"%d",&icnt);
134       if (icnt < 1) icnt = 1;
135     }
136   }
137   else {
138     strncpy(tname,argv[optind+1],40);
139     if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
140       printf(" %s : sequence not found\n",tname);
141       exit(1);
142     }
143     if (argc - optind > 2) {
144       sscanf(argv[optind+2],"%d",&icnt);
145       if (icnt < 1) icnt = 1;
146     }
147   }
148 
149   if (wsiz > n0) wsiz = n0;
150 
151   if (iframe > 3) {
152     iframe = (iframe-1)%3 +1;
153   }
154   else if (iframe <= 0) iframe = 1;
155 
156   for (i=0; i<n0; i++) aa10[i]=aa0[i];
157   aa10[n0]= -1;
158 
159   irand();	/* seed the random number generator */
160 
161 #ifdef __MWERKS__
162   SetVol("\p",ouvRef);
163 #endif
164 
165   outfd = stdout;
166 
167   if (outtty && resfile[0]=='\0') {
168     printf(" Enter filename for results : ");
169     fgets(rline,sizeof(rline),stdin);
170     if ((bp=strchr(rline,'\n'))!=NULL) *bp='\0';
171     if (rline[0]!='\0') strncpy(resfile,rline,sizeof(resfile));
172   }
173   if (resfile[0]!='\0')
174     if ((outfd=fopen(resfile,"w"))==NULL) exit(1);
175 
176   for (i=0; i<n0; i++) {
177     if (sq[aa0[i]] <= 0) fprintf(stderr,"error0 aa0[%d]: %d %d %c\n",
178 				 i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
179   }
180 
181   for (ic=0; ic<icnt; ic++) {
182     if (wflag==1) wshuffle(aa10,aa0,n0,wsiz);
183     else if (wflag==3) shuffle3(aa0,n0,iframe-1);
184     else shuffle(aa0,n0);
185 
186   for (i=0; i<n0; i++) {
187     if (sq[aa0[i]] <= 0) fprintf(stderr,"error1 aa0[%d]: %d %d %c\n",
188 				 i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
189   }
190 
191   if (wflag == 1) sprintf(info,"window: %d",wsiz);
192   else if (wflag == 3) sprintf(info,"codon frame: %d",iframe);
193   else info[0]='\0';
194 
195     fprintf(outfd, ">%s_%d shuffled %s\n",tname,ic,info);
196     for (i=0; i<n0; i++) {
197       if (sq[aa0[i]] > 0) fputc(sq[aa0[i]],outfd);
198       else fprintf(stderr,"error aa0[%d]: %d %d %c\n",
199 		   i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
200       if (i%60 == 59) fputc('\n',outfd);
201       else if (i%10 == 9) fputc(' ',outfd);
202     }
203     fputc('\n',outfd);
204   }
205 
206 #ifdef __MWERKS__
207     SIOUXSettings.asktosaveonclose=FALSE;
208     SIOUXSettings.autocloseonquit=TRUE;
209 #endif
210 
211 }
212 
213 extern int *sascii, nascii[], aascii[];
214 
initenv(argc,argv)215 initenv(argc,argv)
216      int argc; char **argv;
217 {
218   char *cptr, *getenv();
219   int copt, getopt();
220   extern char *optarg;
221 
222   sascii = aascii;
223   sq = aa;
224   hsq = haa;
225   nsq = naa;
226   dnaseq = 0;
227 
228   while ((copt=getopt(argc,argv,"nw:O:c:"))!=EOF)
229     switch(copt) {
230     case 'n': dnaseq=1;
231       sascii = nascii;
232       sq = nt;
233       nsq = nnt;
234       hsq = hnt;
235       pam = npam;
236       strcpy(sqnam,"nt");
237       strcpy(sqtype,"DNA");
238       break;
239     case 'O': strncpy(resfile,optarg,sizeof(resfile));
240       break;
241     case 'w': wflag = 1; sscanf(optarg,"%d",&wsiz);
242       break;
243     case 'c': wflag = 3; sscanf(optarg,"%d",&iframe);
244       dnaseq=1;
245       sascii = nascii;
246       sq = nt;
247       nsq = nnt;
248       hsq = hnt;
249       pam = npam;
250       strcpy(sqnam,"nt");
251       strcpy(sqtype,"DNA");
252       break;
253     default : fprintf(stderr," illegal option -%c\n",copt);
254     }
255   optind--;
256 }
257 
258 int ieven = 1;
wshuffle(from,to,n,wsiz)259 wshuffle(from,to,n,wsiz)	/* copies from from to from shuffling */
260 	char  *from, *to; int n, wsiz;
261 {
262   int i,j, k, mm; char tmp, *top;
263 
264   memcpy(to,from,n);
265 
266   mm = n%wsiz;
267 
268   if (ieven) {
269     for (k=0; k<(n-wsiz); k += wsiz) {
270       top = &to[k];
271       for (i=wsiz; i>0; i--) {
272 	j = nrand(i);
273 	tmp = top[j];
274 	top[j] = top[i-1];
275 	top[i-1] = tmp;
276       }
277       for (i=wsiz; i>0; i--) {
278 	j = nrand(i);
279 	tmp = top[j];
280 	top[j] = top[i-1];
281 	top[i-1] = tmp;
282       }
283       for (i=wsiz; i>0; i--) {
284 	j = nrand(i);
285 	tmp = top[j];
286 	top[j] = top[i-1];
287 	top[i-1] = tmp;
288       }
289       for (i=wsiz; i>0; i--) {
290 	j = nrand(i);
291 	tmp = top[j];
292 	top[j] = top[i-1];
293 	top[i-1] = tmp;
294       }
295     }
296     top = &to[n-mm];
297     for (i=mm; i>0; i--) {
298       j = nrand(i);
299       tmp = top[j];
300       top[j] = top[i-1];
301       top[i-1] = tmp;
302     }
303     for (i=mm; i>0; i--) {
304       j = nrand(i);
305       tmp = top[j];
306       top[j] = top[i-1];
307       top[i-1] = tmp;
308     }
309     for (i=mm; i>0; i--) {
310       j = nrand(i);
311       tmp = top[j];
312       top[j] = top[i-1];
313       top[i-1] = tmp;
314     }
315     for (i=mm; i>0; i--) {
316       j = nrand(i);
317       tmp = top[j];
318       top[j] = top[i-1];
319       top[i-1] = tmp;
320     }
321     ieven = 0;
322   }
323   else {
324     for (k=n; k>=wsiz; k -= wsiz) {
325       top = &to[k-wsiz];
326       for (i=wsiz; i>0; i--) {
327 	j = nrand(i);
328 	tmp = top[j];
329 	top[j] = top[i-1];
330 	top[i-1] = tmp;
331       }
332       for (i=wsiz; i>0; i--) {
333 	j = nrand(i);
334 	tmp = top[j];
335 	top[j] = top[i-1];
336 	top[i-1] = tmp;
337       }
338       for (i=wsiz; i>0; i--) {
339 	j = nrand(i);
340 	tmp = top[j];
341 	top[j] = top[i-1];
342 	top[i-1] = tmp;
343       }
344       for (i=wsiz; i>0; i--) {
345 	j = nrand(i);
346 	tmp = top[j];
347 	top[j] = top[i-1];
348 	top[i-1] = tmp;
349       }
350     }
351   }
352   top = &to[0];
353   for (i=mm; i>0; i--) {
354     j = nrand(i);
355     tmp = top[j];
356     top[j] = top[i-1];
357     top[i-1] = tmp;
358   }
359   for (i=mm; i>0; i--) {
360     j = nrand(i);
361     tmp = top[j];
362     top[j] = top[i-1];
363     top[i-1] = tmp;
364   }
365   for (i=mm; i>0; i--) {
366     j = nrand(i);
367     tmp = top[j];
368     top[j] = top[i-1];
369     top[i-1] = tmp;
370   }
371   for (i=mm; i>0; i--) {
372     j = nrand(i);
373     tmp = top[j];
374     top[j] = top[i-1];
375     top[i-1] = tmp;
376   }
377   ieven = 1;
378   to[n] = -1;
379 }
380 
shuffle(from,n)381 shuffle(from,n)	/* copies from from to from shuffling */
382      char  *from; int n;
383 {
384   int i,j; char tmp;
385 
386   for (i=n; i>0; i--) {
387     j = nrand(i);
388     tmp = from[j];
389     from[j] = from[i-1];
390     from[i-1] = tmp;
391   }
392   from[n] = 0;
393 }
394 
shuffle3(char * from,int n,int frame)395 shuffle3(char *from, int n, int frame)	/* copies from to from shuffling 3's*/
396 {
397 	int i,j, i3, j3; char tmp;
398 /*
399 	for (i=0; i<n; i++)
400 	  if (sq[from[i]] > 0) {fputc(sq[from[i]],stderr); if (i%3 == 2) fputc(' ',stderr);}
401 	  else fprintf(stderr," %d ",from[i]);
402 	fputc('\n',stderr);
403 */
404 	for (i3=n/3; i3>1; i3--) {
405 	  j3 = nrand(i3);
406 	  i = (i3-1)*3+frame; j=j3*3+frame;
407   /*	  fprintf(stderr,"i3: %d j3: %d i: %d j: %d\n",i3,j3,i,j); */
408 	  if (i+2 < n && j+2 < n && i != j) {
409 	    tmp = from[j];
410 	    from[j] = from[i];
411 	    from[i] = tmp;
412 
413 	    tmp = from[j+1];
414 	    from[j+1] = from[i+1];
415 	    from[i+1] = tmp;
416 
417 	    tmp = from[j+2];
418 	    from[j+2] = from[i+2];
419 	    from[i+2] = tmp;
420 	  }
421   /*
422 	  for (i=0; i<n; i++)
423 	    if (sq[from[i]] > 0) {fputc(sq[from[i]],stderr); if (i%3 == 2) fputc(' ',stderr);}
424 	    else fprintf(stderr," %d ",from[i]);
425 	  fputc('\n',stderr);
426   */
427 	}
428 	from[n] = 0;
429 }
430 /*  stubs for linking */
431 int llen;
432 
aancpy()433 aancpy()
434 {}
435 
436 int markx;
disgraph()437 disgraph()
438 {}
439 
min(v0,v1)440 min(v0,v1)
441 	int v0, v1;
442 {
443 	return (v0>v1) ? v1 : v0;
444 }
445 
ALIGN()446 ALIGN()
447 {}
448 
discons()449 discons()
450 {}
451 
452