1 /* randseq.c May, 1995
2
3 copyright (c) 1995 William R. Pearson
4 */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <ctype.h>
10
11 char *verstr="version 2.0, May, 1995";
12
13 #define TRUE 1
14 #define FALSE 0
15
16 #ifndef BIGMEM
17 #define QFILE_SIZE 40
18 #define MAXTST 2000 /* longest test sequence */
19 #define MAXLIB 10000
20 #else
21 #define QFILE_SIZE 256
22 #define MAXTST 10000
23 #define MAXLIB 50000
24 #endif
25
26 #define MAXHIST 201 /* number of histogram divisions */
27
28 #include "upam.gbl"
29
30 FILE *outfd; /* fd for output file */
31
32 /* globals for matching */
33
34
35 char *aa0, *aa10; /* amino acid sequence data */
36 int maxn; /* max space for lib sequence (MAXDIAG-n0) */
37 int n0; /* length of aa0, length of aa1, n0+n1,
38 diagonal offset */
39
40 int dnaseq = 0; /* true if DNA query sequence */
41
42 FILE *outfd;
43 char rline[20],sline[20];
44 char resfile[QFILE_SIZE];
45
46 extern int optind;
47 char *getenv(), *smptr, *cptr; /* scoring matrix env */
48
49 extern int outtty;
50 char smstr[QFILE_SIZE];
51 long sq0off=1;
52 int wflag = -1;
53 int wsiz, iframe=0;
54
main(argc,argv)55 main(argc, argv)
56 int argc; char **argv;
57 {
58 char tname[40], lname[40], qline[40];
59 char *bp;
60 int i,ic,icnt=1;
61 char info[256];
62
63 #ifdef UNIX
64 outtty=isatty(1);
65 #endif
66
67 #ifdef __MWERKS__
68 SIOUXSettings.asktosaveonclose=TRUE;
69 SIOUXSettings.showstatusline=FALSE;
70 SIOUXSettings.autocloseonquit=TRUE;
71
72 argc = ccommand(&argv);
73 if (GetResource('DLOG',IntroDID)==0L && OpenResFile("\pFASTA.rsrc")<0) {
74 SysBeep(100); fprintf(stderr," WARNING FASTA.rsrc file could not be found\n");
75 }
76 InitEvent();
77 GetVol((unsigned char *)prompt,&ouvRef);
78 wpos.h=50; wpos.v=100;
79 #endif
80
81 if ((aa0=calloc(MAXTST+MAXLIB,sizeof(char)))==0) {
82 printf(" cannot allocate sequence array\n");
83 exit(1);
84 }
85 maxn = MAXTST+MAXLIB;
86 if ((aa10=calloc(MAXLIB,sizeof(char)))==0) {
87 printf(" cannot allocate sequence array\n");
88 exit(1);
89 }
90
91 initenv(argc,argv);
92
93 if (argc-optind < 2) {
94 printf(" randseq 2.0 [May, 1995] produces a shuffled sequence\n");
95 l1: printf(" seed sequence file name: ");
96 fgets(tname,40,stdin);
97 if (tname[strlen(tname)-1]=='\n') tname[strlen(tname)-1]='\0';
98 if (strlen(tname)==0) goto l1;
99 if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
100 printf(" %s : sequence not found\n",tname);
101 goto l1;
102 }
103
104 if (wflag<=0) {
105 printf(" local (window) (w), codon (c) or uniform (u) shuffle [u]? ");
106 fgets(qline,40,stdin);
107 if ((bp=strchr(qline,'\n'))!=NULL) *bp='\0';
108 }
109 else qline[0]='\0';
110 if (tolower(qline[0])=='w' || wflag==1) {
111 wflag = 1;
112 wsiz = 10;
113 printf(" local shuffle window size [10] ");
114 fgets(qline,40,stdin);
115 if (qline[0]!='\0' && qline[0]!='\n') {
116 sscanf(qline,"%d",&wsiz);
117 if (wsiz <= 2) wsiz = 10;
118 }
119 }
120 else if (tolower(qline[0])=='c' || wflag==3) {
121 wflag = 3;
122 iframe = 1;
123 printf(" codon frame (1-3)[1]");
124 fgets(qline,40,stdin);
125 if (qline[0]!='\0' && qline[0]!='\n') {
126 sscanf(qline,"%d",&iframe);
127 }
128 }
129 else wflag=0;
130
131 printf(" number of shuffles: [%d]: ",icnt);
132 if (fgets(qline,40,stdin)!=NULL) {
133 sscanf(qline,"%d",&icnt);
134 if (icnt < 1) icnt = 1;
135 }
136 }
137 else {
138 strncpy(tname,argv[optind+1],40);
139 if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
140 printf(" %s : sequence not found\n",tname);
141 exit(1);
142 }
143 if (argc - optind > 2) {
144 sscanf(argv[optind+2],"%d",&icnt);
145 if (icnt < 1) icnt = 1;
146 }
147 }
148
149 if (wsiz > n0) wsiz = n0;
150
151 if (iframe > 3) {
152 iframe = (iframe-1)%3 +1;
153 }
154 else if (iframe <= 0) iframe = 1;
155
156 for (i=0; i<n0; i++) aa10[i]=aa0[i];
157 aa10[n0]= -1;
158
159 irand(); /* seed the random number generator */
160
161 #ifdef __MWERKS__
162 SetVol("\p",ouvRef);
163 #endif
164
165 outfd = stdout;
166
167 if (outtty && resfile[0]=='\0') {
168 printf(" Enter filename for results : ");
169 fgets(rline,sizeof(rline),stdin);
170 if ((bp=strchr(rline,'\n'))!=NULL) *bp='\0';
171 if (rline[0]!='\0') strncpy(resfile,rline,sizeof(resfile));
172 }
173 if (resfile[0]!='\0')
174 if ((outfd=fopen(resfile,"w"))==NULL) exit(1);
175
176 for (i=0; i<n0; i++) {
177 if (sq[aa0[i]] <= 0) fprintf(stderr,"error0 aa0[%d]: %d %d %c\n",
178 i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
179 }
180
181 for (ic=0; ic<icnt; ic++) {
182 if (wflag==1) wshuffle(aa10,aa0,n0,wsiz);
183 else if (wflag==3) shuffle3(aa0,n0,iframe-1);
184 else shuffle(aa0,n0);
185
186 for (i=0; i<n0; i++) {
187 if (sq[aa0[i]] <= 0) fprintf(stderr,"error1 aa0[%d]: %d %d %c\n",
188 i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
189 }
190
191 if (wflag == 1) sprintf(info,"window: %d",wsiz);
192 else if (wflag == 3) sprintf(info,"codon frame: %d",iframe);
193 else info[0]='\0';
194
195 fprintf(outfd, ">%s_%d shuffled %s\n",tname,ic,info);
196 for (i=0; i<n0; i++) {
197 if (sq[aa0[i]] > 0) fputc(sq[aa0[i]],outfd);
198 else fprintf(stderr,"error aa0[%d]: %d %d %c\n",
199 i,aa0[i],sq[aa0[i]],sq[aa0[i]]);
200 if (i%60 == 59) fputc('\n',outfd);
201 else if (i%10 == 9) fputc(' ',outfd);
202 }
203 fputc('\n',outfd);
204 }
205
206 #ifdef __MWERKS__
207 SIOUXSettings.asktosaveonclose=FALSE;
208 SIOUXSettings.autocloseonquit=TRUE;
209 #endif
210
211 }
212
213 extern int *sascii, nascii[], aascii[];
214
initenv(argc,argv)215 initenv(argc,argv)
216 int argc; char **argv;
217 {
218 char *cptr, *getenv();
219 int copt, getopt();
220 extern char *optarg;
221
222 sascii = aascii;
223 sq = aa;
224 hsq = haa;
225 nsq = naa;
226 dnaseq = 0;
227
228 while ((copt=getopt(argc,argv,"nw:O:c:"))!=EOF)
229 switch(copt) {
230 case 'n': dnaseq=1;
231 sascii = nascii;
232 sq = nt;
233 nsq = nnt;
234 hsq = hnt;
235 pam = npam;
236 strcpy(sqnam,"nt");
237 strcpy(sqtype,"DNA");
238 break;
239 case 'O': strncpy(resfile,optarg,sizeof(resfile));
240 break;
241 case 'w': wflag = 1; sscanf(optarg,"%d",&wsiz);
242 break;
243 case 'c': wflag = 3; sscanf(optarg,"%d",&iframe);
244 dnaseq=1;
245 sascii = nascii;
246 sq = nt;
247 nsq = nnt;
248 hsq = hnt;
249 pam = npam;
250 strcpy(sqnam,"nt");
251 strcpy(sqtype,"DNA");
252 break;
253 default : fprintf(stderr," illegal option -%c\n",copt);
254 }
255 optind--;
256 }
257
258 int ieven = 1;
wshuffle(from,to,n,wsiz)259 wshuffle(from,to,n,wsiz) /* copies from from to from shuffling */
260 char *from, *to; int n, wsiz;
261 {
262 int i,j, k, mm; char tmp, *top;
263
264 memcpy(to,from,n);
265
266 mm = n%wsiz;
267
268 if (ieven) {
269 for (k=0; k<(n-wsiz); k += wsiz) {
270 top = &to[k];
271 for (i=wsiz; i>0; i--) {
272 j = nrand(i);
273 tmp = top[j];
274 top[j] = top[i-1];
275 top[i-1] = tmp;
276 }
277 for (i=wsiz; i>0; i--) {
278 j = nrand(i);
279 tmp = top[j];
280 top[j] = top[i-1];
281 top[i-1] = tmp;
282 }
283 for (i=wsiz; i>0; i--) {
284 j = nrand(i);
285 tmp = top[j];
286 top[j] = top[i-1];
287 top[i-1] = tmp;
288 }
289 for (i=wsiz; i>0; i--) {
290 j = nrand(i);
291 tmp = top[j];
292 top[j] = top[i-1];
293 top[i-1] = tmp;
294 }
295 }
296 top = &to[n-mm];
297 for (i=mm; i>0; i--) {
298 j = nrand(i);
299 tmp = top[j];
300 top[j] = top[i-1];
301 top[i-1] = tmp;
302 }
303 for (i=mm; i>0; i--) {
304 j = nrand(i);
305 tmp = top[j];
306 top[j] = top[i-1];
307 top[i-1] = tmp;
308 }
309 for (i=mm; i>0; i--) {
310 j = nrand(i);
311 tmp = top[j];
312 top[j] = top[i-1];
313 top[i-1] = tmp;
314 }
315 for (i=mm; i>0; i--) {
316 j = nrand(i);
317 tmp = top[j];
318 top[j] = top[i-1];
319 top[i-1] = tmp;
320 }
321 ieven = 0;
322 }
323 else {
324 for (k=n; k>=wsiz; k -= wsiz) {
325 top = &to[k-wsiz];
326 for (i=wsiz; i>0; i--) {
327 j = nrand(i);
328 tmp = top[j];
329 top[j] = top[i-1];
330 top[i-1] = tmp;
331 }
332 for (i=wsiz; i>0; i--) {
333 j = nrand(i);
334 tmp = top[j];
335 top[j] = top[i-1];
336 top[i-1] = tmp;
337 }
338 for (i=wsiz; i>0; i--) {
339 j = nrand(i);
340 tmp = top[j];
341 top[j] = top[i-1];
342 top[i-1] = tmp;
343 }
344 for (i=wsiz; i>0; i--) {
345 j = nrand(i);
346 tmp = top[j];
347 top[j] = top[i-1];
348 top[i-1] = tmp;
349 }
350 }
351 }
352 top = &to[0];
353 for (i=mm; i>0; i--) {
354 j = nrand(i);
355 tmp = top[j];
356 top[j] = top[i-1];
357 top[i-1] = tmp;
358 }
359 for (i=mm; i>0; i--) {
360 j = nrand(i);
361 tmp = top[j];
362 top[j] = top[i-1];
363 top[i-1] = tmp;
364 }
365 for (i=mm; i>0; i--) {
366 j = nrand(i);
367 tmp = top[j];
368 top[j] = top[i-1];
369 top[i-1] = tmp;
370 }
371 for (i=mm; i>0; i--) {
372 j = nrand(i);
373 tmp = top[j];
374 top[j] = top[i-1];
375 top[i-1] = tmp;
376 }
377 ieven = 1;
378 to[n] = -1;
379 }
380
shuffle(from,n)381 shuffle(from,n) /* copies from from to from shuffling */
382 char *from; int n;
383 {
384 int i,j; char tmp;
385
386 for (i=n; i>0; i--) {
387 j = nrand(i);
388 tmp = from[j];
389 from[j] = from[i-1];
390 from[i-1] = tmp;
391 }
392 from[n] = 0;
393 }
394
shuffle3(char * from,int n,int frame)395 shuffle3(char *from, int n, int frame) /* copies from to from shuffling 3's*/
396 {
397 int i,j, i3, j3; char tmp;
398 /*
399 for (i=0; i<n; i++)
400 if (sq[from[i]] > 0) {fputc(sq[from[i]],stderr); if (i%3 == 2) fputc(' ',stderr);}
401 else fprintf(stderr," %d ",from[i]);
402 fputc('\n',stderr);
403 */
404 for (i3=n/3; i3>1; i3--) {
405 j3 = nrand(i3);
406 i = (i3-1)*3+frame; j=j3*3+frame;
407 /* fprintf(stderr,"i3: %d j3: %d i: %d j: %d\n",i3,j3,i,j); */
408 if (i+2 < n && j+2 < n && i != j) {
409 tmp = from[j];
410 from[j] = from[i];
411 from[i] = tmp;
412
413 tmp = from[j+1];
414 from[j+1] = from[i+1];
415 from[i+1] = tmp;
416
417 tmp = from[j+2];
418 from[j+2] = from[i+2];
419 from[i+2] = tmp;
420 }
421 /*
422 for (i=0; i<n; i++)
423 if (sq[from[i]] > 0) {fputc(sq[from[i]],stderr); if (i%3 == 2) fputc(' ',stderr);}
424 else fprintf(stderr," %d ",from[i]);
425 fputc('\n',stderr);
426 */
427 }
428 from[n] = 0;
429 }
430 /* stubs for linking */
431 int llen;
432
aancpy()433 aancpy()
434 {}
435
436 int markx;
disgraph()437 disgraph()
438 {}
439
min(v0,v1)440 min(v0,v1)
441 int v0, v1;
442 {
443 return (v0>v1) ? v1 : v0;
444 }
445
ALIGN()446 ALIGN()
447 {}
448
discons()449 discons()
450 {}
451
452