1 /* Quadruple Statistics on an arbitrary alphabet
2
3 Vienna RNA Package --- Peter F Stadler 1993
4
5 */
6
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <strings.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include "ViennaRNA/utils/basic.h"
17 #include "PS3D.h"
18 #include "distance_matrix.h"
19
20 #define PUBLIC
21 #define PRIVATE static
22
23 #define MIN2(A, B) ((A) < (B) ? (A) : (B))
24 #define MIN4(A, B, C, D) MIN2( (MIN2((A),(B))), (MIN2((C),(D))) )
25 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
26 #define MAX4(A, B, C, D) MAX2( (MAX2((A),(B))), (MAX2((C),(D))) )
27
28 PRIVATE int IBox[16];
29
30
31 PUBLIC float *statgeom(char **seqs, int n_of_seqs);
32 PUBLIC float *statgeom4(char **ss[4], int nn[4]);
33 PUBLIC void printf_stg(float *B);
34
35 PRIVATE void SingleBox(char *x1, char *x2, char *x3, char *x4);
36 PRIVATE void SortSingleBox(void);
37
38 /* ----------------------------------------------------------------------- */
39
statgeom(char ** seqs,int n_of_seqs)40 PUBLIC float *statgeom(char **seqs, int n_of_seqs)
41 {
42 int i,j,k,l;
43 int i1;
44
45 float *B;
46 float temp;
47
48 if(n_of_seqs < 4) {
49 fprintf(stderr,"Less than 4 sequences for statistical geometry.\n");
50 return NULL;
51 }
52
53 B = (float *) vrna_alloc(16*sizeof(float));
54
55 for(i=3; i<n_of_seqs; i++) {
56 for(j=2; j<i; j++) {
57 for(k=1; k<j; k++) {
58 for(l=0; l<k; l++) {
59 SingleBox(seqs[i], seqs[j], seqs[k], seqs[l]);
60 SortSingleBox();
61 B[0] = (float) IBox[0]; /* transfer length */
62 for(i1=1;i1<=15;i1++) B[i1] += ( (float) IBox[i1] );
63 }
64 }
65 }
66 }
67
68 temp = (float) n_of_seqs;
69 temp = temp*(temp-1.)*(temp-2.)*(temp-3.)/24.;
70 for(i1=1;i1<=15;i1++) B[i1] /= (temp*B[0]);
71
72 return B;
73 }
74
75 /* ----------------------------------------------------------------------- */
76
statgeom4(char ** ss[4],int nn[4])77 PUBLIC float *statgeom4(char **ss[4], int nn[4])
78 {
79 int i,j,k,l;
80 int i1;
81
82 float *B;
83 float temp;
84
85 B = (float *) vrna_alloc(16*sizeof(float));
86
87 for(i=0; i<nn[0]; i++) {
88 for(j=0; j<nn[1]; j++) {
89 for(k=0; k<nn[2]; k++) {
90 for(l=0; l<nn[3]; l++) {
91 SingleBox(ss[0][i], ss[1][j], ss[2][k], ss[3][l]);
92 B[0] = (float) IBox[0]; /* transfer length */
93 for(i1=1;i1<=15;i1++) B[i1] += ( (float) IBox[i1] );
94 }
95 }
96 }
97 }
98
99 for (temp = 1, i=0;i<4;i++) temp *= ((float) nn[i]) ;
100 for(i1=1;i1<=15;i1++) B[i1] /= (temp*B[0]);
101
102 return B;
103 }
104 /* ------------------------------------------------------------------------- */
105
printf_stg(float * B)106 PUBLIC void printf_stg(float *B)
107 {
108 printf("> Statistical Geometry.\n");
109 printf("> %d (sequence length)\n", (int) B[0]);
110 printf("> AAAA\n");
111 printf(" %7.5f\n", B[1]);
112 printf("> BAAA ABAA AABA AAAB\n");
113 printf(" %7.5f %7.5f %7.5f %7.5f\n", B[2],B[3],B[4],B[5]);
114 printf("> AABB ABAB ABBA\n");
115 printf(" %7.5f %7.5f %7.5f\n", B[6],B[7],B[8]);
116 printf("> AABC ABAC ABCA BAAC BACA BCAA\n");
117 printf(" %7.5f %7.5f %7.5f %7.5f %7.5f %7.5f\n"
118 ,B[9],B[10],B[11],B[12],B[13],B[14]);
119 printf("> ABCD\n");
120 printf(" %7.5f\n",B[15]);
121 }
122
123 /* ------------------------------------------------------------------------- */
124
125
SimplifiedBox(float * B,char * filename)126 PUBLIC void SimplifiedBox(float *B, char *filename)
127 {
128 char *tmp, temp[50];
129 float X,Y,Z;
130 float T,P;
131 float t1, t2, t3, t4;
132 float p1, p2, p3, p4;
133 float x1, x2, y1, y2,z1,z2;
134 FILE *fp;
135 float view[3] = {0.3, 1.5, 0.1};
136 float axis[3] = {1.0, 0.0, 0.0};
137
138 t1 = B[9]+B[10]+B[11]+B[15];
139 t2 = B[9]+B[12]+B[13]+B[15];
140 t3 = B[10]+B[12]+B[14]+B[15];
141 t4 = B[11]+B[13]+B[14]+B[15];
142
143 p1 = B[2]; p2 = B[3]; p3 = B[4]; p4 = B[5];
144
145 x1 = B[6] + B[14];
146 x2 = B[6] + B[9];
147 y1 = B[7] + B[13];
148 y2 = B[7] + B[10];
149 z1 = B[8] + B[12];
150 z2 = B[8] + B[11];
151
152 X = (x1+x2)/2.; Y = (y1+y2)/2.; Z = (z1+z2)/2;
153 T = (t1+t2+t3+t4)/4.;
154 P = (p1+p2+p3+p4)/4.;
155
156 printf("> X = %7.5f \n",X);
157 printf("> Y = %7.5f \n",Y);
158 printf("> Z = %7.5f \n",Z);
159 printf("> T = %7.5f \n",T);
160 printf("> P = %7.5f \n",P);
161
162 tmp = get_taxon_label(-1); /* retrieve the dataset identifier */
163
164 temp[0]='\0';
165 if(tmp) { strcat(temp,tmp);strcat(temp,"_"); free(tmp); }
166 strcat(temp,filename);
167
168 fp = fopen(temp,"w");
169 if (fp!=NULL) {
170 ps3d_Preambel(fp, view, axis, "N");
171 PS_DrawSimplifiedBox(X,Y,Z,T,P, fp);
172 fclose(fp);
173 } else fprintf(stderr,"couldn't open %s -- not drawing box\n", temp);
174 }
175
176
177
178 /* ------------------------------------------------------------------------- */
179
SingleBox(char * x1,char * x2,char * x3,char * x4)180 PRIVATE void SingleBox(char *x1, char *x2, char *x3, char *x4)
181 {
182 int len,i1,j1,k1,i,M,m;
183 int d[4];
184 char t[4];
185
186 len=strlen(x1);
187 if(strlen(x2)!=len) vrna_message_error("Sequences of unequal length in 'SingleBox'");
188 if(strlen(x3)!=len) vrna_message_error("Sequences of unequal length in 'SingleBox'");
189 if(strlen(x4)!=len) vrna_message_error("Sequences of unequal length in 'SingleBox'");
190
191 IBox[0] = len;
192 for(i=1; i<=15; i++) IBox[i] = 0;
193
194 for(i1=0;i1<len;i1++) {
195 t[0] = x1[i1];
196 t[1] = x2[i1];
197 t[2] = x3[i1];
198 t[3] = x4[i1];
199 for(j1=0;j1<4;j1++) {
200 d[j1]=4;
201 for(k1=0;k1<4;k1++) d[j1] -=(t[j1]!=t[k1]);
202 }
203 M=MAX4(d[0],d[1],d[2],d[3]);
204 switch(M) {
205 case 4 : /* Four of a kind */
206 IBox[1]++; /* A A A A */
207 break;
208 case 3 : /* Three of a kind */
209 if(d[0] == 1) IBox[2]++; /* B A A A */
210 else if(d[1] == 1) IBox[3]++; /* A B A A */
211 else if(d[2] == 1) IBox[4]++; /* A A B A */
212 else IBox[5]++; /* A A A B */
213 break;
214 case 2 :
215 m=MIN4(d[0],d[1],d[2],d[3]);
216 if(m==2){ /* Two Pairs */
217 if(t[1]==t[0]) IBox[6]++; /* A A B B */
218 else if(t[2]==t[0]) IBox[7]++; /* A B A B */
219 else IBox[8]++; /* A B B A */
220 }
221 else { /* One Pair */
222 if(d[0]==2){ /* 0 is in the pair */
223 if(t[1]==t[0]) IBox[9]++; /* A A B C */
224 else if(t[2]==t[0]) IBox[10]++; /* A B A C */
225 else IBox[11]++ ;/* A B C A */
226 }
227 else if(d[1]==2){ /* 1 is in the pair */
228 if(t[2]==t[1]) IBox[12]++; /* B A A C */
229 else IBox[13]++; /* B A C A */
230 }
231 else IBox[14]++; /* B C A A */
232 }
233 break;
234 case 1 : /* No Pair */
235 IBox[15]++; /* A B C D */
236 break;
237 default:
238 vrna_message_error("This can't happen.");
239 }
240 }
241 }
242
243 /* ----------------------------------------------------------------------- */
244
SortSingleBox(void)245 PRIVATE void SortSingleBox(void)
246 {
247 int i;
248 int M;
249 int IBB[16];
250 int s[4];
251
252 M = MAX2(MAX2( IBox[6], IBox[7] ), IBox[8] );
253
254 if( M== IBox[6] ) { /* 12|34 */
255 IBB[6] = IBox[6];
256 if( IBox[9] >= IBox[14] ) { /* 1,2 > 3,4 */
257 IBB[9] = IBox[9];
258 IBB[14] = IBox[14];
259 if( IBox[7] >= IBox[8] ) { /* 13|24 */
260 IBB[7] = IBox[7];
261 IBB[8] = IBox[8];
262 if( IBox[10] >= IBox[13] ) { /* 1>2>3>4 */
263 IBB[10] = IBox[10];
264 IBB[13] = IBox[13];
265 IBB[11] = IBox[11];
266 IBB[12] = IBox[12];
267 s[0]=0; s[1]=1; s[2]=2; s[3]=3; /* 1 2 3 4 */
268 }
269 else { /* 2>1>4>3 */
270 IBB[10] = IBox[13];
271 IBB[13] = IBox[10];
272 IBB[11] = IBox[12];
273 IBB[12] = IBox[11];
274 s[0]=1; s[1]=0; s[2]=3; s[3]=2; /* 2 1 4 3 */
275 }
276 }
277 else { /* 14|23 */
278 IBB[7] = IBox[8];
279 IBB[8] = IBox[7];
280 if( IBox[11] >= IBox[12]) { /* 1>2>4>3 */
281 IBB[10] = IBox[11];
282 IBB[13] = IBox[12];
283 IBB[11] = IBox[10];
284 IBB[12] = IBox[13];
285 s[0]=0; s[1]=1; s[2]=3; s[3]=2; /* 1 2 4 3 */
286 }
287 else { /* 2>1>3>4 */
288 IBB[10] = IBox[12];
289 IBB[13] = IBox[11];
290 IBB[11] = IBox[13];
291 IBB[12] = IBox[10];
292 s[0]=1; s[1]=0; s[2]=2; s[3]=3; /* 2 1 3 4 */
293 }
294 }
295 }
296 else { /* 3,4 > 1,2 */
297 IBB[9] = IBox[14];
298 IBB[14]= IBox[9];
299 if( IBox[7] >= IBox[8] ) { /* 31|42 */
300 IBB[7] = IBox[7];
301 IBB[8] = IBox[8];
302 if(IBox[10] >= IBox[13]) { /* 3>4>1>2 */
303 IBB[10] = IBox[10];
304 IBB[13] = IBox[13];
305 IBB[11] = IBox[12];
306 IBB[12] = IBox[11];
307 s[0]=2; s[1]=3; s[2]=0; s[3]=1; /* 3 4 1 2 */
308 }
309 else { /* */
310 IBB[10] = IBox[13];
311 IBB[13] = IBox[10];
312 IBB[11] = IBox[11];
313 IBB[12] = IBox[12];
314 s[0]=3; s[1]=2; s[2]=1; s[3]=0; /* 4 3 2 1 */
315 }
316 }
317 else { /* 32|14 */
318 IBB[7] = IBox[8];
319 IBB[8] = IBox[7];
320 if( IBox[11] >= IBox[12]) { /* 3>4>2>1 */
321 IBB[10] = IBox[12];
322 IBB[13] = IBox[11];
323 IBB[11] = IBox[10];
324 IBB[12] = IBox[13];
325 s[0]=2; s[1]=3; s[2]=1; s[3]=0; /* 3 4 2 1 */
326 }
327 else { /* 2>1>3>4 */
328 IBB[10] = IBox[10];
329 IBB[13] = IBox[13];
330 IBB[11] = IBox[12];
331 IBB[12] = IBox[11];
332 s[0]=2; s[1]=3; s[2]=0; s[3]=1; /* 3 4 1 2 */
333 }
334 }
335 }
336 }
337 else if (M == IBox[7] ) { /* 13|24 */
338 IBB[6] = IBox[7];
339 if( IBox[10] >= IBox[13] ) { /* 1,3 > 2,4 */
340 IBB[9] = IBox[10];
341 IBB[14] = IBox[13];
342 if( IBox[6] >= IBox[8]) { /* 12|34 */
343 IBB[7] = IBox[6];
344 IBB[8] = IBox[8];
345 if( IBox[9] >= IBox[14] ) { /* 1,2>3,4 */
346 IBB[10] = IBox[9];
347 IBB[13] = IBox[14];
348 IBB[11] = IBox[11];
349 IBB[12] = IBox[12];
350 s[0]=0; s[1]=2; s[2]=1; s[3]=3; /* 1 3 2 4 */
351 }
352 else {
353 IBB[10] = IBox[14];
354 IBB[13] = IBox[9];
355 IBB[11] = IBox[12];
356 IBB[12] = IBox[11];
357 s[0]=2; s[1]=0; s[2]=3; s[3]=1; /* 3 1 4 2 */
358 }
359 }
360 else { /* 14|23 */
361 IBB[7] = IBox[8];
362 IBB[8] = IBox[6];
363 if( IBox[11] >= IBox[12] ){ /* 1,4 > 2,3 */
364 IBB[10] = IBox[11];
365 IBB[13] = IBox[12];
366 IBB[11] = IBox[9];
367 IBB[12] = IBox[14];
368 s[0]=0; s[1]=2; s[2]=3; s[3]=1; /* 1 3 4 2 */
369 }
370 else {
371 IBB[10] = IBox[12];
372 IBB[13] = IBox[11];
373 IBB[11] = IBox[14];
374 IBB[12] = IBox[9];
375 s[0]=2; s[1]=0; s[2]=1; s[3]=3; /* 3 1 2 4 */
376 }
377 }
378 }
379 else { /* 2,4 > 1,3 */
380 IBB[9] = IBox[13];
381 IBB[14] = IBox[10];
382 if( IBox[6] >= IBox[8]) { /* 21|43 */
383 IBB[7] = IBox[6];
384 IBB[8] = IBox[8];
385 if( IBox[9] >= IBox[14] ) { /* 2,1>4,3 */
386 IBB[10] = IBox[9];
387 IBB[13] = IBox[14];
388 IBB[11] = IBox[12];
389 IBB[12] = IBox[11];
390 s[0]=1; s[1]=3; s[2]=0; s[3]=2; /* 2 4 1 3 */
391 }
392 else {
393 IBB[10] = IBox[14];
394 IBB[13] = IBox[9];
395 IBB[11] = IBox[11];
396 IBB[12] = IBox[12];
397 s[0]=3; s[1]=1; s[2]=2; s[3]=0; /* 4 2 3 1 */
398 }
399 }
400 else { /* 14|23 */
401 IBB[7] = IBox[8];
402 IBB[8] = IBox[6];
403 if( IBox[11] >= IBox[12] ){ /* 1,4 > 2,3 */
404 IBB[10] = IBox[11];
405 IBB[13] = IBox[12];
406 IBB[11] = IBox[14];
407 IBB[12] = IBox[9];
408 s[0]=3; s[1]=1; s[2]=0; s[3]=2; /* 4 2 1 3 */
409 }
410 else {
411 IBB[10] = IBox[12];
412 IBB[13] = IBox[11];
413 IBB[11] = IBox[9];
414 IBB[12] = IBox[14];
415 s[0]=1; s[1]=3; s[2]=2; s[3]=0; /* 2 4 3 1 */
416 }
417 }
418 }
419 }
420 else { /* 14 | 23 */
421 IBB[6] = IBox[8];
422 if( IBox[11] >= IBox[12] ) { /* 1,4 > 2,3 */
423 IBB[9] = IBox[11];
424 IBB[14] = IBox[12];
425 if(IBox[6] >= IBox[7]) { /* 12|34 */
426 IBB[7] = IBox[6];
427 IBB[8] = IBox[7];
428 if(IBox[9] >= IBox[14]) { /* 1,2>3,4 */
429 IBB[10] = IBox[9];
430 IBB[13] = IBox[14];
431 IBB[11] = IBox[10];
432 IBB[12] = IBox[13];
433 s[0]=0; s[1]=3; s[2]=1; s[3]=2; /* 1 4 2 3 */
434 }
435 else { /* 4,3>2,1 */
436 IBB[10] = IBox[14];
437 IBB[13] = IBox[9];
438 IBB[11] = IBox[13];
439 IBB[12] = IBox[10];
440 s[0]=3; s[1]=0; s[2]=2; s[3]=1; /* 4 1 3 2 */
441 }
442 }
443 else { /* 13|24 */
444 IBB[7] = IBox[7];
445 IBB[8] = IBox[6];
446 if( IBox[10] >= IBox[13]) { /* 1,3 > 2,4 */
447 IBB[10] = IBox[10];
448 IBB[13] = IBox[13];
449 IBB[11] = IBox[9];
450 IBB[12] = IBox[14];
451 s[0]=0; s[1]=3; s[2]=2; s[3]=1; /* 1 4 3 2 */
452 }
453 else {
454 IBB[10] = IBox[13];
455 IBB[13] = IBox[10];
456 IBB[11] = IBox[14];
457 IBB[12] = IBox[9];
458 s[0]=3; s[1]=0; s[2]=1; s[3]=2; /* 4 1 2 3 */
459 }
460 }
461 }
462 else { /* 2,3 > 1,4 */
463 IBB[9] = IBox[12];
464 IBB[14] = IBox[11];
465 if(IBox[6] >= IBox[7]) { /* 34|12 */
466 IBB[7] = IBox[6];
467 IBB[8] = IBox[7];
468 if(IBox[9] >= IBox[14]) { /* 2,1>4,3 */
469 IBB[10] = IBox[9];
470 IBB[13] = IBox[14];
471 IBB[11] = IBox[13];
472 IBB[12] = IBox[10];
473 s[0]=1; s[1]=2; s[2]=0; s[3]=3; /* 2 3 1 4 */
474 }
475 else { /* 4,3>2,1 */
476 IBB[10] = IBox[14];
477 IBB[13] = IBox[9];
478 IBB[11] = IBox[10];
479 IBB[12] = IBox[13];
480 s[0]=2; s[1]=1; s[2]=3; s[3]=0; /* 3 2 4 1 */
481 }
482 }
483 else { /* 31|42 */
484 IBB[7] = IBox[7];
485 IBB[8] = IBox[6];
486 if( IBox[10] >= IBox[13]) { /* 1,3 > 2,4 */
487 IBB[10] = IBox[10];
488 IBB[13] = IBox[13];
489 IBB[11] = IBox[14];
490 IBB[12] = IBox[9];
491 s[0]=2; s[1]=1; s[2]=0; s[3]=4; /* 3 2 1 4 */
492 }
493 else {
494 IBB[10] = IBox[13];
495 IBB[13] = IBox[10];
496 IBB[11] = IBox[9];
497 IBB[12] = IBox[14];
498 s[0]=1; s[1]=2; s[2]=3; s[3]=0; /* 2 3 4 1 */
499 }
500 }
501 }
502 }
503 /* HEUREKA */
504 IBB[0] = IBox[0];
505 IBB[1] = IBox[1];
506 IBB[2] = IBox[2+s[0]];
507 IBB[3] = IBox[2+s[1]];
508 IBB[4] = IBox[2+s[2]];
509 IBB[5] = IBox[2+s[3]];
510 /*
511 IBB[6...14] see above :-)
512 */
513 IBB[15] = IBox[15];
514
515 for(i=0;i<=15;i++) IBox[i] = IBB[i];
516 }
517
518 /* ----------------------------------------------------------------------- */
519
520