1 #include <objres.h>
2 #include <objseq.h>
3
4 #include <picture.h>
5 #include <viewer.h>
6
7 #include <seqport.h>
8 #include <seqgraph.h>
9 #include <seqmtrx.h>
10
11 /* defines */
12 #define FILTS 32
13 #define SQRDBF 8192
14
15 /* functions */
16
ComMatNew(ComMatPtr curcmtp)17 static ComMatPtr ComMatNew (ComMatPtr curcmtp)
18 {
19 ComMatPtr cmtp;
20 Int4 i;
21
22 cmtp = (ComMatPtr) MemNew (sizeof (ComMat));
23 cmtp->next = NULL;
24 cmtp->min = 0;
25 cmtp->max = 0;
26 for (i=0; i<FILTS; i++)
27 cmtp->res[i] = 0;
28
29 if (curcmtp != NULL)
30 {
31 while (curcmtp->next != NULL)
32 curcmtp = curcmtp->next;
33 curcmtp->next = cmtp;
34 }
35 return cmtp;
36 }
37
ComMatFree(ComMatPtr headcmtp)38 extern ComMatPtr ComMatFree (ComMatPtr headcmtp)
39 {
40 ComMatPtr cmtp;
41
42 while (headcmtp != NULL)
43 {
44 cmtp = headcmtp->next;
45 headcmtp->next = NULL;
46 MemFree (headcmtp);
47 headcmtp = cmtp;
48 }
49 return headcmtp;
50 }
51
CompileMatrix(FloatHi scr[FILTS][FILTS],Int4 len,Int4 maxval)52 extern ComMatPtr CompileMatrix (FloatHi scr[FILTS][FILTS], Int4 len,
53 Int4 maxval)
54 {
55 Int4 i, j, tmp;
56 Char k[FILTS] = {'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M',
57 'N', 'R', 'S', 'T', 'V', 'W', 'Y', 'N',
58 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
59 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};
60
61 ComMatPtr cmtphead = NULL, cmtp = NULL;
62
63 for (i = 0; i < len; i++)
64 {
65 cmtp = ComMatNew (cmtp);
66 cmtp->max = maxval;
67 if (cmtphead == NULL)
68 cmtphead = cmtp;
69 for (j = 0; j < FILTS; j++)
70 cmtp->res[(Uint1) k[j] - (Uint1) 'A'] = (Int4) scr[j][i];
71 tmp = 0;
72 for (j = 0; j < FILTS; j++)
73 {
74 if (cmtp->res[j] > tmp)
75 tmp = cmtp->res[j];
76 cmtp->min = tmp;
77 }
78 }
79 return cmtphead;
80 }
81
InvertMatrix(ComMatPtr cmtp,Int4 window)82 extern void InvertMatrix (ComMatPtr cmtp, Int4 window)
83 {
84 Int4 i, j, k;
85 Int4 ftemp;
86 ComMatPtr cmtphead, cmtptail;
87
88 /* complement */
89 cmtphead = cmtp;
90 while (cmtp != NULL)
91 {
92 /* A <-> T */
93 ftemp = cmtp->res[19];
94 cmtp->res[19] = cmtp->res[0];
95 cmtp->res[0] = ftemp;
96 /* B <-> V */
97 ftemp = cmtp->res[21];
98 cmtp->res[21] = cmtp->res[1];
99 cmtp->res[1] = ftemp;
100 /* C <-> G */
101 ftemp = cmtp->res[6];
102 cmtp->res[6] = cmtp->res[2];
103 cmtp->res[2] = ftemp;
104 /* D <-> H */
105 ftemp = cmtp->res[7];
106 cmtp->res[7] = cmtp->res[3];
107 cmtp->res[3] = ftemp;
108 /* K <-> M */
109 ftemp = cmtp->res[12];
110 cmtp->res[12] = cmtp->res[10];
111 cmtp->res[10] = ftemp;
112 /* R <-> Y */
113 ftemp = cmtp->res[17];
114 cmtp->res[17] = cmtp->res[24];
115 cmtp->res[24] = ftemp;
116 /* N ; S ; W */
117 cmtp = cmtp->next;
118 }
119
120 /* reverse */
121 cmtp = cmtphead;
122 j = window - 1;
123 for (i = 0; i < window/2; i++)
124 {
125 cmtptail = cmtphead;
126 for (k = 0; k < j; k++)
127 cmtptail = cmtptail->next;
128 j--;
129
130 for (k = 0; k < FILTS; k++)
131 {
132 ftemp = cmtptail->res[k];
133 cmtptail->res[k] = cmtp->res[k];
134 cmtp->res[k] = ftemp;
135 }
136 k = cmtptail->min;
137 cmtptail->min = cmtp->min;
138 cmtp->min = k;
139 cmtp = cmtp->next;
140 }
141
142 return;
143 }
144
ReadMatrix(CharPtr res,FloatHi scr[FILTS][FILTS],CharPtr filename)145 extern Int4 ReadMatrix (CharPtr res, FloatHi scr[FILTS][FILTS],
146 CharPtr filename)
147 {
148 FILE *fin;
149 Int2 i, n, val;
150 Int4 type, numcol;
151 long int ltype, lscore, lnumcol;
152 Char buf[PATH_MAX], buff[PATH_MAX];
153 CharPtr bptr;
154
155 static Char c[] =
156 {'A','B','C','D','G','H','K','M','N','R','S','T','V','W','Y'};
157
158 for (i = 0; i < FILTS; i++)
159 res[i] = '\0';
160 for (i = 0; i < FILTS; i++)
161 for (n = 0; n < FILTS; n++)
162 scr[i][n] = 0.0;
163
164 if (!FindPath ("ncbi", "ncbi", "data", buf, sizeof (buf)))
165 return 0;
166
167 FileBuildPath (buf, NULL, filename);
168 if ((fin = FileOpen (buf, "r")) == NULL)
169 {
170 if ((fin = FileOpen (filename, "r")) == NULL)
171 {
172 return 0;
173 }
174 }
175
176 val = 0;
177 type = -1;
178 numcol = 0;
179 while ((FileGets (buff, sizeof (buff), fin)) != NULL)
180 {
181 if (buff[0] == ';')
182 continue;
183 if (type == -1)
184 {
185 FileGets (buff, sizeof (buff), fin);
186 sscanf (buff, "%ld", <ype);
187 type = (Int4) ltype;
188 if (type != 1 && type != 2)
189 {
190 FileClose (fin);
191 return 0;
192 }
193 FileGets (buff, sizeof (buff), fin);
194 sscanf (buff, "%ld", &lnumcol);
195 numcol = (Int4) lnumcol;
196 FileGets (buff, sizeof (buff), fin);
197 continue;
198 }
199 res[val] = c[val];
200 bptr = buff;
201 for (i = 0; i < numcol; i++)
202 {
203 sscanf (bptr, "%ld", &lscore);
204 scr[val][i] = (FloatHi) lscore;
205 bptr += 4;
206 }
207 val++;
208 }
209 FileClose (fin);
210 return numcol;
211 }
212
GridMatch(CharPtr seqhead,Int4 cur,Int4 end,Int4 matlen,ComMatPtr cmtphead,Int4Ptr matval,Int4 maxval)213 extern Int4 GridMatch (CharPtr seqhead, Int4 cur, Int4 end, Int4 matlen,
214 ComMatPtr cmtphead, Int4Ptr matval, Int4 maxval)
215 {
216 CharPtr seq;
217 ComMatPtr cmtp;
218 Int4 cutval, newval;
219
220 end = end - matlen + 1;
221 while (cur < end)
222 {
223 seq = seqhead;
224 cmtp = cmtphead;
225 *matval = 0;
226 cutval = maxval;
227 while (cmtp != NULL)
228 {
229 newval = cmtp->res[(((Uint1) *seq) - ((Int2) 'A'))];
230 cutval = cutval - cmtp->min + newval;
231 if (cutval < 0)
232 break;
233 *matval += newval;
234 cmtp = cmtp->next;
235 if (cmtp == NULL)
236 return cur;
237 seq++;
238 }
239 seqhead++;
240 cur++;
241 }
242 return cur;
243 }
244
GridMatchSetUp(CharPtr seq)245 extern Int4 GridMatchSetUp (CharPtr seq)
246 {
247 Int4 i;
248
249 i = 0;
250 while (*seq != '\0')
251 {
252 seq++;
253 i++;
254 }
255 return i;
256 }
257
GetSeqChunk(SeqPortPtr spp,Int4 start,Int4 chunk,Int4 len)258 static CharPtr GetSeqChunk (SeqPortPtr spp, Int4 start, Int4 chunk, Int4 len)
259 {
260 CharPtr seqhead, sequence;
261 Int4 i, size;
262
263 if ((start + chunk) > len)
264 size = len - start;
265 else
266 size = chunk;
267
268 seqhead = sequence = MemNew (sizeof (Char) * (size_t) (size+1));
269 if (seqhead == NULL)
270 return seqhead;
271
272 SeqPortSeek (spp, start, SEEK_SET);
273 for (i = 0; i < size; i++)
274 *sequence++ = SeqPortGetResidue (spp);
275 *sequence = '\0';
276
277 return seqhead;
278 }
279
MatrixMatch(SeqPortPtr spp,Int4 len,ComMatPtr cmtp,Int4 matlen,FloatHiPtr fptr)280 static void MatrixMatch (SeqPortPtr spp, Int4 len, ComMatPtr cmtp,
281 Int4 matlen, FloatHiPtr fptr)
282 {
283 Int4 i, n, length, chunk = SQRDBF;
284 Int4 matval, cutoff = 0;
285 CharPtr seqhead, seq;
286 FloatHiPtr fptrhead, fptrtemp;
287
288 fptrhead = fptr;
289 for (i = 0; i < len; i+=(chunk-matlen))
290 {
291 fptr = fptrhead;
292 fptr += i;
293 fptrtemp = fptr;
294 seqhead = seq = GetSeqChunk (spp, i, chunk, len);
295
296 n = 0;
297 length = GridMatchSetUp (seqhead);
298 while (n < length)
299 {
300 n = GridMatch (seq, n, length, matlen, cmtp, &matval, cmtp->max-cutoff);
301 if (n < length)
302 {
303 fptr = fptrtemp + n;
304 *fptr = (FloatHi) matval;
305 *fptr = *fptr / cmtp->max * 100;
306 n++;
307 seq = seqhead + n;
308 }
309 }
310 MemFree (seqhead);
311 }
312 return;
313 }
314
315 /* functions - external */
316
MatrixSeq(BioseqPtr bsp,SeqGraphPtr sgptr,ComMatPtr cmtp,Int4 window)317 extern SeqGraphPtr MatrixSeq (BioseqPtr bsp, SeqGraphPtr sgptr,
318 ComMatPtr cmtp, Int4 window)
319 {
320 FloatHiPtr fptr;
321 Int4 i, start, end;
322 Int4 gwidth = 500;
323 SeqPortPtr spp;
324 SeqGraphPtr sgp;
325
326 if (bsp != NULL)
327 {
328 if (sgptr != NULL) /* should only be called once for aa's */
329 {
330 InvertMatrix (cmtp, window);
331 }
332
333 if ((sgp = SeqGraphNew ()) != NULL)
334 {
335 /* type and number of values and compression */
336 sgp->numval = bsp->length;
337 sgp->compr = (Int4) (bsp->length / gwidth);
338 if ((bsp->length%gwidth) != 0)
339 sgp->compr += 1;
340 /* graph type */
341 sgp->flags[2] = 1;
342 sgp->values = (Pointer) MemNew ((sizeof (FloatHi)) * sgp->numval);
343 /* min/max */
344 sgp->max.realvalue = 100.0;
345 sgp->min.realvalue = 0.0;
346 sgp->axis.realvalue = 0.0;
347 /* scaling */
348 sgp->flags[1] = 1;
349 sgp->a = 2;
350 sgp->b = 0;
351 }
352 else
353 {
354 return sgptr;
355 }
356 /* do it */
357 fptr = (FloatHiPtr) sgp->values;
358 for (i = 0; i < sgp->numval; i++)
359 {
360 *fptr = 0.0;
361 fptr++;
362 }
363 fptr = (FloatHiPtr) sgp->values;
364 start = 0;
365 end = bsp->length - 1;
366
367 if (ISA_na (bsp->mol))
368 {
369 spp = SeqPortNew (bsp, start, end, 0, Seq_code_iupacna);
370 }
371 else
372 {
373 spp = SeqPortNew (bsp, start, end, 0, Seq_code_iupacaa);
374 }
375
376 MatrixMatch (spp, sgp->numval, cmtp, window, fptr);
377
378 SeqPortFree (spp);
379 if (sgptr != NULL)
380 sgptr->next = sgp;
381 else
382 sgptr = sgp;
383 }
384 return sgptr;
385 }
386
MatrixSearch(BioseqPtr bsp,Int2 matrixtype)387 static SeqGraphPtr MatrixSearch (BioseqPtr bsp, Int2 matrixtype)
388 {
389 Int4 i, n;
390 Int4 window;
391 Int2 moltype;
392 Char res[FILTS];
393 FloatHi scr[FILTS][FILTS];
394 FloatHi maxvalue, maxtemp;
395 SeqGraphPtr sgp;
396 ComMatPtr cmtp;
397
398 switch (matrixtype)
399 {
400 case NA_MATRIX_DONOR1:
401 window = ReadMatrix (res, scr, "KSdonor1.mat");
402 break;
403 case NA_MATRIX_DONOR2:
404 window = ReadMatrix (res, scr, "KSdonor2.mat");
405 break;
406 case NA_MATRIX_ACCEPT:
407 window = ReadMatrix (res, scr, "KSaccept.mat");
408 break;
409 case NA_MATRIX_BRANCH:
410 window = ReadMatrix (res, scr, "KSbranch.mat");
411 break;
412 case NA_MATRIX_HR:
413 window = ReadMatrix (res, scr, "KShr30bi.mat");
414 break;
415 default:
416 return NULL;
417 }
418 if (window == 0)
419 return NULL;
420
421 maxvalue = 0.0;
422 for (i = 0; i < window; i++)
423 {
424 maxtemp = 0.0;
425 for (n = 0; n < FILTS; n++)
426 {
427 if (maxtemp < scr[n][i])
428 maxtemp = scr[n][i];
429 }
430 maxvalue += maxtemp;
431 }
432
433 if (ISA_na (bsp->mol))
434 moltype = 0;
435 else
436 moltype = 1;
437
438 cmtp = CompileMatrix (scr, window, (Int4) maxvalue);
439
440 sgp = NULL;
441 sgp = MatrixSeq (bsp, sgp, cmtp, window);
442 if (sgp == NULL)
443 {
444 ComMatFree (cmtp);
445 return sgp;
446 }
447 if (moltype == 0)
448 {
449 sgp = MatrixSeq (bsp, sgp, cmtp, window);
450 if (sgp->next == NULL)
451 sgp = SeqGraphFree (sgp);
452 }
453 ComMatFree (cmtp);
454 return sgp;
455 }
456