1 #include <objres.h>
2 #include <objseq.h>
3 
4 #include <picture.h>
5 #include <viewer.h>
6 
7 #include <seqport.h>
8 #include <seqgraph.h>
9 #include <seqmtrx.h>
10 
11 /* defines */
12 #define FILTS  32
13 #define SQRDBF 8192
14 
15 /* functions */
16 
ComMatNew(ComMatPtr curcmtp)17 static ComMatPtr ComMatNew (ComMatPtr curcmtp)
18 {
19   ComMatPtr   cmtp;
20   Int4        i;
21 
22   cmtp = (ComMatPtr) MemNew (sizeof (ComMat));
23   cmtp->next = NULL;
24   cmtp->min = 0;
25   cmtp->max = 0;
26   for (i=0; i<FILTS; i++)
27     cmtp->res[i] = 0;
28 
29   if (curcmtp != NULL)
30   {
31     while (curcmtp->next != NULL)
32       curcmtp = curcmtp->next;
33     curcmtp->next = cmtp;
34   }
35   return cmtp;
36 }
37 
ComMatFree(ComMatPtr headcmtp)38 extern ComMatPtr ComMatFree (ComMatPtr headcmtp)
39 {
40   ComMatPtr   cmtp;
41 
42   while (headcmtp != NULL)
43   {
44     cmtp = headcmtp->next;
45     headcmtp->next = NULL;
46     MemFree (headcmtp);
47     headcmtp = cmtp;
48   }
49   return headcmtp;
50 }
51 
CompileMatrix(FloatHi scr[FILTS][FILTS],Int4 len,Int4 maxval)52 extern ComMatPtr CompileMatrix (FloatHi scr[FILTS][FILTS], Int4 len,
53                                 Int4 maxval)
54 {
55   Int4      i, j, tmp;
56   Char      k[FILTS] = {'A', 'B', 'C', 'D', 'G', 'H', 'K', 'M',
57                         'N', 'R', 'S', 'T', 'V', 'W', 'Y', 'N',
58                         'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
59                         'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};
60 
61   ComMatPtr cmtphead = NULL, cmtp = NULL;
62 
63   for (i = 0; i < len; i++)
64   {
65     cmtp = ComMatNew (cmtp);
66     cmtp->max = maxval;
67     if (cmtphead == NULL)
68       cmtphead = cmtp;
69     for (j = 0; j < FILTS; j++)
70       cmtp->res[(Uint1) k[j] - (Uint1) 'A'] = (Int4) scr[j][i];
71     tmp = 0;
72     for (j = 0; j < FILTS; j++)
73     {
74       if (cmtp->res[j] > tmp)
75         tmp = cmtp->res[j];
76       cmtp->min = tmp;
77     }
78   }
79   return cmtphead;
80 }
81 
InvertMatrix(ComMatPtr cmtp,Int4 window)82 extern void InvertMatrix (ComMatPtr cmtp, Int4 window)
83 {
84   Int4      i, j, k;
85   Int4      ftemp;
86   ComMatPtr cmtphead, cmtptail;
87 
88 /* complement */
89   cmtphead = cmtp;
90   while (cmtp != NULL)
91   {
92 /* A <-> T */
93     ftemp = cmtp->res[19];
94     cmtp->res[19] = cmtp->res[0];
95     cmtp->res[0] = ftemp;
96 /* B <-> V */
97     ftemp = cmtp->res[21];
98     cmtp->res[21] = cmtp->res[1];
99     cmtp->res[1] = ftemp;
100 /* C <-> G */
101     ftemp = cmtp->res[6];
102     cmtp->res[6] = cmtp->res[2];
103     cmtp->res[2] = ftemp;
104 /* D <-> H */
105     ftemp = cmtp->res[7];
106     cmtp->res[7] = cmtp->res[3];
107     cmtp->res[3] = ftemp;
108 /* K <-> M */
109     ftemp = cmtp->res[12];
110     cmtp->res[12] = cmtp->res[10];
111     cmtp->res[10] = ftemp;
112 /* R <-> Y */
113     ftemp = cmtp->res[17];
114     cmtp->res[17] = cmtp->res[24];
115     cmtp->res[24] = ftemp;
116 /* N ; S ; W */
117     cmtp = cmtp->next;
118   }
119 
120 /* reverse */
121   cmtp = cmtphead;
122   j = window - 1;
123   for (i = 0; i < window/2; i++)
124   {
125     cmtptail = cmtphead;
126     for (k = 0; k < j; k++)
127       cmtptail = cmtptail->next;
128     j--;
129 
130     for (k = 0; k < FILTS; k++)
131     {
132       ftemp = cmtptail->res[k];
133       cmtptail->res[k] = cmtp->res[k];
134       cmtp->res[k] = ftemp;
135     }
136     k = cmtptail->min;
137     cmtptail->min = cmtp->min;
138     cmtp->min = k;
139     cmtp = cmtp->next;
140   }
141 
142   return;
143 }
144 
ReadMatrix(CharPtr res,FloatHi scr[FILTS][FILTS],CharPtr filename)145 extern Int4 ReadMatrix (CharPtr res, FloatHi scr[FILTS][FILTS],
146                         CharPtr filename)
147 {
148   FILE     *fin;
149   Int2     i, n, val;
150   Int4     type, numcol;
151   long int ltype, lscore, lnumcol;
152   Char     buf[PATH_MAX], buff[PATH_MAX];
153   CharPtr  bptr;
154 
155   static Char c[] =
156     {'A','B','C','D','G','H','K','M','N','R','S','T','V','W','Y'};
157 
158   for (i = 0; i < FILTS; i++)
159     res[i] = '\0';
160   for (i = 0; i < FILTS; i++)
161     for (n = 0; n < FILTS; n++)
162       scr[i][n] = 0.0;
163 
164   if (!FindPath ("ncbi", "ncbi", "data", buf, sizeof (buf)))
165     return 0;
166 
167   FileBuildPath (buf, NULL, filename);
168   if ((fin = FileOpen (buf, "r")) == NULL)
169   {
170     if ((fin = FileOpen (filename, "r")) == NULL)
171     {
172       return 0;
173     }
174   }
175 
176   val = 0;
177   type = -1;
178   numcol = 0;
179   while ((FileGets (buff, sizeof (buff), fin)) != NULL)
180   {
181     if (buff[0] == ';')
182       continue;
183     if (type == -1)
184     {
185       FileGets (buff, sizeof (buff), fin);
186       sscanf (buff, "%ld", &ltype);
187       type = (Int4) ltype;
188       if (type != 1 && type != 2)
189       {
190         FileClose (fin);
191         return 0;
192       }
193       FileGets (buff, sizeof (buff), fin);
194       sscanf (buff, "%ld", &lnumcol);
195       numcol = (Int4) lnumcol;
196       FileGets (buff, sizeof (buff), fin);
197       continue;
198     }
199     res[val] = c[val];
200     bptr = buff;
201     for (i = 0; i < numcol; i++)
202     {
203       sscanf (bptr, "%ld", &lscore);
204       scr[val][i] = (FloatHi) lscore;
205       bptr += 4;
206     }
207     val++;
208   }
209   FileClose (fin);
210   return numcol;
211 }
212 
GridMatch(CharPtr seqhead,Int4 cur,Int4 end,Int4 matlen,ComMatPtr cmtphead,Int4Ptr matval,Int4 maxval)213 extern Int4 GridMatch (CharPtr seqhead, Int4 cur, Int4 end, Int4 matlen,
214                        ComMatPtr cmtphead, Int4Ptr matval, Int4 maxval)
215 {
216   CharPtr   seq;
217   ComMatPtr cmtp;
218   Int4      cutval, newval;
219 
220   end = end - matlen + 1;
221   while (cur < end)
222   {
223     seq = seqhead;
224     cmtp = cmtphead;
225     *matval = 0;
226     cutval = maxval;
227     while (cmtp != NULL)
228     {
229       newval = cmtp->res[(((Uint1) *seq) - ((Int2) 'A'))];
230       cutval = cutval - cmtp->min + newval;
231       if (cutval < 0)
232         break;
233       *matval += newval;
234       cmtp = cmtp->next;
235       if (cmtp == NULL)
236         return cur;
237       seq++;
238     }
239     seqhead++;
240     cur++;
241   }
242   return cur;
243 }
244 
GridMatchSetUp(CharPtr seq)245 extern Int4 GridMatchSetUp (CharPtr seq)
246 {
247   Int4 i;
248 
249   i = 0;
250   while (*seq != '\0')
251   {
252     seq++;
253     i++;
254   }
255   return i;
256 }
257 
GetSeqChunk(SeqPortPtr spp,Int4 start,Int4 chunk,Int4 len)258 static CharPtr GetSeqChunk (SeqPortPtr spp, Int4 start, Int4 chunk, Int4 len)
259 {
260   CharPtr seqhead, sequence;
261   Int4    i, size;
262 
263   if ((start + chunk) > len)
264     size = len - start;
265   else
266     size = chunk;
267 
268   seqhead = sequence = MemNew (sizeof (Char) * (size_t) (size+1));
269   if (seqhead == NULL)
270     return seqhead;
271 
272   SeqPortSeek (spp, start, SEEK_SET);
273   for (i = 0; i < size; i++)
274     *sequence++ = SeqPortGetResidue (spp);
275   *sequence = '\0';
276 
277   return seqhead;
278 }
279 
MatrixMatch(SeqPortPtr spp,Int4 len,ComMatPtr cmtp,Int4 matlen,FloatHiPtr fptr)280 static void MatrixMatch (SeqPortPtr spp, Int4 len, ComMatPtr cmtp,
281                          Int4 matlen, FloatHiPtr fptr)
282 {
283   Int4       i, n, length, chunk = SQRDBF;
284   Int4       matval, cutoff = 0;
285   CharPtr    seqhead, seq;
286   FloatHiPtr fptrhead, fptrtemp;
287 
288   fptrhead = fptr;
289   for (i = 0; i < len; i+=(chunk-matlen))
290   {
291     fptr = fptrhead;
292     fptr += i;
293     fptrtemp = fptr;
294     seqhead = seq = GetSeqChunk (spp, i, chunk, len);
295 
296     n = 0;
297     length = GridMatchSetUp (seqhead);
298     while (n < length)
299     {
300       n = GridMatch (seq, n, length, matlen, cmtp, &matval, cmtp->max-cutoff);
301       if (n < length)
302       {
303         fptr = fptrtemp + n;
304         *fptr = (FloatHi) matval;
305         *fptr = *fptr / cmtp->max * 100;
306         n++;
307         seq = seqhead + n;
308       }
309     }
310     MemFree (seqhead);
311   }
312   return;
313 }
314 
315 /* functions - external */
316 
MatrixSeq(BioseqPtr bsp,SeqGraphPtr sgptr,ComMatPtr cmtp,Int4 window)317 extern SeqGraphPtr MatrixSeq (BioseqPtr bsp, SeqGraphPtr sgptr,
318                               ComMatPtr cmtp, Int4 window)
319 {
320   FloatHiPtr    fptr;
321   Int4          i, start, end;
322   Int4          gwidth = 500;
323   SeqPortPtr    spp;
324   SeqGraphPtr   sgp;
325 
326   if (bsp != NULL)
327   {
328     if (sgptr != NULL)       /* should only be called once for aa's */
329     {
330       InvertMatrix (cmtp, window);
331     }
332 
333     if ((sgp = SeqGraphNew ()) != NULL)
334     {
335 /* type and number of values and compression */
336       sgp->numval = bsp->length;
337       sgp->compr = (Int4) (bsp->length / gwidth);
338       if ((bsp->length%gwidth) != 0)
339         sgp->compr += 1;
340 /* graph type */
341       sgp->flags[2] = 1;
342       sgp->values = (Pointer) MemNew ((sizeof (FloatHi)) * sgp->numval);
343 /* min/max */
344       sgp->max.realvalue = 100.0;
345       sgp->min.realvalue = 0.0;
346       sgp->axis.realvalue = 0.0;
347 /* scaling */
348       sgp->flags[1] = 1;
349       sgp->a = 2;
350       sgp->b = 0;
351     }
352     else
353     {
354       return sgptr;
355     }
356 /* do it */
357     fptr = (FloatHiPtr) sgp->values;
358     for (i = 0; i < sgp->numval; i++)
359     {
360       *fptr = 0.0;
361       fptr++;
362     }
363     fptr = (FloatHiPtr) sgp->values;
364     start = 0;
365     end = bsp->length - 1;
366 
367     if (ISA_na (bsp->mol))
368     {
369       spp = SeqPortNew (bsp, start, end, 0, Seq_code_iupacna);
370     }
371     else
372     {
373       spp = SeqPortNew (bsp, start, end, 0, Seq_code_iupacaa);
374     }
375 
376     MatrixMatch (spp, sgp->numval, cmtp, window, fptr);
377 
378     SeqPortFree (spp);
379     if (sgptr != NULL)
380       sgptr->next = sgp;
381     else
382       sgptr = sgp;
383   }
384   return sgptr;
385 }
386 
MatrixSearch(BioseqPtr bsp,Int2 matrixtype)387 static SeqGraphPtr MatrixSearch (BioseqPtr bsp, Int2 matrixtype)
388 {
389   Int4         i, n;
390   Int4         window;
391   Int2         moltype;
392   Char         res[FILTS];
393   FloatHi      scr[FILTS][FILTS];
394   FloatHi      maxvalue, maxtemp;
395   SeqGraphPtr  sgp;
396   ComMatPtr    cmtp;
397 
398   switch (matrixtype)
399   {
400    case NA_MATRIX_DONOR1:
401     window = ReadMatrix (res, scr, "KSdonor1.mat");
402     break;
403    case NA_MATRIX_DONOR2:
404     window = ReadMatrix (res, scr, "KSdonor2.mat");
405     break;
406    case NA_MATRIX_ACCEPT:
407     window = ReadMatrix (res, scr, "KSaccept.mat");
408     break;
409    case NA_MATRIX_BRANCH:
410     window = ReadMatrix (res, scr, "KSbranch.mat");
411     break;
412    case NA_MATRIX_HR:
413     window = ReadMatrix (res, scr, "KShr30bi.mat");
414     break;
415    default:
416     return NULL;
417   }
418   if (window == 0)
419     return NULL;
420 
421   maxvalue = 0.0;
422   for (i = 0; i < window; i++)
423   {
424     maxtemp = 0.0;
425     for (n = 0; n < FILTS; n++)
426     {
427       if (maxtemp < scr[n][i])
428         maxtemp = scr[n][i];
429     }
430     maxvalue += maxtemp;
431   }
432 
433   if (ISA_na (bsp->mol))
434     moltype = 0;
435   else
436     moltype = 1;
437 
438   cmtp = CompileMatrix (scr, window, (Int4) maxvalue);
439 
440   sgp = NULL;
441   sgp = MatrixSeq (bsp, sgp, cmtp, window);
442   if (sgp == NULL)
443   {
444     ComMatFree (cmtp);
445     return sgp;
446   }
447   if (moltype == 0)
448   {
449     sgp = MatrixSeq (bsp, sgp, cmtp, window);
450     if (sgp->next == NULL)
451       sgp = SeqGraphFree (sgp);
452   }
453   ComMatFree (cmtp);
454   return sgp;
455 }
456