1 /* $Id: thrdcpll.c 345908 2011-12-01 14:12:00Z thiessen $
2 *===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  thrdcpll.c
27 *
28 * Author:  Stephen Bryant
29 *
30 * Initial Version Creation Date: 08/16/2000
31 *
32 * $Revision: 345908 $
33 *
34 * File Description: threader
35 */
36 
37 
38 
39 #include <algo/structure/threader/thrdatd.h>
40 #include <algo/structure/threader/thrddecl.h>
41 
42 /* Given the current alignment, construct by-segment contact lists for */
43 /* location sampling.  These lists contain the same information as the */
44 /* largest-extent or reference contact lists, but some contacts may be */
45 /* omitted, if loop length limits or sequence length constrain the possible */
46 /* n- or c-terminal offsets of a segment.  They contain pointers to the */
47 /* energies of each contact, as well as distance intervals.  */
48 
cpll(Cor_Def * cdf,Rcx_Ptl * pmf,Qry_Seq * qsq,Cxl_Los ** cpr,Cur_Aln * sai,Cxl_Los ** cpl)49 /*int*/ void cpll(Cor_Def* cdf, Rcx_Ptl* pmf, Qry_Seq* qsq, Cxl_Los** cpr,
50          Cur_Aln* sai, Cxl_Los** cpl) {
51 /*-----------------------------------------------------*/
52 /* cdf:  Core segment locations and loop length limits */
53 /* pmf:  Potential of mean force as a 3-d lookup table */
54 /* qsq:  Sequence to thread with alignment contraints  */
55 /* cpr:  Contacts by segment, largest possible set     */
56 /* sai:  Current alignment of query sequence with core */
57 /* cpl:  Contacts by segment, given current alignment  */
58 /*-----------------------------------------------------*/
59 
60 int	nmt;		/* Number of motif residue positions */
61 int	nsc;		/* Number of threaded core segments */
62 int	ppi; 		/* Index of peptide group in potential */
63 /*int	nrt;*/		/* Number of residue types */
64 int	nqi;		/* Number of residues in query sequence */
65 int	i,j,k;		/* Counters */
66 int	t1,t2;		/* Motif residue types */
67 int	r1,r2=0;	/* Motif residue positions */
68 int	s1,s2;		/* Core segment indices */
69 int	d;		/* Distance inteva */
70 Cxl_Los *cr;	/* Pointer to segment reference contact lists */
71 Cxl_Los *cl;	/* Pointer to segment location sampling contact lists */
72 int	*cf;		/* Flags residues possibly within the core. */
73 int	le;		/* Explicit limit on segment extent, query index */
74 int	la;		/* Alignment-derived limit on segment extent */
75 int	mn,mx;		/* Range */
76 
77 /* Parameters */
78 
79 nsc=sai->nsc;
80 nmt=sai->nmt;
81 ppi=pmf->ppi;
82 /*nrt=pmf->nrt;*/
83 nqi=qsq->n;
84 
85 
86 /* printf("nsc %d\n",nsc);
87 printf("nmt %d\n",nmt);
88 printf("ppi %d\n",ppi);
89 printf("nrt %d\n",nrt);
90 printf("nqi %d\n",nqi); */
91 
92 
93 
94 /* Flag residues which may fall in the core, given the current alignment */
95 
96 cf=sai->cf;
97 for(i=0;i<nmt;i++) cf[i]=(-1);
98 /* for(i=0; i<nmt; i++) printf("%d ",cf[i]); printf("cf\n"); */
99 
100 for(i=0; i<nsc; i++) {
101 
102 	/* Identify maximum n-terminal extent of this segment */
103 	le=sai->al[i]-cdf->sll.nomx[i];
104 	la=(i==0) ? cdf->lll.llmn[0]:
105 		sai->al[i-1]+cdf->sll.comn[i-1]+cdf->lll.llmn[i]+1;
106 	/* printf("nt-le:%d nt-la:%d\n",le,la); */
107 	le=(la>le) ? la : le;
108 	mn=cdf->sll.rfpt[i]-(sai->al[i]-le);
109 	/* printf("nt-le:%d nt-mn:%d\n",le,mn); */
110 
111 	/* Identify maximum c-terminal extent of this segment */
112 	le=sai->al[i]+cdf->sll.comx[i];
113 	la=(i==(nsc-1)) ? nqi-1-cdf->lll.llmn[nsc]:
114 		sai->al[i+1]-cdf->sll.nomn[i+1]-cdf->lll.llmn[i+1]-1;
115 	/* printf("ct-le:%d ct-la:%d\n",le,la); */
116 	le=(la<le) ? la : le;
117 	mx=cdf->sll.rfpt[i]+(le-sai->al[i]);
118 	/* printf("ct-le:%d ct-mx:%d\n",le,mx); */
119 
120 	/* Flag possible core residues */
121 	/* printf("mn:%d mx:%d\n",mn,mx); */
122 	for(j=mn; j<=mx; j++) cf[j]=i; }
123 
124 /* for(i=0; i<nmt; i++) printf("%d ",cf[i]); printf("cf\n"); */
125 
126 
127 /* Zero pair counts */
128 
129 for(i=0; i<nsc; i++) { cl=cpl[i]; cl->rr.n=0; cl->rp.n=0; cl->rf.n=0;}
130 
131 
132 /* Loop over core segments */
133 
134 for(i=0; i<nsc; i++ ) {
135 	cl=cpl[i];
136 	cr=cpr[i];
137 
138 	/* Loop over residue-residue contacts in the reference list */
139 
140 	for(j=0; j<cr->rr.n; j++) {
141 
142 		/* Test that contact is within the allowed extent range */
143 		r1=cr->rr.r1[j];
144 		s1=cf[r1];
145 		if(s1<0) continue;
146 		t1=qsq->sq[sai->al[s1]-(cdf->sll.rfpt[s1]-r1)];
147 		if(t1<0) continue;
148 		r2=cr->rr.r2[j];
149 		s2=cf[r2];
150 		if(s2<0) continue;
151 		t2=qsq->sq[sai->al[s2]-(cdf->sll.rfpt[s2]-r2)];
152 		if(t2<0) continue;
153 		d=cr->rr.d[j];
154 
155 		/* Copy contact to the location-sampling pair list */
156 		k=cl->rr.n;
157 		cl->rr.r1[k]=r1;
158 		cl->rr.r2[k]=r2;
159 		cl->rr.d[k]=d;
160 		cl->rr.e[k]=pmf->rrt[d][t1][t2];
161 		cl->rr.n++;
162 	/* printf("j:%d k:%d s1:%d s2:%d r1:%d r2:%d t1:%d t2:%d d:%d e:%d\n",
163 		j,k,s1,s2,r1,r2,t1,t2,d,cl->rr.e[k]); */
164 
165 		}
166 
167 
168 	/* Loop over residue-peptide contacts in the reference list */
169 	for(j=0; j<cr->rp.n; j++) {
170 
171 		/* Test that the contact is present in the current core */
172 		r1=cr->rp.r1[j];
173 		s1=cf[r1];
174 		if(s1<0) continue;
175 		t1=qsq->sq[sai->al[s1]-(cdf->sll.rfpt[s1]-r1)];
176 		if(t1<0) continue;
177 		r2=cr->rp.p2[j];
178 		s2=cf[r2];
179 		if(s2<0) continue;
180 		d=cr->rp.d[j];
181 
182 		/* Copy contact to the location-sampling pair list */
183 		k=cl->rp.n;
184 		cl->rp.r1[k]=r1;
185 		cl->rp.p2[k]=r2;
186 		cl->rp.d[k]=d;
187 		cl->rp.e[k]=pmf->rrt[d][t1][ppi];
188 		cl->rp.n++;
189 	/* printf("j:%d k:%d s1:%d s2:%d r1:%d r2:%d t1:%d t2:%d d:%d e:%d\n",
190 		j,k,s1,s2,r1,r2,t1,ppi,d,cl->rp.e[k]); */
191 		}
192 
193 
194 	/* Loop over residue-fixed contacts in the reference list */
195 	for(j=0; j<cr->rf.n; j++) {
196 
197 		/* Test that the contact is present in the current core */
198 		r1=cr->rf.r1[j];
199 		s1=cf[r1];
200 		if(s1<0) continue;
201 		t1=qsq->sq[sai->al[s1]-(cdf->sll.rfpt[s1]-r1)];
202 		if(t1<0) continue;
203 		t2=cr->rf.t2[j];
204 		d=cr->rf.d[j];
205 
206 		/* Copy contact to the location-sampling pair list */
207 		k=cl->rf.n;
208 		cl->rf.r1[k]=r1;
209 		cl->rf.t2[k]=r2;
210 		cl->rf.d[k]=d;
211 		cl->rf.e[k]=pmf->rrt[d][t1][t2];
212 		cl->rf.n++;
213 	/* printf("j:%d k:%d s1:%d s2:%d r1:%d r2:%d t1:%d t2:%d d:%d e:%d\n",
214 		j,k,s1,s2,r1,r2,t1,ppi,d,cl->rf.e[k]); */
215 		}
216 	}
217 
218 }
219