1 /* $Id: url_subs.c $ */
2 
3 /* copyright (c) 1998, 1999, 2014 by William R. Pearson and the
4    The Rector & Visitors of the University of Virginia */
5 
6 /* Licensed under the Apache License, Version 2.0 (the "License");
7    you may not use this file except in compliance with the License.
8    You may obtain a copy of the License at
9 
10    http://www.apache.org/licenses/LICENSE-2.0
11 
12    Unless required by applicable law or agreed to in writing,
13    software distributed under this License is distributed on an "AS
14    IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
15    express or implied.  See the License for the specific language
16    governing permissions and limitations under the License.
17 */
18 
19 /* 30 Dec 2004 - modify REF_URL to accomodate current Entrez */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include "defs.h"
26 #include "structs.h"
27 #include "param.h"
28 
29 #ifndef DEF_PROT_LIB
30 #define DEF_PROT_LIB "q"
31 #endif
32 
33 #ifndef FASTA_HOST
34 #define FASTA_HOST "your.fasta.host.here/fasta/cgi"
35 #endif
36 
37 extern int seq_pos(int pos, int rev, int off);
38 
39 char *display_domains(char, struct annot_entry **s_annot_arr_p, int n_domains);
40 char *web_encode(const char *);
41 
encode_json_str(FILE * fp,const char * label,const char * value,int first)42 void encode_json_str(FILE *fp, const char *label, const char *value, int first) {
43   if (!first) {fprintf(fp, ",\n");}
44   fprintf(fp, " \"%s\": \"%s\"",label, value);
45 }
46 
encode_json_long(FILE * fp,const char * label,long value,int first)47 void encode_json_long(FILE *fp, const char *label, long value, int first) {
48   if (!first) {fprintf(fp, ",\n");}
49   fprintf(fp, " \"%s\": %ld",label, value);
50 }
51 
encode_json_dfmt(FILE * fp,const char * label,double value,char * fmt,int first)52 void encode_json_dfmt(FILE *fp, const char *label, double value, char *fmt, int first) {
53   fprintf(fp, fmt, label, value);
54 }
55 
encode_json_aln(FILE * fp,const struct a_struct * aln_p,long q_offset,long l_offset,int first)56 void encode_json_aln(FILE *fp, const struct a_struct *aln_p, long q_offset, long l_offset, int first) {
57 }
58 
encode_json_lines(FILE * fp,const char * label,const char * annot_s,int first)59 void encode_json_lines(FILE *fp, const char *label, const char *annot_s, int first) {
60   char *obp, *bp;
61 
62   char *tmp_annot_s;
63   int n_tmp_annot_s;
64 
65   n_tmp_annot_s = strlen(annot_s)+1;
66   if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
67     fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",
68 	    __FILE__, __LINE__,n_tmp_annot_s);
69     return;
70   }
71 
72   SAFE_STRNCPY(tmp_annot_s, annot_s, n_tmp_annot_s);
73 
74   if (!first) {fprintf(fp, ",\n");}
75   fprintf(fp, " \"%s\": [\n",label);
76 
77   obp = bp = tmp_annot_s;
78   while ((bp = strchr(obp,'\n'))) {
79     *bp='\0';
80     if (obp != tmp_annot_s) fprintf(fp, ",\n");
81     fprintf(fp," \"%s\"",obp);
82     obp = bp+1;
83   }
84   fprintf(fp, "\n ]");
85   free(tmp_annot_s);
86 }
87 
encode_json_domains(FILE * fp,const char * label,const struct annot_str * annot_p,int first)88 void encode_json_domains(FILE *fp, const char *label, const struct annot_str *annot_p, int first) {
89   int i;
90 
91   if (!first) {fprintf(fp, ",\n");}
92   fprintf(fp, "\"%s\": [\n",label);
93   for (i=0; i < annot_p->n_annot; i++) {
94     if (annot_p->s_annot_arr_p[i]->label != '-') continue;
95     if (i != 0) fprintf(fp, ",\n");
96     fprintf(fp, "  { \"start\":%ld, \"stop\":%ld, \"description\":\"%s\" }",
97 	    annot_p->s_annot_arr_p[i]->pos+1,annot_p->s_annot_arr_p[i]->end+1,annot_p->s_annot_arr_p[i]->comment);
98   }
99   fprintf(fp,"\n  ]");
100 }
101 
do_url1(FILE * fp,const struct mngmsg * m_msp,const struct pstruct * ppst,char * l_name,int n1,const struct a_struct * aln_p,const char * annot_var_s,const struct annot_str * q_annot_p,const struct annot_str * l_annot_p)102 void do_url1(FILE *fp, const struct mngmsg *m_msp, const struct pstruct *ppst,
103 	     char *l_name, int n1,
104 	     const struct a_struct *aln_p, const char *annot_var_s,
105 	     const struct annot_str *q_annot_p,
106 	     const struct annot_str *l_annot_p )
107 {
108   char my_q_name[200], my_l_name[200], json_l_name[200];
109   char *db, *bp;
110   char pgm[10], o_pgm[10], lib[MAX_LSTR];
111   char *tmp_annot_s, *q_domain_s, *l_domain_s, *tmp_domain_s, *etmp_domain_s;
112   int  n_tmp_annot_s, n_tmp_domain;
113   long q_offset, l_offset;
114   char *ref_url, *lbp=NULL;
115   char *srch_url, *srch_url1, *dom_url;
116 
117   /* set the database */
118   if (m_msp->ldb_info.ldnaseq==SEQT_DNA) db="nucleotide";
119   else db="Protein";
120 
121   /* set the program type */
122   if (strncmp(m_msp->f_id0,"rss",3)==0) {
123     strncpy(pgm,"fa",sizeof(pgm));
124   }
125   else if (strncmp(m_msp->f_id0,"rfx",3)==0) {
126     strncpy(pgm,"fx",sizeof(pgm));
127   }
128   else { strncpy(pgm,m_msp->f_id0,sizeof(pgm)); }
129 
130   SAFE_STRNCPY(o_pgm, pgm, sizeof(o_pgm));
131 
132   /* get a library name (probably does not work for %, + abbreviations */
133   if (m_msp->lname[0]!='%') {
134     SAFE_STRNCPY(lib,m_msp->lname,sizeof(lib));
135   }
136   else {
137     SAFE_STRNCPY(lib,"%25",sizeof(lib));
138     SAFE_STRNCAT(lib,&m_msp->lname[1],sizeof(lib));
139   }
140   lib[sizeof(lib)-1]='\0';
141 
142   if ((lbp = strchr(l_name,'|'))==NULL) {
143     lbp = l_name;
144   }
145   else {
146     lbp++;
147   }
148 
149   SAFE_STRNCPY(my_q_name,m_msp->qtitle,sizeof(my_q_name));
150   if ((bp=strchr(my_q_name,' '))!=NULL) *bp='\0';
151 
152   SAFE_STRNCPY(my_l_name,lbp,sizeof(my_l_name));
153 
154   if (pgm[0]=='t' || !strcmp(pgm,"fx") || !strcmp(pgm,"fy")==0 ) {
155     if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
156     lbp = &my_l_name[strlen(my_l_name)-2];
157     if ( *lbp == '_' ) *lbp = '\0';
158   }
159 
160   /* change the program name for fastx, tfastx, tfasta */
161   /* fastx returns proteins */
162   if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
163   else if (strcmp(pgm,"ff")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
164   else if (pgm[0]=='t') {
165     SAFE_STRNCPY(pgm,"fx",sizeof(pgm));
166     SAFE_STRNCPY(lib,DEF_PROT_LIB,sizeof(lib));
167   }
168 
169   fflush(fp);
170 
171   q_offset = aln_p->q_offset;
172   l_offset = aln_p->l_offset;
173 
174   /* set up ref_url, srch_url, srch_url1, dom_url */
175 
176   fflush(fp);
177 
178   ref_url = getenv("REF_URL");
179   srch_url = getenv("SRCH_URL");
180   srch_url1 = getenv("SRCH_URL1");
181   dom_url = NULL;
182   dom_url = getenv("DOMAIN_PLOT_URL");
183 
184   if (ref_url || srch_url || srch_url1 || dom_url) {
185     fprintf(fp,"<!-- LINK_START %s -->",l_name);
186 
187   /* REF_URL should provide */
188   /* "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A>&nbsp;&nbsp;" */
189   if (ref_url != NULL) {fprintf(fp,ref_url,db,my_l_name);}
190 
191   /* SRCH_URL should provide */
192   /* "<A HREF=\"http://localhost/fasta_www2/searchfa.cgi?query=%s&db=fasta_www.cgi&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">Re-search database</A>&nbsp;&nbsp;" */
193   if (srch_url != NULL) {
194     fprintf(fp,srch_url,my_l_name,db,lib,pgm,
195 	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
196   }
197 
198   /* SRCH_URL1 should provide: */
199   /*  "<A HREF=\"http://localhost/fasta_www2/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">General re-search</A>\n" */
200 
201   if (srch_url1 != NULL) {
202     fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
203 	    l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
204   }
205 
206   if (dom_url!=NULL) {
207     if (annot_var_s && annot_var_s[0]) {
208       tmp_annot_s = web_encode(annot_var_s);
209     }
210     else tmp_annot_s = "";
211 
212     q_domain_s = l_domain_s = NULL;
213 
214     if (q_annot_p && q_annot_p->n_domains > 0 &&
215 	(q_domain_s = display_domains('q',q_annot_p->s_annot_arr_p, q_annot_p->n_annot))!=NULL) {
216     }
217     if (l_annot_p && l_annot_p->n_domains > 0 &&
218 	(l_domain_s = display_domains('l',l_annot_p->s_annot_arr_p, l_annot_p->n_annot))!=NULL) {
219     }
220 
221     /* combine domain strings */
222     n_tmp_domain = 0;
223     if (q_domain_s) n_tmp_domain += strlen(q_domain_s)+1;
224     if (l_domain_s) n_tmp_domain += strlen(l_domain_s)+1;
225     etmp_domain_s = "";
226     if (n_tmp_domain > 0) {
227       if ((tmp_domain_s=(char *)calloc(n_tmp_domain,sizeof(char)))==NULL) {
228 	fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_domain_s[%d]\n",
229 		__FILE__, __LINE__,n_tmp_domain);
230       }
231       else {
232 	tmp_domain_s[0] = '\0';
233 	if (q_domain_s) SAFE_STRNCAT(tmp_domain_s, q_domain_s, n_tmp_domain);
234 	if (l_domain_s) SAFE_STRNCAT(tmp_domain_s, l_domain_s, n_tmp_domain);
235 	etmp_domain_s = web_encode(tmp_domain_s);
236       }
237     }
238 
239     /* appropriate format string: */
240     /*
241        pgm=%s	    -- program abbrev that created alignment
242        q_name=%s     -- query info
243        q_cstart=%ld
244        q_cstop=%ld
245        q_astart=%ld
246        q_astop=%ld
247        l_name=%s     -- library info
248        l_cstart=%ld
249        l_cstop=%ld
250        l_astart=%ld
251        l_astop=%ld
252        region=%s       -- aligned domain and variant information
253        doms=%s
254 
255        DOMAIN_PLOT_URL = "pgm=%s;q_name=%s;q_cstart=%ld;q_cstop=%ld&q_astart=%ld&q_astop=%ld&l_name=%s&l_cstart=%ld&l_cstop=%ld&l_astart=%ld&l_astop=%ld&regions=%s&doms=%s"
256     */
257 
258     /* think about the alternative of running a script
259        rather than embedding it */
260 
261     fprintf(fp,dom_url,o_pgm,
262 	    my_q_name, q_offset+seq_pos(1,aln_p->qlrev,2),q_offset+seq_pos(m_msp->n0,aln_p->qlrev,2),
263 	    q_offset+seq_pos(aln_p->amin0+1,aln_p->qlrev,1), q_offset+seq_pos(aln_p->amax0, aln_p->qlrev,2),
264 	    my_l_name, l_offset+seq_pos(1,aln_p->llrev,2), l_offset+seq_pos(n1,aln_p->llrev,2),
265 	    l_offset+seq_pos(aln_p->amin1+1,aln_p->llrev,1),l_offset+seq_pos(aln_p->amax1,aln_p->llrev,2),
266 	    tmp_annot_s, etmp_domain_s);
267 
268     if (n_tmp_domain>0 && tmp_domain_s) {
269       free(tmp_domain_s);
270       free(etmp_domain_s);
271     }
272     if (l_annot_p && l_annot_p->n_domains && l_domain_s) {
273       free(l_domain_s);
274     }
275     if (q_annot_p && q_annot_p->n_domains && q_domain_s) {
276       free(q_domain_s);
277     }
278     if (annot_var_s && annot_var_s[0] && tmp_annot_s) free(tmp_annot_s);
279   }
280 
281   fprintf(fp,"\n<!-- LINK_STOP -->");
282   fflush(fp);
283   }
284 
285   /*
286     if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
287     fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
288     else
289     fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
290     l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
291   */
292 
293 
294   if (getenv("JSON_HTML")) {
295 
296     /* replace '|' with '_' */
297     SAFE_STRNCPY(json_l_name, l_name, sizeof(json_l_name));
298     for (bp=strchr(json_l_name,'|'); bp; bp=strchr(bp+1,'|')) { *bp = '_'; }
299 
300     /* replace '.' with '_' */
301     for (bp=strchr(json_l_name,'.'); bp; bp=strchr(bp+1,'.')) { *bp = '_'; }
302 
303     fprintf(fp,"\n<script type=\"text/javascript\">\n//<![CDATA[\n var json_%s = {\n",json_l_name);
304     encode_json_str(fp, "db", db, 1);
305     encode_json_str(fp, "l_acc", l_name, 0);
306     encode_json_str(fp, "acc", my_l_name, 0);
307     encode_json_str(fp, "lib", lib, 0);
308     encode_json_str(fp, "pgm", pgm, 0);
309     encode_json_str(fp, "o_pgm", m_msp->f_id0, 0);
310     encode_json_aln(fp, aln_p, q_offset, l_offset, 0);
311     if (annot_var_s && annot_var_s[0]) { encode_json_lines(fp, "annot", annot_var_s, 0); }
312     if (q_annot_p && q_annot_p->n_domains > 0) { encode_json_domains(fp, "q_domains", q_annot_p, 0); }
313     if (l_annot_p && l_annot_p->n_domains > 0) { encode_json_domains(fp, "l_domains", l_annot_p, 0); }
314 
315     fprintf(fp, "\n}\n//]]>\n</script>");
316     fflush(fp);
317   }
318 }
319 
display_domains(char target,struct annot_entry ** annot_arr_p,int n_annots)320 char *display_domains(char target, struct annot_entry **annot_arr_p, int n_annots) {
321   char *domain_s;
322   char line[MAX_STR];
323   int i, i_doms, n_domain_s = MAX_LSTR;
324 
325   /* since (currently) annot_var_s is MAX_LSOTR, do the same for domain_s */
326   if ((domain_s = (char *)calloc(n_domain_s, sizeof(char)))==NULL) {
327     fprintf(stderr,"*** error [%s:%d] *** cannot allocate domain_s[%d]\n",__FILE__, __LINE__,n_domain_s);
328     return NULL;
329   }
330 
331   for (i=0; i < n_annots; i++) {
332     /* annot_arr_p[] has both domains and non domains, but n_domains only counts domains */
333     if (annot_arr_p[i]->label != '-') continue;
334     sprintf(line, "%cDomain:\t%ld-%ld\t%s\n",
335 	    target, annot_arr_p[i]->pos+1, annot_arr_p[i]->end+1, annot_arr_p[i]->comment);
336     if (strlen(domain_s) + strlen(line)+1 > n_domain_s) {
337       n_domain_s += n_domain_s/2;
338       domain_s = realloc(domain_s, n_domain_s);
339     }
340     SAFE_STRNCAT(domain_s, line, n_domain_s);
341   }
342 
343   domain_s = realloc(domain_s, (n_domain_s=strlen(domain_s))+1);
344   domain_s[n_domain_s]='\0';
345 
346   return domain_s;
347 }
348 
349 /* take an annotation string *annot_var_s and convert problematic characters to their web encoding */
350 /* ' ' (space) %20 */
351 /* '|' 	    %7C */
352 /* ';'	    %3B */
353 /* '='	    %3D */
354 /* '\n'	    %0A */
355 
356 static char bad_chars[] = "\n =;|";
357 
web_encode(const char * annot_var_s)358 char *web_encode(const char *annot_var_s) {
359 
360   int n_tmp_annot_s;
361   char *tmp_annot_s, *tmp_annot_d, *dp;
362   const char *bp, *sp;
363   int bad_cnt = 0;
364 
365   /* make string largest possible size */
366   n_tmp_annot_s = strlen(annot_var_s)*3 + 1;
367   if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
368     fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",__FILE__, __LINE__,n_tmp_annot_s);
369     return NULL;
370   }
371 
372   dp = tmp_annot_s;
373   for (sp = annot_var_s; *sp ; sp++) {
374 
375     if ((*sp < '0') ||
376 	(*sp > 9 &&  *sp < 'A') ||
377 	(*sp > 'Z' &&  *sp < 'a') ||
378 	(*sp > 'z')) { sprintf(dp,"%%%02x",*sp); dp += 3;}
379     else { *dp++ = *sp; }
380   }
381 
382   n_tmp_annot_s = dp - tmp_annot_s;
383   tmp_annot_s = realloc(tmp_annot_s, n_tmp_annot_s+1);
384   tmp_annot_s[n_tmp_annot_s] = '\0';
385 
386   return tmp_annot_s;
387 }
388