1 /* $Id: url_subs.c $ */
2
3 /* copyright (c) 1998, 1999, 2014 by William R. Pearson and the
4 The Rector & Visitors of the University of Virginia */
5
6 /* Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing,
13 software distributed under this License is distributed on an "AS
14 IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
15 express or implied. See the License for the specific language
16 governing permissions and limitations under the License.
17 */
18
19 /* 30 Dec 2004 - modify REF_URL to accomodate current Entrez */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "defs.h"
26 #include "structs.h"
27 #include "param.h"
28
29 #ifndef DEF_PROT_LIB
30 #define DEF_PROT_LIB "q"
31 #endif
32
33 #ifndef FASTA_HOST
34 #define FASTA_HOST "your.fasta.host.here/fasta/cgi"
35 #endif
36
37 extern int seq_pos(int pos, int rev, int off);
38
39 char *display_domains(char, struct annot_entry **s_annot_arr_p, int n_domains);
40 char *web_encode(const char *);
41
encode_json_str(FILE * fp,const char * label,const char * value,int first)42 void encode_json_str(FILE *fp, const char *label, const char *value, int first) {
43 if (!first) {fprintf(fp, ",\n");}
44 fprintf(fp, " \"%s\": \"%s\"",label, value);
45 }
46
encode_json_long(FILE * fp,const char * label,long value,int first)47 void encode_json_long(FILE *fp, const char *label, long value, int first) {
48 if (!first) {fprintf(fp, ",\n");}
49 fprintf(fp, " \"%s\": %ld",label, value);
50 }
51
encode_json_dfmt(FILE * fp,const char * label,double value,char * fmt,int first)52 void encode_json_dfmt(FILE *fp, const char *label, double value, char *fmt, int first) {
53 fprintf(fp, fmt, label, value);
54 }
55
encode_json_aln(FILE * fp,const struct a_struct * aln_p,long q_offset,long l_offset,int first)56 void encode_json_aln(FILE *fp, const struct a_struct *aln_p, long q_offset, long l_offset, int first) {
57 }
58
encode_json_lines(FILE * fp,const char * label,const char * annot_s,int first)59 void encode_json_lines(FILE *fp, const char *label, const char *annot_s, int first) {
60 char *obp, *bp;
61
62 char *tmp_annot_s;
63 int n_tmp_annot_s;
64
65 n_tmp_annot_s = strlen(annot_s)+1;
66 if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
67 fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",
68 __FILE__, __LINE__,n_tmp_annot_s);
69 return;
70 }
71
72 SAFE_STRNCPY(tmp_annot_s, annot_s, n_tmp_annot_s);
73
74 if (!first) {fprintf(fp, ",\n");}
75 fprintf(fp, " \"%s\": [\n",label);
76
77 obp = bp = tmp_annot_s;
78 while ((bp = strchr(obp,'\n'))) {
79 *bp='\0';
80 if (obp != tmp_annot_s) fprintf(fp, ",\n");
81 fprintf(fp," \"%s\"",obp);
82 obp = bp+1;
83 }
84 fprintf(fp, "\n ]");
85 free(tmp_annot_s);
86 }
87
encode_json_domains(FILE * fp,const char * label,const struct annot_str * annot_p,int first)88 void encode_json_domains(FILE *fp, const char *label, const struct annot_str *annot_p, int first) {
89 int i;
90
91 if (!first) {fprintf(fp, ",\n");}
92 fprintf(fp, "\"%s\": [\n",label);
93 for (i=0; i < annot_p->n_annot; i++) {
94 if (annot_p->s_annot_arr_p[i]->label != '-') continue;
95 if (i != 0) fprintf(fp, ",\n");
96 fprintf(fp, " { \"start\":%ld, \"stop\":%ld, \"description\":\"%s\" }",
97 annot_p->s_annot_arr_p[i]->pos+1,annot_p->s_annot_arr_p[i]->end+1,annot_p->s_annot_arr_p[i]->comment);
98 }
99 fprintf(fp,"\n ]");
100 }
101
do_url1(FILE * fp,const struct mngmsg * m_msp,const struct pstruct * ppst,char * l_name,int n1,const struct a_struct * aln_p,const char * annot_var_s,const struct annot_str * q_annot_p,const struct annot_str * l_annot_p)102 void do_url1(FILE *fp, const struct mngmsg *m_msp, const struct pstruct *ppst,
103 char *l_name, int n1,
104 const struct a_struct *aln_p, const char *annot_var_s,
105 const struct annot_str *q_annot_p,
106 const struct annot_str *l_annot_p )
107 {
108 char my_q_name[200], my_l_name[200], json_l_name[200];
109 char *db, *bp;
110 char pgm[10], o_pgm[10], lib[MAX_LSTR];
111 char *tmp_annot_s, *q_domain_s, *l_domain_s, *tmp_domain_s, *etmp_domain_s;
112 int n_tmp_annot_s, n_tmp_domain;
113 long q_offset, l_offset;
114 char *ref_url, *lbp=NULL;
115 char *srch_url, *srch_url1, *dom_url;
116
117 /* set the database */
118 if (m_msp->ldb_info.ldnaseq==SEQT_DNA) db="nucleotide";
119 else db="Protein";
120
121 /* set the program type */
122 if (strncmp(m_msp->f_id0,"rss",3)==0) {
123 strncpy(pgm,"fa",sizeof(pgm));
124 }
125 else if (strncmp(m_msp->f_id0,"rfx",3)==0) {
126 strncpy(pgm,"fx",sizeof(pgm));
127 }
128 else { strncpy(pgm,m_msp->f_id0,sizeof(pgm)); }
129
130 SAFE_STRNCPY(o_pgm, pgm, sizeof(o_pgm));
131
132 /* get a library name (probably does not work for %, + abbreviations */
133 if (m_msp->lname[0]!='%') {
134 SAFE_STRNCPY(lib,m_msp->lname,sizeof(lib));
135 }
136 else {
137 SAFE_STRNCPY(lib,"%25",sizeof(lib));
138 SAFE_STRNCAT(lib,&m_msp->lname[1],sizeof(lib));
139 }
140 lib[sizeof(lib)-1]='\0';
141
142 if ((lbp = strchr(l_name,'|'))==NULL) {
143 lbp = l_name;
144 }
145 else {
146 lbp++;
147 }
148
149 SAFE_STRNCPY(my_q_name,m_msp->qtitle,sizeof(my_q_name));
150 if ((bp=strchr(my_q_name,' '))!=NULL) *bp='\0';
151
152 SAFE_STRNCPY(my_l_name,lbp,sizeof(my_l_name));
153
154 if (pgm[0]=='t' || !strcmp(pgm,"fx") || !strcmp(pgm,"fy")==0 ) {
155 if ((lbp=strchr(my_l_name,':'))!=NULL) *lbp='\0';
156 lbp = &my_l_name[strlen(my_l_name)-2];
157 if ( *lbp == '_' ) *lbp = '\0';
158 }
159
160 /* change the program name for fastx, tfastx, tfasta */
161 /* fastx returns proteins */
162 if (strcmp(pgm,"fx")==0 || strcmp(pgm,"fy")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
163 else if (strcmp(pgm,"ff")==0) {SAFE_STRNCPY(pgm,"fa",sizeof(pgm));}
164 else if (pgm[0]=='t') {
165 SAFE_STRNCPY(pgm,"fx",sizeof(pgm));
166 SAFE_STRNCPY(lib,DEF_PROT_LIB,sizeof(lib));
167 }
168
169 fflush(fp);
170
171 q_offset = aln_p->q_offset;
172 l_offset = aln_p->l_offset;
173
174 /* set up ref_url, srch_url, srch_url1, dom_url */
175
176 fflush(fp);
177
178 ref_url = getenv("REF_URL");
179 srch_url = getenv("SRCH_URL");
180 srch_url1 = getenv("SRCH_URL1");
181 dom_url = NULL;
182 dom_url = getenv("DOMAIN_PLOT_URL");
183
184 if (ref_url || srch_url || srch_url1 || dom_url) {
185 fprintf(fp,"<!-- LINK_START %s -->",l_name);
186
187 /* REF_URL should provide */
188 /* "<A HREF=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=%s&fcmd=Search&doptcmd1=DocSum&term=%s\">Entrez lookup</A> " */
189 if (ref_url != NULL) {fprintf(fp,ref_url,db,my_l_name);}
190
191 /* SRCH_URL should provide */
192 /* "<A HREF=\"http://localhost/fasta_www2/searchfa.cgi?query=%s&db=fasta_www.cgi&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">Re-search database</A> " */
193 if (srch_url != NULL) {
194 fprintf(fp,srch_url,my_l_name,db,lib,pgm,
195 l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
196 }
197
198 /* SRCH_URL1 should provide: */
199 /* "<A HREF=\"http://localhost/fasta_www2/searchxf.cgi?query=%s&db=%s&lib=%s&pgm=%s&start=%ld&stop=%ld&n1=%d&o_pgm=%s\">General re-search</A>\n" */
200
201 if (srch_url1 != NULL) {
202 fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
203 l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1,m_msp->f_id0);
204 }
205
206 if (dom_url!=NULL) {
207 if (annot_var_s && annot_var_s[0]) {
208 tmp_annot_s = web_encode(annot_var_s);
209 }
210 else tmp_annot_s = "";
211
212 q_domain_s = l_domain_s = NULL;
213
214 if (q_annot_p && q_annot_p->n_domains > 0 &&
215 (q_domain_s = display_domains('q',q_annot_p->s_annot_arr_p, q_annot_p->n_annot))!=NULL) {
216 }
217 if (l_annot_p && l_annot_p->n_domains > 0 &&
218 (l_domain_s = display_domains('l',l_annot_p->s_annot_arr_p, l_annot_p->n_annot))!=NULL) {
219 }
220
221 /* combine domain strings */
222 n_tmp_domain = 0;
223 if (q_domain_s) n_tmp_domain += strlen(q_domain_s)+1;
224 if (l_domain_s) n_tmp_domain += strlen(l_domain_s)+1;
225 etmp_domain_s = "";
226 if (n_tmp_domain > 0) {
227 if ((tmp_domain_s=(char *)calloc(n_tmp_domain,sizeof(char)))==NULL) {
228 fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_domain_s[%d]\n",
229 __FILE__, __LINE__,n_tmp_domain);
230 }
231 else {
232 tmp_domain_s[0] = '\0';
233 if (q_domain_s) SAFE_STRNCAT(tmp_domain_s, q_domain_s, n_tmp_domain);
234 if (l_domain_s) SAFE_STRNCAT(tmp_domain_s, l_domain_s, n_tmp_domain);
235 etmp_domain_s = web_encode(tmp_domain_s);
236 }
237 }
238
239 /* appropriate format string: */
240 /*
241 pgm=%s -- program abbrev that created alignment
242 q_name=%s -- query info
243 q_cstart=%ld
244 q_cstop=%ld
245 q_astart=%ld
246 q_astop=%ld
247 l_name=%s -- library info
248 l_cstart=%ld
249 l_cstop=%ld
250 l_astart=%ld
251 l_astop=%ld
252 region=%s -- aligned domain and variant information
253 doms=%s
254
255 DOMAIN_PLOT_URL = "pgm=%s;q_name=%s;q_cstart=%ld;q_cstop=%ld&q_astart=%ld&q_astop=%ld&l_name=%s&l_cstart=%ld&l_cstop=%ld&l_astart=%ld&l_astop=%ld®ions=%s&doms=%s"
256 */
257
258 /* think about the alternative of running a script
259 rather than embedding it */
260
261 fprintf(fp,dom_url,o_pgm,
262 my_q_name, q_offset+seq_pos(1,aln_p->qlrev,2),q_offset+seq_pos(m_msp->n0,aln_p->qlrev,2),
263 q_offset+seq_pos(aln_p->amin0+1,aln_p->qlrev,1), q_offset+seq_pos(aln_p->amax0, aln_p->qlrev,2),
264 my_l_name, l_offset+seq_pos(1,aln_p->llrev,2), l_offset+seq_pos(n1,aln_p->llrev,2),
265 l_offset+seq_pos(aln_p->amin1+1,aln_p->llrev,1),l_offset+seq_pos(aln_p->amax1,aln_p->llrev,2),
266 tmp_annot_s, etmp_domain_s);
267
268 if (n_tmp_domain>0 && tmp_domain_s) {
269 free(tmp_domain_s);
270 free(etmp_domain_s);
271 }
272 if (l_annot_p && l_annot_p->n_domains && l_domain_s) {
273 free(l_domain_s);
274 }
275 if (q_annot_p && q_annot_p->n_domains && q_domain_s) {
276 free(q_domain_s);
277 }
278 if (annot_var_s && annot_var_s[0] && tmp_annot_s) free(tmp_annot_s);
279 }
280
281 fprintf(fp,"\n<!-- LINK_STOP -->");
282 fflush(fp);
283 }
284
285 /*
286 if ((srch_url2 = getenv("SRCH_URL2"))==NULL)
287 fprintf(fp,"<A HREF=\"http://fasta.bioch.virginia.edu/fasta/cgi/lalignx.cgi?seq1=\"%s\"&in_seq1=\"FASTA\"&seq2=\"%s\"&in_seq2=\"Accession\"&ssr2=%ld:%ld\">lalign</A>\n<p>\n",my_l_name,db,lib,pgm,l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
288 else
289 fprintf(fp,srch_url1,my_l_name,db,lib,pgm,
290 l_offset+aln_p->amin1+1,l_offset+aln_p->amax1,n1);
291 */
292
293
294 if (getenv("JSON_HTML")) {
295
296 /* replace '|' with '_' */
297 SAFE_STRNCPY(json_l_name, l_name, sizeof(json_l_name));
298 for (bp=strchr(json_l_name,'|'); bp; bp=strchr(bp+1,'|')) { *bp = '_'; }
299
300 /* replace '.' with '_' */
301 for (bp=strchr(json_l_name,'.'); bp; bp=strchr(bp+1,'.')) { *bp = '_'; }
302
303 fprintf(fp,"\n<script type=\"text/javascript\">\n//<![CDATA[\n var json_%s = {\n",json_l_name);
304 encode_json_str(fp, "db", db, 1);
305 encode_json_str(fp, "l_acc", l_name, 0);
306 encode_json_str(fp, "acc", my_l_name, 0);
307 encode_json_str(fp, "lib", lib, 0);
308 encode_json_str(fp, "pgm", pgm, 0);
309 encode_json_str(fp, "o_pgm", m_msp->f_id0, 0);
310 encode_json_aln(fp, aln_p, q_offset, l_offset, 0);
311 if (annot_var_s && annot_var_s[0]) { encode_json_lines(fp, "annot", annot_var_s, 0); }
312 if (q_annot_p && q_annot_p->n_domains > 0) { encode_json_domains(fp, "q_domains", q_annot_p, 0); }
313 if (l_annot_p && l_annot_p->n_domains > 0) { encode_json_domains(fp, "l_domains", l_annot_p, 0); }
314
315 fprintf(fp, "\n}\n//]]>\n</script>");
316 fflush(fp);
317 }
318 }
319
display_domains(char target,struct annot_entry ** annot_arr_p,int n_annots)320 char *display_domains(char target, struct annot_entry **annot_arr_p, int n_annots) {
321 char *domain_s;
322 char line[MAX_STR];
323 int i, i_doms, n_domain_s = MAX_LSTR;
324
325 /* since (currently) annot_var_s is MAX_LSOTR, do the same for domain_s */
326 if ((domain_s = (char *)calloc(n_domain_s, sizeof(char)))==NULL) {
327 fprintf(stderr,"*** error [%s:%d] *** cannot allocate domain_s[%d]\n",__FILE__, __LINE__,n_domain_s);
328 return NULL;
329 }
330
331 for (i=0; i < n_annots; i++) {
332 /* annot_arr_p[] has both domains and non domains, but n_domains only counts domains */
333 if (annot_arr_p[i]->label != '-') continue;
334 sprintf(line, "%cDomain:\t%ld-%ld\t%s\n",
335 target, annot_arr_p[i]->pos+1, annot_arr_p[i]->end+1, annot_arr_p[i]->comment);
336 if (strlen(domain_s) + strlen(line)+1 > n_domain_s) {
337 n_domain_s += n_domain_s/2;
338 domain_s = realloc(domain_s, n_domain_s);
339 }
340 SAFE_STRNCAT(domain_s, line, n_domain_s);
341 }
342
343 domain_s = realloc(domain_s, (n_domain_s=strlen(domain_s))+1);
344 domain_s[n_domain_s]='\0';
345
346 return domain_s;
347 }
348
349 /* take an annotation string *annot_var_s and convert problematic characters to their web encoding */
350 /* ' ' (space) %20 */
351 /* '|' %7C */
352 /* ';' %3B */
353 /* '=' %3D */
354 /* '\n' %0A */
355
356 static char bad_chars[] = "\n =;|";
357
web_encode(const char * annot_var_s)358 char *web_encode(const char *annot_var_s) {
359
360 int n_tmp_annot_s;
361 char *tmp_annot_s, *tmp_annot_d, *dp;
362 const char *bp, *sp;
363 int bad_cnt = 0;
364
365 /* make string largest possible size */
366 n_tmp_annot_s = strlen(annot_var_s)*3 + 1;
367 if ((tmp_annot_s = (char *)calloc(n_tmp_annot_s,sizeof(char)))==NULL) {
368 fprintf(stderr,"*** error [%s:%d] *** cannot allocate tmp_annot_s[%d]\n",__FILE__, __LINE__,n_tmp_annot_s);
369 return NULL;
370 }
371
372 dp = tmp_annot_s;
373 for (sp = annot_var_s; *sp ; sp++) {
374
375 if ((*sp < '0') ||
376 (*sp > 9 && *sp < 'A') ||
377 (*sp > 'Z' && *sp < 'a') ||
378 (*sp > 'z')) { sprintf(dp,"%%%02x",*sp); dp += 3;}
379 else { *dp++ = *sp; }
380 }
381
382 n_tmp_annot_s = dp - tmp_annot_s;
383 tmp_annot_s = realloc(tmp_annot_s, n_tmp_annot_s+1);
384 tmp_annot_s[n_tmp_annot_s] = '\0';
385
386 return tmp_annot_s;
387 }
388