1 static char const rcsid[] = "$Id: vecscrn.c,v 6.137 2005/12/27 14:46:44 madden Exp $";
2 
3 /* ===========================================================================
4 *
5 *                            PUBLIC DOMAIN NOTICE
6 *               National Center for Biotechnology Information
7 *
8 *  This software/database is a "United States Government Work" under the
9 *  terms of the United States Copyright Act.  It was written as part of
10 *  the author's official duties as a United States Government employee and
11 *  thus cannot be copyrighted.  This software/database is freely available
12 *  to the public for use. The National Library of Medicine and the U.S.
13 *  Government have not placed any restriction on its use or reproduction.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 *  Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 
27 /*****************************************************************************
28 
29 File name: vecscrn.c
30 
31 Author: Tom Madden
32 
33 Contents: functions for Vector screening.
34 
35 ******************************************************************************
36  * $Revision: 6.137 $
37  *
38  * $Log: vecscrn.c,v $
39  * Revision 6.137  2005/12/27 14:46:44  madden
40  * Fix buffer overflow in Print62Notation
41  *
42  * Revision 6.136  2004/07/27 14:26:54  madden
43  * New location for tunf.cgi
44  *
45  * Revision 6.135  2003/05/30 17:25:38  coulouri
46  * add rcsid
47  *
48  * Revision 6.134  2002/07/24 21:11:39  kans
49  * reverted ncbi URL
50  *
51  * Revision 6.133  2002/07/23 16:50:04  kans
52  * changed www.ncbi.nlm.nih.gov to www.ncbi.nih.gov
53  *
54  * Revision 6.132  2001/01/09 17:30:03  madden
55  * Fix memory leaks
56  *
57  * Revision 6.131  2000/09/01 18:29:13  dondosha
58  * Removed calls to ReadDBFreeSharedInfo and ReadDBCloseMHdrAndSeqFiles
59  *
60  * Revision 6.130  2000/08/07 20:38:11  madden
61  * Proper casting of int to long for printf
62  *
63  * Revision 6.129  2000/05/19 20:40:34  kitts
64  * 1. Fixed bug in VSMakeCombinedSeqLoc preventing "No hits" being
65  *    reported when none of the blast hits were significant.
66  * 2. Fixed bug in VSCombineSeqLoc causing overlaps to be missed
67  *    when alignment filtering is off.
68  * 3. Fixed bug in CombineSeqLoc to allow all SeqLocs in a linked
69  *    list to be processed.
70  * 4. Added checks to several functions to prevent "No hits"
71  *    being reported when the search fails.
72  * 5. Other minor changes.
73  *
74  * Revision 6.128  2000/05/09 21:43:48  kitts
75  * Removed unused parameter from VSPrintListFromSeqLocs
76  * Moved "No hits" output from VSPrintListFromSeqLocs to vecscreen.c
77  *
78  * Revision 6.127  2000/05/05 20:11:09  madden
79  * Add VSScreenSequenceByLoc
80  *
81  * Revision 6.126  2000/05/01 16:58:42  kitts
82  *
83  * Added function VSPrintListFromSeqLocs
84  * Added function VSPrintListIdLine
85  *
86  * Revision 6.125  2000/03/30 21:04:16  madden
87  * Added function VSMakeCombinedSeqLoc
88  *
89  * Revision 6.124  2000/01/21 16:47:49  kans
90  * use ValNodeSort instead of original jz SortValNode
91  *
92  * Revision 6.123  2000/01/20 18:32:36  madden
93  * Removed VSMakeDisplaySeqLoc
94  *
95  * Revision 6.122  2000/01/20 14:40:36  madden
96  * Added VSMakeDisplaySeqLoc
97  *
98  * Revision 6.121  2000/01/19 21:53:21  madden
99  * Files for vector screening
100  *
101 ******************************************************************************/
102 
103 #include <vecscrn.h>
104 #include <salpacc.h>
105 #include <salpstat.h>
106 #include <urkbias.h>
107 
108 /* Used for the '62' notation stuff. */
109 #define DEFINE_62 62
110 
111 /***********************************************************
112 
113 CoordTo62Notation converts a position into the '62' notation.
114 The '62' notation decomposes a number (X) into two parts:
115 
116 1.) the largest multiple of 62 that is still less than X. Let
117 this number be A.
118 
119 2.) the difference between the number found in 1.) and X.  Let
120 this number be B.
121 
122 The original number can be reconstructed from:
123 
124 X = 62*A + B
125 
126 In CoordTo62Notation the first Int4 is the position that should be decomposed.
127 The two CharPtr's are (respectively) for A and B.  These CharPtr's should be
128 allocated with at least two spaces and will be filled in with A and B.
129 
130 ****************************************************************/
131 static Boolean
CoordTo62Notation(Int4 position,CharPtr major_part,CharPtr minor_part)132 CoordTo62Notation (Int4 position, CharPtr major_part, CharPtr minor_part)
133 
134 {
135 	Int4 major_num, minor_num;
136 
137 	if (major_part == NULL || minor_part == NULL)
138 		return FALSE;
139 
140 	major_num = position/DEFINE_62;
141 	minor_num = position%DEFINE_62;
142 
143 
144 	if (major_num < 10)
145 		sprintf(major_part, "%ld", (long) major_num);
146 	else if (major_num < 36)
147 		*major_part = 'A' + major_num - 10;
148 	else
149 		*major_part = 'a' + major_num - 36;
150 
151 
152 	if (minor_num < 10)
153 		sprintf(minor_part, "%ld", (long) minor_num);
154 	else if (minor_num < 36)
155 		*minor_part = 'A' + minor_num - 10;
156 	else
157 		*minor_part = 'a' + minor_num - 36;
158 
159 	return TRUE;
160 }
161 
162 #define ADD_TO_BUFFER_FOR_BLAST 100
163 
164 /*
165 	Given a SeqLocPtr (for a query of length query_length), fill in
166 	buffer with starts and stops in '62' notation (see above doc.).
167 */
168 
169 static Boolean
Print62Notation(SeqLocPtr seqloc,Int4 query_length,CharPtr * buffer)170 Print62Notation(SeqLocPtr seqloc, Int4 query_length, CharPtr *buffer)
171 {
172 	Char a1[2], a2[2], b1[2], b2[2];
173 	CharPtr ptr, my_buffer;
174 	Int4 buffer_size, position, start, stop;
175 	Nlm_FloatHi block_size;
176         const int kScreenWidth = 600;
177 
178 	if (seqloc == NULL)
179 		return FALSE;
180 
181 	block_size = ((Nlm_FloatHi) query_length)/kScreenWidth;
182 
183 	if (seqloc->choice == SEQLOC_PACKED_INT)
184 		seqloc = (ValNodePtr) seqloc->data.ptrvalue;
185 
186 	position = 0;
187 	buffer_size = ADD_TO_BUFFER_FOR_BLAST;
188 	my_buffer = (CharPtr) MemNew(buffer_size*sizeof(Char));
189 	ptr = my_buffer;
190 	while (seqloc)
191 	{
192                 /* We check for +5 as we need to allow addition of four characters within
193                    this loop as well as setting the final byte to NULLB outside of the loop. */
194 		if (position+5 > buffer_size)
195 		{
196                         CharPtr my_new_buffer = NULL;
197 			buffer_size += ADD_TO_BUFFER_FOR_BLAST;
198 			my_new_buffer = (CharPtr) MemNew(buffer_size*sizeof(Char));
199 			MemCpy(my_new_buffer, my_buffer, position);
200 			my_buffer = (CharPtr) MemFree(my_buffer);
201 			my_buffer = my_new_buffer;
202 			ptr = my_buffer + position;
203 		}
204 		if (SeqLocStrand(seqloc) == Seq_strand_minus)
205 		{
206 			start = SeqLocStop(seqloc)/block_size;
207 			stop = (SeqLocStart(seqloc)+1)/block_size;
208 		}
209 		else
210 		{
211 			start = SeqLocStart(seqloc)/block_size;
212 			stop = (SeqLocStop(seqloc)+1)/block_size;
213 		}
214 		CoordTo62Notation(start, a1, b1);
215 		CoordTo62Notation(stop, a2, b2);
216 		*ptr = *a1; ptr++;
217 		*ptr = *b1; ptr++;
218 		*ptr = *a2; ptr++;
219 		*ptr = *b2; ptr++;
220 		position += 4;
221 		seqloc = seqloc->next;
222 	}
223 	*ptr = NULLB;
224 
225 	*buffer = my_buffer;
226 
227 	return TRUE;
228 }
229 
230 /*
231 	Gets the labels for a given class.
232 	The CharPtr's will be allocated by the function.
233 	It is up to the caller to deallocate this memory.
234 	The 'color' buffer will hold the color in the format of RRGGBB (e.g., "00FF00").
235 */
236 
237 Boolean LIBCALL
VSGetLabels(Int2 class,CharPtr PNTR short_desc,CharPtr PNTR long_desc,CharPtr PNTR color)238 VSGetLabels (Int2 class, CharPtr PNTR short_desc, CharPtr PNTR long_desc, CharPtr PNTR color)
239 
240 {
241 	Boolean retval;
242 
243 	switch (class) {
244 		case 1:
245 			if (long_desc)
246 				*long_desc = StringSave("Strong match");
247 			if (short_desc)
248 				*short_desc = StringSave("Strong");
249 			if (color)
250 				*color = StringSave("FF0000");
251 			retval = TRUE;
252 			break;
253 		case 2:
254 			if (long_desc)
255 				*long_desc = StringSave("Moderate match");
256 			if (short_desc)
257 				*short_desc = StringSave("Moderate");
258 			if (color)
259 				*color = StringSave("FF00FF");
260 			retval = TRUE;
261 			break;
262 		case 3:
263 			if (long_desc)
264 				*long_desc = StringSave("Weak match");
265 			if (short_desc)
266 				*short_desc = StringSave("Weak");
267 			if (color)
268 				*color = StringSave("00FF00");
269 			retval = TRUE;
270 			break;
271 		case 4:
272 			if (long_desc)
273 				*long_desc = StringSave("Suspect origin");
274 			if (short_desc)
275 				*short_desc = StringSave("Suspect");
276 			if (color)
277 				*color = StringSave("FFFF00");
278 			retval = TRUE;
279 			break;
280 		default:
281 			retval = FALSE;
282 	}
283 
284 	return retval;
285 }
286 
287 /*
288 	BlastOptionNew function used for VecScreen screening.
289 	These options were specified by Paul Kitts.
290 */
291 
292 
293 BLAST_OptionsBlkPtr LIBCALL
VSBlastOptionNew(void)294 VSBlastOptionNew(void)
295 
296 {
297 	BLAST_OptionsBlkPtr options;
298 
299 
300 	options = BLASTOptionNew("blastn", TRUE);
301 	if (options == NULL)
302 		return NULL;
303 
304 	options->gap_open = 3;
305 	options->gap_extend = 3;
306 	options->filter_string = StringSave("m D");
307 	options->penalty = -5;
308 	options->expect_value = 700;
309 	options->searchsp_eff = 1.75e12;
310 
311 	return options;
312 }
313 
314 
315 /*
316 	Deletes the VSOptions structure.
317 */
318 
319 VSOptionsPtr LIBCALL
VSOptionsFree(VSOptionsPtr options)320 VSOptionsFree(VSOptionsPtr options)
321 
322 {
323 	if (options == NULL)
324 		return NULL;
325 
326 	options->classes = MemFree(options->classes);
327 
328 
329 	MemFree(options);
330 
331 	return NULL;
332 
333 }
334 /*
335 	Makes up the structure for VSOptions, with default values
336 	filled in.
337 */
338 
339 VSOptionsPtr LIBCALL
VSOptionsNew(void)340 VSOptionsNew(void)
341 
342 {
343 	VSOptionsPtr options;
344 
345 	options = MemNew(sizeof(VSOptions));
346 	if (options == NULL)
347 		return NULL;
348 
349 	options->number_of_classes = 3;
350 	options->classes = MemNew(2*(options->number_of_classes)*sizeof(Int4));
351 	if (options->classes == NULL)
352 	{
353 		MemFree(options);
354 		return NULL;
355 	}
356 	options->classes[0] = 24;
357 	options->classes[1] = 30;
358 	options->classes[2] = 19;
359 	options->classes[3] = 25;
360 	options->classes[4] = 16;
361 	options->classes[5] = 23;
362 	options->distance_to_end = 24;
363 	options->distance_between_segments = 50;
364 	options->filter_by_coverage  = TRUE;
365 
366 	return options;
367 }
368 
369 /*
370 	Changes the SeqLoc to the PLUS strand.  Only works for SeqInt.
371 */
372 
373 static void
SeqLocToPlusStrand(SeqLocPtr seqloc)374 SeqLocToPlusStrand(SeqLocPtr seqloc)
375 
376 {
377 	Int4 start, stop;
378 	SeqIntPtr seq_int;
379 
380 	if (seqloc == NULL || seqloc->choice != SEQLOC_INT)
381 		return;
382 
383 	while (seqloc)
384 	{
385 		if (SeqLocStrand(seqloc) == Seq_strand_minus)
386 		{
387 			seq_int = seqloc->data.ptrvalue;
388 
389 			start = seq_int->from;
390 			stop = seq_int->to;
391 
392 			seq_int->from = MIN(start, stop);
393 			seq_int->to = MAX(start, stop);
394 			seq_int->strand = Seq_strand_plus;
395 		}
396 		seqloc = seqloc->next;
397 	}
398 
399 	return;
400 }
401 
402 /*
403 Check for overlap between seqloc1 and seqloc2.
404 Keep only seqloc1 if there is an overlap.
405 Return a new SeqLocPtr with these SeqLoc's.
406 */
407 static SeqLocPtr
CheckForOverlap(SeqLocPtr seqloc1,SeqLocPtr seqloc2)408 CheckForOverlap(SeqLocPtr seqloc1, SeqLocPtr seqloc2)
409 
410 {
411 	SeqLocPtr seqloc, next_seqloc;
412 	SeqLocPtr new_seqloc=NULL, retval=NULL, current;
413 
414 	if (seqloc1 == NULL || seqloc1->choice != SEQLOC_PACKED_INT)
415 		return seqloc2;
416 
417 	if (seqloc2 == NULL || seqloc2->choice != SEQLOC_PACKED_INT)
418 		return NULL;
419 
420 	seqloc = seqloc2->data.ptrvalue;
421 
422 	while (seqloc)
423 	{
424 		if (SeqLocAinB(seqloc, seqloc1) >= 0)
425 		{
426 			seqloc = seqloc->next;
427 		}
428 		else
429 		{
430 			if (!new_seqloc)
431 			{
432 				new_seqloc = seqloc;
433 				current = new_seqloc;
434 			}
435 			else
436 			{
437 				current->next = seqloc;
438 				current = current->next;
439 			}
440 			next_seqloc = seqloc->next;
441 			seqloc->next = NULL;
442 			seqloc = next_seqloc;
443 		}
444 	}
445 	if (new_seqloc)
446 		ValNodeAddPointer(&retval, SEQLOC_PACKED_INT, new_seqloc);
447 
448 	return retval;
449 }
450 
MySeqLocSortByStartPosition(VoidPtr vp1,VoidPtr vp2)451 static int LIBCALLBACK MySeqLocSortByStartPosition(VoidPtr vp1, VoidPtr vp2)
452 
453 {
454 	SeqLocPtr slp1, slp2;
455 
456 	slp1 = *((SeqLocPtr PNTR) vp1);
457 	slp2 = *((SeqLocPtr PNTR) vp2);
458 
459 	if (SeqLocStart(slp1) < SeqLocStart(slp2))
460 		return -1;
461 	else if (SeqLocStart(slp1) > SeqLocStart(slp2))
462 		return 1;
463 	else
464 		return 0;
465 }
466 
467 /*
468 	Combine two SeqLoc's that overlap.
469 */
470 
471 static SeqLocPtr
CombineSeqLoc(SeqLocPtr seqloc)472 CombineSeqLoc (SeqLocPtr seqloc)
473 
474 {
475 	Int4 start, stop;
476 	SeqLocPtr retval=NULL, seqloc_var, new_seqloc=NULL;
477 	SeqLocPtr my_seqloc=NULL, tmp_seqloc;
478 	SeqIntPtr seq_int;
479 
480 	if (seqloc == NULL || seqloc->choice != SEQLOC_PACKED_INT)
481 		return NULL;
482 
483 	while (seqloc)
484 	{
485 		tmp_seqloc = seqloc->data.ptrvalue;
486 		while (tmp_seqloc)
487 		{
488 			ValNodeLink(&my_seqloc, (SeqLocPtr) AsnIoMemCopy ((Pointer) tmp_seqloc,
489                                      (AsnReadFunc) SeqLocAsnRead,
490                                      (AsnWriteFunc) SeqLocAsnWrite));
491 			tmp_seqloc = tmp_seqloc->next;
492 		}
493 		seqloc = seqloc->next;
494 	}
495 
496 	my_seqloc = (SeqLocPtr) ValNodeSort ((ValNodePtr) my_seqloc, MySeqLocSortByStartPosition);
497 
498 	start = SeqLocStart(my_seqloc);
499 	stop = SeqLocStop(my_seqloc);
500 	seqloc_var = my_seqloc;
501 
502 	while (seqloc_var)
503 	{
504 		if (seqloc_var->next && stop+1 >= SeqLocStart(seqloc_var->next))
505 		{
506 			stop = MAX(stop, SeqLocStop(seqloc_var->next));
507 		}
508 		else
509 		{
510 			seq_int = SeqIntNew();
511 			seq_int->from = start;
512 			seq_int->to = stop;
513 			seq_int->strand = Seq_strand_plus;
514 			seq_int->id = SeqIdDup(SeqIdFindBest(SeqLocId(seqloc_var), SEQID_GI));
515                 	ValNodeAddPointer(&new_seqloc, SEQLOC_INT, seq_int);
516 			if (seqloc_var->next)
517 			{
518 				start = SeqLocStart(seqloc_var->next);
519 				stop = SeqLocStop(seqloc_var->next);
520 			}
521 		}
522 		seqloc_var = seqloc_var->next;
523 	}
524 
525 	if (new_seqloc)
526 		ValNodeAddPointer(&retval, SEQLOC_PACKED_INT, new_seqloc);
527 
528 	return retval;
529 }
530 
531 /* Get intervening SeqLoc's if the two SeqLoc's are within distance_permitted.  */
532 
533 static SeqLocPtr
GetInterveningSeqLoc(SeqLocPtr seqloc,Int4 distance_permitted,Int4 query_length)534 GetInterveningSeqLoc (SeqLocPtr seqloc, Int4 distance_permitted, Int4 query_length)
535 
536 {
537 	SeqLocPtr retval=NULL, seqloc_var, new_seqloc=NULL;
538 	SeqLocPtr my_seqloc=NULL, tmp_seqloc;
539 	SeqIntPtr seq_int;
540 
541 	if (seqloc == NULL)
542 		return NULL;
543 
544 	if (seqloc->choice != SEQLOC_PACKED_INT)
545 		return NULL;
546 
547 	while (seqloc)
548 	{
549 		tmp_seqloc = seqloc->data.ptrvalue;
550 		while (tmp_seqloc)
551 		{
552 			ValNodeLink(&my_seqloc, (SeqLocPtr) AsnIoMemCopy ((Pointer) tmp_seqloc,
553                                      (AsnReadFunc) SeqLocAsnRead,
554                                      (AsnWriteFunc) SeqLocAsnWrite));
555 			tmp_seqloc = tmp_seqloc->next;
556 		}
557 		seqloc = seqloc->next;
558 	}
559 
560 	my_seqloc = (SeqLocPtr) ValNodeSort ((ValNodePtr) my_seqloc, MySeqLocSortByStartPosition);
561 
562 	seqloc_var = my_seqloc;
563 
564 	if (SeqLocStart(seqloc_var) < distance_permitted &&
565 		SeqLocStart(seqloc_var) > 0)
566 	{
567 			seq_int = SeqIntNew();
568 			seq_int->from = 0;
569 			seq_int->to = SeqLocStart(seqloc_var) - 1;
570 			seq_int->strand = Seq_strand_plus;
571 			seq_int->id = SeqIdDup(SeqIdFindBest(SeqLocId(seqloc_var), SEQID_GI));
572                 	ValNodeAddPointer(&new_seqloc, SEQLOC_INT, seq_int);
573 	}
574 
575 	while (seqloc_var)
576 	{
577 		if (seqloc_var->next && SeqLocStop(seqloc_var)+1 < SeqLocStart(seqloc_var->next) && SeqLocStop(seqloc_var)+distance_permitted >= SeqLocStart(seqloc_var->next))
578 		{
579 			seq_int = SeqIntNew();
580 			seq_int->from = SeqLocStop(seqloc_var) + 1;
581 			seq_int->to = SeqLocStart(seqloc_var->next) - 1;
582 			seq_int->strand = Seq_strand_plus;
583 			seq_int->id = SeqIdDup(SeqIdFindBest(SeqLocId(seqloc_var), SEQID_GI));
584                 	ValNodeAddPointer(&new_seqloc, SEQLOC_INT, seq_int);
585 		}
586 		else if ((seqloc_var->next == NULL || SeqLocStop(seqloc_var->next) < SeqLocStop(seqloc_var)) && query_length - (SeqLocStop(seqloc_var)+1) < distance_permitted && query_length - (SeqLocStop(seqloc_var)+1) > 0) /* Check distance to end. */
587 		{
588 			seq_int = SeqIntNew();
589 			seq_int->from = SeqLocStop(seqloc_var) + 1;
590 			seq_int->to = query_length - 1;
591 			seq_int->strand = Seq_strand_plus;
592 			seq_int->id = SeqIdDup(SeqIdFindBest(SeqLocId(seqloc_var), SEQID_GI));
593                 	ValNodeAddPointer(&new_seqloc, SEQLOC_INT, seq_int);
594 		}
595 		seqloc_var = seqloc_var->next;
596 	}
597 
598 	if (new_seqloc)
599 		ValNodeAddPointer(&retval, SEQLOC_PACKED_INT, new_seqloc);
600 
601 	return retval;
602 }
603 
604 /*
605 	Scans the SeqAlignPtr's, producing SeqLocPtr's as specified.
606 	Each ValNodePtr points to a list of SeqLoc's.
607 
608 	This function:
609 
610 	- categorizes hits
611 	- eliminates redundancy
612 	- calculates SeqLocs from the SeqAlign
613 
614 	if options are NULL, defaults will be used.
615 
616 */
617 
618 Boolean LIBCALL
VSMakeSeqLoc(SeqAlignPtr PNTR seqalign_ptr,ValNodePtr PNTR vnp,Int4 length,VSOptionsPtr options)619 VSMakeSeqLoc (SeqAlignPtr PNTR seqalign_ptr, ValNodePtr PNTR vnp, Int4 length, VSOptionsPtr options)
620 
621 {
622 	Boolean own_options=FALSE;
623 	Int4 start, stop, score, num, tmp;
624 	Nlm_FloatHi bit_score, evalue;
625 	SeqLocPtr seqloc1=NULL, seqloc2=NULL, seqloc3=NULL, seqloc, seqloc_tmp;
626 	SeqAlignPtr seqalign, head=NULL, next;
627 	SeqAlignPtr sap1=NULL, sap2=NULL, sap3=NULL;
628 	Uint2Ptr HitsPerBase=NULL;
629 	Int4 last_start=INT4_MAX, last_stop=-1;
630 
631 	if (seqalign_ptr == NULL || vnp == NULL)
632 	        return FALSE;
633 
634 	if (options == NULL)
635 	{
636 		options = VSOptionsNew();
637 		if (options == NULL)
638 		        return FALSE;
639 		own_options = TRUE;
640 	}
641 
642 	seqalign = *seqalign_ptr;
643 	while (seqalign)
644 	{
645 		next = seqalign->next;
646 		GetScoreAndEvalue(seqalign, &score, &bit_score, &evalue, &num);
647 		start = SeqAlignStart(seqalign, 0);
648 		stop = SeqAlignStop(seqalign, 0);
649 		if (start > stop)
650 		{ /* reverse for minus strand. */
651 			tmp = start;
652 			start = stop;
653 			stop = tmp;
654 		}
655 		if (score >= options->classes[1] || (score >= options->classes[0] && (start <= options->distance_to_end || length-(stop+1) <= options->distance_to_end)))
656 		{
657 			sap1 = SeqAlignLink(sap1, seqalign);
658 			seqalign->next = NULL;
659 		}
660 		else if (score >= options->classes[3] || (score >= options->classes[2] && (start <= options->distance_to_end || length-(stop+1) <= options->distance_to_end)))
661 		{
662 			sap2 =SeqAlignLink(sap2, seqalign);
663 			seqalign->next = NULL;
664 		}
665 		else if (score >= options->classes[5] || (score >= options->classes[4] && (start <= options->distance_to_end || length-(stop+1) <= options->distance_to_end)))
666 		{
667 			sap3 = SeqAlignLink(sap3, seqalign);
668 			seqalign->next = NULL;
669 		}
670 		else
671 		{
672 			SeqAlignFree(seqalign);
673 		}
674 		seqalign = next;
675 	}
676 
677 	if (sap1)
678 	{
679 		if (options->filter_by_coverage)
680 			sap1 = SeqAlignFilterByCoverage(sap1, NULL, NULL, 1, &HitsPerBase, &last_start, &last_stop);
681 		seqalign = sap1;
682 		while (seqalign)
683 		{
684 			seqloc_tmp = SeqLocFromSeqAlign(seqalign, NULL);
685 			SeqLocToPlusStrand(seqloc_tmp);
686 			ValNodeLink(&seqloc1, seqloc_tmp);
687 			seqalign = seqalign->next;
688 		}
689 		head = SeqAlignLink(head, sap1);
690 	}
691 	if (sap2)
692 	{
693 		if (options->filter_by_coverage)
694 			sap2 = SeqAlignFilterByCoverage(sap2, NULL, NULL, 1, &HitsPerBase, &last_start, &last_stop);
695 		seqalign = sap2;
696 		while (seqalign)
697 		{
698 			seqloc_tmp = SeqLocFromSeqAlign(seqalign, NULL);
699 			SeqLocToPlusStrand(seqloc_tmp);
700 			ValNodeLink(&seqloc2, seqloc_tmp);
701 			seqalign = seqalign->next;
702 		}
703 		head = SeqAlignLink(head, sap2);
704 	}
705 	if (sap3)
706 	{
707 		if (options->filter_by_coverage)
708 			sap3 = SeqAlignFilterByCoverage(sap3, NULL, NULL, 1, &HitsPerBase, &last_start, &last_stop);
709 		seqalign = sap3;
710 		while (seqalign)
711 		{
712 			seqloc_tmp = SeqLocFromSeqAlign(seqalign, NULL);
713 			SeqLocToPlusStrand(seqloc_tmp);
714 			ValNodeLink(&seqloc3, seqloc_tmp);
715 			seqalign = seqalign->next;
716 		}
717 		head = SeqAlignLink(head, sap3);
718 	}
719 
720 	/* Set for return. */
721 	*seqalign_ptr = head;
722 
723 	HitsPerBase = MemFree(HitsPerBase);
724 
725 	*vnp = NULL;
726 
727 	if (seqloc1)
728 	{
729 		seqloc = NULL;
730 		ValNodeAddPointer(&seqloc, SEQLOC_PACKED_INT, seqloc1);
731 		ValNodeAddPointer(vnp, 1, seqloc);
732 	}
733 
734 	if (seqloc2)
735 	{
736 		seqloc = NULL;
737 		ValNodeAddPointer(&seqloc, SEQLOC_PACKED_INT, seqloc2);
738 		ValNodeAddPointer(vnp, 2, seqloc);
739 	}
740 
741 	if (seqloc3)
742 	{
743 		seqloc = NULL;
744 		ValNodeAddPointer(&seqloc, SEQLOC_PACKED_INT, seqloc3);
745 		ValNodeAddPointer(vnp, 3, seqloc);
746 	}
747 
748 
749 	if (own_options)
750 		options = VSOptionsFree(options);
751 
752 	return TRUE;
753 }
754 
755 /*
756 
757 	This function checks SeqLocs for redundancy.  It:
758 
759 	- Combines SeqLoc's in each category
760 	- checks for overlap
761 	- produces a fourth SeqLoc for intervening sequences.
762 */
763 
764 Boolean LIBCALL
VSCombineSeqLoc(ValNodePtr PNTR vnp,Int4 query_length,VSOptionsPtr options)765 VSCombineSeqLoc(ValNodePtr PNTR vnp, Int4 query_length, VSOptionsPtr options)
766 {
767 	SeqLocPtr seqloc1=NULL, seqloc2=NULL, seqloc3=NULL, seqloc4=NULL;
768 	SeqLocPtr n_seqloc1=NULL, n_seqloc2=NULL, n_seqloc3=NULL;
769 	SeqLocPtr tmp_slp=NULL, combo_slp=NULL;
770 	ValNodePtr var;
771 
772 	if (vnp == NULL || options == NULL)
773 		return FALSE;
774 
775 	if (*vnp == NULL)
776 		return TRUE;
777 
778 	var = *vnp;
779 	while (var)
780 	{
781 		if (var->choice == 1)
782 		{
783 			seqloc1 = var->data.ptrvalue;
784 		}
785 		else if (var->choice == 2)
786 		{
787 			seqloc2 = var->data.ptrvalue;
788 		}
789 		else if (var->choice == 3)
790 		{
791 			seqloc3 = var->data.ptrvalue;
792 		}
793 		var = var->next;
794 	}
795 
796 	*vnp = ValNodeFree(*vnp);
797 
798 	if (seqloc1)
799 	{
800 		n_seqloc1 = CombineSeqLoc(seqloc1);
801 		seqloc1 = SeqLocSetFree(seqloc1);
802 		ValNodeAddPointer(vnp, 1, n_seqloc1);
803 	}
804 
805 	if (seqloc2)
806 	{
807 		seqloc2 = CheckForOverlap(n_seqloc1, seqloc2);
808 		n_seqloc2 = CombineSeqLoc(seqloc2);
809 		seqloc2 = SeqLocSetFree(seqloc2);
810 		ValNodeAddPointer(vnp, 2, n_seqloc2);
811 	}
812 
813 	if (seqloc3)
814 	{
815 		tmp_slp = SeqLocDupAll(n_seqloc1);
816 		tmp_slp = SeqLocLink(&tmp_slp, SeqLocDupAll(n_seqloc2));
817 		combo_slp = CombineSeqLoc(tmp_slp);
818 		tmp_slp = SeqLocSetFree(tmp_slp);
819 		seqloc3 = CheckForOverlap(combo_slp, seqloc3);
820 		n_seqloc3 = CombineSeqLoc(seqloc3);
821 		seqloc3 = SeqLocSetFree(seqloc3);
822 		ValNodeAddPointer(vnp, 3, n_seqloc3);
823 	}
824 
825 	combo_slp = SeqLocSetFree(combo_slp);
826 
827 	tmp_slp = NULL;
828 	tmp_slp = SeqLocDupAll(n_seqloc1);
829 	tmp_slp = SeqLocLink(&tmp_slp, SeqLocDupAll(n_seqloc2));
830 	tmp_slp = SeqLocLink(&tmp_slp, SeqLocDupAll(n_seqloc3));
831 
832 	seqloc4 = GetInterveningSeqLoc(tmp_slp, options->distance_between_segments, query_length);
833 	ValNodeAddPointer(vnp, 4, seqloc4);
834 /*
835 	if (seqloc4)
836 	{
837 		seqloc = NULL;
838 		ValNodeAddPointer(&seqloc, SEQLOC_PACKED_INT, seqloc4);
839 	}
840 */
841 
842 
843 	tmp_slp = SeqLocSetFree(tmp_slp);
844 
845 
846 	return TRUE;
847 }
848 
849 /*
850 	For a given SeqAlignPtr, make and merge SeqLoc's corresponding to these.
851 
852 	This function calls VSMakeSeqLoc and VSCombineSeqLoc.
853 
854 	Note: if 'options' is NULL, default values will be used.  This is STRONGLY recommended.
855 */
856 
857 Int2 LIBCALL
VSMakeCombinedSeqLoc(SeqAlignPtr PNTR seqalign_ptr,ValNodePtr PNTR vnpp,Int4 length,VSOptionsPtr options)858 VSMakeCombinedSeqLoc(SeqAlignPtr PNTR seqalign_ptr, ValNodePtr PNTR vnpp, Int4 length, VSOptionsPtr options)
859 {
860 	Boolean delete_options=FALSE;
861 	Boolean not_failed;
862 	Int2 retval=0;
863 	ValNodePtr var;
864 
865 	if (seqalign_ptr == NULL || vnpp == NULL)
866 		return -1;
867 
868 	*vnpp = NULL;
869 
870 	if (*seqalign_ptr == NULL)
871 		return 0;
872 
873 	if (options == NULL)
874 	{
875 		options = VSOptionsNew();
876 		if (options == NULL)
877 		        return -1;
878 		delete_options = TRUE;
879 	}
880 
881 	not_failed = VSMakeSeqLoc(seqalign_ptr, vnpp, length, options);
882         if (not_failed)
883 	        not_failed = VSCombineSeqLoc(vnpp, length, options);
884 
885         if (not_failed)
886 	{
887 	        retval = *vnpp ? INT2_MAX : 0;
888 	        var = *vnpp;
889 	        while (var)
890 	        {
891 		        retval = MIN(retval, (Int2) var->choice);
892 		        var = var->next;
893 	        }
894 	}
895 	else retval = -1;
896 
897 	if (delete_options)
898 		options = VSOptionsFree(options);
899 
900 	return retval;
901 }
902 
903 /*
904 Performs VecScreen for stand-alone application.
905 
906 Note: if 'options' is NULL, default values will be used.  This is STRONGLY recommended.
907 */
908 
909 Int2 LIBCALL
VSScreenSequence(BioseqPtr bsp,VSOptionsPtr options,CharPtr database,SeqAlignPtr PNTR seqalign_ptr,ValNodePtr PNTR vnpp,ValNodePtr * other_returns,ValNodePtr * error_returns)910 VSScreenSequence(BioseqPtr bsp, VSOptionsPtr options, CharPtr database, SeqAlignPtr PNTR seqalign_ptr, ValNodePtr PNTR vnpp, ValNodePtr *other_returns, ValNodePtr *error_returns)
911 
912 {
913 	BLAST_OptionsBlkPtr blast_options;
914 	Boolean delete_options = FALSE;
915 	Int2 retval=0;
916 	SeqAlignPtr seqalign;
917 
918 	if (bsp == NULL)
919 		return -1;
920 
921 	if (seqalign_ptr)
922 		*seqalign_ptr = NULL;
923 
924 	blast_options = VSBlastOptionNew();
925 	if (blast_options == NULL)
926 		return -1;
927 
928 	if (options == NULL)
929 	{
930 		options = VSOptionsNew();
931 		if (options == NULL)
932 		        return -1;
933 		delete_options = TRUE;
934 	}
935 
936 	if (database == NULL)
937 		seqalign = BioseqBlastEngine(bsp, "blastn", "UniVec_Core", blast_options, other_returns, error_returns, NULL);
938 	else
939 		seqalign = BioseqBlastEngine(bsp, "blastn", database, blast_options, other_returns, error_returns, NULL);
940 	if (seqalign)
941 	{
942 		retval = VSMakeCombinedSeqLoc(&seqalign, vnpp, bsp->length, options);
943 
944 		if (seqalign_ptr)
945 			*seqalign_ptr = seqalign;
946 	}
947 
948 	if (delete_options)
949 		options = VSOptionsFree(options);
950 
951 	blast_options = BLASTOptionDelete(blast_options);
952 
953 	return retval;
954 }
955 
956 /*
957 Performs VecScreen for vector filtering.
958 
959 Note: if 'options' is NULL, default values will be used.  This is STRONGLY recommended.
960 */
961 
962 Int2 LIBCALL
VSScreenSequenceByLoc(SeqLocPtr slp,VSOptionsPtr options,CharPtr database,SeqAlignPtr PNTR seqalign_ptr,ValNodePtr PNTR vnpp,ValNodePtr * other_returns,ValNodePtr * error_returns)963 VSScreenSequenceByLoc(SeqLocPtr slp, VSOptionsPtr options, CharPtr database, SeqAlignPtr PNTR seqalign_ptr, ValNodePtr PNTR vnpp, ValNodePtr *other_returns, ValNodePtr *error_returns)
964 
965 {
966 	BioseqPtr bsp;
967 	Int2 retval=0;
968 
969 	if (slp == NULL)
970 		return -1;
971 
972 
973 	bsp = BioseqLockById(SeqLocId(slp));
974 	if (bsp == NULL)
975 		return -1;
976 
977 	retval = VSScreenSequence(bsp, options, database, seqalign_ptr, vnpp, other_returns, error_returns);
978 
979 	BioseqUnlock(bsp);
980 
981 	return retval;
982 }
983 
984 /*
985 	Prints bar overview and list of matching segments by category
986 */
987 
988 Boolean LIBCALL
VSPrintOverviewFromSeqLocs(ValNodePtr vnp,Int4 query_length,FILE * outfp)989 VSPrintOverviewFromSeqLocs (ValNodePtr vnp, Int4 query_length, FILE *outfp)
990 
991 {
992 	CharPtr buffer50=NULL, buffer25=NULL, buffer20=NULL, buffer15=NULL;
993 	SeqLocPtr seqloc1=NULL, seqloc2=NULL, seqloc3=NULL, seqloc4=NULL, tmp;
994 	ValNodePtr var=NULL;
995 
996         if (outfp == NULL || query_length < 1)
997 		return FALSE;
998 
999 	var = vnp;
1000 	while (var)
1001 	{
1002 		if (var->choice == 1)
1003 		{
1004 			seqloc1 = var->data.ptrvalue;
1005 		}
1006 		else if (var->choice == 2)
1007 		{
1008 			seqloc2 = var->data.ptrvalue;
1009 		}
1010 		else if (var->choice == 3)
1011 		{
1012 			seqloc3 = var->data.ptrvalue;
1013 		}
1014 		else if (var->choice == 4)
1015 		{
1016 			seqloc4 = var->data.ptrvalue;
1017 		}
1018 		var = var->next;
1019 	}
1020 
1021 	if (!seqloc1 && !seqloc2 && !seqloc3 && !seqloc4)
1022 		return TRUE;
1023 
1024 	if (seqloc1)
1025 	{
1026 		Print62Notation(seqloc1, query_length, &buffer50);
1027 	}
1028 
1029 	if (seqloc2)
1030 	{
1031 		Print62Notation(seqloc2, query_length, &buffer25);
1032 	}
1033 
1034 	if (seqloc3)
1035 	{
1036 		Print62Notation(seqloc3, query_length, &buffer20);
1037 	}
1038 
1039 	if (seqloc4)
1040 	{
1041 		Print62Notation(seqloc4, query_length, &buffer15);
1042 	}
1043 
1044 
1045 	fprintf(outfp, "<B>Distribution of Vector Matches on the Query Sequence</B>\n\n");
1046 	fprintf(outfp, "<IMG hspace=1 SRC=http://www.ncbi.nlm.nih.gov/sutils/tunf.cgi?450x12-1(1)150(%ld)300(%ld)450(%ld)600(%ld)\n", (long) query_length/4, (long) query_length/2, (long) 3*query_length/4, (long) query_length);
1047 	fprintf(outfp, ">\n");
1048 	fprintf(outfp, "<IMG hspace=1 SRC=http://www.ncbi.nlm.nih.gov/gorf/gun2.cgi?0M(009g)");
1049 	if (buffer20)
1050 		fprintf(outfp, "00FF00(%s)", buffer20);
1051 	if (buffer25)
1052 		fprintf(outfp, "FF00FF(%s)", buffer25);
1053 	if (buffer50)
1054 		fprintf(outfp, "FF0000(%s)", buffer50);
1055 	if (buffer15)
1056 		fprintf(outfp, "FFFF00(%s)", buffer15);
1057 
1058 	MemFree(buffer20);
1059 	MemFree(buffer25);
1060 	MemFree(buffer50);
1061 	MemFree(buffer15);
1062 
1063 	fprintf(outfp, ">\n");
1064 	fprintf(outfp, "   <B>Match to Vector: </B>");
1065 	fprintf(outfp, "<IMG SRC=http://www.ncbi.nlm.nih.gov/gorf/gun2.cgi?0M(000M)FF0000(000M)> <B>Strong </B>  ");
1066 	fprintf(outfp, "<IMG SRC=http://www.ncbi.nlm.nih.gov/gorf/gun2.cgi?0M(000M)FF00FF(000M)> <B>Moderate </B>  ");
1067 	fprintf(outfp, "<IMG SRC=http://www.ncbi.nlm.nih.gov/gorf/gun2.cgi?0M(000M)00FF00(000M)> <B>Weak </B>\n");
1068 	fprintf(outfp, "   <B>Segment of suspect origin: </B>");
1069 	fprintf(outfp, "<IMG SRC=http://www.ncbi.nlm.nih.gov/gorf/gun2.cgi?0M(000M)FFFF00(000M)>\n\n");
1070 
1071 
1072 	fprintf(outfp, "<B>Segments matching vector: </B>\n");
1073 	if (seqloc1)
1074 	{
1075 		if (seqloc1->choice == SEQLOC_PACKED_INT)
1076 			tmp = seqloc1->data.ptrvalue;
1077 		fprintf(outfp, "<A HREF=\"http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html#Strong\" TARGET=\"VecScreenInfo\">Strong match</A>:  ");
1078 		while (tmp)
1079 		{
1080 			fprintf(outfp, " %ld-%ld", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1081 			if (tmp->next)
1082 				fprintf(outfp, ",");
1083 			else
1084 				fprintf(outfp, "\n");
1085 			tmp = tmp->next;
1086 		}
1087 	}
1088 
1089 	if (seqloc2)
1090 	{
1091 		if (seqloc2->choice == SEQLOC_PACKED_INT)
1092 			tmp = seqloc2->data.ptrvalue;
1093 		fprintf(outfp, "<A HREF=\"http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html#Moderate\" TARGET=\"VecScreenInfo\">Moderate match</A>:");
1094 		while (tmp)
1095 		{
1096 			fprintf(outfp, " %ld-%ld", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1097 			if (tmp->next)
1098 				fprintf(outfp, ",");
1099 			else
1100 				fprintf(outfp, "\n");
1101 			tmp = tmp->next;
1102 		}
1103 	}
1104 
1105 	if (seqloc3)
1106 	{
1107 		if (seqloc3->choice == SEQLOC_PACKED_INT)
1108 			tmp = seqloc3->data.ptrvalue;
1109 		fprintf(outfp, "<A HREF=\"http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html#Weak\" TARGET=\"VecScreenInfo\">Weak match</A>:    ");
1110 		while (tmp)
1111 		{
1112 			fprintf(outfp, " %ld-%ld", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1113 			if (tmp->next)
1114 				fprintf(outfp, ",");
1115 			else
1116 				fprintf(outfp, "\n");
1117 			tmp = tmp->next;
1118 		}
1119 	}
1120 
1121 	if (seqloc4)
1122 	{
1123 		if (seqloc4->choice == SEQLOC_PACKED_INT)
1124 			tmp = seqloc4->data.ptrvalue;
1125 		fprintf(outfp, "<A HREF=\"http://www.ncbi.nlm.nih.gov/VecScreen/VecScreen_docs.html#Suspect\" TARGET=\"VecScreenInfo\">Suspect origin</A>:");
1126 		while (tmp)
1127 		{
1128 			fprintf(outfp, " %ld-%ld", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1129 			if (tmp->next)
1130 				fprintf(outfp, ",");
1131 			else
1132 				fprintf(outfp, "\n");
1133 			tmp = tmp->next;
1134 		}
1135 	}
1136 
1137 	fprintf(outfp, "\n");
1138 
1139 	return TRUE;
1140 }
1141 
1142 
1143 
1144 /*
1145 	Prints list of matching segments
1146 */
1147 
1148 Boolean LIBCALL
VSPrintListFromSeqLocs(ValNodePtr vnp,FILE * outfp)1149 VSPrintListFromSeqLocs (ValNodePtr vnp, FILE *outfp)
1150 
1151 {
1152 	SeqLocPtr seqloc1=NULL, seqloc2=NULL, seqloc3=NULL, seqloc4=NULL, tmp;
1153 	ValNodePtr var=NULL;
1154 
1155         if (outfp == NULL)
1156 		return FALSE;
1157 
1158 	var = vnp;
1159 	while (var)
1160 	{
1161 		if (var->choice == 1)
1162 		{
1163 			seqloc1 = var->data.ptrvalue;
1164 		}
1165 		else if (var->choice == 2)
1166 		{
1167 			seqloc2 = var->data.ptrvalue;
1168 		}
1169 		else if (var->choice == 3)
1170 		{
1171 			seqloc3 = var->data.ptrvalue;
1172 		}
1173 		else if (var->choice == 4)
1174 		{
1175 			seqloc4 = var->data.ptrvalue;
1176 		}
1177 		var = var->next;
1178 	}
1179 
1180 	if (!seqloc1 && !seqloc2 && !seqloc3 && !seqloc4)
1181 		return TRUE;
1182 
1183 	if (seqloc1)
1184 	{
1185 		if (seqloc1->choice == SEQLOC_PACKED_INT)
1186 			tmp = seqloc1->data.ptrvalue;
1187 		fprintf(outfp, "Strong match\n");
1188 		while (tmp)
1189 		{
1190 			fprintf(outfp, "%ld\t%ld\n", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1191 			tmp = tmp->next;
1192 		}
1193 	}
1194 
1195 	if (seqloc2)
1196 	{
1197 		if (seqloc2->choice == SEQLOC_PACKED_INT)
1198 			tmp = seqloc2->data.ptrvalue;
1199 		fprintf(outfp, "Moderate match\n");
1200 		while (tmp)
1201 		{
1202 			fprintf(outfp, "%ld\t%ld\n", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1203 			tmp = tmp->next;
1204 		}
1205 	}
1206 
1207 	if (seqloc3)
1208 	{
1209 		if (seqloc3->choice == SEQLOC_PACKED_INT)
1210 			tmp = seqloc3->data.ptrvalue;
1211 		fprintf(outfp, "Weak match\n");
1212 		while (tmp)
1213 		{
1214 			fprintf(outfp, "%ld\t%ld\n", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1215 			tmp = tmp->next;
1216 		}
1217 	}
1218 
1219 	if (seqloc4)
1220 	{
1221 		if (seqloc4->choice == SEQLOC_PACKED_INT)
1222 			tmp = seqloc4->data.ptrvalue;
1223 		fprintf(outfp, "Suspect origin\n");
1224 		while (tmp)
1225 		{
1226 			fprintf(outfp, "%ld\t%ld\n", (long) (SeqLocStart(tmp)+1), (long) (SeqLocStop(tmp)+1));
1227 			tmp = tmp->next;
1228 		}
1229 	}
1230 
1231 	return TRUE;
1232 }
1233 
1234 
1235 #define BUFFER_LENGTH 255
1236 
1237 /*
1238 	Prints Id line for list results format
1239 */
1240 
1241 Boolean LIBCALL
VSPrintListIdLine(BioseqPtr query_bsp,CharPtr proginfo,CharPtr database,FILE * outfp)1242 VSPrintListIdLine (BioseqPtr query_bsp, CharPtr proginfo, CharPtr database, FILE *outfp)
1243 
1244 {
1245 
1246 	Char            idbuf [BUFFER_LENGTH];
1247 	CharPtr         dbdef, ptr, chptr;
1248 	Int4            length;
1249 	ReadDBFILEPtr   rdfp, rdfp_var;
1250 	Boolean         first_title;
1251 
1252 
1253 	if (outfp == NULL)
1254 		return FALSE;
1255 
1256 
1257 	/* prepare Query ID string for output */
1258 
1259 	ptr = NULL; chptr = NULL;
1260 
1261 	if (query_bsp)
1262 		ptr = BioseqGetTitle (query_bsp);
1263 
1264 	if (ptr) {
1265 		while (IS_WHITESP(*ptr))
1266 			ptr++;
1267 
1268 		if (*ptr == '"') {
1269 			ptr++;
1270 			chptr = StringChr (ptr, '"');
1271 		}
1272 		else {
1273 			chptr = StringPBrk (ptr, " \t");
1274 		}
1275 	}
1276 
1277 	if (ptr && *ptr != '\0') {
1278 		StringNCpy_0 (idbuf, ptr, (chptr ? MIN((chptr - ptr) + 1, BUFFER_LENGTH) : BUFFER_LENGTH));
1279 	}
1280 	else {
1281 		StringCpy (idbuf, "Id_unknown");
1282 	}
1283 
1284 
1285 	/* prepare database description(s) for output */
1286 
1287 	dbdef = NULL; rdfp = NULL;
1288 
1289 	if (database)
1290 		rdfp = readdb_new_ex(database, READDB_DB_IS_NUC, FALSE);
1291 
1292 	if (rdfp) {
1293 		rdfp_var = rdfp;
1294 		length = 0;
1295 		while (rdfp_var) {
1296 			length += StringLen(readdb_get_title(rdfp_var));
1297 			length += 3;
1298 			rdfp_var = rdfp_var->next;
1299 		}
1300 		dbdef = MemNew(length*sizeof(Char));
1301 	}
1302 
1303 	if (dbdef) {
1304 		ptr = dbdef;
1305 		rdfp_var = rdfp;
1306 
1307 		first_title = TRUE;
1308 		while (rdfp_var) {
1309 			chptr = readdb_get_title(rdfp_var);
1310 
1311 			if(chptr == NULL) {
1312 				rdfp_var = rdfp_var->next;
1313 				continue;
1314 			}
1315 
1316 			if(!first_title) {
1317 				*ptr = ';';
1318 				ptr++;
1319 				*ptr = ' ';
1320 				ptr++;
1321 			}
1322 			else
1323 				first_title = FALSE;
1324 
1325 			StringCpy(ptr, readdb_get_title(rdfp_var));
1326 
1327 			length = StringLen(ptr);
1328 			ptr += length;
1329 
1330 			rdfp_var = rdfp_var->next;
1331 		}
1332 
1333 		*ptr = '\0';
1334 
1335 		rdfp = readdb_destruct(rdfp);
1336 
1337 	}
1338 
1339 
1340 	fprintf(outfp, ">Vector %s Screen: %s Database: %s\n",
1341 		idbuf, proginfo ? proginfo : "unknown", dbdef ? dbdef : "unknown");
1342 
1343 	dbdef = MemFree(dbdef);
1344 
1345 	return TRUE;
1346 
1347 }
1348 
1349 
1350