1 /*   sequin2.c
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *            National Center for Biotechnology Information (NCBI)
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government do not place any restriction on its use or reproduction.
13 *  We would, however, appreciate having the NCBI and the author cited in
14 *  any work or product based on this material
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 * ===========================================================================
25 *
26 * File Name:  sequin2.c
27 *
28 * Author:  Jonathan Kans
29 *
30 * Version Creation Date:   1/22/95
31 *
32 * $Revision: 6.759 $
33 *
34 * File Description:
35 *
36 * Modifications:
37 * --------------------------------------------------------------------------
38 * Date     Name        Description of modification
39 * -------  ----------  -----------------------------------------------------
40 *
41 *
42 * ==========================================================================
43 */
44 
45 #include "sequin.h"
46 #include <document.h>
47 #include <sequtil.h>
48 #include <biosrc.h>
49 #include <cdrgn.h>
50 #include <seqsub.h>
51 #include <tofasta.h>
52 #include <gather.h>
53 #include <subutil.h>
54 #include <suggslp.h>
55 #include <toasn3.h>
56 #include <toporg.h>
57 #include <salfiles.h>
58 #include <salsap.h>
59 #include <salign.h>
60 #include <edutil.h>
61 #include <vsm.h>
62 //#include <accentr.h>
63 //#include <accutils.h>
64 #include <pmfapi.h>
65 #include <explore.h>
66 #include <aliparse.h>
67 #include <algo/blast/api/twoseq_api.h>
68 #ifdef WIN_MOTIF
69 #include <netscape.h>
70 #endif
71 #include <actutils.h>
72 #include <salpanel.h>
73 #include <findrepl.h>
74 #include <macrodlg.h>
75 #include <macroapi.h>
76 
77 extern EnumFieldAssoc  biosource_genome_simple_alist [];
78 extern EnumFieldAssoc  biosource_origin_alist [];
79 
ENUM_ALIST(biomol_nucX_alist)80 static ENUM_ALIST(biomol_nucX_alist)
81   {"Genomic DNA",            253},
82   {"Genomic RNA",            254},
83   {"Precursor RNA",            2},
84   {"mRNA [cDNA]",              3},
85   {"Ribosomal RNA",            4},
86   {"Transfer RNA",             5},
87   {"Other-Genetic",            9},
88   {"cRNA",                    11},
89   {"Transcribed RNA",         13},
90   {"Transfer-messenger RNA", MOLECULE_TYPE_TMRNA },
91   {"ncRNA",                  MOLECULE_TYPE_NCRNA},
92 END_ENUM_ALIST
93 
94 static ENUM_ALIST(biomol_nucGen_alist)
95   {"Genomic DNA",            253},
96   {"Genomic RNA",            254},
97 END_ENUM_ALIST
98 
99 static ENUM_ALIST(topology_nuc_alist)
100 {"Linear",          TOPOLOGY_LINEAR},
101 {"Circular",        TOPOLOGY_CIRCULAR},
102 END_ENUM_ALIST
103 
104 static ENUM_ALIST(molecule_alist)
105 {"DNA",             Seq_mol_dna },
106 {"RNA",             Seq_mol_rna },
107 END_ENUM_ALIST
108 
109 #define PRINTED_INT_MAX_LEN 15
110 
111 #define CREATE_FASTA_REQUIRED 0
112 #define CREATE_FASTA_WARNING  1
113 
114 /* These functions are for creating, copying, and freeing lists
115  * of titles and IDs.
116  */
117 static IDAndTitleEditPtr IDAndTitleEditNew (void)
118 {
119   IDAndTitleEditPtr iatep;
120 
121   iatep = (IDAndTitleEditPtr) MemNew (sizeof (IDAndTitleEditData));
122   if (iatep != NULL)
123   {
124     iatep->id_list = NULL;
125     iatep->title_list = NULL;
126     iatep->is_seg = NULL;
127     iatep->num_sequences = 0;
128     iatep->nuc_only = FALSE;
129   }
130   return iatep;
131 }
132 
IDAndTitleEditInit(IDAndTitleEditPtr iatep,Int4 new_num_sequences)133 static void IDAndTitleEditInit (IDAndTitleEditPtr iatep, Int4 new_num_sequences)
134 {
135   Int4 seq_num;
136   if (iatep == NULL)
137   {
138     return;
139   }
140 
141   /* free old lists, if any */
142   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
143   {
144     iatep->id_list [seq_num] = MemFree (iatep->id_list [seq_num]);
145     iatep->title_list [seq_num] = MemFree (iatep->title_list [seq_num]);
146   }
147   iatep->id_list = MemFree (iatep->id_list);
148   iatep->title_list = MemFree (iatep->title_list);
149   iatep->length_list = MemFree (iatep->length_list);
150   iatep->is_seg = MemFree (iatep->is_seg);
151 
152   /* now create blanks for num_sequences entries */
153   iatep->num_sequences = MAX (0, new_num_sequences);
154   if (iatep->num_sequences > 0)
155   {
156     iatep->id_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
157     iatep->title_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
158     iatep->length_list = (Int4Ptr) MemNew (iatep->num_sequences * sizeof (Int4));
159     iatep->is_seg = (BoolPtr) MemNew (iatep->num_sequences * sizeof (Boolean));
160     for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
161     {
162       iatep->id_list [seq_num] = NULL;
163       iatep->title_list  [seq_num] = NULL;
164       iatep->length_list[seq_num] = 0;
165       iatep->is_seg [seq_num] = FALSE;
166     }
167   }
168 }
169 
IDAndTitleEditCopy(IDAndTitleEditPtr iatep_orig)170 static IDAndTitleEditPtr IDAndTitleEditCopy (IDAndTitleEditPtr iatep_orig)
171 {
172   IDAndTitleEditPtr iatep_copy;
173   Int4              seq_num;
174 
175   if (iatep_orig == NULL)
176   {
177     return NULL;
178   }
179 
180   iatep_copy = IDAndTitleEditNew ();
181   if (iatep_copy == NULL)
182   {
183     return NULL;
184   }
185 
186   IDAndTitleEditInit (iatep_copy, iatep_orig->num_sequences);
187   for (seq_num = 0; seq_num < iatep_copy->num_sequences; seq_num++)
188   {
189     iatep_copy->id_list [seq_num] = StringSave (iatep_orig->id_list [seq_num]);
190     iatep_copy->title_list [seq_num] = StringSave (iatep_orig->title_list [seq_num]);
191     iatep_copy->length_list [seq_num] = iatep_orig->length_list [seq_num];
192     if (iatep_orig->is_seg != NULL)
193     {
194       iatep_copy->is_seg [seq_num] = iatep_orig->is_seg [seq_num];
195     }
196   }
197 
198   return iatep_copy;
199 }
200 
IDAndTitleEditFree(IDAndTitleEditPtr iatep)201 NLM_EXTERN IDAndTitleEditPtr IDAndTitleEditFree (IDAndTitleEditPtr iatep)
202 {
203   Int4 i;
204 
205   if (iatep != NULL)
206   {
207     for (i = 0; i < iatep->num_sequences; i++)
208     {
209       iatep->id_list [i] = MemFree (iatep->id_list [i]);
210       iatep->title_list [i] = MemFree (iatep->title_list [i]);
211     }
212     iatep->id_list = MemFree (iatep->id_list);
213     iatep->title_list = MemFree (iatep->title_list);
214     iatep->length_list = MemFree (iatep->length_list);
215     iatep->is_seg = MemFree (iatep->is_seg);
216     iatep = MemFree (iatep);
217   }
218   return iatep;
219 }
220 
221 /* These functions are for applying lists of titles and IDs
222  * to a SeqEntry list.
223  */
CountSequencesAndSegments(SeqEntryPtr list,Boolean nuc_only)224 NLM_EXTERN Int4 CountSequencesAndSegments (SeqEntryPtr list, Boolean nuc_only)
225 {
226   Int4         num_seqs = 0;
227   BioseqSetPtr bssp;
228   BioseqPtr    bsp;
229 
230   while (list != NULL)
231   {
232     if (list->data.ptrvalue != NULL)
233     {
234       if (IS_Bioseq (list))
235       {
236         bsp = (BioseqPtr) list->data.ptrvalue;
237         if (!nuc_only || ISA_na (bsp->mol)) {
238           num_seqs ++;
239         }
240       }
241       else if (IS_Bioseq_set (list))
242       {
243         bssp = (BioseqSetPtr) list->data.ptrvalue;
244         num_seqs += CountSequencesAndSegments (bssp->seq_set, nuc_only);
245       }
246     }
247     list = list->next;
248   }
249   return num_seqs;
250 }
251 
FindNthSequenceInSet(SeqEntryPtr seq_list,Int4 nth,BoolPtr is_seg,Boolean nuc_only)252 NLM_EXTERN BioseqPtr FindNthSequenceInSet (SeqEntryPtr seq_list, Int4 nth, BoolPtr is_seg, Boolean nuc_only)
253 {
254   Int4         pos = 0;
255   BioseqPtr    bsp = NULL;
256   BioseqSetPtr bssp;
257   SeqEntryPtr  sep;
258 
259   while (seq_list != NULL && bsp == NULL)
260   {
261     if (seq_list->data.ptrvalue != NULL)
262     {
263       if (IS_Bioseq (seq_list) && seq_list->data.ptrvalue != NULL
264           && (!nuc_only || ISA_na(((BioseqPtr)seq_list->data.ptrvalue)->mol)))
265       {
266         if (nth == pos)
267         {
268           bsp = seq_list->data.ptrvalue;
269         }
270         else
271         {
272           pos ++;
273         }
274       }
275       else if (IS_Bioseq_set (seq_list))
276       {
277         bssp = (BioseqSetPtr) seq_list->data.ptrvalue;
278         if (bssp->_class == BioseqseqSet_class_parts && is_seg != NULL)
279         {
280           *is_seg = TRUE;
281         }
282         sep = bssp->seq_set;
283         while (sep != NULL && bsp == NULL)
284         {
285           bsp = FindNthSequenceInSet (sep, nth - pos, is_seg, nuc_only);
286           if (bsp == NULL)
287           {
288             if (IS_Bioseq_set (sep))
289             {
290               bssp = (BioseqSetPtr) sep->data.ptrvalue;
291               pos += CountSequencesAndSegments (bssp->seq_set, nuc_only);
292             }
293             else if (IS_Bioseq (sep) && (!nuc_only || ISA_na (((BioseqPtr)(sep->data.ptrvalue))->mol)))
294             {
295               pos ++;
296             }
297           }
298           sep = sep->next;
299         }
300         if (bsp == NULL && is_seg != NULL)
301         {
302           *is_seg = FALSE;
303         }
304       }
305     }
306     seq_list = seq_list->next;
307   }
308   return bsp;
309 }
310 
311 
AddOneSequenceToIatep(BioseqPtr bsp,IDAndTitleEditPtr iatep,Int4 pos)312 static void AddOneSequenceToIatep (BioseqPtr bsp, IDAndTitleEditPtr iatep, Int4 pos)
313 {
314   SeqIdPtr sip;
315   SeqDescPtr sdp;
316 
317   sip = SeqIdFindBest (bsp->id, SEQID_GENBANK);
318   if (sip != NULL)
319   {
320     if (sip->choice == SEQID_LOCAL) {
321       iatep->id_list [pos] = SeqIdWholeLabel (sip, PRINTID_REPORT);
322     } else {
323       iatep->id_list [pos] = SeqIdWholeLabel (sip, PRINTID_FASTA_SHORT);
324     }
325   }
326   sdp = bsp->descr;
327   while (sdp != NULL && sdp->choice != Seq_descr_title)
328   {
329     sdp = sdp->next;
330   }
331   if (sdp != NULL && !StringHasNoText (sdp->data.ptrvalue))
332   {
333     iatep->title_list [pos] = StringSave (sdp->data.ptrvalue);
334   }
335   iatep->length_list [pos] = bsp->length;
336 }
337 
338 
AddSequencesInSetToIatep(SeqEntryPtr seq_list,Int4Ptr nth,Boolean nuc_only,IDAndTitleEditPtr iatep)339 static void AddSequencesInSetToIatep (SeqEntryPtr seq_list, Int4Ptr nth, Boolean nuc_only, IDAndTitleEditPtr iatep)
340 {
341   BioseqSetPtr bssp;
342   BioseqPtr    bsp;
343 
344   while (seq_list != NULL)
345   {
346     if (seq_list->data.ptrvalue != NULL)
347     {
348       if (IS_Bioseq (seq_list)
349           && (bsp = (BioseqPtr) seq_list->data.ptrvalue) != NULL
350           && (!nuc_only || ISA_na(bsp->mol)))
351       {
352         AddOneSequenceToIatep (bsp, iatep, *nth);
353         (*nth)++;
354       }
355       else if (IS_Bioseq_set (seq_list))
356       {
357         bssp = (BioseqSetPtr) seq_list->data.ptrvalue;
358         AddSequencesInSetToIatep (bssp->seq_set, nth, nuc_only, iatep);
359       }
360     }
361     seq_list = seq_list->next;
362   }
363 }
364 
365 
SeqEntryListToIDAndTitleEditEx(SeqEntryPtr list,Boolean nuc_only)366 NLM_EXTERN IDAndTitleEditPtr SeqEntryListToIDAndTitleEditEx (SeqEntryPtr list, Boolean nuc_only)
367 {
368   IDAndTitleEditPtr iatep;
369   Int4              num_sequences, i = 0;
370 
371   num_sequences = CountSequencesAndSegments (list, nuc_only);
372   if (num_sequences == 0)
373   {
374     return NULL;
375   }
376 
377   iatep = IDAndTitleEditNew ();
378   if (iatep == NULL)
379   {
380     return NULL;
381   }
382 
383   iatep->nuc_only = nuc_only;
384   IDAndTitleEditInit (iatep, num_sequences);
385   AddSequencesInSetToIatep (list, &i, nuc_only, iatep);
386 
387   return iatep;
388 }
389 
390 
SeqEntryListToIDAndTitleEdit(SeqEntryPtr list)391 static IDAndTitleEditPtr SeqEntryListToIDAndTitleEdit (SeqEntryPtr list)
392 {
393   return SeqEntryListToIDAndTitleEditEx (list, FALSE);
394 }
395 
ReplaceIDAndTitleForBioseq(BioseqPtr bsp,SeqIdPtr new_sip,CharPtr title)396 static void ReplaceIDAndTitleForBioseq (BioseqPtr bsp, SeqIdPtr new_sip, CharPtr title)
397 {
398   SeqDescrPtr sdp;
399   SeqEntryPtr sep;
400 
401   if (bsp == NULL)
402   {
403     return;
404   }
405 
406   /* replace ID */
407 
408   if (new_sip != NULL)
409   {
410     if (SeqIdComp (new_sip, bsp->id) == SIC_YES)
411     {
412       /* do nothing, no change */
413     }
414     else
415     {
416       if (bsp->id != NULL)
417       {
418         new_sip->next = bsp->id->next;
419         bsp->id->next = NULL;
420         bsp->id = SeqIdFree (bsp->id);
421       }
422       bsp->id = new_sip;
423       SeqMgrReplaceInBioseqIndex(bsp);
424     }
425   }
426   else
427   {
428     bsp->id = SeqIdFree (bsp->id);
429   }
430 
431   /* replace title */
432   if (title == NULL)
433   {
434     title = StringSave ("");
435   }
436   sdp = bsp->descr;
437   while (sdp != NULL && sdp->choice != Seq_descr_title)
438   {
439     sdp = sdp->next;
440   }
441   if (sdp == NULL)
442   {
443     sep = SeqMgrGetSeqEntryForData (bsp);
444     sdp = CreateNewDescriptor (sep, Seq_descr_title);
445     sdp->data.ptrvalue = title;
446   }
447   else
448   {
449     sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
450     sdp->data.ptrvalue = title;
451   }
452 }
453 
ResetSegSetIDLists(SeqEntryPtr list)454 static void ResetSegSetIDLists (SeqEntryPtr list)
455 {
456   BioseqSetPtr bssp, parts;
457   BioseqPtr    seg_bsp;
458   SeqEntryPtr  sep;
459   SeqLocPtr    loc, next_loc, last_loc;
460 
461   while (list != NULL)
462   {
463     if (IS_Bioseq_set (list) && (bssp = (BioseqSetPtr) list->data.ptrvalue) != NULL)
464     {
465       if (bssp->_class == BioseqseqSet_class_segset)
466       {
467         sep = bssp->seq_set;
468         seg_bsp = NULL;
469         parts = NULL;
470         while (sep != NULL && (seg_bsp == NULL || parts == NULL))
471         {
472           if (IS_Bioseq (sep))
473           {
474             seg_bsp = sep->data.ptrvalue;
475           }
476           else if (IS_Bioseq_set (sep))
477           {
478             parts = sep->data.ptrvalue;
479             if (parts != NULL && parts->_class != BioseqseqSet_class_parts)
480             {
481               parts = NULL;
482             }
483           }
484           sep = sep->next;
485         }
486         if (seg_bsp != NULL)
487         {
488           /* remove old location */
489           loc = (SeqLocPtr) seg_bsp->seq_ext;
490           while (loc != NULL)
491           {
492             next_loc = loc->next;
493             loc->next = NULL;
494             loc = SeqLocFree (loc);
495             loc = next_loc;
496           }
497           seg_bsp->seq_ext = NULL;
498           /* put in new locations */
499           if (parts != NULL) {
500             sep = parts->seq_set;
501           }
502           last_loc = NULL;
503           while (sep != NULL)
504           {
505             if (IS_Bioseq (sep) && sep->data.ptrvalue != NULL)
506             {
507               loc = SeqLocWholeNew (sep->data.ptrvalue);
508               if (loc != NULL)
509               {
510                 if (last_loc == NULL)
511                 {
512                   seg_bsp->seq_ext = loc;
513                 }
514                 else
515                 {
516                   last_loc->next = loc;
517                 }
518                 last_loc = loc;
519               }
520             }
521             sep = sep->next;
522           }
523         }
524       }
525       else
526       {
527         ResetSegSetIDLists (bssp->seq_set);
528       }
529     }
530     list = list->next;
531   }
532 }
533 
534 
ApplyIatepToOneSequence(BioseqPtr bsp,IDAndTitleEditPtr iatep,Int4 pos)535 static void ApplyIatepToOneSequence (BioseqPtr bsp, IDAndTitleEditPtr iatep, Int4 pos)
536 {
537   SeqIdPtr new_sip;
538 
539   new_sip = NULL;
540   if (StringChr (iatep->id_list[pos], '|') != NULL) {
541     new_sip = SeqIdParse (iatep->id_list[pos]);
542   }
543   if (new_sip == NULL) {
544     new_sip = MakeSeqID (iatep->id_list [pos]);
545   }
546   ReplaceIDAndTitleForBioseq (bsp, new_sip, StringSave (iatep->title_list [pos]));
547 
548 }
549 
550 
ApplyIatepToSequencesInSet(SeqEntryPtr seq_list,Int4Ptr nth,Boolean nuc_only,IDAndTitleEditPtr iatep)551 static void ApplyIatepToSequencesInSet (SeqEntryPtr seq_list, Int4Ptr nth, Boolean nuc_only, IDAndTitleEditPtr iatep)
552 {
553   BioseqSetPtr bssp;
554 
555   while (seq_list != NULL)
556   {
557     if (seq_list->data.ptrvalue != NULL)
558     {
559       if (IS_Bioseq (seq_list) && seq_list->data.ptrvalue != NULL
560           && (!nuc_only || ISA_na(((BioseqPtr)seq_list->data.ptrvalue)->mol)))
561       {
562         ApplyIatepToOneSequence (seq_list->data.ptrvalue, iatep, *nth);
563         (*nth)++;
564       }
565       else if (IS_Bioseq_set (seq_list))
566       {
567         bssp = (BioseqSetPtr) seq_list->data.ptrvalue;
568         ApplyIatepToSequencesInSet (bssp->seq_set, nth, nuc_only, iatep);
569       }
570     }
571     seq_list = seq_list->next;
572   }
573 }
574 
575 
ApplyIDAndTitleEditToSeqEntryList(SeqEntryPtr list,IDAndTitleEditPtr iatep)576 NLM_EXTERN Boolean ApplyIDAndTitleEditToSeqEntryList (SeqEntryPtr list, IDAndTitleEditPtr iatep)
577 {
578   Int4      i = 0;
579 
580   if (list == NULL || iatep == NULL)
581   {
582     return FALSE;
583   }
584 
585   if (CountSequencesAndSegments (list, iatep->nuc_only) != iatep->num_sequences)
586   {
587     return FALSE;
588   }
589 
590   ApplyIatepToSequencesInSet (list, &i, iatep->nuc_only, iatep);
591   ResetSegSetIDLists (list);
592   return TRUE;
593 }
594 
595 
596 static ValNodePtr BuildModifierTypeList (ValNodePtr type_list, CharPtr new_title, Boolean allow_prot);
597 
RemoveSourceModifiersFromIdAndTitleEdit(IDAndTitleEditPtr iatep)598 NLM_EXTERN void RemoveSourceModifiersFromIdAndTitleEdit (IDAndTitleEditPtr iatep)
599 {
600   ValNodePtr  found_modifiers = NULL, vnp;
601   Int4        seq_num;
602 
603   if (iatep != NULL)
604   {
605     /* get list of modifiers from titles */
606     for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
607     {
608       found_modifiers = BuildModifierTypeList (found_modifiers,
609                                                iatep->title_list [seq_num],
610                                                FALSE);
611       for (vnp = found_modifiers; vnp != NULL; vnp = vnp->next)
612       {
613         if (StringICmp (vnp->data.ptrvalue, "genetic_code") == 0
614             || StringICmp (vnp->data.ptrvalue, "organism") == 0
615             || StringICmp (vnp->data.ptrvalue, "location") == 0
616             || StringICmp (vnp->data.ptrvalue, "gencode_comment") == 0
617             || StringICmp (vnp->data.ptrvalue, "moltype") == 0
618             || StringICmp (vnp->data.ptrvalue, "topology") == 0)
619         {
620           continue;
621         }
622         RemoveValueFromDefline (vnp->data.ptrvalue, iatep->title_list [seq_num]);
623       }
624       found_modifiers = ValNodeFreeData (found_modifiers);
625     }
626   }
627 }
628 
629 
630 /* this section of code is used to read and parse the taxlist.txt
631  * and lineages.txt files */
632 static ValNodePtr orglist = NULL;
633 
634 typedef struct orginfo
635 {
636   CharPtr taxname;
637   CharPtr common;
638   Int4    ngcode;
639   Int4    mgcode;
640   CharPtr div;
641   Int4    taxnum;
642   CharPtr lineage;
643 } OrgInfoData, PNTR OrgInfoPtr;
644 
OpenSequinDataFile(CharPtr filename)645 static FILE *OpenSequinDataFile (CharPtr filename)
646 {
647   Char              str [PATH_MAX];
648   CharPtr           ptr;
649   FILE              *f = NULL;
650 
651   if (StringHasNoText (filename))
652   {
653     return NULL;
654   }
655 
656   ProgramPath (str, sizeof (str));
657   ptr = StringRChr (str, DIRDELIMCHR);
658   if (ptr == NULL)
659   {
660     return NULL;
661   }
662 
663   *ptr = '\0';
664   FileBuildPath (str, NULL, filename);
665   f = FileOpen (str, "r");
666   if (f == NULL) {
667     if (GetAppParam ("NCBI", "NCBI", "DATA", "", str, sizeof (str))) {
668       FileBuildPath (str, NULL, filename);
669       f = FileOpen (str, "r");
670     }
671   }
672   return f;
673 }
674 
FindByTaxNum(Int4 taxnum)675 static OrgInfoPtr FindByTaxNum (Int4 taxnum)
676 {
677   ValNodePtr vnp;
678   OrgInfoPtr oip;
679 
680   for (vnp = orglist; vnp != NULL; vnp = vnp->next)
681   {
682     oip = (OrgInfoPtr) vnp->data.ptrvalue;
683     if (oip != NULL && oip->taxnum == taxnum)
684     {
685       return oip;
686     }
687   }
688   return NULL;
689 }
690 
FindByTaxName(CharPtr taxname)691 static OrgInfoPtr FindByTaxName (CharPtr taxname)
692 {
693   ValNodePtr vnp;
694   OrgInfoPtr oip;
695 
696   if (StringHasNoText (taxname))
697   {
698     return NULL;
699   }
700 
701   for (vnp = orglist; vnp != NULL; vnp = vnp->next)
702   {
703     oip = (OrgInfoPtr) vnp->data.ptrvalue;
704     if (oip != NULL && StringICmp (oip->taxname, taxname) == 0)
705     {
706       return oip;
707     }
708   }
709   return NULL;
710 }
711 
AddLineagesToOrganismList(void)712 static void AddLineagesToOrganismList (void)
713 {
714   ReadBufferData    rbd;
715   CharPtr           line;
716   CharPtr           ptr;
717   FILE              *f;
718   OrgInfoPtr        oip;
719   Int4              taxnum;
720 
721   /* can only add lineages to existing list */
722   if (orglist == NULL) return;
723 
724   /* now read in lineages */
725   f = OpenSequinDataFile ("lineages.txt");
726 
727   if (f != NULL)
728   {
729     rbd.fp = f;
730     rbd.current_data = NULL;
731     line = AbstractReadFunction (&rbd);
732     line = AbstractReadFunction (&rbd);
733     while (line != NULL)
734     {
735       ptr = StringChr (line, '\t');
736       if (ptr != NULL)
737       {
738         *ptr = '\0';
739         if (StrToLong (line, &taxnum))
740         {
741           oip = FindByTaxNum (taxnum);
742           if (oip != NULL)
743           {
744             oip->lineage = StringSave (ptr + 1);
745           }
746         }
747       }
748     	line = AbstractReadFunction (&rbd);
749     }
750     FileClose (f);
751   }
752 }
753 
GetNextToken(CharPtr PNTR pstart)754 static CharPtr GetNextToken (CharPtr PNTR pstart)
755 {
756   CharPtr pend;
757   CharPtr newval = NULL;
758 
759   if (pstart == NULL || *pstart == NULL)
760   {
761     return NULL;
762   }
763 
764   pend = StringChr (*pstart, '\t');
765   if (pend != NULL)
766   {
767     *pend = 0;
768   }
769   newval = StringSave (*pstart);
770   if (pend == NULL)
771   {
772     *pstart = NULL;
773   }
774   else
775   {
776     *pstart = pend + 1;
777   }
778   return newval;
779 }
780 
LoadOrganismList(void)781 static void LoadOrganismList (void)
782 {
783   ReadBufferData    rbd;
784   CharPtr           line;
785   CharPtr           p_start, numval;
786   FILE              *f;
787   OrgInfoPtr        oip;
788 
789   if (orglist != NULL) return;
790 
791   f = OpenSequinDataFile ("taxlist.txt");
792 
793   if (f != NULL) {
794     rbd.fp = f;
795     rbd.current_data = NULL;
796     line = AbstractReadFunction (&rbd);
797     line = AbstractReadFunction (&rbd);
798     while (line != NULL)
799     {
800       oip = (OrgInfoPtr) MemNew (sizeof (OrgInfoData));
801       if (oip != NULL)
802       {
803         p_start = line;
804         /* read in tax name */
805         oip->taxname = GetNextToken (&p_start);
806 
807         /* read in common name */
808         oip->common = GetNextToken (&p_start);
809 
810         /* read in nuclear genetic code */
811         numval = GetNextToken (&p_start);
812         if (numval != NULL)
813         {
814           StrToLong (numval, &(oip->ngcode));
815           numval = MemFree (numval);
816         }
817         /* read in mitochondrial genetic code */
818         numval = GetNextToken (&p_start);
819         if (numval != NULL)
820         {
821           StrToLong (numval, &(oip->mgcode));
822           numval = MemFree (numval);
823         }
824 
825         /* read in div */
826         oip->div = GetNextToken (&p_start);
827 
828         /* read in taxnum */
829         numval = GetNextToken (&p_start);
830         if (numval != NULL)
831         {
832           StrToLong (numval, &(oip->taxnum));
833           numval = MemFree (numval);
834         }
835 
836         ValNodeAddPointer (&orglist, 0, oip);
837       }
838       line = MemFree (line);
839     	line = AbstractReadFunction (&rbd);
840     }
841     FileClose (f);
842   }
843   AddLineagesToOrganismList ();
844 }
845 
846 /* This section of code is used for determining genetic codes based on
847  * FASTA-defline values.
848  */
849 #define USE_NUCLEAR_GENETIC_CODE       1
850 #define USE_MITOCHONDRIAL_GENETIC_CODE 2
851 #define USE_OTHER_GENETIC_CODE         3
852 
UseGeneticCodeForLocation(CharPtr location)853 static Int4 UseGeneticCodeForLocation (CharPtr location)
854 {
855   if (StringHasNoText (location))
856   {
857     return USE_NUCLEAR_GENETIC_CODE;
858   }
859   else if (StringICmp (location, "Mitochondrion") == 0
860            || StringICmp (location, "Kinetoplast") == 0
861            || StringICmp (location, "Hydrogenosome") == 0)
862   {
863     return USE_MITOCHONDRIAL_GENETIC_CODE;
864   }
865   else if (StringICmp (location, "Chloroplast") == 0
866            || StringICmp (location, "Chromoplast") == 0
867            || StringICmp (location, "plastid") == 0
868            || StringICmp (location, "cyanelle") == 0
869            || StringICmp (location, "apicoplast") == 0
870            || StringICmp (location, "leucoplast") == 0
871            || StringICmp (location, "proplastid") == 0)
872   {
873     return USE_OTHER_GENETIC_CODE;
874   }
875   else
876   {
877     return USE_NUCLEAR_GENETIC_CODE;
878   }
879 }
880 
881 
GetGeneticCodeForTaxNameAndLocation(CharPtr taxname,CharPtr location)882 static Int4 GetGeneticCodeForTaxNameAndLocation (CharPtr taxname, CharPtr location)
883 {
884   ValNodePtr vnp;
885   OrgInfoPtr oip;
886   Int4       use_code;
887 
888   use_code = UseGeneticCodeForLocation (location);
889   if (use_code == USE_OTHER_GENETIC_CODE)
890   {
891     return 11;
892   }
893   else if (StringHasNoText (taxname))
894   {
895     return -1;
896   }
897 
898   for (vnp = orglist; vnp != NULL; vnp = vnp->next)
899   {
900     if (vnp->data.ptrvalue == NULL)
901     {
902       continue;
903     }
904     oip = (OrgInfoPtr) vnp->data.ptrvalue;
905     if (StringICmp (oip->taxname, taxname) == 0)
906     {
907       if (use_code == USE_NUCLEAR_GENETIC_CODE)
908       {
909         return oip->ngcode;
910       }
911       else
912       {
913         return oip->mgcode;
914       }
915     }
916   }
917 
918   return -1;
919 }
920 
GeneticCodeStringFromIntAndList(Int4 num,ValNodePtr list)921 static CharPtr GeneticCodeStringFromIntAndList (Int4 num, ValNodePtr list)
922 {
923   while (list != NULL)
924   {
925     if (list->choice == num)
926     {
927       return list->data.ptrvalue;
928     }
929     list = list->next;
930   }
931   return NULL;
932 }
933 
934 
935 /* these functions deal with commonly asked questions about package types -
936  * which ones are sets, which ones are single sequences, which ones have
937  * which default molecule types.
938  */
PackageTypeIsSet(Int2 seqPackage)939 static Boolean PackageTypeIsSet (Int2 seqPackage)
940 {
941   if (seqPackage == SEQ_PKG_POPULATION
942       || seqPackage == SEQ_PKG_PHYLOGENETIC
943       || seqPackage == SEQ_PKG_MUTATION
944       || seqPackage == SEQ_PKG_ENVIRONMENT
945       || seqPackage == SEQ_PKG_GENBANK
946       || seqPackage == SEQ_PKG_TSA)
947   {
948     return TRUE;
949   }
950   else
951   {
952     return FALSE;
953   }
954 
955 }
956 
PackageTypeIsSingle(Int2 seqPackage)957 static Boolean PackageTypeIsSingle (Int2 seqPackage)
958 {
959   if (seqPackage == SEQ_PKG_SINGLE
960       || seqPackage == SEQ_PKG_GAPPED)
961   {
962     return TRUE;
963   }
964   else
965   {
966     return FALSE;
967   }
968 }
969 
970 /* These functions are used to find titles in SeqEntries */
FindFirstTitle(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)971 static void FindFirstTitle (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
972 
973 {
974   CharPtr PNTR  ttlptr;
975 
976   if (mydata == NULL) return;
977   ttlptr = (CharPtr PNTR) mydata;
978   if (*ttlptr != NULL) return;
979   *ttlptr = SeqEntryGetTitle (sep);
980 }
981 
FindFirstSeqEntryTitle(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)982 static void FindFirstSeqEntryTitle (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
983 
984 {
985   SeqEntryPtr PNTR  sepptr;
986 
987   if (mydata == NULL) return;
988   sepptr = (SeqEntryPtr PNTR) mydata;
989   if (*sepptr != NULL) return;
990   if (SeqEntryGetSeqDescr (sep, Seq_descr_title, NULL) != NULL) {
991    *sepptr = sep;
992   }
993 }
994 
995 /* These functions are used to change the values of modifiers in definition lines */
996 
MakeSearchStringFromAlist(CharPtr str,CharPtr name)997 extern void MakeSearchStringFromAlist (CharPtr str, CharPtr name)
998 
999 {
1000   Char     ch;
1001   CharPtr  ptr;
1002 
1003   StringCpy (str, "[");
1004   StringCat (str, name);
1005   StringCat (str, "=");
1006   ptr = str;
1007   ch = *ptr;
1008   while (*ptr != '\0') {
1009     *ptr = TO_LOWER (ch);
1010     ptr++;
1011     ch = *ptr;
1012   }
1013 }
1014 
1015 /* This section of code is used for parsing well-formatted definition lines.
1016  */
1017 typedef struct modifieralias
1018 {
1019   CharPtr alias;
1020   CharPtr modifier;
1021 } ModifierAlias, PNTR ModifierAliasPtr;
1022 
1023 static ModifierAlias alias_list [] =
1024 {
1025   { "org", "organism" },
1026   { "organism name", "organism" },
1027   { "mol-type", "moltype" },
1028   { "mol_type", "moltype" },
1029   { "note", "note-orgmod" },
1030   { "comment", "note-orgmod" },
1031   { "common-name", "common name"},
1032   { "subsource", "note-subsrc" },
1033   { "technique", "tech" },
1034   { "prot", "protein" },
1035   { "prot_desc", "protein_desc" } ,
1036   { "special-notes", "note-subsrc" }
1037 };
1038 
1039 static Int4 num_aliases = sizeof (alias_list) / sizeof (ModifierAlias);
1040 
1041 static CharPtr protein_modifier_names [] =
1042 {
1043   "gene",
1044   "gene_syn",
1045   "protein",
1046   "protein_desc",
1047   "note",
1048   "comment",
1049   "orf",
1050   "function",
1051   "EC_number"
1052 };
1053 
1054 static Int4 num_protein_modifier_names = sizeof (protein_modifier_names) / sizeof (CharPtr);
1055 
GetCanonicalName(CharPtr mod_name)1056 static CharPtr GetCanonicalName (CharPtr mod_name)
1057 {
1058   Int4 j;
1059   Uint1   subtype;
1060 
1061   if (StringHasNoText (mod_name))
1062   {
1063     return StringSave ("");
1064   }
1065 
1066   for (j = 0; j < num_protein_modifier_names; j++) {
1067     if (StringsAreEquivalent (mod_name, protein_modifier_names[j])) {
1068       return StringSave (protein_modifier_names[j]);
1069     }
1070   }
1071 
1072   subtype = EquivalentOrgMod (mod_name);
1073   if (subtype != 0) {
1074     return StringSave (GetOrgModQualName (subtype));
1075   }
1076 
1077   if (StringISearch (mod_name, "primer") == NULL) {
1078     subtype = EquivalentSubSource (mod_name);
1079   } else {
1080     subtype = EquivalentSubSourceEx (mod_name, TRUE);
1081   }
1082   if (subtype != 0) {
1083     return StringSave (GetSubsourceQualName (subtype));
1084   }
1085 
1086   for (j = 0; j < num_aliases; j++)
1087   {
1088     if (StringsAreEquivalent (alias_list [j].alias, mod_name))
1089     {
1090       return StringSave (alias_list [j].modifier);
1091     }
1092   }
1093   return StringSave (mod_name);
1094 }
1095 
1096 typedef enum {
1097     eModifierType_SourceQual = 0,
1098     eModifierType_Organism,
1099     eModifierType_Location,
1100     eModifierType_Lineage,
1101     eModifierType_GeneticCode,
1102     eModifierType_GeneticCodeComment,
1103     eModifierType_NucGeneticCode,
1104     eModifierType_MitoGeneticCode,
1105     eModifierType_MolType,
1106     eModifierType_Molecule,
1107     eModifierType_Origin,
1108     eModifierType_Topology,
1109     eModifierType_CommonName,
1110     eModifierType_Technique,
1111     eModifierType_Protein
1112 } EModifierType;
1113 
1114 typedef struct modifierinfo
1115 {
1116   CharPtr       name;
1117   Uint1         subtype;
1118   CharPtr       value;
1119   EModifierType modtype;
1120 } ModifierInfoData, PNTR ModifierInfoPtr;
1121 
ModifierInfoNew(void)1122 static ModifierInfoPtr ModifierInfoNew (void)
1123 {
1124   ModifierInfoPtr mip;
1125   mip = (ModifierInfoPtr) MemNew (sizeof (ModifierInfoData));
1126   if (mip == NULL) return NULL;
1127   mip->name = NULL;
1128   mip->value = NULL;
1129   mip->modtype = eModifierType_SourceQual;
1130   return mip;
1131 }
1132 
ModifierInfoFree(ModifierInfoPtr mip)1133 static ModifierInfoPtr ModifierInfoFree (ModifierInfoPtr mip)
1134 {
1135   if (mip == NULL) return NULL;
1136   mip->name = MemFree (mip->name);
1137   mip->value = MemFree (mip->value);
1138   mip = MemFree (mip);
1139   return mip;
1140 }
1141 
ModifierInfoListFree(ValNodePtr list)1142 static ValNodePtr ModifierInfoListFree (ValNodePtr list)
1143 {
1144   if (list == NULL) return NULL;
1145   ModifierInfoListFree (list->next);
1146   list->next = NULL;
1147   list->data.ptrvalue = ModifierInfoFree (list->data.ptrvalue);
1148   ValNodeFree (list);
1149   return NULL;
1150 }
1151 
GetModifierType(CharPtr mod_name)1152 static EModifierType GetModifierType (CharPtr mod_name)
1153 {
1154   Int4 i;
1155   CharPtr canonical_name;
1156   EModifierType returntype;
1157 
1158   canonical_name = GetCanonicalName (mod_name);
1159 
1160   if (StringHasNoText (canonical_name))
1161   {
1162     returntype = eModifierType_SourceQual;
1163   }
1164   else if (StringICmp (canonical_name, "organism") == 0
1165            || StringICmp (canonical_name, "org") == 0)
1166   {
1167 	returntype = eModifierType_Organism;
1168   }
1169   else if (StringICmp (canonical_name, "location") == 0)
1170   {
1171     returntype = eModifierType_Location;
1172   }
1173   else if (StringICmp (canonical_name, "lineage") == 0)
1174   {
1175     returntype = eModifierType_Lineage;
1176   }
1177   else if (StringICmp (canonical_name, "gcode") == 0)
1178   {
1179     returntype = eModifierType_NucGeneticCode;
1180   }
1181   else if (StringICmp (canonical_name, "mgcode") == 0)
1182   {
1183     returntype = eModifierType_MitoGeneticCode;
1184   }
1185   else if (StringICmp (canonical_name, "genetic_code") == 0)
1186   {
1187     returntype = eModifierType_GeneticCode;
1188   }
1189   else if (StringICmp (canonical_name, "gencode_comment") == 0)
1190   {
1191     returntype = eModifierType_GeneticCodeComment;
1192   }
1193   else if (StringICmp (canonical_name, "moltype") == 0)
1194   {
1195     returntype = eModifierType_MolType;
1196   }
1197   else if (StringICmp (canonical_name, "molecule") == 0)
1198   {
1199     returntype = eModifierType_Molecule;
1200   }
1201   else if (StringICmp (canonical_name, "origin") == 0)
1202   {
1203     returntype = eModifierType_Origin;
1204   }
1205   else if (StringICmp (canonical_name, "topology") == 0)
1206   {
1207     returntype = eModifierType_Topology;
1208   }
1209   else if (StringICmp (canonical_name, "common name") == 0)
1210   {
1211     returntype = eModifierType_CommonName;
1212   }
1213   else if (StringICmp (canonical_name, "tech") == 0)
1214   {
1215     returntype = eModifierType_Technique;
1216   }
1217   else
1218   {
1219     for (i = 0; i < num_protein_modifier_names; i++)
1220     {
1221       if (StringICmp (canonical_name, protein_modifier_names[i]) == 0)
1222       {
1223         returntype = eModifierType_Protein;
1224         canonical_name = MemFree (canonical_name);
1225         return returntype;
1226       }
1227     }
1228     returntype = eModifierType_SourceQual;
1229   }
1230 
1231   canonical_name = MemFree (canonical_name);
1232   return returntype;
1233 }
1234 
AllowMultipleValues(CharPtr mod_name)1235 static Boolean AllowMultipleValues (CharPtr mod_name)
1236 {
1237   EModifierType mod_type;
1238   Boolean       rval = FALSE;
1239 
1240   mod_type = GetModifierType (mod_name);
1241   switch (mod_type)
1242   {
1243     case eModifierType_SourceQual:
1244       if (! IsNonTextModifier (mod_name))
1245       {
1246         rval = TRUE;
1247       }
1248       break;
1249     case eModifierType_CommonName:
1250       rval = TRUE;
1251       break;
1252     case eModifierType_Organism:
1253       rval = TRUE;
1254       break;
1255     default:
1256       rval = FALSE;
1257       break;
1258   }
1259   return rval;
1260 }
1261 
1262 typedef enum
1263 {
1264   BRACKET_ERR_NO_ERR = 0,
1265   BRACKET_ERR_MISMATCHED_BRACKETS,
1266   BRACKET_ERR_MISSING_EQUALS,
1267   BRACKET_ERR_MULT_EQUALS,
1268   BRACKET_ERR_NO_MOD_NAME,
1269   BRACKET_ERR_MISMATCHED_QUOTES
1270 } bracketing_err_num;
1271 
ExpectToken(CharPtr cp)1272 static Char ExpectToken (CharPtr cp)
1273 {
1274   CharPtr valstart;
1275 
1276   if (cp == NULL)
1277   {
1278     return 0;
1279   }
1280   else if (*cp == '[')
1281   {
1282     valstart = cp + 1 + StringSpn (cp + 1, " \t");
1283     if (StringLen (valstart) > 3
1284         && (StringNICmp (valstart, "dna", 3) == 0
1285             || StringNICmp (valstart, "rna", 3) == 0
1286             || StringNICmp (valstart, "orf", 3) == 0)
1287         && *(valstart + 3 + StringSpn (valstart + 3, " \t")) == ']')
1288     {
1289       return ']';
1290     }
1291     else
1292     {
1293       return '=';
1294     }
1295   }
1296   else if (*cp == '=')
1297   {
1298     return ']';
1299   }
1300   else if (*cp == ']')
1301   {
1302     return '[';
1303   }
1304   else
1305   {
1306     return 0;
1307   }
1308 }
1309 
1310 /* When we are looking for double-quotation marks to use for delimiting
1311  * sections of a title that should not be parsed or values that may contain
1312  * brackets, equals signs, or other reserved characters, skip over
1313  * quotation marks that are preceded by the escape character (backslash).
1314  * This allows quotation marks to be included in a quoted string.
1315  */
NextUnescapedQuote(CharPtr str)1316 static CharPtr NextUnescapedQuote (CharPtr str)
1317 {
1318   CharPtr cp;
1319 
1320   if (StringHasNoText (str))
1321   {
1322     return NULL;
1323   }
1324   cp = StringChr (str, '"');
1325   if (cp != NULL && cp != str)
1326   {
1327     while (cp != NULL && *(cp - 1) == '\\')
1328     {
1329       cp = StringChr (cp + 1, '"');
1330     }
1331   }
1332   return cp;
1333 }
1334 
1335 /* This function steps backward from str_end until it has located
1336  * an unescaped double-quotation mark or it has reached the
1337  * start of the string (str_start).
1338  */
FindPreviousUnescapedQuote(CharPtr str_start,CharPtr str_end)1339 static CharPtr FindPreviousUnescapedQuote (CharPtr str_start, CharPtr str_end)
1340 {
1341   CharPtr cp;
1342   if (str_start == NULL || str_end == NULL || str_end < str_start)
1343   {
1344     return NULL;
1345   }
1346 
1347   cp = str_end;
1348   while (cp > str_start && (*cp != '"' || *(cp - 1) == '\\'))
1349   {
1350     cp--;
1351   }
1352   if (*cp != '"')
1353   {
1354     cp = NULL;
1355   }
1356   return cp;
1357 }
1358 
1359 
1360 /* This function finds the next bracketing token ([, =, or ]) in
1361  * the string that is not enclosed by unescaped quotation marks.
1362  */
NextBracketToken(CharPtr str)1363 static CharPtr NextBracketToken (CharPtr str)
1364 {
1365   CharPtr next_quote;
1366   CharPtr cp;
1367 
1368   if (StringHasNoText (str))
1369   {
1370     return NULL;
1371   }
1372 
1373   cp = str;
1374   while (*cp != 0)
1375   {
1376     switch (*cp)
1377     {
1378       case '"':
1379         if (cp == str || (*(cp - 1) != '\\'))
1380         {
1381           next_quote = NextUnescapedQuote (cp + 1);
1382           if (next_quote == NULL)
1383           {
1384             return cp;
1385           }
1386           else
1387           {
1388             cp = next_quote + 1;;
1389           }
1390         }
1391         else
1392         {
1393           cp++;
1394         }
1395         break;
1396       case '[':
1397       case ']':
1398       case '=':
1399         return cp;
1400       default:
1401         cp++;
1402     }
1403   }
1404 
1405   return NULL;
1406 }
1407 
DetectBadBracketing(CharPtr str)1408 static Int4 DetectBadBracketing (CharPtr str)
1409 {
1410   CharPtr cp;
1411   Char    expected_token;
1412   CharPtr last_token = NULL, namestart;
1413 
1414   if (StringHasNoText (str))
1415   {
1416     return BRACKET_ERR_NO_ERR;
1417   }
1418 
1419   expected_token = '[';
1420   cp = NextBracketToken (str);
1421   while (cp != NULL)
1422   {
1423     switch (*cp)
1424     {
1425       case '"':
1426         return BRACKET_ERR_MISMATCHED_QUOTES;
1427         break;
1428       case '[':
1429       case ']':
1430       case '=':
1431         if (expected_token == *cp)
1432         {
1433           if (expected_token == '=' && last_token != NULL)
1434           {
1435             namestart = last_token + 1 + StringSpn (last_token + 1, " \t");
1436             if (namestart == cp)
1437             {
1438               return BRACKET_ERR_NO_MOD_NAME;
1439             }
1440           }
1441           expected_token = ExpectToken (cp);
1442           last_token = cp;
1443         }
1444         else if (expected_token == '=')
1445         {
1446           if (cp - last_token - 1 == StringSpn (last_token + 1, " \t"))
1447           {
1448             return BRACKET_ERR_MISMATCHED_BRACKETS;
1449           }
1450           else
1451           {
1452             return BRACKET_ERR_MISSING_EQUALS;
1453           }
1454         }
1455         else if (*cp == '=')
1456         {
1457           if (expected_token == ']')
1458           {
1459             return BRACKET_ERR_MULT_EQUALS;
1460           }
1461           else
1462           {
1463             return BRACKET_ERR_MISMATCHED_BRACKETS;
1464           }
1465         }
1466         else
1467         {
1468           return BRACKET_ERR_MISMATCHED_BRACKETS;
1469         }
1470         break;
1471     }
1472     cp = NextBracketToken (cp + 1);
1473   }
1474 
1475   if (cp == NULL && expected_token != '[')
1476   {
1477     return BRACKET_ERR_MISMATCHED_BRACKETS;
1478   }
1479 
1480   return BRACKET_ERR_NO_ERR;
1481 }
1482 
1483 static ModifierInfoPtr
ParseOneBracketedModifier(CharPtr str,CharPtr PNTR bracket_start,CharPtr PNTR bracket_stop)1484 ParseOneBracketedModifier
1485 (CharPtr      str,
1486  CharPtr PNTR bracket_start,
1487  CharPtr PNTR bracket_stop)
1488 {
1489   CharPtr         start, stop, eq_loc;
1490   ModifierInfoPtr mip;
1491   Int4            value_len, name_len;
1492   CharPtr         canonical_name;
1493 
1494   start = NextBracketToken (str);
1495   while (start != NULL && *start != '[')
1496   {
1497     start = NextBracketToken (start + 1);
1498   }
1499   if (start == NULL) return NULL;
1500   eq_loc = NextBracketToken (start + 1);
1501   if (eq_loc == NULL) return NULL;
1502   if (*eq_loc == ']')
1503   {
1504     stop = eq_loc;
1505   }
1506   else if (*eq_loc == '=')
1507   {
1508     stop = NextBracketToken (eq_loc + 1);
1509   }
1510   else
1511   {
1512     return NULL;
1513   }
1514 
1515   if (stop == NULL || *stop != ']') return NULL;
1516 
1517   mip = ModifierInfoNew();
1518   if (mip == NULL) return NULL;
1519 
1520   /* copy in modifier name */
1521   name_len = eq_loc - start + 1;
1522   mip->name = (CharPtr) MemNew (name_len * sizeof (Char));
1523   if (mip->name == NULL)
1524   {
1525     mip = ModifierInfoFree (mip);
1526     return NULL;
1527   }
1528   StringNCpy (mip->name, start + 1, name_len - 2);
1529   mip->name [name_len - 1] = 0;
1530   TrimSpacesAroundString (mip->name);
1531   canonical_name = GetCanonicalName (mip->name);
1532   mip->name = MemFree (mip->name);
1533   mip->name = canonical_name;
1534   if (StringICmp (mip->name, "note") == 0)
1535   {
1536     mip->name = MemFree (mip->name);
1537     mip->name = StringSave ("Note-SubSrc");
1538   }
1539 
1540   /* [orf], [rna], and [dna] don't have values */
1541   if (stop > eq_loc)
1542   {
1543     value_len = stop - eq_loc + 1;
1544     mip->value = (CharPtr) MemNew (value_len * sizeof (Char));
1545     if (mip->value == NULL)
1546     {
1547       mip = ModifierInfoFree (mip);
1548       return NULL;
1549     }
1550 
1551     StringNCpy (mip->value, eq_loc + 1, value_len - 2);
1552     mip->value [value_len - 1] = 0;
1553     TrimSpacesAroundString (mip->value);
1554   }
1555 
1556   mip->modtype = GetModifierType (mip->name);
1557   if (mip->modtype == eModifierType_SourceQual)
1558   {
1559     mip->subtype = FindTypeForModNameText (mip->name);
1560   }
1561   else
1562   {
1563     mip->subtype = 0;
1564   }
1565 
1566   if (bracket_start != NULL)
1567   {
1568     *bracket_start = start;
1569   }
1570 
1571   if (bracket_stop != NULL)
1572   {
1573     *bracket_stop = stop;
1574   }
1575 
1576   return mip;
1577 }
1578 
ParseAllBracketedModifiers(CharPtr str)1579 static ValNodePtr ParseAllBracketedModifiers (CharPtr str)
1580 {
1581   CharPtr         stop, cp;
1582   ValNodePtr      list = NULL;
1583   ModifierInfoPtr mip;
1584 
1585   cp = str;
1586   mip = ParseOneBracketedModifier (cp, NULL, &stop);
1587   while (mip != NULL && stop != NULL)
1588   {
1589     ValNodeAddPointer (&list, 0, mip);
1590     cp = stop + 1;
1591     mip = ParseOneBracketedModifier (cp, NULL, &stop);
1592   }
1593   return list;
1594 }
1595 
1596 
GetPresentModifierNames(CharPtr str)1597 NLM_EXTERN CharPtr GetPresentModifierNames (CharPtr str)
1598 {
1599   ValNodePtr      list = NULL, vnp;
1600   Int4            len = 1;
1601   CharPtr         text = NULL;
1602   ModifierInfoPtr mip;
1603 
1604   list = ParseAllBracketedModifiers(str);
1605   if (list == NULL) {
1606     return StringSave ("");
1607   }
1608   for (vnp = list; vnp != NULL; vnp = vnp->next) {
1609     mip = (ModifierInfoPtr)vnp->data.ptrvalue;
1610     len += StringLen (mip->name) + 2;
1611   }
1612 
1613   text = (CharPtr) MemNew (sizeof (Char) * len);
1614   mip = (ModifierInfoPtr)list->data.ptrvalue;
1615   StringCpy (text, mip->name);
1616   for (vnp = list->next; vnp != NULL; vnp = vnp->next) {
1617     mip = (ModifierInfoPtr)vnp->data.ptrvalue;
1618     StringCat (text, ", ");
1619     StringCat (text, mip->name);
1620   }
1621 
1622   list = ModifierInfoListFree(list);
1623 
1624   return text;
1625 }
1626 
1627 
IsValueInEnumAssoc(CharPtr value,EnumFieldAssocPtr eap)1628 static Boolean IsValueInEnumAssoc (CharPtr value, EnumFieldAssocPtr eap)
1629 {
1630   while (eap != NULL && eap->name != NULL)
1631   {
1632     if (StringICmp (eap->name, value) == 0)
1633     {
1634       return TRUE;
1635     }
1636     eap++;
1637   }
1638   return FALSE;
1639 }
1640 
GeneticCodeFromStringAndList(CharPtr str,ValNodePtr list)1641 static Int4 GeneticCodeFromStringAndList (CharPtr str, ValNodePtr list)
1642 {
1643   while (list != NULL)
1644   {
1645     if (StringICmp (str, list->data.ptrvalue) == 0)
1646     {
1647       return list->choice;
1648     }
1649     list = list->next;
1650   }
1651   return 0;
1652 }
1653 
GeneticCodeFromString(CharPtr str)1654 static Int4 GeneticCodeFromString (CharPtr str)
1655 {
1656   ValNodePtr gencodelist;
1657   Int4       gcode = 0;
1658 
1659   if (StringHasNoText (str))
1660   {
1661     gcode = 0;
1662   }
1663   else if (isdigit (str[0]))
1664   {
1665     gcode = atoi (str);
1666   }
1667   else
1668   {
1669     gencodelist = GetGeneticCodeValNodeList ();
1670     gcode = GeneticCodeFromStringAndList (str, gencodelist);
1671     gencodelist = ValNodeFreeData (gencodelist);
1672   }
1673   return gcode;
1674 }
1675 
MolTypeFromString(CharPtr str)1676 static Int4 MolTypeFromString (CharPtr str)
1677 {
1678   EnumFieldAssocPtr  eap;
1679 
1680   if (StringICmp (str, "dna") == 0)
1681   {
1682     return 253;
1683   }
1684   else if (StringICmp (str, "rna") == 0)
1685   {
1686     return 254;
1687   }
1688   else if (StringICmp (str, "genomic") == 0)
1689   {
1690     return 253;
1691   }
1692   for (eap = biomol_nucGen_alist; eap != NULL && eap->name != NULL; eap++)
1693   {
1694     if (StringsAreEquivalent (eap->name, str))
1695     {
1696       return eap->value;
1697     }
1698   }
1699   for (eap = biomol_nucX_alist; eap != NULL && eap->name != NULL; eap++)
1700   {
1701     if (StringsAreEquivalent (eap->name, str))
1702     {
1703       return eap->value;
1704     }
1705     else if (eap->name [0] == 'm'
1706              && StringICmp (eap->name, "mRNA [cDNA]") == 0
1707              && StringICmp (str, "mRNA") == 0)
1708     {
1709       return eap->value;
1710     }
1711   }
1712   return 0;
1713 }
1714 
1715 
1716 /* This function looks at a parsed modifier structure to determine whether the
1717  * value is acceptable for this modifier type.
1718  */
ModifierHasInvalidValue(ModifierInfoPtr mip)1719 static Boolean ModifierHasInvalidValue (ModifierInfoPtr mip)
1720 {
1721   Boolean rval = FALSE;
1722 
1723   if (mip != NULL
1724       && ((mip->modtype == eModifierType_Location
1725   	          && !IsValueInEnumAssoc (mip->value, biosource_genome_simple_alist))
1726   	    || (mip->modtype == eModifierType_Origin
1727   	          && !IsValueInEnumAssoc (mip->value, biosource_origin_alist))
1728   	    || (mip->modtype == eModifierType_Topology
1729   	          && !IsValueInEnumAssoc (mip->value, topology_nuc_alist))
1730   	    || (mip->modtype == eModifierType_Molecule
1731   	          && !IsValueInEnumAssoc (mip->value, molecule_alist))
1732   	    || ((mip->modtype == eModifierType_GeneticCode
1733   	              || mip->modtype == eModifierType_NucGeneticCode
1734   	              || mip->modtype == eModifierType_MitoGeneticCode)
1735   	             && GeneticCodeFromString (mip->value) == 0)
1736   	    || (mip->modtype == eModifierType_MolType
1737   	             && MolTypeFromString (mip->value) == 0)
1738   	    || (mip->modtype == eModifierType_SourceQual
1739   	             && IsNonTextModifier (mip->name)
1740   	             && !StringHasNoText (mip->value)
1741                  && StringICmp (mip->value, "TRUE") != 0
1742                  && StringICmp (mip->value, "FALSE") != 0)))
1743   {
1744     rval = TRUE;
1745   }
1746 
1747   return rval;
1748 }
1749 
1750 /* This section contains functions for finding, changing, and removing
1751  * bracketed value pairs in definition lines.
1752  * These functions include:
1753  *
1754  * FindValuePairInDefLine - returns pointer to position in title where
1755  *                          the first bracketed pair with the specified
1756  *                          modifier name (or one of its aliases) occurs.
1757  *                          Useful for non-text modifiers, which do not
1758  *                          have values.
1759  *
1760  * FindValueFromPairInDefline - returns value from the first bracketed
1761  *                              pair in the title with the specified
1762  *                              modifier name (or one of its aliases).
1763  *
1764  * RemoveValueFromDefline - removes the first bracketed pair in the title
1765  *                          with the specified modifier name (or one of its aliases)
1766  *
1767  * ReplaceValueInThisValuePair - replaces the value in the specified value pair.
1768  *                               if new value is empty, pair is removed.
1769  *
1770  * ReplaceValueInOneDefLine - finds the first bracketed pair in the title
1771  *                            with the specified modifier name (or one of its aliases).
1772  *                            If a pair is found, the value in that pair is replaced
1773  *                            with the new value; otherwise a new pair is added to
1774  *                            the title.
1775  *
1776  * ReplaceOneModifierValue - finds all bracketed pairs in a title with the specified
1777  *                           modifier name or one of its aliases and the specified value
1778  *                           and replaces that value with the new value (or removes the
1779  *                           pair, if the new value is empty.
1780  *
1781  * RemoveAllDuplicatePairsFromOneTitle - removes all bracketed pairs that are duplicates
1782  *                                       in name and value of another pair already in
1783  *                                       the title.
1784  *
1785  * RemoveMeaninglessEmptyPairsFromOneTitle - removes bracketed pairs without values
1786  *                                           that are not non-text modifiers
1787  *
1788  * StripAllInstancesOfModNameFromTitle - removes all mentions of specified modifier
1789  *                                       name from title
1790  *
1791  */
1792 
FindValuePairInDefLine(CharPtr mod_name,CharPtr def_line,CharPtr PNTR valstop)1793 static CharPtr FindValuePairInDefLine (CharPtr mod_name, CharPtr def_line, CharPtr PNTR valstop)
1794 {
1795   CharPtr         cp, start, stop;
1796   ModifierInfoPtr mip;
1797   CharPtr         canonical_name;
1798 
1799   if (mod_name == NULL || def_line == NULL)
1800   {
1801     return NULL;
1802   }
1803 
1804   cp = NextBracketToken (def_line);
1805   if (cp == NULL)
1806   {
1807     return NULL;
1808   }
1809 
1810   canonical_name = GetCanonicalName (mod_name);
1811 
1812   mip = ParseOneBracketedModifier (cp, &start, &stop);
1813   while (mip != NULL && start != NULL && stop != NULL
1814          && StringICmp (mip->name, canonical_name) != 0)
1815   {
1816     cp = NextBracketToken (stop + 1);
1817     mip = ModifierInfoFree (mip);
1818     mip = ParseOneBracketedModifier (cp, &start, &stop);
1819   }
1820 
1821   if (mip != NULL && StringICmp (mip->name, canonical_name) == 0)
1822   {
1823     mip = ModifierInfoFree (mip);
1824     if (valstop != NULL)
1825     {
1826       *valstop = stop;
1827     }
1828     canonical_name = MemFree (canonical_name);
1829     return start;
1830   }
1831   else
1832   {
1833     mip = ModifierInfoFree (mip);
1834     canonical_name = MemFree (canonical_name);
1835     return NULL;
1836   }
1837 }
1838 
FindNthValuePairInDefLine(CharPtr title,CharPtr val_name,Int4 val_num,CharPtr PNTR p_val_end)1839 static CharPtr FindNthValuePairInDefLine (CharPtr title, CharPtr val_name, Int4 val_num, CharPtr PNTR p_val_end)
1840 {
1841   CharPtr val_loc, val_end = NULL;
1842   Int4    title_val_num;
1843 
1844   if (StringHasNoText (val_name))
1845   {
1846     return NULL;
1847   }
1848 
1849   val_loc = FindValuePairInDefLine (val_name, title, &val_end);
1850   title_val_num = 0;
1851   while (val_loc != NULL && val_end != NULL && title_val_num != val_num)
1852   {
1853     val_loc = FindValuePairInDefLine (val_name, val_end + 1, &val_end);
1854     title_val_num++;
1855   }
1856   if (p_val_end != NULL)
1857   {
1858     *p_val_end = val_end;
1859   }
1860   return val_loc;
1861 }
1862 
FindValueFromPairInDefline(CharPtr mod_name,CharPtr def_line)1863 NLM_EXTERN CharPtr FindValueFromPairInDefline (CharPtr mod_name, CharPtr def_line)
1864 {
1865   CharPtr bracket_start, eq_loc, bracket_end;
1866   CharPtr new_val = NULL;
1867   Int4 new_val_len;
1868 
1869   bracket_start = FindValuePairInDefLine (mod_name, def_line, &bracket_end);
1870   if (bracket_start == NULL || bracket_end == NULL)
1871   {
1872     return NULL;
1873   }
1874 
1875   eq_loc = NextBracketToken (bracket_start + 1);
1876   if (eq_loc == NULL || *eq_loc != '=')
1877   {
1878     return NULL;
1879   }
1880 
1881   new_val_len = bracket_end - eq_loc;
1882   new_val = (CharPtr) MemNew (new_val_len * sizeof (Char));
1883   if (new_val != NULL)
1884   {
1885     StringNCpy (new_val, eq_loc + 1, new_val_len - 1);
1886     new_val [new_val_len - 1] = 0;
1887   }
1888   TrimSpacesAroundString (new_val);
1889   return new_val;
1890 }
1891 
FindValueFromPairInDeflineBeforeCharPtr(CharPtr mod_name,CharPtr def_line,CharPtr cp)1892 static CharPtr FindValueFromPairInDeflineBeforeCharPtr (CharPtr mod_name, CharPtr def_line, CharPtr cp)
1893 {
1894   CharPtr bracket_start, bracket_end;
1895 
1896   bracket_start = FindValuePairInDefLine (mod_name, def_line, &bracket_end);
1897   if (bracket_start == NULL || (cp != NULL && bracket_start > cp))
1898   {
1899     return NULL;
1900   }
1901   else
1902   {
1903     return FindValueFromPairInDefline (mod_name, bracket_start);
1904   }
1905 }
1906 
RemoveValuePairFromDefline(CharPtr pair_start,CharPtr pair_end,CharPtr defline)1907 static void RemoveValuePairFromDefline (CharPtr pair_start, CharPtr pair_end, CharPtr defline)
1908 {
1909   CharPtr src, dst;
1910 
1911   if (pair_start == NULL || pair_end == NULL || defline == NULL
1912       || pair_end <= pair_start)
1913   {
1914     return;
1915   }
1916 
1917   dst = pair_start;
1918   src = pair_end;
1919   while (isspace (*src))
1920   {
1921     src++;
1922   }
1923 
1924   while (*src != 0)
1925   {
1926     *dst = *src;
1927     dst++;
1928     src++;
1929   }
1930   *dst = 0;
1931 }
1932 
RemoveValueFromDefline(CharPtr mod_name,CharPtr def_line)1933 NLM_EXTERN void RemoveValueFromDefline (CharPtr mod_name, CharPtr def_line)
1934 {
1935   CharPtr bracket_start, bracket_end;
1936 
1937   bracket_start = FindValuePairInDefLine (mod_name, def_line, &bracket_end);
1938   if (bracket_start == NULL || bracket_end == NULL)
1939   {
1940     return;
1941   }
1942 
1943   RemoveValuePairFromDefline (bracket_start, bracket_end + 1, def_line);
1944 }
1945 
AddQuotesToValueWithBrackets(CharPtr orig_value)1946 static CharPtr AddQuotesToValueWithBrackets (CharPtr orig_value)
1947 {
1948   CharPtr first_bracket, first_quote;
1949   CharPtr cp, new_value = NULL, tmp_value;
1950   Char    bracket_buf [2];
1951   Int4    offset;
1952 
1953   if (orig_value == NULL)
1954   {
1955     return NULL;
1956   }
1957   else if (StringHasNoText (orig_value))
1958   {
1959     return StringSave (orig_value);
1960   }
1961 
1962   new_value = StringSave (orig_value);
1963 
1964   first_bracket = StringChr (new_value, '[');
1965   if (first_bracket == NULL)
1966   {
1967     first_bracket = StringChr (new_value, ']');
1968   }
1969 
1970   first_quote = NextUnescapedQuote (new_value);
1971 
1972   if (first_bracket == NULL && first_quote == NULL)
1973   {
1974     return new_value;
1975   }
1976   else if (first_bracket != NULL && first_quote == NULL)
1977   {
1978     tmp_value = (CharPtr) MemNew ((StringLen (new_value) + 3) * sizeof (Char));
1979     if (tmp_value == NULL)
1980     {
1981       new_value = MemFree (new_value);
1982       return NULL;
1983     }
1984     StringCat (tmp_value, "\"");
1985     StringCat (tmp_value, new_value);
1986     StringCat (tmp_value, "\"");
1987     new_value = MemFree (new_value);
1988     new_value = tmp_value;
1989     return new_value;
1990   }
1991 
1992   cp = orig_value;
1993 
1994   bracket_buf [0] = 0;
1995   bracket_buf [1] = 0;
1996 
1997   while (*cp != 0)
1998   {
1999     if (*cp == '"' && (cp == orig_value || *(cp - 1) != '\\'))
2000     {
2001       cp = NextUnescapedQuote (cp + 1);
2002       if (cp == NULL)
2003       {
2004         tmp_value = (CharPtr) MemNew ((StringLen (new_value) + 3) * sizeof (Char));
2005         if (tmp_value == NULL)
2006         {
2007           new_value = MemFree (new_value);
2008           return NULL;
2009         }
2010         StringCpy (tmp_value, new_value);
2011         if (new_value [StringLen (new_value) - 1] == '\\')
2012         {
2013           StringCat (tmp_value, " ");
2014         }
2015         StringCat (tmp_value, "\"");
2016         return tmp_value;
2017       }
2018       else
2019       {
2020         cp++;
2021       }
2022     }
2023     else if (*cp == '[' || *cp == ']')
2024     {
2025       tmp_value = (CharPtr) MemNew ((StringLen (new_value) + 3) * sizeof (Char));
2026       if (tmp_value == NULL)
2027       {
2028         new_value = MemFree (new_value);
2029         return new_value;
2030       }
2031       offset = cp - new_value;
2032       StringNCpy (tmp_value, new_value, offset);
2033       StringCat (tmp_value, "\"");
2034       bracket_buf [0] = *cp;
2035       StringCat (tmp_value, bracket_buf);
2036       StringCat (tmp_value, "\"");
2037       StringCat (tmp_value, cp + 1);
2038       new_value = MemFree (new_value);
2039       new_value = tmp_value;
2040       cp = new_value + offset + 3;
2041     }
2042     else
2043     {
2044       cp++;
2045     }
2046   }
2047 
2048   return new_value;
2049 }
2050 
2051 static CharPtr
ReplaceValueInThisValuePair(CharPtr orig_defline,CharPtr value_loc,CharPtr value_name,CharPtr end_loc,CharPtr new_value)2052 ReplaceValueInThisValuePair
2053 (CharPtr orig_defline,
2054  CharPtr value_loc,
2055  CharPtr value_name,
2056  CharPtr end_loc,
2057  CharPtr new_value)
2058 {
2059   CharPtr new_title;
2060   Int4    new_title_len = 0;
2061   Boolean is_nontext;
2062   CharPtr tmp_name;
2063   CharPtr fixed_value;
2064 
2065   if (StringHasNoText (orig_defline) || value_loc == NULL || end_loc == NULL
2066       || *value_loc != '[' || *end_loc != ']')
2067   {
2068     return orig_defline;
2069   }
2070 
2071   fixed_value = AddQuotesToValueWithBrackets (new_value);
2072 
2073   if (StringHasNoText (fixed_value))
2074   {
2075     RemoveValuePairFromDefline (value_loc, end_loc, orig_defline);
2076   }
2077   else
2078   {
2079     /* keep part before pair and after pair, insert new value in position */
2080     new_title_len = StringLen (orig_defline)
2081                                + StringLen (value_name)
2082                                + StringLen (fixed_value)
2083                                + 5;
2084     new_title = MemNew (new_title_len * sizeof (Char));
2085     if (new_title != NULL)
2086     {
2087       if (value_loc > orig_defline)
2088       {
2089         StringNCpy (new_title, orig_defline, value_loc - orig_defline);
2090       }
2091       StringCat (new_title, "[");
2092       tmp_name = StringSave (value_name);
2093       tmp_name [0] = TO_LOWER (tmp_name [0]);
2094       StringCat (new_title, tmp_name);
2095       is_nontext = IsNonTextModifier (tmp_name);
2096       tmp_name = MemFree (tmp_name);
2097       StringCat (new_title, "=");
2098       if (!is_nontext)
2099       {
2100         StringCat (new_title, fixed_value);
2101       }
2102       StringCat (new_title, "]");
2103       if (end_loc != NULL && *end_loc != 0)
2104       {
2105         if (*end_loc == ']')
2106         {
2107           StringCat (new_title, end_loc + 1);
2108         }
2109         else
2110         {
2111           StringCat (new_title, end_loc);
2112         }
2113       }
2114       orig_defline = MemFree (orig_defline);
2115       orig_defline = new_title;
2116     }
2117   }
2118   TrimSpacesAroundString (orig_defline);
2119 
2120   fixed_value = MemFree (fixed_value);
2121 
2122   return orig_defline;
2123 }
2124 
InsertStringAtOffset(CharPtr old_string,CharPtr new_string,Int4 offset)2125 static CharPtr InsertStringAtOffset (CharPtr old_string, CharPtr new_string, Int4 offset)
2126 {
2127   Int4    new_len;
2128   CharPtr new_str = NULL;
2129 
2130   if (old_string == NULL)
2131   {
2132     new_str = StringSave (new_string);
2133   }
2134   else if (new_string == NULL)
2135   {
2136     new_str =  StringSave (old_string);
2137   }
2138   else
2139   {
2140     new_len = StringLen (old_string) + StringLen (new_string) + 1;
2141     new_str = (CharPtr) MemNew (new_len * sizeof (Char));
2142     if (new_str != NULL)
2143     {
2144       StringNCpy (new_str, old_string, offset);
2145       StringCat (new_str, new_string);
2146       if ((Uint4)offset < StringLen (old_string))
2147       {
2148         StringCat (new_str, old_string + offset);
2149       }
2150     }
2151   }
2152   return new_str;
2153 }
2154 
2155 static CharPtr
InsertValuePairAtOffset(CharPtr orig_defline,CharPtr value_name,CharPtr value_str,Int4 offset)2156 InsertValuePairAtOffset
2157 (CharPtr orig_defline,
2158  CharPtr value_name,
2159  CharPtr value_str,
2160  Int4    offset)
2161 {
2162   CharPtr pair_string, fixed_value;
2163 
2164   if (StringHasNoText (value_name) || offset < 0)
2165   {
2166     return orig_defline;
2167   }
2168 
2169   fixed_value = AddQuotesToValueWithBrackets (value_str);
2170 
2171   pair_string = (CharPtr) MemNew ((StringLen (value_name) + StringLen (fixed_value) + 6) * sizeof (Char));
2172   if (pair_string != NULL)
2173   {
2174     if (IsNonTextModifier (value_name))
2175     {
2176       sprintf (pair_string, "[%s=]", value_name);
2177     }
2178     else
2179     {
2180       sprintf (pair_string, "[%s=%s]", value_name, fixed_value);
2181     }
2182     orig_defline = InsertStringAtOffset (orig_defline, pair_string, offset);
2183     pair_string = MemFree (pair_string);
2184   }
2185   fixed_value = MemFree (fixed_value);
2186   return orig_defline;
2187 }
2188 
2189 
2190 static CharPtr
ReplaceValueInOneDefLineForOrganism(CharPtr orig_defline,CharPtr value_name,CharPtr new_value,CharPtr organism)2191 ReplaceValueInOneDefLineForOrganism
2192 (CharPtr orig_defline,
2193  CharPtr value_name,
2194  CharPtr new_value,
2195  CharPtr organism)
2196 {
2197   CharPtr value_loc = NULL, end_loc = NULL;
2198   CharPtr fixed_value;
2199   CharPtr next_org_loc = NULL, org_stop = NULL, first_org_stop = NULL;
2200   CharPtr first_organism;
2201 
2202   if (StringHasNoText (value_name))
2203   {
2204     return orig_defline;
2205   }
2206 
2207   /* if we want to add a value to a specific organism, we need to make sure
2208    * that we insert or replace a value after that organism name but before
2209    * the next organism name.
2210    */
2211 
2212   if (organism != NULL)
2213   {
2214     if (organism < orig_defline || organism - orig_defline > (Int4) StringLen (orig_defline))
2215     {
2216       organism = NULL;
2217     }
2218   }
2219 
2220   if (organism != NULL)
2221   {
2222     if (organism != FindValuePairInDefLine ("organism", organism, &org_stop))
2223     {
2224       return orig_defline;
2225     }
2226   }
2227 
2228   first_organism = FindValuePairInDefLine ("organism", orig_defline, &first_org_stop);
2229 
2230 
2231   if (organism == NULL)
2232   {
2233     organism = first_organism;
2234     org_stop = first_org_stop;
2235   }
2236 
2237   if (org_stop != NULL)
2238   {
2239     next_org_loc = FindValuePairInDefLine ("organism", org_stop + 1, NULL);
2240   }
2241 
2242   fixed_value = AddQuotesToValueWithBrackets (new_value);
2243 
2244   /* if this is the first organism, or if we have no organism, start looking for
2245    * a value to replace at the beginning of the line.
2246    */
2247   if (organism == NULL || organism == first_organism)
2248   {
2249     value_loc = FindValuePairInDefLine (value_name, orig_defline, &end_loc);
2250   }
2251   else
2252   {
2253     value_loc = FindValuePairInDefLine (value_name, organism, &end_loc);
2254   }
2255 
2256   if (next_org_loc != NULL && value_loc > next_org_loc)
2257   {
2258     value_loc = NULL;
2259   }
2260 
2261   if (StringHasNoText (fixed_value))
2262   {
2263     if (value_loc == NULL)
2264     {
2265       /* old line had no value, no new value provided, no change */
2266     }
2267     else
2268     {
2269       RemoveValuePairFromDefline (value_loc, end_loc, orig_defline);
2270     }
2271   }
2272   else
2273   {
2274     if (value_loc == NULL)
2275     {
2276       /* add new value just before next organism */
2277       if (next_org_loc == NULL)
2278       {
2279         orig_defline = InsertValuePairAtOffset (orig_defline, value_name, new_value,
2280                                                 StringLen (orig_defline));
2281       }
2282       else
2283       {
2284         orig_defline = InsertValuePairAtOffset (orig_defline, value_name, new_value,
2285                                                 next_org_loc - orig_defline);
2286       }
2287     }
2288     else
2289     {
2290       /* replace this value */
2291       orig_defline = ReplaceValueInThisValuePair (orig_defline, value_loc, value_name,
2292                                                   end_loc, new_value);
2293     }
2294   }
2295   TrimSpacesAroundString (orig_defline);
2296 
2297   fixed_value = MemFree (fixed_value);
2298 
2299   return orig_defline;
2300 }
2301 
2302 NLM_EXTERN CharPtr
ReplaceValueInOneDefLine(CharPtr orig_defline,CharPtr value_name,CharPtr new_value)2303 ReplaceValueInOneDefLine
2304 (CharPtr orig_defline,
2305  CharPtr value_name,
2306  CharPtr new_value)
2307 {
2308   CharPtr value_loc = NULL, end_loc = NULL;
2309 
2310   if (StringHasNoText (value_name))
2311   {
2312     return orig_defline;
2313   }
2314 
2315   value_loc = FindValuePairInDefLine (value_name, orig_defline, &end_loc);
2316 
2317   if (value_loc == NULL)
2318   {
2319     if (StringHasNoText (new_value))
2320     {
2321       /* old line had no value, no new value provided, no change */
2322       return orig_defline;
2323     }
2324     else
2325     {
2326       /* make sure value is added for first organism */
2327       orig_defline = ReplaceValueInOneDefLineForOrganism (orig_defline, value_name,
2328                                                           new_value, NULL);
2329     }
2330   }
2331   else
2332   {
2333     orig_defline = ReplaceValueInThisValuePair (orig_defline, value_loc, value_name, end_loc, new_value);
2334   }
2335 
2336   return orig_defline;
2337 }
2338 
2339 static CharPtr
ReplaceOneModifierValue(CharPtr title,CharPtr orig_name,CharPtr orig_value,CharPtr repl_value,Boolean is_nontext,Boolean copy_to_note)2340 ReplaceOneModifierValue
2341 (CharPtr title,
2342  CharPtr orig_name,
2343  CharPtr orig_value,
2344  CharPtr repl_value,
2345  Boolean is_nontext,
2346  Boolean copy_to_note)
2347 {
2348   CharPtr bracket_loc, eq_loc, end_bracket_loc, new_title;
2349   Int4    new_title_len;
2350   CharPtr orig_note, new_note;
2351   Boolean any_replaced = FALSE;
2352 
2353   if (StringHasNoText (title)
2354       || StringHasNoText (orig_name))
2355   {
2356     return title;
2357   }
2358 
2359   bracket_loc = FindValuePairInDefLine (orig_name, title, &end_bracket_loc);
2360   while (bracket_loc != NULL && end_bracket_loc != NULL)
2361   {
2362     eq_loc = NextBracketToken (bracket_loc + 1);
2363     if (eq_loc == NULL || *eq_loc != '=')
2364     {
2365       return title;
2366     }
2367     if ((StringNCmp (orig_value, eq_loc + 1, StringLen (orig_value)) == 0
2368         && StringLen (orig_value) == end_bracket_loc - eq_loc - 1)
2369         || (StringHasNoText (orig_value)
2370             && StringSpn (eq_loc + 1, " \t") == end_bracket_loc - eq_loc - 1))
2371     {
2372       new_title_len = StringLen (title) + StringLen (repl_value) - StringLen (orig_value) + 1;
2373       new_title = (CharPtr) MemNew (new_title_len * sizeof (Char));
2374       if (new_title == NULL)
2375       {
2376         return title;
2377       }
2378       if (is_nontext)
2379       {
2380         if (StringHasNoText (repl_value))
2381         {
2382           StringNCpy (new_title, title, bracket_loc - title);
2383           StringCat (new_title, end_bracket_loc + 1 + StringSpn (end_bracket_loc, " "));
2384         }
2385         else
2386         {
2387           StringNCpy (new_title, title, eq_loc - title + 1);
2388           StringCat (new_title, end_bracket_loc);
2389         }
2390       }
2391       else if (StringHasNoText (repl_value))
2392       {
2393         /* remove pair completely */
2394         StringNCpy (new_title, title, bracket_loc - title);
2395         StringCat (new_title, end_bracket_loc + 1);
2396       }
2397       else
2398       {
2399         StringNCpy (new_title, title, eq_loc - title + 1);
2400         StringCat (new_title, repl_value);
2401         StringCat (new_title, end_bracket_loc);
2402       }
2403 
2404       title = MemFree (title);
2405       title = new_title;
2406       any_replaced = TRUE;
2407       bracket_loc = FindValuePairInDefLine (orig_name, title, &end_bracket_loc);
2408     }
2409     else
2410     {
2411       bracket_loc = FindValuePairInDefLine (orig_name, end_bracket_loc, &end_bracket_loc);
2412     }
2413   }
2414 
2415   if (any_replaced && copy_to_note && !StringHasNoText (repl_value) && !StringHasNoText (orig_value))
2416   {
2417     orig_note = FindValueFromPairInDefline ("note", title);
2418     if (StringHasNoText (orig_note))
2419     {
2420       new_note = (CharPtr) MemNew ((StringLen (orig_name)
2421                                     + StringLen (orig_value) + 8) * sizeof (Char));
2422       if (new_note != NULL)
2423       {
2424         sprintf (new_note, "%s was %s", orig_name, orig_value);
2425       }
2426     }
2427     else
2428     {
2429       new_note = (CharPtr) MemNew ((StringLen (orig_note)
2430                                     + StringLen (orig_name)
2431                                     + StringLen (orig_value) + 8) * sizeof (Char));
2432       if (new_note != NULL)
2433       {
2434         sprintf (new_note, "%s; %s was %s", orig_note, orig_name, orig_value);
2435       }
2436     }
2437 
2438     if (new_note != NULL)
2439     {
2440       title = ReplaceValueInOneDefLine (title, "note", new_note);
2441     }
2442 
2443     orig_note = MemFree (orig_note);
2444     new_note = MemFree (new_note);
2445   }
2446 
2447   return title;
2448 }
2449 
2450 static Boolean IsUnrecognizedModifierName (ModifierInfoPtr mip, Boolean is_nuc);
2451 
RemoveRecognizedModifiersFromTitle(CharPtr title,ValNodePtr modifier_info_list,Boolean is_nuc)2452 static void RemoveRecognizedModifiersFromTitle (CharPtr title, ValNodePtr modifier_info_list, Boolean is_nuc)
2453 {
2454   ValNodePtr      vnp;
2455   ModifierInfoPtr mip;
2456 
2457   for (vnp = modifier_info_list; vnp != NULL; vnp = vnp->next) {
2458     mip = (ModifierInfoPtr) vnp->data.ptrvalue;
2459     if (mip != NULL && ! IsUnrecognizedModifierName (mip, is_nuc)
2460         && (!is_nuc || mip->modtype != eModifierType_Protein)) {
2461       RemoveValueFromDefline (mip->name, title);
2462     }
2463   }
2464 }
2465 
StripAllInstancesOfModNameFromTitle(CharPtr mod_name,CharPtr title)2466 static void StripAllInstancesOfModNameFromTitle (CharPtr mod_name, CharPtr title)
2467 {
2468   CharPtr         valstr;
2469 
2470   valstr = FindValueFromPairInDefline (mod_name, title);
2471   while (valstr != NULL)
2472   {
2473     RemoveValueFromDefline (mod_name, title);
2474     valstr = MemFree (valstr);
2475     valstr = FindValueFromPairInDefline (mod_name, title);
2476   }
2477 }
2478 
RemoveAllDuplicatePairsFromOneTitle(CharPtr title)2479 static CharPtr RemoveAllDuplicatePairsFromOneTitle (CharPtr title)
2480 {
2481   CharPtr         start_bracket, end_bracket, tmp_title, new_title;
2482   ModifierInfoPtr mip;
2483   Int4            offset;
2484 
2485   mip = ParseOneBracketedModifier (title, &start_bracket, &end_bracket);
2486   while (mip != NULL && start_bracket != NULL && end_bracket != NULL)
2487   {
2488     offset = end_bracket - title + 1;
2489     tmp_title = StringSave (title + offset);
2490     tmp_title = ReplaceOneModifierValue (tmp_title, mip->name, mip->value, NULL,
2491                                      IsNonTextModifier (mip->name), FALSE);
2492     new_title = (CharPtr) MemNew ((StringLen (tmp_title) + offset + 1)* sizeof (Char));
2493     if (new_title != NULL)
2494     {
2495       StringNCpy (new_title, title, offset);
2496       StringCat (new_title, tmp_title);
2497     }
2498     tmp_title = MemFree (tmp_title);
2499     title = MemFree (title);
2500     title = new_title;
2501     mip = ModifierInfoFree (mip);
2502     mip = ParseOneBracketedModifier (title + offset, &start_bracket, &end_bracket);
2503   }
2504   mip = ModifierInfoFree (mip);
2505   return title;
2506 }
2507 
ShiftString(CharPtr str,Int4 shift_size)2508 static void ShiftString (CharPtr str, Int4 shift_size)
2509 {
2510   CharPtr src, dst;
2511 
2512   if (str == NULL)
2513   {
2514     return;
2515   }
2516 
2517   if (shift_size > (Int4) StringLen (str))
2518   {
2519     *str = 0;
2520   }
2521   else
2522   {
2523     src = str + shift_size;
2524     dst = str;
2525     while (*src != 0)
2526     {
2527       *dst = *src;
2528       dst++;
2529       src++;
2530     }
2531     *dst = 0;
2532   }
2533 }
2534 
RemoveMeaninglessEmptyPairsFromOneTitle(CharPtr title)2535 static void RemoveMeaninglessEmptyPairsFromOneTitle (CharPtr title)
2536 {
2537   CharPtr         start_bracket, end_bracket;
2538   ModifierInfoPtr mip;
2539 
2540   mip = ParseOneBracketedModifier (title, &start_bracket, &end_bracket);
2541   while (mip != NULL && start_bracket != NULL && end_bracket != NULL)
2542   {
2543     if (StringHasNoText (mip->value) && ! IsNonTextModifier (mip->name))
2544     {
2545       ShiftString (start_bracket, end_bracket - start_bracket + 1);
2546       mip = ModifierInfoFree (mip);
2547       mip = ParseOneBracketedModifier (start_bracket, &start_bracket, &end_bracket);
2548     }
2549     else
2550     {
2551       mip = ModifierInfoFree (mip);
2552       mip = ParseOneBracketedModifier (end_bracket + 1, &start_bracket, &end_bracket);
2553     }
2554   }
2555   mip = ModifierInfoFree (mip);
2556 }
2557 
ApplyOneModToSeqEntry(SeqEntryPtr sep,CharPtr mod_name,CharPtr mod_value)2558 static void ApplyOneModToSeqEntry (SeqEntryPtr sep, CharPtr mod_name, CharPtr mod_value)
2559 {
2560   BioseqPtr    bsp = NULL;
2561   SeqDescrPtr  sdp = NULL;
2562 
2563   if (sep == NULL || StringHasNoText (mod_name))
2564   {
2565     return;
2566   }
2567 
2568   if (IS_Bioseq (sep))
2569   {
2570     bsp = (BioseqPtr) sep->data.ptrvalue;
2571   }
2572   else if (IS_Bioseq_set (sep))
2573   {
2574     sep = FindNucSeqEntry (sep);
2575     if (sep != NULL && IS_Bioseq (sep))
2576     {
2577       bsp = (BioseqPtr) sep->data.ptrvalue;
2578     }
2579   }
2580 
2581   if (bsp == NULL)
2582   {
2583     return;
2584   }
2585 
2586   for (sdp = bsp->descr; sdp != NULL && sdp->choice != Seq_descr_title; sdp = sdp->next)
2587   {
2588   }
2589 
2590   if (sdp == NULL)
2591   {
2592     sdp = SeqDescrNew (NULL);
2593     sdp->choice = Seq_descr_title;
2594     if (bsp->descr == NULL)
2595     {
2596       bsp->descr = sdp;
2597     }
2598   }
2599   if (sdp != NULL)
2600   {
2601     sdp->data.ptrvalue = ReplaceValueInOneDefLine (sdp->data.ptrvalue,
2602                                                    mod_name, mod_value);
2603   }
2604 
2605 
2606 }
2607 
2608 
FinishModifierInfo(ModifierInfoPtr mip)2609 static void FinishModifierInfo (ModifierInfoPtr mip)
2610 {
2611   CharPtr canonical_name, fixed_value;
2612 
2613   if (mip == NULL) {
2614     return;
2615   }
2616 
2617   TrimSpacesAroundString (mip->name);
2618   canonical_name = GetCanonicalName (mip->name);
2619   mip->name = MemFree (mip->name);
2620   mip->name = canonical_name;
2621   if (StringICmp (mip->name, "note") == 0)
2622   {
2623     mip->name = MemFree (mip->name);
2624     mip->name = StringSave ("Note-SubSrc");
2625   }
2626 
2627   mip->modtype = GetModifierType (mip->name);
2628   if (mip->modtype == eModifierType_SourceQual)
2629   {
2630     mip->subtype = FindTypeForModNameText (mip->name);
2631   }
2632   else
2633   {
2634     mip->subtype = 0;
2635   }
2636 
2637   TrimSpacesAroundString (mip->value);
2638   fixed_value = AddQuotesToValueWithBrackets (mip->value);
2639   mip->value = MemFree (mip->value);
2640   mip->value = fixed_value;
2641 }
2642 
MakeModifierInfoFromNameAndValue(CharPtr value_name,CharPtr value_string)2643 static ModifierInfoPtr MakeModifierInfoFromNameAndValue (CharPtr value_name, CharPtr value_string)
2644 {
2645   ModifierInfoPtr mip;
2646 
2647   mip = ModifierInfoNew ();
2648   mip->name = StringSave (value_name);
2649   mip->value = StringSave (value_string);
2650   FinishModifierInfo (mip);
2651   return mip;
2652 }
2653 
2654 
2655 static const CharPtr s_AllowedUnmapped[] = {
2656   "note-subsrc",
2657   "note-orgmod",
2658   "passage history",
2659   NULL};
2660 
s_IsAllowedUnmapped(CharPtr mod_name)2661 static Boolean s_IsAllowedUnmapped (CharPtr mod_name)
2662 {
2663   Int4 i;
2664   Boolean rval = FALSE;
2665 
2666   for (i = 0; s_AllowedUnmapped[i] != NULL && !rval; i++) {
2667     rval = StringsAreEquivalent(mod_name, s_AllowedUnmapped[i]);
2668   }
2669   return rval;
2670 }
2671 
2672 
2673 /* This section is used to import tables of modifiers. */
2674 static CharPtr
ApplyImportModToTitle(CharPtr title,CharPtr value_name,CharPtr value_string,Boolean erase_where_blank,Boolean parse_multiple)2675 ApplyImportModToTitle
2676 (CharPtr title,
2677  CharPtr value_name,
2678  CharPtr value_string,
2679  Boolean erase_where_blank,
2680  Boolean parse_multiple)
2681 {
2682   ModifierInfoPtr mip;
2683   CharPtr next_semi, val_start, title_loc, title_end;
2684   CharPtr insert_point;
2685   Int4    insert_offset, title_val_num;
2686   Char    val_save_ch;
2687 
2688   if (StringHasNoText (value_name))
2689   {
2690     return title;
2691   }
2692 
2693   if (!erase_where_blank && StringHasNoText (value_string))
2694   {
2695     return title;
2696   }
2697 
2698   mip = MakeModifierInfoFromNameAndValue (value_name, value_string);
2699 
2700   if (mip == NULL
2701       || (mip->modtype == eModifierType_SourceQual
2702         	&& mip->subtype == 255
2703   	      && !s_IsAllowedUnmapped(mip->name)))
2704   {
2705     mip = ModifierInfoFree (mip);
2706     return title;
2707   }
2708 
2709   if (erase_where_blank && StringHasNoText (value_string))
2710   {
2711     RemoveValueFromDefline (value_name, title);
2712   }
2713   else if (parse_multiple
2714            && value_string [0] == '('
2715            && value_string [StringLen (value_string) - 1] == ')'
2716            && (next_semi = StringChr (value_string, ';')) != NULL)
2717   {
2718     val_start = value_string + 1;
2719     title_val_num = 0;
2720     while (next_semi != NULL)
2721     {
2722       /* temporarily truncate at end of value */
2723       val_save_ch = *next_semi;
2724       *next_semi = 0;
2725 
2726       title_loc = FindNthValuePairInDefLine (title, value_name, title_val_num, &title_end);
2727       if (StringHasNoText (val_start))
2728       {
2729         if (title_loc != NULL)
2730         {
2731           RemoveValuePairFromDefline (title_loc, title_end, title);
2732         }
2733         else
2734         {
2735           /* if text is empty and there is no value pair, nothing to do */
2736         }
2737         /* note - we do not increment title_val_num here because either we've
2738          * removed a value or there are no values left.
2739          */
2740       }
2741       else
2742       {
2743         if (title_loc == NULL)
2744         {
2745           /* need to insert a new value - if organism name, put at end of title,
2746            * otherwise insert before second organism name if any
2747            */
2748           if (StringICmp (value_name, "organism") == 0)
2749           {
2750             insert_offset = StringLen (title);
2751           }
2752           else
2753           {
2754             insert_point = FindNthValuePairInDefLine (title, "organism", 1, NULL);
2755             if (insert_point == NULL)
2756             {
2757               insert_offset = StringLen (title);
2758             }
2759             else
2760             {
2761               insert_offset = insert_point - title;
2762             }
2763           }
2764           title = InsertValuePairAtOffset (title, value_name, val_start, insert_offset);
2765         }
2766         else
2767         {
2768           /* replace values in order */
2769           title = ReplaceValueInThisValuePair (title, title_loc, value_name,
2770                                                title_end, val_start);
2771         }
2772 
2773         title_val_num++;
2774       }
2775 
2776       /* replace character */
2777       *next_semi = val_save_ch;
2778       /* advance to next value in list */
2779       val_start = next_semi + 1;
2780       if (*next_semi == ';')
2781       {
2782         next_semi = StringChr (next_semi + 1, ';');
2783         if (next_semi == NULL)
2784         {
2785           next_semi = value_string + StringLen (value_string) - 1;
2786         }
2787       }
2788       else
2789       {
2790         next_semi = NULL;
2791       }
2792     }
2793   }
2794   else if (StringCmp (value_name, "organism") == 0)
2795   {
2796     title = ReplaceValueInOneDefLine (title, value_name, value_string);
2797   }
2798   else
2799   {
2800     title = ReplaceValueInOneDefLineForOrganism (title, value_name, value_string, NULL);
2801   }
2802 
2803   mip = ModifierInfoFree (mip);
2804   return title;
2805 }
2806 
ReadRowListFromFile(void)2807 NLM_EXTERN ValNodePtr ReadRowListFromFile (void)
2808 {
2809   Char          path [PATH_MAX];
2810   ValNodePtr    header_line = NULL;
2811   FILE           *fp;
2812 
2813   path [0] = '\0';
2814   if (! GetInputFileName (path, sizeof (path), NULL, "TEXT")) return NULL;
2815   fp = FileOpen (path, "r");
2816   if (fp == NULL) {
2817     Message (MSG_ERROR, "Unable to open %s", path);
2818   } else {
2819     header_line = ReadTabTableFromFile (fp);
2820     FileClose (fp);
2821   }
2822   return header_line;
2823 }
2824 
2825 /* This function will find the sequence number in the IDAndTitleEdit
2826  * to use for each row and put that value in the sequence_numbers array.
2827  */
2828 NLM_EXTERN Boolean
ValidateModifierTableSequenceIDs(ValNodePtr header_line,IDAndTitleEditPtr iatep,Int4Ptr sequence_numbers,Int4Ptr num_rows)2829 ValidateModifierTableSequenceIDs
2830 (ValNodePtr        header_line,
2831  IDAndTitleEditPtr iatep,
2832  Int4Ptr           sequence_numbers,
2833  Int4Ptr           num_rows)
2834 {
2835   ValNodePtr   not_found = NULL;
2836   ValNodePtr   found_more_than_once = NULL;
2837   CharPtr      too_many_msg = NULL, not_found_msg = NULL;
2838   Boolean      rval = TRUE;
2839   Int4         msg_len = 0;
2840   CharPtr      too_many_fmt = " found more than once\n";
2841   CharPtr      not_found_fmt = " not found\n";
2842   CharPtr      err_msg = NULL;
2843   ValNodePtr   row_vnp, col_vnp, prev_row, next_row;
2844   Int4         i, j, seq_num, other_instances;
2845   Boolean      found;
2846   Int4         row_number;
2847   Int4         deleted_rows;
2848 
2849   if (header_line == NULL || header_line->next == NULL || iatep == NULL
2850       || sequence_numbers == NULL || num_rows == NULL || *num_rows < ValNodeLen (header_line->next))
2851   {
2852     return FALSE;
2853   }
2854 
2855   for (row_vnp = header_line->next, row_number = 0;
2856        row_vnp != NULL && row_number < *num_rows;
2857        row_vnp = row_vnp->next, row_number++)
2858   {
2859     col_vnp = row_vnp->data.ptrvalue;
2860     if (col_vnp == NULL || col_vnp->data.ptrvalue == NULL)
2861     {
2862       continue;
2863     }
2864 
2865     /* find correct sequence number */
2866     seq_num = -1;
2867     for (i = 0, found = FALSE; i < iatep->num_sequences && !found; i++)
2868     {
2869       if (StringCmp (iatep->id_list [i], col_vnp->data.ptrvalue) == 0)
2870       {
2871         seq_num = i;
2872         found = TRUE;
2873       }
2874     }
2875     sequence_numbers[row_number] = seq_num;
2876 
2877     if (!found)
2878     {
2879       ValNodeAddPointer (&not_found, 0, StringSave (col_vnp->data.ptrvalue));
2880     }
2881     else
2882     {
2883       /* count the number of times this seq_num has already appeared in the list.*/
2884       other_instances = 0;
2885       for (j = 0; j < row_number; j++)
2886       {
2887         if (sequence_numbers[j] == seq_num)
2888         {
2889           other_instances++;
2890         }
2891       }
2892       /* if the value was found exactly once, add this to the list of duplicates.
2893        * if the value was found more than once, it will already have been reported.
2894        */
2895       if (other_instances == 1)
2896       {
2897         ValNodeAddPointer (&found_more_than_once, 0, StringSave (col_vnp->data.ptrvalue));
2898       }
2899     }
2900   }
2901 
2902   if (found_more_than_once != NULL || not_found != NULL)
2903   {
2904     if (found_more_than_once != NULL)
2905     {
2906       too_many_msg = CreateListMessage ("Sequence ID", NULL, found_more_than_once);
2907       rval = FALSE;
2908       msg_len += StringLen (too_many_msg) + StringLen (too_many_fmt) + 5;
2909     }
2910     if (not_found != NULL)
2911     {
2912       not_found_msg = CreateListMessage ("Sequence ID", NULL, not_found);
2913       msg_len += StringLen (not_found_msg) + StringLen (not_found_fmt) + 5;
2914     }
2915 
2916     err_msg = (CharPtr) MemNew ((msg_len + 1) * sizeof (Char));
2917     if (err_msg != NULL)
2918     {
2919       if (too_many_msg != NULL)
2920       {
2921         StringCat (err_msg, too_many_msg);
2922         if (found_more_than_once->next != NULL)
2923         {
2924           StringCat (err_msg, " were");
2925         }
2926         else
2927         {
2928           StringCat (err_msg, " was");
2929         }
2930         StringCat (err_msg, too_many_fmt);
2931       }
2932       if (not_found_msg != NULL)
2933       {
2934         StringCat (err_msg, not_found_msg);
2935         if (not_found->next != NULL)
2936         {
2937           StringCat (err_msg, " were");
2938         }
2939         else
2940         {
2941           StringCat (err_msg, " was");
2942         }
2943         StringCat (err_msg, not_found_fmt);
2944       }
2945       if (rval)
2946       {
2947         if (ANS_NO == Message (MSG_YN, "%sContinue anyway?", err_msg))
2948         {
2949           rval = FALSE;
2950         }
2951       }
2952       else
2953       {
2954         Message (MSG_ERROR, "%sPlease correct your file.", err_msg);
2955       }
2956     }
2957     too_many_msg = MemFree (too_many_msg);
2958     not_found_msg = MemFree (not_found_msg);
2959     err_msg = MemFree (err_msg);
2960   }
2961 
2962   /* remove rows for sequence IDs that are not found */
2963   for (row_vnp = header_line->next, row_number = 0, prev_row = header_line;
2964        row_vnp != NULL && row_number < *num_rows;
2965        row_vnp = next_row, row_number++)
2966   {
2967     next_row = row_vnp->next;
2968     if (sequence_numbers[row_number] < 0) {
2969       prev_row->next = next_row;
2970       row_vnp->next = NULL;
2971       row_vnp = FreeTableDisplayRowList (row_vnp);
2972     } else {
2973       prev_row = row_vnp;
2974     }
2975   }
2976 
2977   /* now remove sequence_numbers entries */
2978   deleted_rows = 0;
2979   row_number = 0;
2980   while (row_number < *num_rows)
2981   {
2982     if (sequence_numbers[row_number] < 0) {
2983       for (i = row_number + 1; i < *num_rows; i++) {
2984         sequence_numbers[i - 1] = sequence_numbers[i];
2985       }
2986       (*num_rows)--;
2987     } else {
2988       row_number++;
2989     }
2990   }
2991 
2992   return rval;
2993 }
2994 
2995 
IsSequenceIdColumnHeader(CharPtr str)2996 NLM_EXTERN Boolean IsSequenceIdColumnHeader (CharPtr str)
2997 {
2998   if (StringICmp (str, "local_id") != 0
2999       && StringICmp (str, "local id") != 0
3000       && StringICmp (str, "local-id") != 0
3001       && StringICmp (str, "seq_id") != 0
3002       && StringICmp (str, "seq id") != 0
3003       && StringICmp (str, "seq-id") != 0
3004       && StringICmp (str, "seqid") != 0
3005       && StringICmp (str, "sequence_id") != 0
3006       && StringICmp (str, "sequence id") != 0
3007       && StringICmp (str, "sequence-id") != 0)
3008   {
3009     return FALSE;
3010   }
3011   else
3012   {
3013     return TRUE;
3014   }
3015 
3016 }
3017 
3018 
3019 /* This checks the column names and puts the modifier type in the choice for each column */
ValidateImportModifierColumnNames(ValNodePtr header_line,ValNodePtr preferred_list)3020 static Boolean ValidateImportModifierColumnNames (ValNodePtr header_line, ValNodePtr preferred_list)
3021 {
3022   ValNodePtr      header_vnp;
3023   Boolean         rval = TRUE;
3024   ModifierInfoPtr mip;
3025   CharPtr         orig_name;
3026   Int4            col_num;
3027 
3028   if (header_line == NULL)
3029   {
3030     return FALSE;
3031   }
3032 
3033   header_vnp = header_line->data.ptrvalue;
3034   if (header_vnp == NULL || header_vnp->next == NULL)
3035   {
3036     return FALSE;
3037   }
3038 
3039   /* check ID column */
3040   if (!IsSequenceIdColumnHeader(header_vnp->data.ptrvalue))
3041   {
3042     Message (MSG_ERROR, "Table file is missing header line!  Make sure first column header is seq_id");
3043     return FALSE;
3044   }
3045   header_vnp = header_vnp->next;
3046   col_num = 1;
3047   while (header_vnp != NULL && rval)
3048   {
3049     mip = MakeModifierInfoFromNameAndValue (header_vnp->data.ptrvalue, NULL);
3050     if (mip == NULL
3051       || (mip->modtype == eModifierType_SourceQual
3052         	&& mip->subtype == 255
3053           && !s_IsAllowedUnmapped(mip->name)))
3054     {
3055       orig_name = (CharPtr) header_vnp->data.ptrvalue;
3056       rval = ReplaceImportModifierName (&orig_name, col_num, preferred_list);
3057       header_vnp->data.ptrvalue = orig_name;
3058     }
3059     else
3060     {
3061       header_vnp->data.ptrvalue = MemFree (header_vnp->data.ptrvalue);
3062       header_vnp->data.ptrvalue = StringSave (mip->name);
3063       header_vnp->choice = mip->modtype;
3064     }
3065     mip = ModifierInfoFree (mip);
3066     header_vnp = header_vnp->next;
3067     col_num++;
3068   }
3069   return rval;
3070 }
3071 
StringAlreadyInList(ValNodePtr list,CharPtr str)3072 static Boolean StringAlreadyInList (ValNodePtr list, CharPtr str)
3073 {
3074   while (list != NULL)
3075   {
3076     if (StringICmp (list->data.ptrvalue, str) == 0)
3077     {
3078       return TRUE;
3079     }
3080     list = list->next;
3081   }
3082   return FALSE;
3083 }
3084 
ValidateTableValues(ValNodePtr header_line)3085 static Boolean ValidateTableValues (ValNodePtr header_line)
3086 {
3087   ValNodePtr      header_vnp, row_vnp, col_vnp;
3088   Boolean         rval = TRUE;
3089   ModifierInfoPtr mip;
3090   Int4            col_num;
3091   ValNodePtr      bad_value_columns = NULL;
3092   ValNodePtr      bad_nontext_columns = NULL;
3093   CharPtr         err_msg;
3094 
3095   if (header_line == NULL || header_line->next == NULL
3096       || header_line->data.ptrvalue == NULL)
3097   {
3098     return FALSE;
3099   }
3100 
3101   for (row_vnp = header_line->next; row_vnp != NULL; row_vnp = row_vnp->next)
3102   {
3103     /* skip rows with bad sequence IDs */
3104     if (row_vnp->data.ptrvalue == NULL)
3105     {
3106       continue;
3107     }
3108 
3109     header_vnp = header_line->data.ptrvalue;
3110     col_vnp = row_vnp->data.ptrvalue;
3111     /* skip ID column */
3112     header_vnp = header_vnp->next;
3113     col_vnp = col_vnp->next;
3114     for (col_num = 1;
3115          header_vnp != NULL && col_vnp != NULL;
3116          header_vnp = header_vnp->next, col_vnp = col_vnp->next, col_num++)
3117     {
3118       mip = MakeModifierInfoFromNameAndValue (header_vnp->data.ptrvalue,
3119                                               col_vnp->data.ptrvalue);
3120       if (mip->modtype == eModifierType_SourceQual
3121   	             && IsNonTextModifier (mip->name))
3122       {
3123         if (StringICmp (mip->value, "TRUE") != 0
3124             && StringICmp (mip->value, "FALSE") != 0)
3125         {
3126           if (!StringAlreadyInList (bad_nontext_columns, header_vnp->data.ptrvalue))
3127           {
3128             ValNodeAddPointer (&bad_nontext_columns, col_num, StringSave (header_vnp->data.ptrvalue));
3129           }
3130         }
3131       }
3132       else if (ModifierHasInvalidValue (mip))
3133       {
3134         if (!StringAlreadyInList (bad_value_columns, header_vnp->data.ptrvalue))
3135         {
3136           ValNodeAddPointer (&bad_value_columns, col_num, StringSave (header_vnp->data.ptrvalue));
3137         }
3138       }
3139       mip = ModifierInfoFree (mip);
3140     }
3141   }
3142 
3143   if (bad_value_columns != NULL)
3144   {
3145     err_msg = CreateListMessage ("Your file contains invalid values for column",
3146                                  ". Please edit your file to list valid values.",
3147                                  bad_value_columns);
3148     Message (MSG_ERROR, err_msg);
3149     rval = FALSE;
3150   }
3151   if (bad_nontext_columns != NULL && rval)
3152   {
3153     err_msg = CreateListMessage ("Your file contains values other than TRUE or FALSE for column",
3154                                  ". These modifiers do not allow other text.  Click OK to "
3155                                  "discard this text and mark the values as TRUE.  If you "
3156                                  "wish to preserve this text under another modifier, click "
3157                                  "Cancel and change the column header in your file.",
3158                                  bad_nontext_columns);
3159     if (ANS_CANCEL == Message (MSG_OKC, err_msg))
3160     {
3161       rval = FALSE;
3162     }
3163   }
3164 
3165   bad_value_columns = ValNodeFreeData (bad_value_columns);
3166   bad_nontext_columns = ValNodeFreeData (bad_nontext_columns);
3167   return rval;
3168 }
3169 
3170 static Boolean
CheckModifiersForOverwrite(ValNodePtr header_line,IDAndTitleEditPtr iatep,Int4Ptr sequence_numbers,Int4 num_rows,BoolPtr erase_where_blank,BoolPtr parse_multiple)3171 CheckModifiersForOverwrite
3172 (ValNodePtr        header_line,
3173  IDAndTitleEditPtr iatep,
3174  Int4Ptr           sequence_numbers,
3175  Int4              num_rows,
3176  BoolPtr           erase_where_blank,
3177  BoolPtr           parse_multiple)
3178 {
3179   ValNodePtr row_vnp, header_vnp, col_vnp;
3180   CharPtr    title_val, data_val;
3181   ValNodePtr blank_column_list = NULL;
3182   ValNodePtr replace_column_list = NULL;
3183   ValNodePtr parse_multi_list = NULL;
3184   Int4       col_num, row_num;
3185   Boolean    rval = TRUE;
3186   CharPtr    err_msg;
3187   MsgAnswer  ans;
3188 
3189   if (header_line == NULL || header_line->next == NULL || iatep == NULL
3190       || sequence_numbers == NULL || num_rows < ValNodeLen (header_line->next)
3191       || erase_where_blank == NULL || parse_multiple == NULL)
3192   {
3193     return FALSE;
3194   }
3195 
3196   *erase_where_blank = FALSE;
3197   *parse_multiple = FALSE;
3198 
3199   for (row_vnp = header_line->next, row_num = 0;
3200        row_vnp != NULL && row_num < num_rows;
3201        row_vnp = row_vnp->next, row_num++)
3202   {
3203     if (row_vnp->data.ptrvalue == NULL)
3204     {
3205       continue;
3206     }
3207     header_vnp = header_line->data.ptrvalue;
3208     col_vnp = row_vnp->data.ptrvalue;
3209 
3210     /* skip ID column */
3211     header_vnp = header_vnp->next;
3212     col_vnp = col_vnp->next;
3213 
3214     col_num = 1;
3215     while (header_vnp != NULL && col_vnp != NULL)
3216     {
3217       /* if column name is blank, skip */
3218       if (header_vnp->data.ptrvalue != NULL)
3219       {
3220         title_val = FindValueFromPairInDefline (header_vnp->data.ptrvalue,
3221                                                 iatep->title_list [sequence_numbers[row_num]]);
3222         data_val = col_vnp->data.ptrvalue;
3223         if (!StringHasNoText (title_val))
3224         {
3225           if (StringHasNoText (data_val))
3226           {
3227             /* add to list of possible erasures */
3228             if (!StringAlreadyInList (blank_column_list, header_vnp->data.ptrvalue))
3229             {
3230               ValNodeAddPointer (&blank_column_list, col_num, StringSave (header_vnp->data.ptrvalue));
3231             }
3232           }
3233           else if (StringCmp (data_val, title_val) != 0)
3234           {
3235             /* add to list of possible replacements */
3236             if (!StringAlreadyInList (replace_column_list, header_vnp->data.ptrvalue))
3237             {
3238               ValNodeAddPointer (&replace_column_list, col_num, StringSave (header_vnp->data.ptrvalue));
3239             }
3240           }
3241         }
3242         title_val = MemFree (title_val);
3243         /* check for multival parsing */
3244         if (data_val != NULL
3245             && data_val [0] == '(' && data_val [StringLen (data_val) - 1] == ')'
3246             && StringChr (data_val, ';') != NULL
3247             && !StringAlreadyInList (parse_multi_list, header_vnp->data.ptrvalue))
3248         {
3249           ValNodeAddPointer (&parse_multi_list, col_num, StringSave (header_vnp->data.ptrvalue));
3250         }
3251       }
3252       header_vnp = header_vnp->next;
3253       col_vnp = col_vnp->next;
3254       col_num++;
3255     }
3256   }
3257 
3258   if (replace_column_list != NULL)
3259   {
3260     err_msg = CreateListMessage ("Record already contains values for column",
3261                                  " also found in the import table.\n"
3262                                  "Do you wish to overwrite these values?",
3263                                  replace_column_list);
3264     if (ANS_NO == Message (MSG_YN, err_msg))
3265     {
3266       rval = FALSE;
3267     }
3268     err_msg = MemFree (err_msg);
3269   }
3270 
3271   if (blank_column_list != NULL && rval)
3272   {
3273     err_msg = CreateListMessage ("Your import table contains blanks in column",
3274                                  " where data already exists in the sequences.\n"
3275                                  "Do you wish to erase these values in the sequences?\n"
3276                                  "If you say no, the old values will remain.",
3277                                  blank_column_list);
3278     ans = Message (MSG_YNC, err_msg);
3279     err_msg = MemFree (err_msg);
3280     if (ans == ANS_CANCEL)
3281     {
3282       rval = FALSE;
3283     }
3284     else if (ans == ANS_YES)
3285     {
3286       *erase_where_blank = TRUE;
3287     }
3288   }
3289 
3290 #if 0
3291   /* ability to parse multiple entry format removed (for now) */
3292   if (parse_multi_list != NULL && rval)
3293   {
3294     err_msg = CreateListMessage ("Your import table contains values in column",
3295                                  " where the values are in form '(value1;value2)'.\n"
3296                                  "Do you wish to parse these values into multiple modifiers?\n"
3297                                  "If you say no, the values will be applied to a single modifier.",
3298                                  parse_multi_list);
3299     ans = Message (MSG_YNC, err_msg);
3300     err_msg = MemFree (err_msg);
3301     if (ans == ANS_CANCEL)
3302     {
3303       rval = FALSE;
3304     }
3305     else if (ans == ANS_YES)
3306     {
3307       *parse_multiple = TRUE;
3308     }
3309   }
3310 #endif
3311 
3312   blank_column_list = ValNodeFree (blank_column_list);
3313   replace_column_list = ValNodeFreeData (replace_column_list);
3314   parse_multi_list = ValNodeFreeData (parse_multi_list);
3315 
3316   return rval;
3317 }
3318 
ImportModifiersToIDAndTitleEditEx(IDAndTitleEditPtr iatep,ValNodePtr preferred_list)3319 NLM_EXTERN Boolean ImportModifiersToIDAndTitleEditEx (IDAndTitleEditPtr iatep, ValNodePtr preferred_list)
3320 {
3321   ValNodePtr   header_line, row_vnp, col_vnp, header_vnp, special_list;
3322   Boolean      erase_where_blank = FALSE, parse_multi = FALSE;
3323   Int4Ptr      sequence_numbers;
3324   Int4         num_rows, row_number;
3325 
3326   if (iatep == NULL)
3327   {
3328     return FALSE;
3329   }
3330 
3331   SendHelpScrollMessage (helpForm, "Organism Page", "Import Source Modifiers");
3332 
3333   header_line = ReadRowListFromFile ();
3334   if (header_line == NULL || header_line->next == NULL)
3335   {
3336     header_line = FreeTableDisplayRowList (header_line);
3337     return FALSE;
3338   }
3339 
3340   header_vnp = header_line->data.ptrvalue;
3341   if (header_vnp == NULL || header_vnp->next == NULL)
3342   {
3343     header_line = FreeTableDisplayRowList (header_line);
3344     return FALSE;
3345   }
3346 
3347   /* remove special characters */
3348   special_list = ScanTabTableForSpecialCharacters (header_line);
3349   if (special_list != NULL
3350       && !FixSpecialCharactersForStringsInList (special_list,
3351                                                 "The table contains special characters\nand cannot be used until they are replaced.",
3352                                                 FALSE)) {
3353     special_list = FreeContextList (special_list);
3354     header_line = FreeTableDisplayRowList (header_line);
3355     return FALSE;
3356   }
3357   special_list = FreeContextList (special_list);
3358 
3359   num_rows = ValNodeLen (header_line->next);
3360   sequence_numbers = (Int4Ptr) MemNew (num_rows * sizeof (Int4));
3361 
3362   if (!ValidateModifierTableSequenceIDs (header_line, iatep, sequence_numbers, &num_rows))
3363   {
3364     header_line = FreeTableDisplayRowList (header_line);
3365     sequence_numbers = MemFree (sequence_numbers);
3366     return FALSE;
3367   }
3368 
3369   AdjustInfluenzaSourceTable (header_line);
3370 
3371   /* first, validate all column names and values */
3372   if (!ValidateImportModifierColumnNames (header_line, preferred_list))
3373   {
3374     header_line = FreeTableDisplayRowList (header_line);
3375     sequence_numbers = MemFree (sequence_numbers);
3376     return FALSE;
3377   }
3378 
3379   if (!ValidateTableValues (header_line))
3380   {
3381     header_line = FreeTableDisplayRowList (header_line);
3382     sequence_numbers = MemFree (sequence_numbers);
3383     return FALSE;
3384   }
3385 
3386   if (!CheckModifiersForOverwrite (header_line, iatep,
3387                                    sequence_numbers, num_rows,
3388                                    &erase_where_blank, &parse_multi))
3389   {
3390     header_line = FreeTableDisplayRowList (header_line);
3391     sequence_numbers = MemFree (sequence_numbers);
3392     return FALSE;
3393   }
3394 
3395   /* now apply */
3396   for (row_vnp = header_line->next, row_number = 0;
3397        row_vnp != NULL && row_number < num_rows;
3398        row_vnp = row_vnp->next, row_number++)
3399   {
3400     if (row_vnp->data.ptrvalue == NULL)
3401     {
3402       continue;
3403     }
3404     header_vnp = header_line->data.ptrvalue;
3405     col_vnp = row_vnp->data.ptrvalue;
3406 
3407     /* skip the ID column */
3408     header_vnp = header_vnp->next;
3409     col_vnp = col_vnp->next;
3410 
3411     for (;
3412          header_vnp != NULL && col_vnp != NULL;
3413          header_vnp = header_vnp->next, col_vnp = col_vnp->next)
3414     {
3415       iatep->title_list [sequence_numbers [row_number]] = ApplyImportModToTitle (iatep->title_list [sequence_numbers[row_number]],
3416                                                                    header_vnp->data.ptrvalue,
3417                                                                    col_vnp->data.ptrvalue,
3418                                                                    erase_where_blank,
3419                                                                    parse_multi);
3420     }
3421   }
3422   sequence_numbers = MemFree (sequence_numbers);
3423   return TRUE;
3424 }
3425 
3426 
ImportModifiersToIDAndTitleEdit(IDAndTitleEditPtr iatep)3427 NLM_EXTERN Boolean ImportModifiersToIDAndTitleEdit (IDAndTitleEditPtr iatep)
3428 {
3429   return ImportModifiersToIDAndTitleEditEx (iatep, NULL);
3430 }
3431 
3432 
3433 typedef struct fastapage {
3434   DIALOG_MESSAGE_BLOCK
3435   Char         path [PATH_MAX];
3436   SeqEntryPtr  list;
3437   ValNodePtr   errmsgs;
3438   DoC          doc;
3439   GrouP        instructions;
3440   GrouP        have_seq_instr_grp;
3441   GrouP        singleIdGrp;
3442   TexT         singleSeqID;
3443   Boolean      is_na;
3444   Boolean      is_mrna;
3445   Boolean      is_delta;
3446   Boolean      parseSeqId;
3447   Boolean      single;
3448   Int2Ptr      seqPackagePtr;
3449   ButtoN       import_btn;
3450   ButtoN       clear_btn;
3451 } FastaPage, PNTR FastaPagePtr;
3452 
3453 static ParData faParFmt = {FALSE, FALSE, FALSE, FALSE, FALSE, 0, 0};
3454 static ColData faColFmt = {0, 0, 80, 0, NULL, 'l', TRUE, FALSE, FALSE, FALSE, TRUE};
3455 
ResetFastaPage(FastaPagePtr fpp)3456 static void ResetFastaPage (FastaPagePtr fpp)
3457 
3458 {
3459   SeqEntryPtr  next;
3460   SeqEntryPtr  sep;
3461 
3462   if (fpp != NULL) {
3463     sep = fpp->list;
3464     while (sep != NULL) {
3465       next = sep->next;
3466       sep->next = NULL;
3467       SeqEntryFree (sep);
3468       sep = next;
3469     }
3470     fpp->list = NULL;
3471     fpp->errmsgs = ValNodeFreeData (fpp->errmsgs);
3472   }
3473 }
3474 
GetModValueFromSeqEntry(SeqEntryPtr sep,CharPtr mod_name)3475 static CharPtr GetModValueFromSeqEntry (SeqEntryPtr sep, CharPtr mod_name)
3476 {
3477   CharPtr ttl = NULL;
3478   CharPtr value = NULL;
3479 
3480   if (sep == NULL || StringHasNoText (mod_name))
3481   {
3482     return NULL;
3483   }
3484 
3485   SeqEntryExplore (sep, (Pointer) (&ttl), FindFirstTitle);
3486   if (StringHasNoText (ttl))
3487   {
3488     return NULL;
3489   }
3490 
3491   value =  FindValueFromPairInDefline (mod_name, ttl);
3492 
3493   return value;
3494 }
3495 
AddReportLine(CharPtr str,CharPtr name,CharPtr tmp)3496 static void AddReportLine (CharPtr str, CharPtr name, CharPtr tmp)
3497 
3498 {
3499   StringCat (str, name);
3500   StringCat (str, ": ");
3501   StringCat (str, tmp);
3502   StringCat (str, "\n");
3503 }
3504 
3505 static CharPtr GetDisplayValue (CharPtr mod_name, CharPtr title, BoolPtr multi_found);
3506 static CharPtr GetDisplayValueFromModifierInfoList (CharPtr mod_name, ValNodePtr modifier_info_list, BoolPtr multi_found);
3507 
ReportModifiers(CharPtr str,CharPtr report_name,ValNodePtr modifier_info_list,CharPtr mod_name,CharPtr not_found_msg)3508 static void ReportModifiers (CharPtr str, CharPtr report_name,
3509                              ValNodePtr modifier_info_list, CharPtr mod_name, CharPtr not_found_msg)
3510 {
3511   CharPtr valstr;
3512   Boolean multi_found = TRUE;
3513 
3514   valstr = GetDisplayValueFromModifierInfoList (mod_name, modifier_info_list, &multi_found);
3515   if (IsNonTextModifier (mod_name) && StringICmp (valstr, "FALSE") == 0)
3516   {
3517   	valstr = MemFree (valstr);
3518   }
3519 
3520   if (!StringHasNoText (valstr)) {
3521     AddReportLine (str, report_name, valstr);
3522   } else if (!StringHasNoText (not_found_msg)) {
3523     StringCat (str, not_found_msg);
3524   }
3525   valstr = MemFree (valstr);
3526 }
3527 
LookupAndAddReportLine(CharPtr str,CharPtr report_name,CharPtr title,CharPtr mod_name,CharPtr not_found_msg)3528 static void LookupAndAddReportLine (CharPtr str, CharPtr report_name,
3529                                     CharPtr title, CharPtr mod_name, CharPtr not_found_msg)
3530 {
3531   CharPtr valstr;
3532   Boolean multi_found = TRUE;
3533 
3534   valstr = GetDisplayValue (mod_name, title, &multi_found);
3535   if (IsNonTextModifier (mod_name) && StringICmp (valstr, "FALSE") == 0)
3536   {
3537   	valstr = MemFree (valstr);
3538   }
3539 
3540   if (!StringHasNoText (valstr)) {
3541     AddReportLine (str, report_name, valstr);
3542   } else if (!StringHasNoText (not_found_msg)) {
3543     StringCat (str, not_found_msg);
3544   }
3545   valstr = MemFree (valstr);
3546 }
3547 
LookupAndAddLocationReportLine(CharPtr str,CharPtr title)3548 static void LookupAndAddLocationReportLine (CharPtr str, CharPtr title)
3549 {
3550   CharPtr valstr;
3551 
3552   valstr = FindValueFromPairInDefline ("location", title);
3553   if (!StringHasNoText (valstr) && StringICmp (valstr, "genomic") != 0) {
3554     AddReportLine (str, "Location", valstr);
3555   }
3556   valstr = MemFree (valstr);
3557 }
3558 
3559 static CharPtr singlewarn = "\
3560 ERROR - You may not enter multiple segments for a single sequence submission.\
3561 You should either clear the nucleotide and import a single FASTA record, or\
3562 return to the Sequence Format form and choose the proper submission type.\n\n";
3563 
3564 #define FastaFormatBufLen 2000
3565 
CountSegSetSegments(SeqEntryPtr sep)3566 static Int4 CountSegSetSegments (SeqEntryPtr sep)
3567 {
3568   BioseqSetPtr bssp;
3569 
3570   if (sep == NULL || sep->data.ptrvalue == NULL || ! IS_Bioseq_set (sep))
3571   {
3572     return 0;
3573   }
3574 
3575   bssp = (BioseqSetPtr) sep->data.ptrvalue;
3576   if (bssp->_class != BioseqseqSet_class_segset)
3577   {
3578     return 0;
3579   }
3580   sep = bssp->seq_set;
3581 
3582   while (sep != NULL)
3583   {
3584     if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL)
3585     {
3586       bssp = (BioseqSetPtr) sep->data.ptrvalue;
3587       if (bssp->_class == BioseqseqSet_class_parts)
3588       {
3589         return ValNodeLen (bssp->seq_set);
3590       }
3591     }
3592     sep = sep->next;
3593   }
3594   return 0;
3595 }
3596 
FormatFastaDoc(FastaPagePtr fpp)3597 static void FormatFastaDoc (FastaPagePtr fpp)
3598 
3599 {
3600   Nlm_QualNameAssocPtr ap;
3601   BioseqPtr          bsp;
3602   Boolean            hasErrors;
3603   CharPtr            label;
3604   Int4               len;
3605   CharPtr            measure;
3606   SeqEntryPtr        nsep = NULL;
3607   Int2               num;
3608   CharPtr            plural;
3609   CharPtr            ptr;
3610   SeqIdPtr           sip;
3611   SeqEntryPtr        sep;
3612   CharPtr            str;
3613   CharPtr            title;
3614   CharPtr            ttl;
3615   CharPtr            tmp;
3616   ValNodePtr         vnp;
3617   Int4               num_seg;
3618   CharPtr            valstr;
3619   ValNodePtr         modifier_info_list = NULL;
3620 
3621   if (fpp != NULL) {
3622     str = MemNew (sizeof (char) * FastaFormatBufLen);
3623     tmp = MemNew (sizeof (char) * FastaFormatBufLen);
3624     if (str == NULL || tmp == NULL) return;
3625     num = 0;
3626     len = 0;
3627     hasErrors = FALSE;
3628     for (sep = fpp->list; sep != NULL; sep = sep->next) {
3629       num++;
3630       if (IS_Bioseq (sep)) {
3631         bsp = (BioseqPtr) sep->data.ptrvalue;
3632         if (bsp != NULL) {
3633           len += bsp->length;
3634         }
3635       } else if (IS_Bioseq_set (sep)) {
3636         nsep = FindNucSeqEntry (sep);
3637         if (nsep != NULL && IS_Bioseq (nsep)) {
3638           bsp = (BioseqPtr) nsep->data.ptrvalue;
3639           if (bsp != NULL) {
3640             len += bsp->length;
3641           }
3642         }
3643       }
3644     }
3645     if (num > 1) {
3646       plural = "s";
3647     } else {
3648       plural = "";
3649     }
3650     if (fpp->single && num > 1) {
3651       AppendText (fpp->doc, singlewarn, &faParFmt, &faColFmt, programFont);
3652       hasErrors = TRUE;
3653     }
3654     if (fpp->is_mrna) {
3655       label = "Message";
3656       measure = "nucleotides";
3657     } else if (fpp->is_na) {
3658       label = "Sequence";
3659       measure = "bases";
3660     } else {
3661       label = "Sequence";
3662       measure = "amino acids";
3663     }
3664     if (fpp->is_mrna) {
3665       sprintf (str, "%ld transcript sequence%s, total length %ld %s\n",
3666                (long) num, plural, (long) len, measure);
3667     } else if (fpp->is_na) {
3668       sprintf (str, "%ld nucleotide sequence%s, total length %ld %s\n",
3669                (long) num, plural, (long) len, measure);
3670     } else {
3671       sprintf (str, "%ld protein sequence%s, total length %ld %s\n",
3672                (long) num, plural, (long) len, measure);
3673     }
3674     AppendText (fpp->doc, str, &faParFmt, &faColFmt, programFont);
3675     vnp = fpp->errmsgs;
3676     num = 0;
3677     for (sep = fpp->list; sep != NULL; sep = sep->next) {
3678       num++;
3679       len = 0;
3680       num_seg = CountSegSetSegments (sep);
3681       sip = NULL;
3682       tmp [0] = '\0';
3683       if (IS_Bioseq (sep)) {
3684         bsp = (BioseqPtr) sep->data.ptrvalue;
3685         if (bsp != NULL) {
3686           len = bsp->length;
3687           sip = SeqIdFindWorst (bsp->id);
3688           SeqIdWrite (sip, tmp, PRINTID_REPORT, FastaFormatBufLen);
3689         }
3690         nsep = sep;
3691       } else if (IS_Bioseq_set (sep)) {
3692         nsep = FindNucSeqEntry (sep);
3693         if (nsep != NULL && IS_Bioseq (nsep)) {
3694           bsp = (BioseqPtr) nsep->data.ptrvalue;
3695           if (bsp != NULL) {
3696             len = bsp->length;
3697             sip = SeqIdFindWorst (bsp->id);
3698             SeqIdWrite (sip, tmp, PRINTID_REPORT, FastaFormatBufLen);
3699           }
3700         }
3701       }
3702 
3703       /* if segmented set, show number of segments */
3704       if (num_seg > 0)
3705       {
3706         sprintf (str, "\nSegset %d Sequence ID: %s\nLength: %ld %s (%d segments)\n",
3707                  (int) num, tmp, (long) len, measure, num_seg);
3708       }
3709       else
3710       {
3711         sprintf (str, "\n%s %d Sequence ID: %s\nLength: %ld %s\n", label,
3712                  (int) num, tmp, (long) len, measure);
3713       }
3714       ttl = NULL;
3715       SeqEntryExplore (nsep, (Pointer) (&ttl), FindFirstTitle);
3716       title = StringSaveNoNull (ttl);
3717       modifier_info_list = ParseAllBracketedModifiers (title);
3718       if (title != NULL && (! fpp->is_na)) {
3719 
3720         ReportModifiers (str, "Gene", modifier_info_list, "gene", "No gene name detected\n");
3721         ReportModifiers (str, "Protein", modifier_info_list, "protein", "No protein name detected\n");
3722         ReportModifiers (str, "Gene Syn", modifier_info_list, "gene_syn", NULL);
3723         ReportModifiers (str, "Protein Desc", modifier_info_list, "protein_desc", NULL);
3724 
3725         ptr = StringISearch (title, "[orf]");
3726         if (ptr != NULL) {
3727         StringCat (str, "ORF indicated\n");
3728         }
3729         ReportModifiers (str, "Protein Comment", modifier_info_list, "comment", NULL);
3730       }
3731 
3732       if (title != NULL && fpp->is_na && (! fpp->is_mrna)) {
3733         ReportModifiers (str, "Organism", modifier_info_list, "organism", NULL);
3734         ReportModifiers (str, "Lineage", modifier_info_list, "lineage", NULL);
3735         for (ap = current_orgmod_subtype_alist; ap->name != NULL; ap++) {
3736           ReportModifiers (str, ap->name, modifier_info_list, ap->name, NULL);
3737         }
3738         for (ap = current_subsource_subtype_alist; ap->name != NULL; ap++) {
3739           ReportModifiers (str, ap->name, modifier_info_list, ap->name, NULL);
3740         }
3741         LookupAndAddReportLine (str, "Note", title, "note", NULL);
3742         LookupAndAddReportLine (str, "Note", title, "subsource", NULL);
3743         LookupAndAddReportLine (str, "Molecule", title, "molecule", NULL);
3744         LookupAndAddReportLine (str, "MolType", title, "moltype", NULL);
3745         LookupAndAddLocationReportLine (str, title);
3746         LookupAndAddReportLine (str, "Genetic Code", title, "genetic_code", NULL);
3747       }
3748 
3749       if (title != NULL && fpp->is_na && fpp->is_mrna) {
3750         LookupAndAddReportLine (str, "Gene", title, "gene", "No gene name detected\n");
3751         valstr = FindValueFromPairInDefline ("mrna", title);
3752         if (!StringHasNoText (valstr)) {
3753           AddReportLine (str, "mRNA", valstr);
3754           valstr = MemFree (valstr);
3755         } else {
3756           valstr = MemFree (valstr);
3757           valstr = FindValueFromPairInDefline ("cdna", title);
3758           if (!StringHasNoText (valstr)) {
3759             AddReportLine (str, "cDNA", valstr);
3760           } else {
3761             StringCat (str, "No mRNA name detected\n");
3762           }
3763           valstr = MemFree (valstr);
3764         }
3765         LookupAndAddReportLine (str, "Comment", title, "comment", NULL);
3766       }
3767       MemFree (title);
3768       ttl = NULL;
3769       SeqEntryExplore (nsep, (Pointer) (&ttl), FindFirstTitle);
3770       title = StringSaveNoNull (ttl);
3771       if (title != NULL) {
3772         RemoveRecognizedModifiersFromTitle (title, modifier_info_list, fpp->is_na);
3773         if (fpp->is_mrna) {
3774           StripAllInstancesOfModNameFromTitle ("gene", title);
3775           StripAllInstancesOfModNameFromTitle ("mrna", title);
3776           StripAllInstancesOfModNameFromTitle ("cdna", title);
3777           StripAllInstancesOfModNameFromTitle ("comment", title);
3778         }
3779         TrimSpacesAroundString (title);
3780         if (! StringHasNoText (title)) {
3781           StringCat (str, "Title: ");
3782           StringNCat (str, title, 128);
3783           StringCat (str, "\n");
3784         } else {
3785           StringCat (str, "No title detected\n");
3786         }
3787       }
3788       MemFree (title);
3789       ModifierInfoListFree (modifier_info_list);
3790       if (vnp != NULL && vnp->data.ptrvalue != NULL) {
3791         hasErrors = TRUE;
3792         StringCat (str, (CharPtr) vnp->data.ptrvalue);
3793         StringCat (str, "\n");
3794       }
3795       AppendText (fpp->doc, str, &faParFmt, &faColFmt, programFont);
3796       if (vnp != NULL) {
3797         vnp = vnp->next;
3798       }
3799     }
3800     MemFree (str);
3801     MemFree (tmp);
3802     UpdateDocument (fpp->doc, 0, 0);
3803     if (hasErrors) {
3804       Beep ();
3805       Beep ();
3806       Beep ();
3807     }
3808   }
3809 }
3810 
ImportOneGappedSequence(FILE * fp)3811 extern SeqEntryPtr ImportOneGappedSequence (FILE *fp)
3812 {
3813   BioseqPtr      bsp;
3814   Pointer        dataptr;
3815   Uint2          datatype;
3816   SeqEntryPtr    topsep;
3817   SeqSubmitPtr   ssp;
3818   ErrSev         oldsev;
3819 
3820   if (fp == NULL) return NULL;
3821 
3822   oldsev = ErrSetMessageLevel (SEV_MAX);
3823   bsp = ReadDeltaFasta (fp, NULL);
3824   ErrSetMessageLevel (oldsev);
3825   if (bsp == NULL)
3826   {
3827     topsep = NULL;
3828     dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE,
3829 		  		    TRUE, FALSE);
3830     if (dataptr != NULL)
3831     {
3832       /* Get a pointer to the new SeqEntry */
3833       if (datatype == OBJ_SEQENTRY)
3834       {
3835         topsep = (SeqEntryPtr) dataptr;
3836       }
3837       else if (datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET)
3838       {
3839         topsep = SeqMgrGetSeqEntryForData (dataptr);
3840       }
3841       else if (datatype == OBJ_SEQSUB)
3842       {
3843         ssp = (SeqSubmitPtr) dataptr;
3844         if (ssp != NULL && ssp->datatype == 1)
3845         {
3846           topsep = (SeqEntryPtr) ssp->data;
3847         }
3848       }
3849     }
3850   }
3851   else
3852   {
3853     topsep = SeqMgrGetSeqEntryForData (bsp);
3854   }
3855 
3856   return topsep;
3857 }
3858 
SegsetFromSeqEntryList(SeqEntryPtr list)3859 static SeqEntryPtr SegsetFromSeqEntryList (SeqEntryPtr list)
3860 {
3861   SeqEntryPtr  first_sep, tmp_sep, next_sep;
3862   BioseqPtr    bsp;
3863   SeqDescrPtr  sdp = NULL, set_sdp;
3864 
3865   if (list == NULL)
3866   {
3867     return NULL;
3868   }
3869 
3870   first_sep = list;
3871   next_sep = first_sep->next;
3872   first_sep->next = NULL;
3873 
3874   /* grab title on first sequence to put on segmented bioseq */
3875   if (IS_Bioseq (first_sep) && first_sep->data.ptrvalue != NULL)
3876   {
3877     bsp = (BioseqPtr) first_sep->data.ptrvalue;
3878     sdp = bsp->descr;
3879     while (sdp != NULL && sdp->choice != Seq_descr_title)
3880     {
3881       sdp = sdp->next;
3882     }
3883   }
3884 
3885   while (next_sep != NULL)
3886   {
3887     tmp_sep = next_sep;
3888     next_sep = tmp_sep->next;
3889     tmp_sep->next = NULL;
3890     AddSeqEntryToSeqEntry (first_sep, tmp_sep, TRUE);
3891   }
3892 
3893   if (sdp != NULL && IS_Bioseq_set (first_sep))
3894   {
3895     tmp_sep = FindNucSeqEntry (first_sep);
3896     if (tmp_sep != NULL && IS_Bioseq (tmp_sep) && tmp_sep->data.ptrvalue != NULL)
3897     {
3898       bsp = tmp_sep->data.ptrvalue;
3899       set_sdp = bsp->descr;
3900       while (set_sdp != NULL && set_sdp->choice != Seq_descr_title)
3901       {
3902         set_sdp = set_sdp->next;
3903       }
3904       if (set_sdp == NULL)
3905       {
3906         set_sdp = CreateNewDescriptor (tmp_sep, Seq_descr_title);
3907       }
3908       if (set_sdp != NULL && StringHasNoText (set_sdp->data.ptrvalue))
3909       {
3910         /* make a copy, rather than removing the segment title */
3911         set_sdp->data.ptrvalue = MemFree (set_sdp->data.ptrvalue);
3912         set_sdp->data.ptrvalue = StringSave (sdp->data.ptrvalue);
3913       }
3914     }
3915   }
3916 
3917   return first_sep;
3918 }
3919 
3920 
3921 static SeqEntryPtr
ReadOneSegSet(FILE * fp,Boolean parse_id,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped)3922 ReadOneSegSet
3923 (FILE            *fp,
3924  Boolean         parse_id,
3925  ValNodePtr PNTR err_msg_list,
3926  BoolPtr         chars_stripped)
3927 {
3928   SeqEntryPtr nextsep;
3929   CharPtr     errormsg = NULL;
3930   Char        lastchar;
3931   SeqEntryPtr seg_list = NULL, seg_list_last = NULL;
3932   BioseqPtr   bsp;
3933 
3934   if (fp == NULL)
3935   {
3936     return NULL;
3937   }
3938 
3939   /* note - we pass in FALSE for parse_id in SequinFastaToSeqEntryEx
3940    * because we do not want to use Sequin's auto-generated sequence IDs.
3941    * We then parse the sequence ID from the title ourselves using
3942    * ReplaceFakeIDWithIDFromTitle if parse_id is TRUE, or leave the ID
3943    * as blank to force the user to select a real ID later.
3944    */
3945   nextsep = SequinFastaToSeqEntryExEx (fp, TRUE, &errormsg, FALSE, &lastchar, chars_stripped);
3946   while (nextsep != NULL ||
3947          (lastchar != (Char) EOF && lastchar != NULLB && lastchar != (Char) 255
3948           && lastchar != ']'))
3949   {
3950     if (nextsep != NULL)
3951     {
3952       /* replace fake ID with ID from title */
3953       if (IS_Bioseq (nextsep) && nextsep->data.ptrvalue != NULL)
3954       {
3955         bsp = (BioseqPtr) nextsep->data.ptrvalue;
3956         if (parse_id)
3957         {
3958           ReplaceFakeIDWithIDFromTitle ((BioseqPtr) nextsep->data.ptrvalue);
3959         }
3960         else
3961         {
3962           bsp->id = SeqIdFree (bsp->id);
3963         }
3964       }
3965       SeqEntryPack (nextsep);
3966       if (seg_list_last == NULL)
3967       {
3968         seg_list = nextsep;
3969       }
3970       else
3971       {
3972         seg_list_last->next = nextsep;
3973       }
3974       seg_list_last = nextsep;
3975 
3976       ValNodeAddPointer (err_msg_list, 0, errormsg);
3977       errormsg = NULL;
3978     }
3979     nextsep = SequinFastaToSeqEntryExEx (fp, TRUE, &errormsg, FALSE, &lastchar, chars_stripped);
3980   }
3981   nextsep = SegsetFromSeqEntryList (seg_list);
3982   return nextsep;
3983 }
3984 
AddDefaultMoleculeTypeToIDAndTitleEdit(IDAndTitleEditPtr iatep)3985 static void AddDefaultMoleculeTypeToIDAndTitleEdit (IDAndTitleEditPtr iatep)
3986 {
3987   Int4    seq_num;
3988   CharPtr old_value;
3989 
3990   if (iatep == NULL)
3991   {
3992     return;
3993   }
3994 
3995   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
3996   {
3997     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
3998     {
3999       continue;
4000     }
4001     old_value = FindValueFromPairInDefline("moltype",
4002                                            iatep->title_list [seq_num]);
4003     if (StringHasNoText (old_value) || StringICmp (old_value, "dna") == 0)
4004     {
4005       iatep->title_list [seq_num] = ReplaceValueInOneDefLine(iatep->title_list [seq_num],
4006                                                              "moltype",
4007                                                              "Genomic DNA");
4008     }
4009     old_value = MemFree (old_value);
4010   }
4011 }
4012 
AddDefaultLocationToIDAndTitleEdit(IDAndTitleEditPtr iatep)4013 static void AddDefaultLocationToIDAndTitleEdit (IDAndTitleEditPtr iatep)
4014 {
4015   Int4    seq_num;
4016   CharPtr old_value, first_organism, next_org_loc = NULL, org_stop;
4017 
4018   if (iatep == NULL)
4019   {
4020     return;
4021   }
4022 
4023   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
4024   {
4025     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
4026     {
4027       continue;
4028     }
4029     first_organism = FindValuePairInDefLine ("organism", iatep->title_list [seq_num], &org_stop);
4030     if (first_organism != NULL)
4031     {
4032       next_org_loc = FindValuePairInDefLine ("organism", org_stop + 1, NULL);
4033     }
4034     else
4035     {
4036       next_org_loc = NULL;
4037     }
4038     old_value = FindValueFromPairInDeflineBeforeCharPtr ("location",
4039                                                          iatep->title_list [seq_num],
4040                                                          next_org_loc);
4041     if (StringHasNoText (old_value))
4042     {
4043       iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
4044                                                                          "location",
4045                                                                          "genomic",
4046                                                                          first_organism);
4047     }
4048     old_value = MemFree (old_value);
4049   }
4050 }
4051 
AddDefaultTopologyToIDAndTitleEdit(IDAndTitleEditPtr iatep)4052 static void AddDefaultTopologyToIDAndTitleEdit (IDAndTitleEditPtr iatep)
4053 {
4054   Int4    seq_num;
4055   CharPtr old_value;
4056 
4057   if (iatep == NULL)
4058   {
4059     return;
4060   }
4061 
4062   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
4063   {
4064     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
4065     {
4066       continue;
4067     }
4068     old_value = FindValueFromPairInDefline ("topology",
4069                                             iatep->title_list [seq_num]);
4070     if (StringHasNoText (old_value))
4071     {
4072       iatep->title_list [seq_num] = ReplaceValueInOneDefLine(iatep->title_list [seq_num],
4073                                                              "topology",
4074                                                              "Linear");
4075     }
4076     old_value = MemFree (old_value);
4077   }
4078 }
4079 
AddDefaultGeneticCodesToIDAndTitleEdit(IDAndTitleEditPtr iatep)4080 static void AddDefaultGeneticCodesToIDAndTitleEdit (IDAndTitleEditPtr iatep)
4081 {
4082   CharPtr     taxname, location, gcode_name;
4083   Int4        gcode;
4084   ValNodePtr  gencodelist;
4085   Int4        seq_num;
4086   CharPtr     first_organism, next_org_loc = NULL, org_stop;
4087 
4088   if (iatep == NULL)
4089   {
4090     return;
4091   }
4092 
4093   gencodelist = GetGeneticCodeValNodeList ();
4094   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
4095   {
4096     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
4097     {
4098       continue;
4099     }
4100     first_organism = FindValuePairInDefLine ("organism", iatep->title_list [seq_num], &org_stop);
4101     if (first_organism != NULL)
4102     {
4103       next_org_loc = FindValuePairInDefLine ("organism", org_stop + 1, NULL);
4104     }
4105     else
4106     {
4107       next_org_loc = NULL;
4108     }
4109 
4110     taxname = FindValueFromPairInDefline ("organism", first_organism);
4111     location = FindValueFromPairInDeflineBeforeCharPtr ("location",
4112                                                         iatep->title_list [seq_num],
4113                                                         next_org_loc);
4114     if (StringHasNoText (location))
4115     {
4116       location = StringSave ("genomic");
4117     }
4118 
4119     gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
4120     taxname = MemFree (taxname);
4121     location = MemFree (location);
4122 
4123     if (gcode < 0)
4124     {
4125       gcode_name = FindValueFromPairInDeflineBeforeCharPtr ("genetic_code",
4126                                                             iatep->title_list [seq_num],
4127                                                             next_org_loc);
4128       if (StringHasNoText (gcode_name))
4129       {
4130         gcode_name = MemFree (gcode_name);
4131         gcode_name = GeneticCodeStringFromIntAndList (1, gencodelist);
4132         iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
4133                                                                          "genetic_code",
4134                                                                          gcode_name,
4135                                                                          first_organism);
4136       }
4137       else
4138       {
4139         gcode_name = MemFree (gcode_name);
4140       }
4141     }
4142     else
4143     {
4144       gcode_name = GeneticCodeStringFromIntAndList (gcode, gencodelist);
4145       iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
4146                                                                          "genetic_code",
4147                                                                          gcode_name,
4148                                                                          first_organism);
4149     }
4150   }
4151   ValNodeFreeData (gencodelist);
4152 }
4153 
AddDefaultModifierValues(SeqEntryPtr seq_list)4154 static void AddDefaultModifierValues (SeqEntryPtr seq_list)
4155 {
4156   IDAndTitleEditPtr iatep;
4157 
4158   iatep = SeqEntryListToIDAndTitleEdit (seq_list);
4159   AddDefaultMoleculeTypeToIDAndTitleEdit (iatep);
4160   AddDefaultLocationToIDAndTitleEdit (iatep);
4161   AddDefaultTopologyToIDAndTitleEdit (iatep);
4162   AddDefaultGeneticCodesToIDAndTitleEdit (iatep);
4163   ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
4164   iatep = IDAndTitleEditFree (iatep);
4165 }
4166 
4167 
CleanTitles(SeqEntryPtr sep,ValNodePtr PNTR special_list)4168 static void CleanTitles (SeqEntryPtr sep, ValNodePtr PNTR special_list)
4169 {
4170   BioseqPtr    bsp;
4171   BioseqSetPtr bssp;
4172   SeqDescrPtr  sdp = NULL;
4173 
4174   while (sep != NULL)
4175   {
4176     sdp = NULL;
4177     if (sep->choice == 1)
4178     {
4179       bsp = sep->data.ptrvalue;
4180       if (bsp != NULL)
4181       {
4182         sdp = bsp->descr;
4183       }
4184     }
4185     else if (sep->choice == 2)
4186     {
4187       bssp = sep->data.ptrvalue;
4188       if (bssp != NULL)
4189       {
4190         CleanTitles (bssp->seq_set, special_list);
4191         sdp = bssp->descr;
4192       }
4193     }
4194     while (sdp != NULL)
4195     {
4196       if (sdp->choice == Seq_descr_title)
4197       {
4198         SpecialCharFindWithContext ((CharPtr PNTR) &(sdp->data.ptrvalue), special_list, NULL, NULL);
4199       }
4200       sdp = sdp->next;
4201     }
4202     sep = sep->next;
4203   }
4204 }
4205 
4206 
4207 extern SeqEntryPtr
ImportSequencesFromFileExEx(FILE * fp,SeqEntryPtr sep_list,Boolean is_na,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped,Boolean allow_char_stripping,Nlm_ImportSeqCallbackProc callback,Pointer callback_data)4208 ImportSequencesFromFileExEx
4209 (FILE           *fp,
4210  SeqEntryPtr     sep_list,
4211  Boolean         is_na,
4212  Boolean         parse_id,
4213  CharPtr         supplied_id_txt,
4214  ValNodePtr PNTR err_msg_list,
4215  BoolPtr         chars_stripped,
4216  Boolean         allow_char_stripping,
4217  Nlm_ImportSeqCallbackProc callback,
4218  Pointer         callback_data)
4219 {
4220   SeqEntryPtr new_sep_list, last, oldscope;
4221   ValNodePtr    special_list = NULL;
4222 
4223   if (chars_stripped != NULL)
4224   {
4225     *chars_stripped = FALSE;
4226   }
4227 
4228   oldscope = SeqEntrySetScope (NULL);
4229 
4230   if (is_na)
4231   {
4232     new_sep_list = ImportNucleotideFASTASequencesFromFileEx (fp, parse_id, supplied_id_txt,
4233                                                    err_msg_list, chars_stripped, allow_char_stripping,
4234                                                    callback, callback_data);
4235   }
4236   else
4237   {
4238     new_sep_list = ImportProteinFASTASequences (fp, parse_id, supplied_id_txt, err_msg_list, chars_stripped);
4239   }
4240 
4241   CleanTitles (new_sep_list, &special_list);
4242   if (!FixSpecialCharactersForStringsInList (special_list, "Definition lines contain special characters.\nThe sequences cannot be imported unless the characters are replaced.", FALSE))
4243   {
4244     new_sep_list = SeqEntryFree (new_sep_list);
4245   }
4246   special_list = FreeContextList (special_list);
4247 
4248   last = sep_list;
4249   while (last != NULL && last->next != NULL)
4250   {
4251     last = last->next;
4252   }
4253   if (last == NULL)
4254   {
4255     sep_list = new_sep_list;
4256   }
4257   else
4258   {
4259     last->next = new_sep_list;
4260   }
4261 
4262   SeqEntrySetScope (oldscope);
4263 
4264   return sep_list;
4265 }
4266 extern SeqEntryPtr
ImportSequencesFromFileEx(FILE * fp,SeqEntryPtr sep_list,Boolean is_na,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped,Boolean allow_char_stripping)4267 ImportSequencesFromFileEx
4268 (FILE           *fp,
4269  SeqEntryPtr     sep_list,
4270  Boolean         is_na,
4271  Boolean         parse_id,
4272  CharPtr         supplied_id_txt,
4273  ValNodePtr PNTR err_msg_list,
4274  BoolPtr         chars_stripped,
4275  Boolean         allow_char_stripping)
4276 {
4277   return ImportSequencesFromFileExEx (fp, sep_list, is_na, parse_id, supplied_id_txt,
4278               err_msg_list, chars_stripped, allow_char_stripping, NULL, NULL);
4279 }
4280 
4281 
4282 extern SeqEntryPtr
ImportSequencesFromFile(FILE * fp,SeqEntryPtr sep_list,Boolean is_na,Boolean parse_id,CharPtr supplied_id_txt,ValNodePtr PNTR err_msg_list,BoolPtr chars_stripped)4283 ImportSequencesFromFile
4284 (FILE           *fp,
4285  SeqEntryPtr     sep_list,
4286  Boolean         is_na,
4287  Boolean         parse_id,
4288  CharPtr         supplied_id_txt,
4289  ValNodePtr PNTR err_msg_list,
4290  BoolPtr         chars_stripped)
4291 {
4292   return ImportSequencesFromFileEx (fp, sep_list, is_na, parse_id, supplied_id_txt, err_msg_list, chars_stripped, FALSE);
4293 }
4294 
4295 
4296 static Boolean CollectIDsAndTitles (SeqEntryPtr new_list, SeqEntryPtr current_list, Boolean is_nuc);
4297 
RemoveZeroLengthSequences(SeqEntryPtr list,Int4Ptr pnum_seqs,Int4Ptr pnum_zero)4298 static SeqEntryPtr RemoveZeroLengthSequences (SeqEntryPtr list, Int4Ptr pnum_seqs, Int4Ptr pnum_zero)
4299 {
4300   SeqEntryPtr  prev_sep, next_sep, this_sep;
4301   Int4         num_seqs = 0, num_zero = 0;
4302   BioseqPtr    bsp;
4303   BioseqSetPtr bssp;
4304 
4305   if (list == NULL)
4306   {
4307     return NULL;
4308   }
4309 
4310   prev_sep = NULL;
4311   this_sep = list;
4312   while (this_sep != NULL)
4313   {
4314     num_seqs++;
4315     next_sep = this_sep->next;
4316     if (this_sep->data.ptrvalue == NULL)
4317     {
4318       num_zero++;
4319       if (prev_sep == NULL)
4320       {
4321         list = next_sep;
4322       }
4323       else
4324       {
4325         prev_sep->next = next_sep;
4326       }
4327       this_sep->next = NULL;
4328       SeqEntryFree (this_sep);
4329     }
4330     else if (IS_Bioseq (this_sep))
4331     {
4332       bsp = (BioseqPtr) this_sep->data.ptrvalue;
4333       if (bsp->length == 0)
4334       {
4335         num_zero++;
4336 
4337         if (prev_sep == NULL)
4338         {
4339           list = next_sep;
4340         }
4341         else
4342         {
4343           prev_sep->next = next_sep;
4344         }
4345         this_sep->next = NULL;
4346         SeqEntryFree (this_sep);
4347       }
4348       else
4349       {
4350         prev_sep = this_sep;
4351       }
4352     }
4353     else if (IS_Bioseq_set (this_sep))
4354     {
4355       bssp = (BioseqSetPtr) this_sep->data.ptrvalue;
4356       bssp->seq_set = RemoveZeroLengthSequences (bssp->seq_set, pnum_seqs, pnum_zero);
4357       if (bssp->seq_set == NULL)
4358       {
4359         num_zero++;
4360         if (prev_sep == NULL)
4361         {
4362           list = next_sep;
4363         }
4364         else
4365         {
4366           prev_sep->next = next_sep;
4367         }
4368         this_sep->next = NULL;
4369         SeqEntryFree (this_sep);
4370       }
4371       else
4372       {
4373         prev_sep = this_sep;
4374       }
4375     }
4376     else
4377     {
4378       prev_sep = this_sep;
4379     }
4380     this_sep = next_sep;
4381   }
4382 
4383   if (pnum_seqs != NULL)
4384   {
4385     *pnum_seqs += num_seqs;
4386   }
4387   if (pnum_zero != NULL)
4388   {
4389     *pnum_zero += num_zero;
4390   }
4391   return list;
4392 }
4393 
RejectZeroLengthSequences(SeqEntryPtr PNTR new_list)4394 static Boolean RejectZeroLengthSequences (SeqEntryPtr PNTR new_list)
4395 {
4396   SeqEntryPtr next_sep;
4397   Int4        num_zero = 0, num_seq = 0;
4398   Boolean     rval = TRUE;
4399   Boolean     delete_all = FALSE;
4400 
4401   if (new_list == NULL)
4402   {
4403     return FALSE;
4404   }
4405 
4406   *new_list = RemoveZeroLengthSequences (*new_list, &num_seq, &num_zero);
4407 
4408   if (num_zero > 0)
4409   {
4410     ResetSegSetIDLists (*new_list);
4411     if (num_zero == num_seq)
4412     {
4413       Message (MSG_ERROR, "The sequences in your file are empty - you cannot import them.");
4414       delete_all = TRUE;
4415       rval = FALSE;
4416     }
4417     else if (ANS_CANCEL == Message (MSG_OKC, "%d sequences in your file are empty and cannot be imported.  "
4418                                     "Would you like to import the remaining sequences?", num_zero))
4419     {
4420       delete_all = TRUE;
4421       rval = FALSE;
4422     }
4423     if (delete_all)
4424     {
4425 
4426       while ((*new_list) != NULL)
4427       {
4428         next_sep = (*new_list)->next;
4429         (*new_list)->next = NULL;
4430         SeqEntryFree (*new_list);
4431         *new_list = next_sep;
4432       }
4433     }
4434   }
4435   return rval;
4436 }
4437 
RejectExtraSequences(SeqEntryPtr new_list,FastaPagePtr fpp)4438 static Boolean RejectExtraSequences (SeqEntryPtr new_list, FastaPagePtr fpp)
4439 {
4440   SeqEntryPtr sep, next_sep;
4441 
4442   if (new_list == NULL || fpp == NULL)
4443   {
4444     return FALSE;
4445   }
4446   else if (!fpp->single || new_list->next == NULL)
4447   {
4448     return TRUE;
4449   }
4450 
4451   if (fpp->is_na
4452            && fpp->seqPackagePtr != NULL
4453            && *(fpp->seqPackagePtr) != SEQ_PKG_GENOMICCDNA)
4454   {
4455     if (Message (MSG_YN, "You are importing multiple sequences - did you intend to create a batch submission?") == ANS_YES)
4456     {
4457       *(fpp->seqPackagePtr) = SEQ_PKG_GENBANK;
4458       fpp->single = FALSE;
4459       SafeHide (fpp->singleIdGrp);
4460       return TRUE;
4461     }
4462   }
4463   if (Message (MSG_YN, "You cannot import multiple sequences - import the first one and ignore the rest?") == ANS_YES)
4464   {
4465     sep = new_list->next;
4466     new_list->next = NULL;
4467     while (sep != NULL)
4468     {
4469       next_sep = sep->next;
4470       sep->next = NULL;
4471       sep = SeqEntryFree (sep);
4472       sep = next_sep;
4473     }
4474     return TRUE;
4475   }
4476   else
4477   {
4478     return FALSE;
4479   }
4480 }
4481 
ShowImportHelp(ButtoN b)4482 static void ShowImportHelp (ButtoN b)
4483 {
4484   CharPtr help_msg;
4485 
4486   help_msg = (CharPtr) GetObjectExtra (b);
4487   if (help_msg == NULL)
4488   {
4489     return;
4490   }
4491 
4492   Message (MSG_OK, help_msg);
4493 }
4494 
OkToImport(CharPtr msg,CharPtr help_msg)4495 static Boolean OkToImport (CharPtr msg, CharPtr help_msg)
4496 {
4497   WindoW w;
4498   GrouP  h, c;
4499   PrompT p;
4500   ButtoN b;
4501   ModalAcceptCancelData acd;
4502 
4503   if (msg == NULL)
4504   {
4505     return TRUE;
4506   }
4507   acd.accepted = FALSE;
4508   acd.cancelled = FALSE;
4509 
4510   w = ModalWindow(-20, -13, -10, -10, NULL);
4511   h = HiddenGroup (w, -1, 0, NULL);
4512 
4513   p = StaticPrompt (h, msg, 0, 0, programFont, 'l');
4514   c = HiddenGroup (h, 3, 0, NULL);
4515   b = PushButton (c, "Yes", ModalAcceptButton);
4516   SetObjectExtra (b, &acd, NULL);
4517   b = PushButton (c, "No", ModalCancelButton);
4518   SetObjectExtra (b, &acd, NULL);
4519   if (help_msg != NULL)
4520   {
4521     b = PushButton (c, "Help", ShowImportHelp);
4522     SetObjectExtra (b, help_msg, NULL);
4523   }
4524   AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) c, NULL);
4525 
4526   Show(w);
4527   Select (w);
4528   while (!acd.accepted && ! acd.cancelled)
4529   {
4530     ProcessExternalEvent ();
4531     Update ();
4532   }
4533   ProcessAnEvent ();
4534   Remove (w);
4535   if (acd.accepted)
4536   {
4537     return TRUE;
4538   }
4539   else
4540   {
4541     return FALSE;
4542   }
4543 }
4544 
4545 static CharPtr segset_import_help_str = "Segmented sequence: a collection of non-overlapping, non-contiguous sequences that cover a specified genetic region. A standard example is a set of genomic DNA sequences that encode exons from a gene along with fragments of their flanking introns.";
4546 static CharPtr gapped_import_help_str = "Gapped sequence: a sequence with one or more gaps of known or unknown length.";
4547 
4548 
ImportedSequenceTypeOk(SeqEntryPtr list,Int2 seqPackage)4549 static Boolean ImportedSequenceTypeOk (SeqEntryPtr list, Int2 seqPackage)
4550 {
4551   BioseqPtr bsp;
4552   Boolean   rval = TRUE;
4553 
4554   if (list == NULL || seqPackage != SEQ_PKG_SINGLE)
4555   {
4556     return TRUE;
4557   }
4558   if (list->choice == 1)
4559   {
4560     bsp = (BioseqPtr) list->data.ptrvalue;
4561     if (bsp != NULL && bsp->repr == Seq_repr_delta)
4562     {
4563       SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL);
4564       rval = OkToImport ("You have imported a gapped sequence.  Did you mean to do that?",
4565                          gapped_import_help_str);
4566     }
4567   }
4568   else if (list->choice == 2)
4569   {
4570     SendHelpScrollMessage (helpForm, "Sequence Format Form", NULL);
4571     rval = OkToImport ("You have imported a segmented sequence.  Did you mean to do that?",
4572                        segset_import_help_str);
4573   }
4574   return rval;
4575 }
4576 
4577 
ImportFastaDialog(DialoG d,CharPtr filename)4578 static Boolean ImportFastaDialog (DialoG d, CharPtr filename)
4579 
4580 {
4581   CharPtr       extension;
4582   FILE          *f;
4583   FastaPagePtr  fpp;
4584   ValNodePtr    head;
4585   Char          path [PATH_MAX];
4586   RecT          r;
4587   SeqEntryPtr   sep, new_sep_list, new_sep, test_sep;
4588   Boolean       rval = FALSE;
4589   BioseqPtr     bsp;
4590   CharPtr       supplied_id_txt = NULL;
4591   Boolean       chars_stripped = FALSE;
4592 
4593   path [0] = '\0';
4594   StringNCpy_0 (path, filename, sizeof (path));
4595   fpp = (FastaPagePtr) GetObjectExtra (d);
4596   if (fpp != NULL) {
4597     if (fpp->list != NULL && fpp->single)
4598     {
4599       if (!fpp->is_na
4600           || fpp->seqPackagePtr == NULL
4601           || *fpp->seqPackagePtr == SEQ_PKG_GENOMICCDNA)
4602       {
4603         Message (MSG_ERROR, "Can't import additional sequences!");
4604         return FALSE;
4605       }
4606       else
4607       {
4608         if (Message (MSG_YN, "You are importing multiple sequences - did you intend to create a batch submission?") == ANS_NO)
4609         {
4610           Message (MSG_ERROR, "Can't import additional sequences!");
4611           return FALSE;
4612         }
4613         else
4614         {
4615           *(fpp->seqPackagePtr) = SEQ_PKG_GENBANK;
4616           fpp->single = FALSE;
4617           SafeHide (fpp->singleIdGrp);
4618         }
4619       }
4620     }
4621     extension = NULL;
4622     if (fpp->is_mrna) {
4623       extension = GetAppProperty ("FastaNucExtension");
4624     } else if (fpp->is_na) {
4625       extension = GetAppProperty ("FastaNucExtension");
4626     } else {
4627       extension = GetAppProperty ("FastaProtExtension");
4628     }
4629     if (path [0] != '\0' || GetInputFileName (path, sizeof (path), extension, "TEXT")) {
4630       WatchCursor ();
4631       StringCpy (fpp->path, path);
4632       ObjectRect (fpp->doc, &r);
4633       InsetRect (&r, 4, 4);
4634       faColFmt.pixWidth = r.right - r.left;
4635       /*
4636       ResetFastaPage (fpp);
4637       */
4638       Reset (fpp->doc);
4639       Update ();
4640       sep = fpp->list;
4641       head = fpp->errmsgs;
4642       f = FileOpen (fpp->path, "r");
4643       if (f == NULL)
4644       {
4645         Message (MSG_ERROR, "Unable to open %s", fpp->path);
4646         fpp->path[0] = 0;
4647       }
4648       else
4649       {
4650         if (fpp->singleSeqID != NULL)
4651         {
4652           supplied_id_txt = SaveStringFromText (fpp->singleSeqID);
4653         }
4654         new_sep_list = ImportSequencesFromFile (f, NULL, fpp->is_na,
4655                                                 fpp->parseSeqId,
4656                                                 supplied_id_txt,
4657                                                 &head, &chars_stripped);
4658         if (chars_stripped && new_sep_list != NULL)
4659         {
4660           if (ANS_CANCEL == Message (MSG_OKC, "Illegal characters will be stripped from your sequence data.  Do you want to continue?"))
4661           {
4662             new_sep_list = SeqEntryFree (new_sep_list);
4663             FileClose (f);
4664             fpp->path [0] = 0;
4665             ArrowCursor ();
4666             Update ();
4667             return FALSE;
4668           }
4669         }
4670         supplied_id_txt = MemFree (supplied_id_txt);
4671         FileClose (f);
4672 
4673         if (new_sep_list != NULL
4674             && new_sep_list->next == NULL
4675             && fpp->single
4676             && fpp->list == NULL
4677             && fpp->is_na
4678             && new_sep_list->choice == 1
4679             && new_sep_list->data.ptrvalue != NULL)
4680         {
4681           bsp = (BioseqPtr) new_sep_list->data.ptrvalue;
4682 
4683           /* assign a fake ID if there is only one sequence being imported,
4684            * the package type is single, and there are no other sequences
4685            * from previous imports.
4686            */
4687 
4688           if (bsp->id == NULL)
4689           {
4690             bsp->id = MakeSeqID ("nuc_1");
4691           }
4692         }
4693 
4694         if (new_sep_list == NULL)
4695         {
4696           Message (MSG_ERROR, "Unable to read sequences from %s", fpp->path);
4697           fpp->path [0] = 0;
4698         }
4699         else if (! RejectZeroLengthSequences (&new_sep_list))
4700         {
4701           fpp->path [0] = 0;
4702         }
4703         else if (! RejectExtraSequences (new_sep_list, fpp))
4704         {
4705           /* if unsuccessful, delete new list */
4706           new_sep = new_sep_list;
4707           while (new_sep != NULL)
4708           {
4709             test_sep = new_sep->next;
4710             SeqEntryFree (new_sep);
4711             new_sep = test_sep;
4712           }
4713           fpp->path [0] = 0;
4714         }
4715         else if (fpp->seqPackagePtr != NULL
4716                  && ! ImportedSequenceTypeOk (new_sep_list, *(fpp->seqPackagePtr)))
4717         {
4718           /* if unsuccessful, delete new list */
4719           new_sep = new_sep_list;
4720           while (new_sep != NULL)
4721           {
4722             test_sep = new_sep->next;
4723             SeqEntryFree (new_sep);
4724             new_sep = test_sep;
4725           }
4726           fpp->path [0] = 0;
4727         }
4728         else if (CollectIDsAndTitles (new_sep_list, fpp->list, (fpp->is_na && ! fpp->is_mrna)))
4729         {
4730           if (fpp->is_na)
4731           {
4732             /* add default molecule type, topology, location, and genetic codes */
4733             AddDefaultModifierValues (new_sep_list);
4734             TrimAmbiguousBases (&new_sep_list);
4735           }
4736           if (BadSeqIdLengths (new_sep_list))
4737           {
4738             new_sep = new_sep_list;
4739             while (new_sep != NULL)
4740             {
4741               test_sep = new_sep->next;
4742               SeqEntryFree (new_sep);
4743               new_sep = test_sep;
4744             }
4745             fpp->path [0] = 0;
4746           } else {
4747             /* if successful, link old and new lists */
4748             ValNodeLink (&(fpp->list), new_sep_list);
4749             rval = TRUE;
4750           }
4751         }
4752         else
4753         {
4754           /* if unsuccessful, delete new list */
4755           new_sep = new_sep_list;
4756           while (new_sep != NULL)
4757           {
4758             test_sep = new_sep->next;
4759             SeqEntryFree (new_sep);
4760             new_sep = test_sep;
4761           }
4762           fpp->path [0] = 0;
4763         }
4764       }
4765 
4766       if (fpp->list == NULL)
4767       {
4768         SafeHide (fpp->have_seq_instr_grp);
4769         Reset (fpp->doc);
4770         SafeShow (fpp->instructions);
4771         Update ();
4772         SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
4773         Enable (fpp->import_btn);
4774         Disable (fpp->clear_btn);
4775       }
4776       else
4777       {
4778         SafeHide (fpp->instructions);
4779         Update ();
4780         if (! fpp->is_na || fpp->single
4781             || fpp->seqPackagePtr == NULL
4782             || *fpp->seqPackagePtr == SEQ_PKG_GENOMICCDNA)
4783         {
4784           Disable (fpp->import_btn);
4785         }
4786         else
4787         {
4788           Enable (fpp->import_btn);
4789           SetTitle (fpp->import_btn, "Import Additional Nucleotide FASTA");
4790         }
4791         Enable (fpp->clear_btn);
4792         FormatFastaDoc (fpp);
4793         SafeShow (fpp->have_seq_instr_grp);
4794       }
4795       ArrowCursor ();
4796       Update ();
4797       return rval;
4798     }
4799   }
4800   return FALSE;
4801 }
4802 
4803 #define EXPORT_PAGE_WIDTH 80
4804 
ExportSeqIdAndTitle(SeqIdPtr sip,CharPtr title,FILE * fp)4805 static void ExportSeqIdAndTitle (SeqIdPtr sip, CharPtr title, FILE *fp)
4806 {
4807   CharPtr id_str = NULL;
4808 
4809   if (fp == NULL)
4810   {
4811     return;
4812   }
4813 
4814   if (sip == NULL)
4815   {
4816     id_str = StringSave ("unknown_id");
4817   }
4818   else
4819   {
4820     id_str = SeqIdWholeLabel (sip, PRINTID_REPORT);
4821   }
4822 
4823   if (StringCSpn (id_str, " \t") == StringLen (id_str))
4824   {
4825     fprintf (fp, ">%s %s\n", id_str, title == NULL ? "" : title);
4826   }
4827   else
4828   {
4829     fprintf (fp, ">'%s' %s\n", id_str, title == NULL ? "" : title);
4830   }
4831   id_str = MemFree (id_str);
4832 }
4833 
ExportSeqPort(Int4 from,Int4 to,SeqPortPtr spp,FILE * fp)4834 static void ExportSeqPort (Int4 from, Int4 to, SeqPortPtr spp, FILE *fp)
4835 {
4836   Char        buffer [EXPORT_PAGE_WIDTH + 1];
4837   Int4        seq_offset, txt_out;
4838 
4839   if (spp == NULL || fp == NULL || from < 0 || to <= from)
4840   {
4841     return;
4842   }
4843 
4844   seq_offset = from;
4845   while (seq_offset < to)
4846   {
4847     txt_out = ReadBufferFromSep (spp, buffer, seq_offset,
4848                                  MIN (seq_offset + EXPORT_PAGE_WIDTH, to), 0);
4849     if (txt_out == 0) break;
4850     seq_offset += txt_out;
4851     fprintf(fp, "%s\n", buffer);
4852   }
4853 
4854 }
4855 
ExportOneRawSequence(BioseqPtr bsp,CharPtr title_master,FILE * fp)4856 static void ExportOneRawSequence (BioseqPtr bsp, CharPtr title_master, FILE *fp)
4857 {
4858   SeqDescrPtr sdp;
4859   Char        buffer [EXPORT_PAGE_WIDTH + 1];
4860   SeqPortPtr  spp;
4861   CharPtr     title = NULL;
4862   CharPtr     combined_title = NULL;
4863   Boolean     free_combined = FALSE;
4864 
4865   if (bsp == NULL || fp == NULL || bsp->repr != Seq_repr_raw)
4866   {
4867     return;
4868   }
4869 
4870   sdp = bsp->descr;
4871   while (sdp != NULL && sdp->choice != Seq_descr_title)
4872   {
4873     sdp = sdp->next;
4874   }
4875   if (sdp != NULL)
4876   {
4877     title = sdp->data.ptrvalue;
4878   }
4879 
4880   if (StringHasNoText (title_master))
4881   {
4882     combined_title = title;
4883   }
4884   else if (StringHasNoText (title))
4885   {
4886     combined_title = title_master;
4887   }
4888   else
4889   {
4890     combined_title = (CharPtr) MemNew ((StringLen (title_master) + StringLen (title) + 2) * sizeof (Char));
4891     if (combined_title != NULL)
4892     {
4893       StringCpy (combined_title, title_master);
4894       StringCat (combined_title, " ");
4895       StringCat (combined_title, title);
4896       free_combined = TRUE;
4897     }
4898   }
4899 
4900   ExportSeqIdAndTitle (bsp->id, combined_title, fp);
4901   if (free_combined)
4902   {
4903     combined_title = MemFree (combined_title);
4904   }
4905 
4906   buffer [EXPORT_PAGE_WIDTH] = 0;
4907 
4908   spp = SeqPortNew (bsp, 0, bsp->length-1, Seq_strand_plus, Seq_code_iupacna);
4909 
4910   ExportSeqPort (0, bsp->length, spp, fp);
4911 
4912   SeqPortFree (spp);
4913   fprintf (fp, "\n");
4914 }
4915 
ExportOneSegmentedBioseq(BioseqPtr bsp,FILE * fp)4916 static void ExportOneSegmentedBioseq (BioseqPtr bsp, FILE *fp)
4917 {
4918   SeqLocPtr   slp;
4919   BioseqPtr   bsp_seg;
4920   SeqDescrPtr sdp;
4921   CharPtr     title = NULL;
4922 
4923   if (bsp == NULL || fp == NULL || bsp->repr != Seq_repr_seg)
4924   {
4925     return;
4926   }
4927 
4928   fprintf (fp, "[\n");
4929 
4930   sdp = bsp->descr;
4931   while (sdp != NULL && sdp->choice != Seq_descr_title)
4932   {
4933     sdp = sdp->next;
4934   }
4935   if (sdp != NULL)
4936   {
4937     title = sdp->data.ptrvalue;
4938   }
4939 
4940   slp = (SeqLocPtr) bsp->seq_ext;
4941   while (slp != NULL)
4942   {
4943     bsp_seg = BioseqFind (SeqLocId (slp));
4944     ExportOneRawSequence (bsp_seg, title, fp);
4945     title = NULL;
4946     slp = slp->next;
4947   }
4948   fprintf (fp, "]\n\n");
4949 }
4950 
ExportOneDeltaBioseq(BioseqPtr bsp,FILE * fp)4951 static Boolean ExportOneDeltaBioseq (BioseqPtr bsp, FILE *fp)
4952 {
4953   SeqDescrPtr sdp;
4954   CharPtr     title = NULL;
4955   DeltaSeqPtr dsp;
4956   SeqLitPtr   slip;
4957   SeqPortPtr  spp;
4958   Char        buffer [EXPORT_PAGE_WIDTH + 1];
4959   Int4        seq_offset;
4960 
4961   if (bsp == NULL || fp == NULL || bsp->repr != Seq_repr_delta
4962       || bsp->seq_ext_type != 4 || bsp->seq_ext == NULL)
4963   {
4964     return FALSE;
4965   }
4966 
4967   dsp = (DeltaSeqPtr) bsp->seq_ext;
4968   while (dsp != NULL)
4969   {
4970     if (dsp->data.ptrvalue == NULL || dsp->choice != 2)
4971     {
4972       Message (MSG_ERROR, "Can't export badly formed delta sequence!");
4973       return FALSE;
4974     }
4975     dsp = dsp->next;
4976   }
4977 
4978   sdp = bsp->descr;
4979   while (sdp != NULL && sdp->choice != Seq_descr_title)
4980   {
4981     sdp = sdp->next;
4982   }
4983   if (sdp != NULL)
4984   {
4985     title = sdp->data.ptrvalue;
4986   }
4987 
4988   ExportSeqIdAndTitle (bsp->id, title, fp);
4989 
4990   buffer [EXPORT_PAGE_WIDTH] = 0;
4991 
4992   spp = SeqPortNew (bsp, 0, bsp->length-1, Seq_strand_plus, Seq_code_iupacna);
4993 
4994   seq_offset = 0;
4995   dsp = (DeltaSeqPtr) bsp->seq_ext;
4996   while (dsp != NULL)
4997   {
4998 		slip = (SeqLitPtr) (dsp->data.ptrvalue);
4999     if (IsDeltaSeqGap(dsp))
5000     {
5001       if (IsDeltaSeqUnknownGap (dsp))
5002       {
5003         fprintf (fp, ">?unk100\n");
5004       }
5005       else
5006       {
5007         fprintf (fp, ">?%d\n", slip->length);
5008       }
5009     }
5010     else
5011     {
5012       ExportSeqPort (seq_offset, seq_offset + slip->length, spp, fp);
5013 		}
5014     seq_offset += slip->length;
5015     dsp = dsp->next;
5016   }
5017   fprintf (fp, "\n");
5018   return TRUE;
5019 }
5020 
ExportFASTASeqEntryList(SeqEntryPtr sep,FILE * fp)5021 NLM_EXTERN void ExportFASTASeqEntryList (SeqEntryPtr sep, FILE *fp)
5022 {
5023   BioseqPtr    bsp;
5024   BioseqSetPtr bssp;
5025 
5026   if (sep == NULL || sep->data.ptrvalue == NULL || fp == NULL)
5027   {
5028     return;
5029   }
5030 
5031   if (IS_Bioseq (sep))
5032   {
5033     bsp = (BioseqPtr) sep->data.ptrvalue;
5034     if (ISA_na (bsp->mol))
5035     {
5036       if (bsp->repr == Seq_repr_raw)
5037       {
5038         if (SeqMgrGetParentOfPart (bsp, NULL) == NULL)
5039         {
5040           ExportOneRawSequence (bsp, NULL, fp);
5041         }
5042       }
5043       else if (bsp->repr == Seq_repr_seg)
5044       {
5045         ExportOneSegmentedBioseq (bsp, fp);
5046       }
5047       else if (bsp->repr == Seq_repr_delta)
5048       {
5049         ExportOneDeltaBioseq (bsp, fp);
5050       }
5051     }
5052   }
5053   else if (IS_Bioseq_set (sep))
5054   {
5055     bssp = (BioseqSetPtr) sep->data.ptrvalue;
5056     /* we don't export the parts set because we export them
5057      * when we do the master segment
5058      */
5059     if (bssp->_class != BioseqseqSet_class_parts)
5060     {
5061       ExportFASTASeqEntryList (bssp->seq_set, fp);
5062     }
5063   }
5064   ExportFASTASeqEntryList (sep->next, fp);
5065 }
5066 
ExportNucleotideFASTADialog(DialoG d,CharPtr filename)5067 static Boolean ExportNucleotideFASTADialog (DialoG d, CharPtr filename)
5068 {
5069   CharPtr       extension;
5070   FILE          *f;
5071   FastaPagePtr  fpp;
5072   Char          path [PATH_MAX];
5073   Boolean       rval = FALSE;
5074 
5075   fpp = (FastaPagePtr) GetObjectExtra (d);
5076   if (fpp == NULL) {
5077     return FALSE;
5078   }
5079 
5080   path [0] = '\0';
5081   StringNCpy_0 (path, filename, sizeof (path));
5082 
5083   extension = NULL;
5084   if (fpp->is_mrna) {
5085     extension = GetAppProperty ("FastaNucExtension");
5086   } else if (fpp->is_na) {
5087     extension = GetAppProperty ("FastaNucExtension");
5088   } else {
5089     extension = GetAppProperty ("FastaProtExtension");
5090   }
5091   if (path [0] != '\0' || GetOutputFileName (path, sizeof (path), extension)) {
5092     f = FileOpen (path, "w");
5093     if (f == NULL)
5094     {
5095       Message (MSG_ERROR, "Unable to open %s", path);
5096     }
5097     else
5098     {
5099       WatchCursor ();
5100       ExportFASTASeqEntryList (fpp->list, f);
5101       FileClose (f);
5102 
5103       ArrowCursor ();
5104       Update ();
5105       rval = TRUE;
5106     }
5107   }
5108   return rval;
5109 }
5110 
CleanupFastaDialog(GraphiC g,VoidPtr data)5111 static void CleanupFastaDialog (GraphiC g, VoidPtr data)
5112 
5113 {
5114   FastaPagePtr  fpp;
5115 
5116   fpp = (FastaPagePtr) data;
5117   if (fpp != NULL) {
5118     ResetFastaPage (fpp);
5119   }
5120   MemFree (data);
5121 }
5122 
5123 static CharPtr  fastaNucMsg = "\
5124 \nClick on 'Import Nucleotide FASTA' to read a formatted FASTA file \
5125 or 'Add/Modify Sequences' to create the file here.  The FASTA definition \
5126 line must be in the following form:\n\n\
5127 >SeqID [organism=scientific name]\n\n\
5128 where the [ and ] brackets are actually in the text.\n\
5129 Properly formatted modifiers and a title can also be included in the FASTA definition line.";
5130 
5131 
5132 static CharPtr  fastaGenMsg = "\
5133 \nPlease enter information about the genomic \
5134 sequence in the spaces above.  Then click on either \
5135 'Add/Modify Sequences' to create your sequences with the editor or \
5136 'Import Genomic FASTA' to read a previously generated FASTA file that \
5137 contains the sequence (which can be in segments).  The \
5138 FASTA definition lines may be of the following form:\n\n\
5139 >ID [organism=scientific name] [strain=name] [clone=name] title\n\n\
5140 where the [ and ] brackets are actually in the text.";
5141 
5142 static CharPtr  fastaMrnaMsg  = "\
5143 \nPlease enter information about the transcript \
5144 sequences in the spaces above.  Then click on \
5145 'Import Transcript FASTA' to read a FASTA file that \
5146 contains the sequence (which can be in segments).  The \
5147 FASTA definition lines may be of the following form:\n\n\
5148 >ID [gene=symbol] [mrna=name] title\n\n\
5149 where the [ and ] brackets are actually in the text.";
5150 
5151 static CharPtr  fastaProtMsg = "\
5152 \nPlease enter information about the protein \
5153 sequences in the spaces above.  Then click on \
5154 'Import Protein FASTA' to read a FASTA file that \
5155 contains the sequences.  The FASTA definition lines should \
5156 be of the following form:\n\n\
5157 >ID [gene=symbol] [protein=name] title\n\n\
5158 where the [ and ] brackets are actually in the text.";
5159 
GetFastaSettingName(FastaPagePtr fpp)5160 static CharPtr GetFastaSettingName (FastaPagePtr fpp)
5161 {
5162   if (fpp == NULL)
5163   {
5164   	return NULL;
5165   }
5166   else if (fpp->is_mrna)
5167   {
5168     return "PARSEMRNASEQID";
5169   }
5170   else if (fpp->is_na)
5171   {
5172     return "PARSENUCSEQID";
5173   }
5174   else
5175   {
5176     return "PARSEPROTSEQID";
5177   }
5178 }
5179 
ChangeIDParse(ButtoN b)5180 static void ChangeIDParse (ButtoN b)
5181 {
5182   FastaPagePtr      fpp;
5183   CharPtr           setting_name;
5184 
5185   fpp = (FastaPagePtr) GetObjectExtra (b);
5186   if (fpp != NULL) {
5187     fpp->parseSeqId = GetStatus (b);
5188 
5189     setting_name = GetFastaSettingName (fpp);
5190 
5191     if (fpp->parseSeqId) {
5192       SetAppParam ("SEQUINCUSTOM", "PREFERENCES", setting_name, "TRUE");
5193       SafeHide (fpp->singleIdGrp);
5194     } else {
5195       SetAppParam ("SEQUINCUSTOM", "PREFERENCES", setting_name, "FALSE");
5196       if (fpp->single)
5197       {
5198         SafeShow (fpp->singleIdGrp);
5199       }
5200       else
5201       {
5202       	SafeHide (fpp->singleIdGrp);
5203       }
5204     }
5205   }
5206 }
5207 
CreateFastaDialog(GrouP h,CharPtr title,Boolean is_na,Boolean is_mrna,CharPtr text,Boolean single,Int2Ptr seqPackagePtr)5208 extern DialoG CreateFastaDialog (GrouP h, CharPtr title,
5209                                  Boolean is_na, Boolean is_mrna, CharPtr text,
5210                                  Boolean single, Int2Ptr seqPackagePtr)
5211 
5212 {
5213   FastaPagePtr  fpp;
5214   GrouP         g;
5215   GrouP         m;
5216   GrouP         p;
5217   GrouP         s;
5218   PrompT        pr;
5219   CharPtr       setting_name;
5220   ButtoN        prs = NULL;
5221   Char          str [32];
5222   Boolean       parseSeqId;
5223 #ifdef WIN_MAC
5224   Int2          wid = 25;
5225 #else
5226   Int2          wid = 33;
5227 #endif
5228 
5229   p = HiddenGroup (h, 1, 0, NULL);
5230   SetGroupSpacing (p, 10, 10);
5231 
5232   fpp = (FastaPagePtr) MemNew (sizeof (FastaPage));
5233   if (fpp != NULL) {
5234 
5235     SetObjectExtra (p, fpp, CleanupFastaDialog);
5236     fpp->dialog = (DialoG) p;
5237     fpp->todialog = NULL;
5238     fpp->fromdialog = NULL;
5239     fpp->importdialog = ImportFastaDialog;
5240     if (is_na)
5241     {
5242       fpp->exportdialog = ExportNucleotideFASTADialog;
5243     }
5244     else
5245     {
5246       fpp->exportdialog = NULL;
5247     }
5248 
5249     fpp->seqPackagePtr = seqPackagePtr;
5250     if (title != NULL && title [0] != '\0') {
5251       s = NormalGroup (p, 0, -2, title, systemFont, NULL);
5252     } else {
5253       s = HiddenGroup (p, 0, -2, NULL);
5254     }
5255     m = HiddenGroup (s, -1, 0, NULL);
5256 
5257     fpp->path [0] = '\0';
5258     fpp->is_na = is_na;
5259     fpp->is_mrna = is_mrna;
5260     fpp->single = single;
5261 
5262     setting_name = GetFastaSettingName (fpp);
5263 
5264     if (GetAppParam ("SEQUINCUSTOM", "SETTINGS", "ALLOWNOSEQID", NULL, str, sizeof (str))
5265         && StringICmp (str, "TRUE") == 0)
5266     {
5267       prs = CheckBox (m, "Fasta definition line starts with sequence ID", ChangeIDParse);
5268       SetObjectExtra (prs, fpp, NULL);
5269     }
5270     parseSeqId = FALSE;
5271     if (GetAppParam ("SEQUINCUSTOM", "PREFERENCES", setting_name, NULL, str, sizeof (str))) {
5272       if (StringICmp (str, "TRUE") == 0) {
5273         parseSeqId = TRUE;
5274       }
5275     }
5276     else
5277     {
5278       parseSeqId = TRUE;
5279     }
5280     SetStatus (prs, parseSeqId);
5281 
5282     fpp->parseSeqId = parseSeqId;
5283     if (fpp->single) {
5284       fpp->singleIdGrp = HiddenGroup (m, 2, 0, NULL);
5285       StaticPrompt (fpp->singleIdGrp, "Enter unique identifier for this sequence", 0, dialogTextHeight, programFont, 'l');
5286       fpp->singleSeqID = DialogText (fpp->singleIdGrp, "", 6, NULL);
5287       if (parseSeqId) {
5288         Hide (fpp->singleIdGrp);
5289       }
5290     }
5291 
5292     g = HiddenGroup (m, 0, 0, NULL);
5293     fpp->instructions = MultiLinePrompt (g, text, 27 * stdCharWidth, programFont);
5294     fpp->have_seq_instr_grp = HiddenGroup (g, -1, 0, NULL);
5295     SetGroupSpacing (fpp->have_seq_instr_grp, 10, 10);
5296     fpp->doc = DocumentPanel (fpp->have_seq_instr_grp, stdCharWidth * wid, stdLineHeight * 12);
5297     SetDocAutoAdjust (fpp->doc, FALSE);
5298     pr = StaticPrompt (fpp->have_seq_instr_grp, "Choose Clear from the Edit menu to clear these sequences", 0, dialogTextHeight, systemFont, 'c');
5299     AlignObjects (ALIGN_CENTER, (HANDLE) fpp->doc, (HANDLE) pr, NULL);
5300     Hide (fpp->have_seq_instr_grp);
5301     AlignObjects (ALIGN_CENTER, (HANDLE) fpp->instructions,
5302                   (HANDLE) fpp->have_seq_instr_grp, NULL);
5303 
5304     AlignObjects (ALIGN_CENTER, (HANDLE) g,
5305                                 (HANDLE) prs,
5306                                 (HANDLE) fpp->singleIdGrp,
5307                                 NULL);
5308   }
5309 
5310   return (DialoG) p;
5311 }
5312 
5313 typedef struct phylippage {
5314   DIALOG_MESSAGE_BLOCK
5315   Uint1        format;
5316   Char         path [PATH_MAX];
5317   SeqEntryPtr  sep;
5318   ValNodePtr   errmsgs;
5319   DoC          doc;
5320   GrouP        instructions;
5321   Char         extension [10];
5322   Int4         type;
5323   TSequenceInfoPtr aln_settings;
5324 
5325 } PhylipPage, PNTR PhylipPagePtr;
5326 
5327 
5328 #define PhylipFormatBufLen 1000
5329 
FormatPhylipDoc(PhylipPagePtr ppp)5330 static void FormatPhylipDoc (PhylipPagePtr ppp)
5331 
5332 {
5333   Nlm_QualNameAssocPtr ap;
5334   BioseqPtr          bsp;
5335   BioseqSetPtr       bssp;
5336   CharPtr            label;
5337   Int4               len;
5338   CharPtr            measure;
5339   SeqEntryPtr        nsep;
5340   Int2               num;
5341   CharPtr            plural;
5342   SeqIdPtr           sip;
5343   SeqEntryPtr        sep;
5344   CharPtr            str;
5345   CharPtr            title;
5346   CharPtr            ttl;
5347   CharPtr            tmp;
5348   CharPtr            valstr;
5349   ValNodePtr         vnp;
5350 
5351   if (ppp != NULL) {
5352     str = MemNew (sizeof (char) * PhylipFormatBufLen);
5353     tmp = MemNew (sizeof (char) * PhylipFormatBufLen);
5354     if (str == NULL || tmp == NULL) return;
5355     num = 0;
5356     len = 0;
5357     sep = ppp->sep;
5358     if (sep != NULL && IS_Bioseq_set (sep)) {
5359       bssp = (BioseqSetPtr) sep->data.ptrvalue;
5360       if (bssp != NULL && (bssp->_class == 7 ||
5361                            (IsPopPhyEtcSet (bssp->_class)))) {
5362         for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5363           num++;
5364           if (IS_Bioseq (sep)) {
5365             bsp = (BioseqPtr) sep->data.ptrvalue;
5366             if (bsp != NULL) {
5367               len += bsp->length;
5368             }
5369           } else if (IS_Bioseq_set (sep)) {
5370             nsep = FindNucSeqEntry (sep);
5371             if (nsep != NULL && IS_Bioseq (nsep)) {
5372               bsp = (BioseqPtr) nsep->data.ptrvalue;
5373               if (bsp != NULL) {
5374                 len += bsp->length;
5375               }
5376             }
5377           }
5378         }
5379       }
5380     }
5381     if (num > 1) {
5382       plural = "s";
5383     } else {
5384       plural = "";
5385     }
5386     label = "Sequence";
5387     measure = "nucleotides";
5388     sprintf (str, "%d nucleotide sequence%s, total length %ld %s\n",
5389              (int) num, plural, (long) len, measure);
5390     AppendText (ppp->doc, str, &faParFmt, &faColFmt, programFont);
5391     vnp = ppp->errmsgs;
5392     num = 0;
5393     sep = ppp->sep;
5394     if (sep != NULL && IS_Bioseq_set (sep)) {
5395       bssp = (BioseqSetPtr) sep->data.ptrvalue;
5396       if (bssp != NULL && (bssp->_class == 7 ||
5397                            (IsPopPhyEtcSet (bssp->_class)))) {
5398         for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
5399           nsep = NULL;
5400           num++;
5401           len = 0;
5402           sip = NULL;
5403           tmp [0] = '\0';
5404           if (IS_Bioseq (sep)) {
5405             bsp = (BioseqPtr) sep->data.ptrvalue;
5406             if (bsp != NULL) {
5407               len = bsp->length;
5408               sip = SeqIdFindWorst (bsp->id);
5409               SeqIdWrite (sip, tmp, PRINTID_REPORT, FastaFormatBufLen);
5410             }
5411           } else if (IS_Bioseq_set (sep)) {
5412             nsep = FindNucSeqEntry (sep);
5413             if (nsep != NULL && IS_Bioseq (nsep)) {
5414               bsp = (BioseqPtr) nsep->data.ptrvalue;
5415               if (bsp != NULL) {
5416                 len = bsp->length;
5417                 sip = SeqIdFindWorst (bsp->id);
5418                 SeqIdWrite (sip, tmp, PRINTID_REPORT, FastaFormatBufLen);
5419               }
5420             }
5421           }
5422           sprintf (str, "\n%s %d\nLength: %ld %s\nSequence ID: %s\n", label,
5423                    (int) num, (long) len, measure, tmp);
5424           ttl = NULL;
5425           SeqEntryExplore (nsep, (Pointer) (&ttl), FindFirstTitle);
5426           title = StringSaveNoNull (ttl);
5427           if (title != NULL) {
5428             valstr = FindValueFromPairInDefline ("organism", title);
5429             if (!StringHasNoText (valstr)) {
5430               AddReportLine (str, "Organism", valstr);
5431             }
5432             valstr = MemFree (valstr);
5433             RemoveValueFromDefline ("organism", title);
5434 
5435             valstr = FindValueFromPairInDefline ("lineage", title);
5436             if (!StringHasNoText (valstr)) {
5437               AddReportLine (str, "Lineage", valstr);
5438             }
5439             valstr = MemFree (valstr);
5440             RemoveValueFromDefline ("lineage", title);
5441 
5442             for (ap = current_orgmod_subtype_alist; ap->name != NULL; ap++) {
5443               if (IsNonTextModifier (ap->name))
5444               {
5445                 if (FindValuePairInDefLine (ap->name, title, NULL) != NULL)
5446                 {
5447                   AddReportLine (str, ap->name, "TRUE");
5448                   RemoveValueFromDefline (ap->name, title);
5449                 }
5450               }
5451               else
5452               {
5453                 valstr = FindValueFromPairInDefline (ap->name, title);
5454                 if (!StringHasNoText (valstr)) {
5455                   AddReportLine (str, ap->name, title);
5456                 }
5457                 valstr = MemFree (valstr);
5458                 RemoveValueFromDefline (ap->name, title);
5459               }
5460             }
5461             for (ap = current_subsource_subtype_alist; ap->name != NULL; ap++) {
5462               if (IsNonTextModifier (ap->name))
5463               {
5464                 if (FindValuePairInDefLine (ap->name, title, NULL) != NULL)
5465                 {
5466                   AddReportLine (str, ap->name, "TRUE");
5467                   RemoveValueFromDefline (ap->name, title);
5468                 }
5469               }
5470               else
5471               {
5472                 valstr = FindValueFromPairInDefline (ap->name, title);
5473                 if (!StringHasNoText (valstr)) {
5474                   AddReportLine (str, ap->name, title);
5475                 }
5476                 valstr = MemFree (valstr);
5477                 RemoveValueFromDefline (ap->name, title);
5478               }
5479             }
5480 
5481             valstr = FindValueFromPairInDefline ("note-orgmod", title);
5482             if (!StringHasNoText (valstr)) {
5483               AddReportLine (str, "Note", valstr);
5484             }
5485             valstr = MemFree (valstr);
5486             RemoveValueFromDefline ("note-orgmod", title);
5487 
5488             valstr = FindValueFromPairInDefline ("note-subsrc", title);
5489             if (!StringHasNoText (valstr)) {
5490               AddReportLine (str, "Note", valstr);
5491             }
5492             valstr = MemFree (valstr);
5493             RemoveValueFromDefline ("note-subsrc", title);
5494 
5495             valstr = FindValueFromPairInDefline ("molecule", title);
5496             if (!StringHasNoText (valstr)) {
5497               AddReportLine (str, "Molecule", valstr);
5498             }
5499             valstr = MemFree (valstr);
5500             RemoveValueFromDefline ("molecule", title);
5501 
5502             valstr = FindValueFromPairInDefline ("moltype", title);
5503             if (!StringHasNoText (valstr)) {
5504               AddReportLine (str, "MolType", valstr);
5505             }
5506             valstr = MemFree (valstr);
5507             RemoveValueFromDefline ("moltype", title);
5508 
5509             valstr = FindValueFromPairInDefline ("location", title);
5510             if (!StringHasNoText (valstr)) {
5511               AddReportLine (str, "Location", valstr);
5512             }
5513             valstr = MemFree (valstr);
5514             RemoveValueFromDefline ("location", valstr);
5515 
5516             TrimSpacesAroundString (title);
5517             if (! StringHasNoText (title)) {
5518               StringCat (str, "Title: ");
5519               StringNCat (str, title, 128);
5520               StringCat (str, "\n");
5521             } else {
5522               StringCat (str, "No title detected\n");
5523             }
5524           }
5525           MemFree (title);
5526           if (vnp != NULL && vnp->data.ptrvalue != NULL) {
5527             StringCat (str, (CharPtr) vnp->data.ptrvalue);
5528             StringCat (str, "\n");
5529           }
5530           AppendText (ppp->doc, str, &faParFmt, &faColFmt, programFont);
5531           if (vnp != NULL) {
5532             vnp = vnp->next;
5533           }
5534         }
5535       }
5536     }
5537     MemFree (str);
5538     MemFree (tmp);
5539     UpdateDocument (ppp->doc, 0, 0);
5540   }
5541 }
5542 
ResetPhylipPage(PhylipPagePtr ppp)5543 static void ResetPhylipPage (PhylipPagePtr ppp)
5544 
5545 {
5546   if (ppp != NULL) {
5547     ppp->sep = SeqEntryFree (ppp->sep);
5548     ppp->errmsgs = ValNodeFreeData (ppp->errmsgs);
5549   }
5550 }
5551 
5552 static CharPtr noOrgInTitleWarning =
5553 "sequences have organism information in titles. " \
5554 "It is critical to annotate the data file with organism and source information. " \
5555 "Please quit Sequin and read the Sequin Quick Guide section on preparing the data files before proceeding.";
5556 
CountTitlesWithoutOrganisms(SeqEntryPtr sep)5557 static void CountTitlesWithoutOrganisms (SeqEntryPtr sep)
5558 {
5559   IDAndTitleEditPtr iatep;
5560   Int4              seq_num;
5561   CharPtr           org_name;
5562   Int4              num_sequences = 0, num_with_orgs = 0;
5563 
5564   iatep = SeqEntryListToIDAndTitleEdit (sep);
5565   if (iatep == NULL)
5566   {
5567     return;
5568   }
5569 
5570   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
5571   {
5572     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
5573     {
5574       continue;
5575     }
5576     num_sequences ++;
5577     org_name = FindValueFromPairInDefline ("organism", iatep->title_list [seq_num]);
5578     if (!StringHasNoText (org_name))
5579     {
5580       num_with_orgs ++;
5581     }
5582     org_name = MemFree (org_name);
5583   }
5584   iatep = IDAndTitleEditFree (iatep);
5585   if (num_sequences != num_with_orgs && num_with_orgs != 0)
5586   {
5587     Message (MSG_OK, "%d of %d %s", num_sequences - num_with_orgs, (int) num_sequences, noOrgInTitleWarning);
5588   }
5589 
5590 }
5591 
5592 static CharPtr  phylipNucMsg = "\
5593 \nClick 'Import Nucleotide Alignment' to load your \
5594 nucleotide alignment file.\n\nClick on 'Custom Alignment Settings' \
5595 if Sequin has trouble reading your alignment file.";
5596 
SetPhylipDocInstructions(PhylipPagePtr ppp)5597 static void SetPhylipDocInstructions (PhylipPagePtr ppp)
5598 {
5599   if (ppp == NULL || ppp->doc == NULL) return;
5600   Reset (ppp->doc);
5601   AppendText (ppp->doc, phylipNucMsg, &faParFmt, &faColFmt, programFont);
5602   UpdateDocument (ppp->doc, 0, 0);
5603   Update ();
5604 }
5605 
ImportPhylipDialog(DialoG d,CharPtr filename)5606 static Boolean ImportPhylipDialog (DialoG d, CharPtr filename)
5607 {
5608   Char           path [PATH_MAX];
5609   PhylipPagePtr  ppp;
5610   SeqEntryPtr    sep;
5611   RecT           r;
5612   FILE           *fp;
5613   ObjMgrDataPtr  omdptop;
5614   ObjMgrData     omdata;
5615   Uint2          parenttype;
5616   Pointer        parentptr;
5617   Char           errStr [PATH_MAX + 64];
5618   CharPtr        no_org_err_msg = NULL;
5619 
5620   if (d == NULL || filename == NULL) return FALSE;
5621 
5622   path [0] = '\0';
5623   StringNCpy_0 (path, filename, sizeof (path));
5624   ppp = (PhylipPagePtr) GetObjectExtra (d);
5625   if (ppp == NULL) {
5626     return FALSE;
5627   }
5628 
5629   if (path [0] != '\0' || GetInputFileName (path, sizeof (path), ppp->extension, "TEXT")) {
5630     WatchCursor ();
5631     StringCpy (ppp->path, path);
5632     ObjectRect (ppp->doc, &r);
5633     InsetRect (&r, 4, 4);
5634     faColFmt.pixWidth = r.right - r.left;
5635     Reset (ppp->doc);
5636     Update ();
5637     ppp->sep = SeqEntryFree (ppp->sep);
5638     fp = FileOpen (path, "r");
5639     if (fp != NULL) {
5640       ppp->sep = SeqEntryFromAlignmentFile (fp, ppp->aln_settings,
5641                                             Seq_mol_na, no_org_err_msg);
5642       TrimAmbiguousBases(&(ppp->sep));
5643 
5644       if (CollectIDsAndTitles (ppp->sep, NULL, TRUE))/* check for bracketing issues here */
5645       {
5646         if (BadSeqIdLengths(ppp->sep))
5647         {
5648           ppp->sep = SeqEntryFree (ppp->sep);
5649         } else {
5650           /* add default molecule type, topology, location, and genetic codes */
5651           AddDefaultModifierValues (ppp->sep);
5652         }
5653       }
5654       else
5655       {
5656         ppp->sep = SeqEntryFree (ppp->sep);
5657       }
5658 
5659       sep = ppp->sep;
5660       if (sep != NULL) {
5661         SaveSeqEntryObjMgrData (ppp->sep, &omdptop, &omdata);
5662         GetSeqEntryParent (ppp->sep, &parentptr, &parenttype);
5663         SeqMgrLinkSeqEntry (sep, parenttype, parentptr);
5664         RestoreSeqEntryObjMgrData (sep, omdptop, &omdata);
5665 
5666         FormatPhylipDoc (ppp);
5667         SafeShow (ppp->doc);
5668 
5669         CountTitlesWithoutOrganisms (sep);
5670       } else {
5671         SendHelpScrollMessage (helpForm, "Nucleotide Page", "Nucleotide Page for Aligned Data Formats");
5672         SetPhylipDocInstructions (ppp);
5673       }
5674     } else {
5675       SetPhylipDocInstructions (ppp);
5676     }
5677   } else {
5678 	sprintf (errStr, "ERROR: Unable to open file %s\n\n", path);
5679 	AppendText (ppp->doc, errStr, &faParFmt, &faColFmt, programFont);
5680 	AppendText (ppp->doc, strerror(errno), &faParFmt, &faColFmt, programFont);
5681 	SafeShow (ppp->doc);
5682     Update ();
5683   }
5684   ArrowCursor ();
5685   Update ();
5686   return TRUE;
5687 }
5688 
CleanupPhylipDialog(GraphiC g,VoidPtr data)5689 static void CleanupPhylipDialog (GraphiC g, VoidPtr data)
5690 
5691 {
5692   PhylipPagePtr  ppp;
5693 
5694   ppp = (PhylipPagePtr) data;
5695   if (ppp != NULL) {
5696     ResetPhylipPage (ppp);
5697     SequenceInfoFree (ppp->aln_settings);
5698     ppp->aln_settings = NULL;
5699   }
5700   MemFree (data);
5701 }
5702 
5703 
CreatePhylipDialog(GrouP h,CharPtr title,CharPtr text,Int2 format,CharPtr extension,Int4 type)5704 static DialoG CreatePhylipDialog (GrouP h, CharPtr title, CharPtr text,
5705                                   Int2 format, CharPtr extension,
5706                                   Int4 type)
5707 
5708 {
5709   PhylipPagePtr  ppp;
5710   GrouP          g;
5711   GrouP          m;
5712   GrouP          p;
5713   GrouP          s;
5714   RecT          r;
5715 
5716   p = HiddenGroup (h, 1, 0, NULL);
5717   SetGroupSpacing (p, 10, 10);
5718 
5719   ppp = (PhylipPagePtr) MemNew (sizeof (PhylipPage));
5720   if (ppp != NULL) {
5721 
5722     SetObjectExtra (p, ppp, CleanupPhylipDialog);
5723     ppp->dialog = (DialoG) p;
5724     ppp->todialog = NULL;
5725     ppp->fromdialog = NULL;
5726     ppp->importdialog = ImportPhylipDialog;
5727     ppp->type = type;
5728 
5729     if (title != NULL && title [0] != '\0') {
5730       s = NormalGroup (p, 0, -2, title, systemFont, NULL);
5731     } else {
5732       s = HiddenGroup (p, 0, -2, NULL);
5733     }
5734     m = HiddenGroup (s, -1, 0, NULL);
5735 
5736     ppp->format = (Uint1) format;
5737     ppp->path [0] = '\0';
5738     StringNCpy_0 (ppp->extension, extension, sizeof (ppp->extension));
5739 
5740     g = HiddenGroup (m, 0, 0, NULL);
5741     ppp->doc = DocumentPanel (g, stdCharWidth * 27, stdLineHeight * 8);
5742     ObjectRect (ppp->doc, &r);
5743     InsetRect (&r, 4, 4);
5744     faColFmt.pixWidth = r.right - r.left;
5745 
5746     ppp->aln_settings = GetDefaultSequenceInfo();
5747 
5748     SetPhylipDocInstructions (ppp);
5749   }
5750 
5751   return (DialoG) p;
5752 }
5753 
5754 #define NUCLEOTIDE_PAGE        0
5755 #define SEQUENCING_METHOD_PAGE 1
5756 #define ORGANISM_PAGE          2
5757 #define MRNA_PAGE              3
5758 #define PROTEIN_PAGE           4
5759 #define ANNOTATE_PAGE          5
5760 
5761 /*---------------------------------------------------------------------*/
5762 /*                                                                     */
5763 /* HasZeroLengthSequence () -- Checks to see if any of a submission's  */
5764 /*                             sequences are missing (ie -- zero       */
5765 /*                             length).                                */
5766 /*                                                                     */
5767 /*---------------------------------------------------------------------*/
5768 
HasZeroLengthSequence(ForM newForm)5769 extern Boolean HasZeroLengthSequence (ForM newForm)
5770 {
5771   SequencesFormPtr  sqfp;
5772   FastaPagePtr      fpp;
5773   SeqEntryPtr       sep;
5774   BioseqPtr         bsp;
5775 
5776   /* Get the list of Bioseqs to check */
5777 
5778   sqfp = (SequencesFormPtr) GetObjectExtra (newForm);
5779   if (NULL == sqfp)
5780     return TRUE;
5781 
5782   fpp = GetObjectExtra (sqfp->dnaseq);
5783   sep = fpp->list;
5784 
5785   /* Check the list */
5786 
5787   while (NULL != sep) {
5788     if (sep->choice == 1) {
5789       bsp = (BioseqPtr) sep->data.ptrvalue;
5790       if (bsp->length <= 0)
5791 	return TRUE;
5792     }
5793     sep = sep->next;
5794   }
5795 
5796   /* If we made it to here, then */
5797   /* there were none found.      */
5798 
5799   return FALSE;
5800 }
5801 
SequencesFormHasProteins(ForM f)5802 extern Boolean SequencesFormHasProteins (ForM f)
5803 
5804 {
5805   FastaPagePtr      fpp;
5806   SequencesFormPtr  sqfp;
5807 
5808   sqfp = (SequencesFormPtr) GetObjectExtra (f);
5809   if (sqfp != NULL) {
5810     if (PackageTypeIsSet (sqfp->seqPackage)) return TRUE;
5811     fpp = GetObjectExtra (sqfp->protseq);
5812     if (fpp != NULL) {
5813       if (fpp->path [0] != '\0') {
5814         return TRUE;
5815       }
5816     }
5817   }
5818   return FALSE;
5819 }
5820 
GetSequencesFormProteinList(ForM f)5821 extern SeqEntryPtr GetSequencesFormProteinList (ForM f)
5822 
5823 {
5824   FastaPagePtr      fpp;
5825   SequencesFormPtr  sqfp;
5826 
5827   sqfp = (SequencesFormPtr) GetObjectExtra (f);
5828   if (sqfp != NULL) {
5829     fpp = GetObjectExtra (sqfp->protseq);
5830     if (fpp != NULL) {
5831       return fpp->list;
5832     }
5833   }
5834   return NULL;
5835 }
5836 
GetSeqEntryFromSequencesForm(SequencesFormPtr sqfp)5837 static SeqEntryPtr GetSeqEntryFromSequencesForm (SequencesFormPtr sqfp)
5838 {
5839   SeqEntryPtr list = NULL;
5840   FastaPagePtr       fpp;
5841   PhylipPagePtr      ppp;
5842   SeqEntryPtr        sep;
5843   BioseqSetPtr       bssp;
5844 
5845   if (sqfp == NULL) return NULL;
5846 
5847   if (sqfp->seqFormat == SEQ_FMT_FASTA) {
5848     fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
5849     if (fpp != NULL)
5850     {
5851       list = fpp->list;
5852     }
5853   } else if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT) {
5854     ppp = (PhylipPagePtr) GetObjectExtra (sqfp->dnaseq);
5855     if (ppp != NULL) {
5856       sep = ppp->sep;
5857       if (sep != NULL && IS_Bioseq_set (sep)) {
5858         bssp = (BioseqSetPtr) sep->data.ptrvalue;
5859         if (bssp != NULL) {
5860           list = bssp->seq_set;
5861         }
5862       }
5863     }
5864   }
5865   return list;
5866 }
5867 
GetSequencesFormNucleotideList(ForM f)5868 extern SeqEntryPtr GetSequencesFormNucleotideList (ForM f)
5869 {
5870   SequencesFormPtr  sqfp;
5871 
5872   sqfp = (SequencesFormPtr) GetObjectExtra (f);
5873   if (sqfp != NULL) {
5874     return GetSeqEntryFromSequencesForm (sqfp);
5875   }
5876   return NULL;
5877 }
5878 
SequencesFormHasTooManyNucleotides(ForM f)5879 extern Boolean SequencesFormHasTooManyNucleotides (ForM f)
5880 
5881 {
5882   FastaPagePtr      fpp;
5883   SequencesFormPtr  sqfp;
5884 
5885   sqfp = (SequencesFormPtr) GetObjectExtra (f);
5886   if (sqfp != NULL && PackageTypeIsSingle (sqfp->seqPackage))
5887   {
5888     fpp = GetObjectExtra (sqfp->dnaseq);
5889     if (fpp != NULL) {
5890       if (fpp->list != NULL && fpp->list->next != NULL) {
5891         return TRUE;
5892       }
5893     }
5894   }
5895   return FALSE;
5896 }
5897 
5898 extern DialoG CreateTagListDialogEx (GrouP h, Uint2 rows, Uint2 cols,
5899                                      Int2 spacing, Uint2Ptr types,
5900                                      Uint2Ptr textWidths, EnumFieldAssocPtr PNTR alists,
5901                                      Boolean useBar, Boolean noExtend,
5902                                      ToDialogFunc tofunc, FromDialogFunc fromfunc);
5903 
5904 static ValNodePtr
BuildModifierTypeList(ValNodePtr type_list,CharPtr new_title,Boolean allow_prot)5905 BuildModifierTypeList
5906 (ValNodePtr type_list,
5907  CharPtr    new_title,
5908  Boolean    allow_prot)
5909 {
5910   ValNodePtr      modifier_info_list;
5911   ValNodePtr      info_vnp, type_vnp;
5912   ModifierInfoPtr mip;
5913 
5914   modifier_info_list = ParseAllBracketedModifiers (new_title);
5915   for (info_vnp = modifier_info_list; info_vnp != NULL; info_vnp = info_vnp->next)
5916   {
5917     mip = (ModifierInfoPtr)info_vnp->data.ptrvalue;
5918     if (mip == NULL
5919         || mip->modtype == eModifierType_Protein
5920         || mip->modtype == eModifierType_Organism)
5921     {
5922       continue;
5923     }
5924     if (mip->modtype == eModifierType_SourceQual)
5925     {
5926   	  for (type_vnp = type_list;
5927   	       type_vnp != NULL
5928   	         && (type_vnp->choice != mip->subtype
5929   	             || StringICmp (type_vnp->data.ptrvalue, mip->name) != 0);
5930   	       type_vnp = type_vnp->next)
5931   	  {
5932   	  }
5933     }
5934     else
5935     {
5936   	  for (type_vnp = type_list;
5937   	       type_vnp != NULL && StringICmp (type_vnp->data.ptrvalue, mip->name) != 0;
5938   	       type_vnp = type_vnp->next)
5939   	  {
5940   	  }
5941     }
5942   	if (type_vnp == NULL)
5943   	{
5944   	  type_vnp = ValNodeNew (type_list);
5945   	  if (type_list == NULL) type_list = type_vnp;
5946   	  if (type_vnp != NULL)
5947   	  {
5948   	  	type_vnp->choice = mip->subtype;
5949   	  	type_vnp->data.ptrvalue = StringSave (mip->name);
5950   	  }
5951   	}
5952   }
5953   ModifierInfoListFree (modifier_info_list);
5954   return type_list;
5955 }
5956 
5957 
5958 static Uint2 modedit_widths [] = {
5959   0, 0,
5960 };
5961 
ENUM_ALIST(nontextmodedit_alist)5962 ENUM_ALIST(nontextmodedit_alist)
5963   {"FALSE",             0},
5964   {"TRUE",              1},
5965 END_ENUM_ALIST
5966 
5967 extern void ConfirmSequencesFormParsing (ForM f, FormActnFunc putItAllTogether)
5968 
5969 {
5970   SequencesFormPtr  sqfp;
5971 
5972   sqfp = (SequencesFormPtr) GetObjectExtra (f);
5973   if (sqfp != NULL && putItAllTogether != NULL) {
5974     putItAllTogether (sqfp->form);
5975   }
5976 }
5977 
AddToSubSource(BioSourcePtr biop,CharPtr title,CharPtr label,Uint1 subtype)5978 extern void AddToSubSource (BioSourcePtr biop, CharPtr title, CharPtr label, Uint1 subtype)
5979 
5980 {
5981   CharPtr       ptr;
5982   SubSourcePtr  ssp;
5983   CharPtr       str;
5984   SubSourcePtr  tmpssp;
5985 
5986   if (biop == NULL || title == NULL || label == NULL) return;
5987   str = MemNew (StringLen (title));
5988   if (str == NULL) return;
5989   ptr = StringISearch (title, label);
5990   if (ptr != NULL) {
5991     StringCpy (str, ptr + StringLen (label));
5992     ptr = StringChr (str, ']');
5993     if (ptr != NULL) {
5994       *ptr = '\0';
5995       TrimSpacesAroundString (str);
5996       ssp = SubSourceNew ();
5997       if (biop->subtype == NULL) {
5998         biop->subtype = ssp;
5999       } else {
6000         tmpssp = biop->subtype;
6001         while (tmpssp->next != NULL) {
6002           tmpssp = tmpssp->next;
6003         }
6004         tmpssp->next = ssp;
6005       }
6006       if (ssp != NULL) {
6007         ssp->subtype = subtype;
6008         ssp->name = StringSave (str);
6009       }
6010     }
6011   }
6012   MemFree (str);
6013 }
6014 
AddToOrgMod(BioSourcePtr biop,CharPtr title,CharPtr label,Uint1 subtype)6015 extern void AddToOrgMod (BioSourcePtr biop, CharPtr title, CharPtr label, Uint1 subtype)
6016 
6017 {
6018   OrgModPtr   mod;
6019   OrgNamePtr  onp;
6020   OrgRefPtr   orp;
6021   CharPtr     ptr;
6022   CharPtr     str;
6023   OrgModPtr   tmpmod;
6024 
6025   if (biop == NULL || title == NULL || label == NULL) return;
6026   str = MemNew (StringLen (title));
6027   if (str == NULL) return;
6028   ptr = StringISearch (title, label);
6029   if (ptr != NULL) {
6030     StringCpy (str, ptr + StringLen (label));
6031     ptr = StringChr (str, ']');
6032     if (ptr != NULL) {
6033       *ptr = '\0';
6034       TrimSpacesAroundString (str);
6035       orp = biop->org;
6036       if (orp == NULL) {
6037         orp = OrgRefNew ();
6038         biop->org = orp;
6039       }
6040       if (orp != NULL) {
6041         onp = orp->orgname;
6042         if (onp == NULL) {
6043           onp = OrgNameNew ();
6044           orp->orgname = onp;
6045         }
6046         if (onp != NULL) {
6047           mod = OrgModNew ();
6048           if (onp->mod == NULL) {
6049             onp->mod = mod;
6050           } else {
6051             tmpmod = onp->mod;
6052             while (tmpmod->next != NULL) {
6053               tmpmod = tmpmod->next;
6054             }
6055             tmpmod->next = mod;
6056           }
6057           if (mod != NULL) {
6058             mod->subtype = subtype;
6059             mod->subname = StringSave (str);
6060           }
6061         }
6062       }
6063     }
6064   }
6065   MemFree (str);
6066 }
6067 
6068 #define PROC_NUC_STR_SIZE 4096
6069 
TopologyFromString(CharPtr str)6070 static Int4 TopologyFromString (CharPtr str)
6071 {
6072   EnumFieldAssocPtr  eap;
6073 
6074   for (eap = topology_nuc_alist; eap != NULL && eap->name != NULL; eap++)
6075   {
6076     if (StringICmp (eap->name, str) == 0)
6077     {
6078       return eap->value;
6079     }
6080   }
6081   return 1;
6082 }
6083 
AddOrgRef(BioSourcePtr biop)6084 static BioSourcePtr AddOrgRef (BioSourcePtr biop)
6085 {
6086   if (biop == NULL)
6087   {
6088     biop = BioSourceNew ();
6089   }
6090   if (biop == NULL)
6091   {
6092     return NULL;
6093   }
6094   if (biop->org == NULL)
6095   {
6096     biop->org = OrgRefNew ();
6097   }
6098   if (biop->org == NULL)
6099   {
6100     biop = BioSourceFree (biop);
6101     return NULL;
6102   }
6103   return biop;
6104 }
6105 
AddOrgName(BioSourcePtr biop)6106 static BioSourcePtr AddOrgName (BioSourcePtr biop)
6107 {
6108   biop = AddOrgRef (biop);
6109   if (biop == NULL || biop->org == NULL)
6110   {
6111     biop = BioSourceFree (biop);
6112     return NULL;
6113   }
6114   if (biop->org->orgname == NULL)
6115   {
6116     biop->org->orgname = OrgNameNew ();
6117     if (biop->org->orgname == NULL)
6118     {
6119       biop = BioSourceFree (biop);
6120       return NULL;
6121     }
6122   }
6123   return biop;
6124 }
6125 
SetGeneticCodeForBioSource(BioSourcePtr biop,Int4 gcode,Boolean is_nuc)6126 static BioSourcePtr SetGeneticCodeForBioSource (BioSourcePtr biop, Int4 gcode, Boolean is_nuc)
6127 {
6128   OrgRefPtr  orp;
6129   OrgNamePtr onp;
6130 
6131   if (gcode < 0)
6132   {
6133     return biop;
6134   }
6135 
6136   biop = AddOrgName (biop);
6137   if (biop == NULL)
6138   {
6139     return biop;
6140   }
6141 
6142   orp = biop->org;
6143   if (biop->org == NULL)
6144   {
6145     biop->org = OrgRefNew ();
6146     orp = biop->org;
6147   }
6148   if (orp != NULL) {
6149     onp = orp->orgname;
6150     if (onp == NULL) {
6151       onp = OrgNameNew ();
6152       orp->orgname = onp;
6153     }
6154     if (onp != NULL) {
6155       if (is_nuc)
6156       {
6157         onp->gcode = gcode;
6158       }
6159       else
6160       {
6161         onp->mgcode = gcode;
6162       }
6163     }
6164   }
6165   return biop;
6166 }
6167 
6168 static BioSourcePtr
SetGeneticCodeFromTitle(BioSourcePtr biop,CharPtr title,CharPtr mod_name,Boolean is_nuc)6169 SetGeneticCodeFromTitle
6170 (BioSourcePtr biop,
6171  CharPtr      title,
6172  CharPtr      mod_name,
6173  Boolean      is_nuc)
6174 {
6175   CharPtr    gcode_str;
6176   Int4       gcode;
6177   CharPtr    next_org_loc;
6178 
6179   if (StringHasNoText (title))
6180   {
6181     return biop;
6182   }
6183 
6184   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6185   gcode_str = FindValueFromPairInDeflineBeforeCharPtr (mod_name, title, next_org_loc);
6186   if (!StringHasNoText (gcode_str))
6187   {
6188     gcode = GeneticCodeFromString (gcode_str);
6189     biop = SetGeneticCodeForBioSource (biop, gcode, is_nuc);
6190   }
6191   if (gcode_str != NULL)
6192   {
6193     RemoveValueFromDefline (mod_name, title);
6194   }
6195   gcode_str = MemFree (gcode_str);
6196   return biop;
6197 }
6198 
6199 static BioSourcePtr
SetAllGeneticCodesFromTitle(BioSourcePtr biop,CharPtr title)6200 SetAllGeneticCodesFromTitle
6201 (BioSourcePtr biop,
6202  CharPtr      title)
6203 {
6204   Int4    code_to_use;
6205   CharPtr location;
6206   CharPtr next_org_loc;
6207 
6208   if (StringHasNoText (title))
6209   {
6210     return biop;
6211   }
6212 
6213   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6214   location = FindValueFromPairInDeflineBeforeCharPtr ("location", title, next_org_loc);
6215   if (!StringHasNoText (location))
6216   {
6217     code_to_use = UseGeneticCodeForLocation (location);
6218     if (code_to_use == USE_OTHER_GENETIC_CODE)
6219     {
6220       biop = SetGeneticCodeForBioSource (biop, 11, TRUE);
6221       RemoveValueFromDefline ("genetic_code", title);
6222     }
6223     else if (code_to_use == USE_NUCLEAR_GENETIC_CODE)
6224     {
6225       biop = SetGeneticCodeFromTitle (biop, title, "genetic_code", TRUE);
6226     }
6227     else if (code_to_use == USE_MITOCHONDRIAL_GENETIC_CODE)
6228     {
6229       biop = SetGeneticCodeFromTitle (biop, title, "genetic_code", FALSE);
6230     }
6231   }
6232   location = MemFree (location);
6233 
6234   biop = SetGeneticCodeFromTitle (biop, title, "gcode", TRUE);
6235   biop = SetGeneticCodeFromTitle (biop, title, "mgcode", FALSE);
6236 
6237   return biop;
6238 }
6239 
6240 static void
SetMoleculeAndMolTypeFromTitle(BioseqPtr bsp,CharPtr title,Int2 seqPackage)6241 SetMoleculeAndMolTypeFromTitle
6242 (BioseqPtr   bsp,
6243  CharPtr     title,
6244  Int2        seqPackage)
6245 {
6246   SeqEntryPtr sep;
6247   ValNodePtr vnp;
6248   MolInfoPtr mip = NULL;
6249   Uint1      biomol;
6250   Int4       molecule;
6251   CharPtr    valstr;
6252   CharPtr    ptr;
6253   SeqLocPtr  slp;
6254   BioseqPtr  bsp_seg;
6255 
6256   if (bsp == NULL)
6257   {
6258     return;
6259   }
6260 
6261   sep = SeqMgrGetSeqEntryForData (bsp);
6262   if (sep == NULL)
6263   {
6264     return;
6265   }
6266 
6267   vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
6268   if (vnp == NULL)
6269   {
6270     biomol = 1;
6271     molecule = Seq_mol_dna;
6272   }
6273   else
6274   {
6275     mip = (MolInfoPtr) vnp->data.ptrvalue;
6276     biomol = mip->biomol;
6277     molecule = bsp->mol;
6278   }
6279 
6280   /* get moltype from defline */
6281   valstr = FindValueFromPairInDefline ("moltype", title);
6282   if (!StringHasNoText (valstr))
6283   {
6284     biomol = MolTypeFromString (valstr);
6285     if (biomol == 1)
6286     {
6287       molecule = Seq_mol_na;
6288     }
6289     else if (biomol >= 2 && biomol <= 7)
6290     {
6291       molecule = Seq_mol_rna;
6292     }
6293     else if (biomol == 9)
6294     {
6295       molecule = Seq_mol_dna;
6296     }
6297     else if (biomol == MOLECULE_TYPE_NCRNA)
6298     {
6299       molecule = Seq_mol_rna;
6300     }
6301     else if (biomol == 253)
6302     {
6303       molecule = Seq_mol_dna;
6304       biomol = 1;
6305     }
6306     else if (biomol == 254)
6307     {
6308       molecule = Seq_mol_rna;
6309       biomol = 1;
6310     }
6311     else if (biomol == 255)
6312     {
6313       molecule = Seq_mol_other;
6314     }
6315   }
6316   valstr = MemFree (valstr);
6317 
6318   RemoveValueFromDefline ("moltype", title);
6319 
6320   /* get molecule from defline */
6321   valstr = FindValueFromPairInDefline ("molecule", title);
6322   if (!StringHasNoText (valstr))
6323   {
6324     if (StringICmp (valstr, "dna") == 0) {
6325       molecule = Seq_mol_dna;
6326     } else if (StringICmp (valstr, "rna") == 0) {
6327       molecule = Seq_mol_rna;
6328     }
6329   }
6330   valstr = MemFree (valstr);
6331   RemoveValueFromDefline ("molecule", title);
6332 
6333   ptr = StringISearch (title, "[dna]");
6334   if (ptr != NULL)
6335   {
6336     molecule = Seq_mol_dna;
6337     ExciseString (title, "[dna", "]");
6338   }
6339 
6340   ptr = StringISearch (title, "[rna]");
6341   if (ptr != NULL)
6342   {
6343     molecule = Seq_mol_rna;
6344     ExciseString (title, "[rna", "]");
6345   }
6346 
6347   if (mip == NULL)
6348   {
6349     vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
6350     mip = MolInfoNew ();
6351     vnp->data.ptrvalue = mip;
6352   }
6353 
6354   mip->biomol = biomol;
6355   bsp->mol = molecule;
6356 
6357   valstr = FindValueFromPairInDefline ("tech", title);
6358   if (!StringHasNoText (valstr))
6359   {
6360     ReadTechFromString (valstr, mip);
6361   }
6362   valstr = MemFree (valstr);
6363   RemoveValueFromDefline ("tech", title);
6364 
6365   if (bsp->repr == Seq_repr_seg)
6366   {
6367     slp = (SeqLocPtr) bsp->seq_ext;
6368     while (slp != NULL)
6369     {
6370       bsp_seg = BioseqFind (SeqLocId (slp));
6371       sep = SeqMgrGetSeqEntryForData (bsp_seg);
6372       if (bsp_seg != NULL)
6373       {
6374         bsp_seg->mol = bsp->mol;
6375       }
6376       vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
6377       if (vnp == NULL)
6378       {
6379         vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
6380       }
6381       if (vnp != NULL)
6382       {
6383         vnp->data.ptrvalue = MolInfoFree (vnp->data.ptrvalue);
6384         vnp->data.ptrvalue = (MolInfoPtr) AsnIoMemCopy (mip, (AsnReadFunc) MolInfoAsnRead,
6385                                                             (AsnWriteFunc) MolInfoAsnWrite);
6386       }
6387       slp = slp->next;
6388     }
6389   }
6390 }
6391 
AddGeneticCodeComment(BioseqPtr bsp,CharPtr comment)6392 static void AddGeneticCodeComment (BioseqPtr bsp, CharPtr comment)
6393 {
6394   SeqDescPtr         sdp;
6395   UserObjectPtr      uop = NULL;
6396   ObjectIdPtr        oip;
6397   UserFieldPtr       ufp, last_ufp = NULL;
6398   CharPtr            comment_fmt = "Submitter genetic code: %s";
6399   CharPtr            new_comment;
6400   Int4               new_comment_len;
6401 
6402   if (bsp == NULL || StringHasNoText (comment))
6403   {
6404     return;
6405   }
6406 
6407   sdp = bsp->descr;
6408   while (sdp != NULL && uop == NULL)
6409   {
6410     if (sdp->choice == Seq_descr_user && sdp->data.ptrvalue != NULL)
6411     {
6412       uop = (UserObjectPtr) sdp->data.ptrvalue;
6413       oip = uop->type;
6414       if (oip == NULL || StringCmp (oip->str, "Submission") != 0)
6415       {
6416         uop = NULL;
6417       }
6418     }
6419     sdp = sdp->next;
6420   }
6421 
6422 
6423   if (uop == NULL)
6424   {
6425     uop = UserObjectNew ();
6426     if (uop == NULL)
6427     {
6428       return;
6429     }
6430     uop->type = ObjectIdNew ();
6431     uop->type->str = StringSave ("Submission");
6432     ValNodeAddPointer (&bsp->descr, Seq_descr_user, uop);
6433   }
6434 
6435   ufp = uop->data;
6436   while (ufp != NULL
6437          && (ufp->label == NULL
6438            || StringCmp (ufp->label->str, "AdditionalComment") != 0))
6439   {
6440     last_ufp = ufp;
6441     ufp = ufp->next;
6442   }
6443 
6444   if (ufp == NULL)
6445   {
6446     ufp = UserFieldNew ();
6447     ufp->label = ObjectIdNew ();
6448     ufp->label->str = StringSave ("AdditionalComment");
6449     if (last_ufp == NULL)
6450     {
6451       uop->data = ufp;
6452     }
6453     else
6454     {
6455       last_ufp->next = ufp;
6456     }
6457   }
6458 
6459   new_comment_len = StringLen (comment) + StringLen (comment_fmt);
6460   if (!StringHasNoText (ufp->data.ptrvalue))
6461   {
6462     new_comment_len += StringLen (ufp->data.ptrvalue);
6463   }
6464   new_comment = (CharPtr) MemNew (new_comment_len * sizeof (Char));
6465   sprintf (new_comment, comment_fmt, comment);
6466 
6467   if (!StringHasNoText (ufp->data.ptrvalue))
6468   {
6469     StringCat (new_comment, ufp->data.ptrvalue);
6470   }
6471 
6472   ufp->data.ptrvalue = MemFree (ufp->data.ptrvalue);
6473   ufp->data.ptrvalue = new_comment;
6474 }
6475 
AddOrgModValue(BioSourcePtr biop,Uint1 subtype,CharPtr subname)6476 static BioSourcePtr AddOrgModValue (BioSourcePtr biop, Uint1 subtype, CharPtr subname)
6477 {
6478   OrgModPtr    mod;
6479 
6480   if (subname == NULL)
6481   {
6482     return biop;
6483   }
6484 
6485   biop = AddOrgName (biop);
6486   if (biop != NULL)
6487   {
6488     mod = OrgModNew ();
6489     if (mod != NULL)
6490     {
6491       mod->subtype = subtype;
6492       mod->subname = subname;
6493       subname = NULL;
6494       mod->next = biop->org->orgname->mod;
6495       biop->org->orgname->mod = mod;
6496     }
6497   }
6498   subname = MemFree (subname);
6499   return biop;
6500 }
6501 
AddSubSourceValue(BioSourcePtr biop,Uint1 subtype,CharPtr subname)6502 static BioSourcePtr AddSubSourceValue (BioSourcePtr biop, Uint1 subtype, CharPtr subname)
6503 {
6504   SubSourcePtr ssp;
6505 
6506   if (subname == NULL)
6507   {
6508     return biop;
6509   }
6510 
6511   if (biop == NULL)
6512   {
6513     biop = BioSourceNew ();
6514   }
6515   if (biop != NULL)
6516   {
6517     ssp = SubSourceNew ();
6518     if (ssp != NULL)
6519     {
6520       ssp->subtype = subtype;
6521       ssp->name = subname;
6522       subname = NULL;
6523       ssp->next = biop->subtype;
6524       biop->subtype = ssp;
6525     }
6526   }
6527   subname = MemFree (subname);
6528   return biop;
6529 }
6530 
6531 extern BioSourcePtr
ExtractFromTitleToBioSourceOrgMod(CharPtr title,BioSourcePtr biop,CharPtr mod_name,Int4 subtype)6532 ExtractFromTitleToBioSourceOrgMod
6533 (CharPtr      title,
6534  BioSourcePtr biop,
6535  CharPtr      mod_name,
6536  Int4         subtype)
6537 {
6538   CharPtr valstr;
6539   CharPtr next_org_loc;
6540 
6541   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6542   while ((valstr = FindValueFromPairInDeflineBeforeCharPtr (mod_name, title, next_org_loc)) != NULL)
6543   {
6544     biop = AddOrgModValue (biop, subtype, valstr);
6545     RemoveValueFromDefline (mod_name, title);
6546     next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6547   }
6548   return biop;
6549 }
6550 
6551 extern BioSourcePtr
ExtractFromTitleToBioSourceSubSource(CharPtr title,BioSourcePtr biop,CharPtr mod_name,Int4 subtype)6552 ExtractFromTitleToBioSourceSubSource
6553 (CharPtr      title,
6554  BioSourcePtr biop,
6555  CharPtr      mod_name,
6556  Int4         subtype)
6557 {
6558   CharPtr valstr;
6559   CharPtr next_org_loc;
6560 
6561   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6562   while ((valstr = FindValueFromPairInDeflineBeforeCharPtr (mod_name, title, next_org_loc)) != NULL)
6563   {
6564     if (IsNonTextModifier (mod_name)) {
6565       if (StringICmp (valstr, "FALSE") == 0) {
6566         valstr = MemFree (valstr);
6567       } else if (StringICmp (valstr, "TRUE") == 0) {
6568         biop = AddSubSourceValue (biop, subtype, StringSave (""));
6569         valstr = MemFree (valstr);
6570       } else {
6571         biop = AddSubSourceValue (biop, subtype, valstr);
6572       }
6573     } else {
6574       biop = AddSubSourceValue (biop, subtype, valstr);
6575     }
6576     RemoveValueFromDefline (mod_name, title);
6577     next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6578   }
6579   return biop;
6580 }
6581 
6582 
ExtractFromTitleToBioSourcePrimers(CharPtr title,BioSourcePtr biop)6583 static BioSourcePtr ExtractFromTitleToBioSourcePrimers (CharPtr title, BioSourcePtr biop)
6584 {
6585   CharPtr valstr;
6586   CharPtr next_org_loc;
6587   ValNode qual;
6588   CharPtr mod_name;
6589   Int4    qual_defs[] = { Source_qual_fwd_primer_name, Source_qual_fwd_primer_seq, Source_qual_rev_primer_name, Source_qual_rev_primer_seq};
6590   Int4    num_quals = 4;
6591   Int4    i;
6592 
6593   if (StringHasNoText (title)) {
6594     return biop;
6595   }
6596   if (biop == NULL) {
6597     biop = BioSourceNew ();
6598   }
6599 
6600   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6601 
6602   for (i = 0; i < num_quals; i++) {
6603     MemSet (&qual, 0, sizeof (ValNode));
6604     qual.choice = SourceQualChoice_textqual;
6605     qual.data.intvalue = qual_defs[i];
6606     mod_name = GetSourceQualName (qual_defs[i]);
6607     while ((valstr = FindValueFromPairInDeflineBeforeCharPtr (mod_name, title, next_org_loc)) != NULL) {
6608       SetSourceQualInBioSource (biop, &qual, NULL, valstr, ExistingTextOption_add_qual);
6609       RemoveValueFromDefline (mod_name, title);
6610     }
6611   }
6612 
6613   return biop;
6614 }
6615 
6616 
6617 /* this function collects all of the common names prior to the next organism name
6618  * and assembles a semicolon-delimited list.
6619  */
6620 extern BioSourcePtr
ExtractFromTitleToBioSourceCommonName(CharPtr title,BioSourcePtr biop)6621 ExtractFromTitleToBioSourceCommonName
6622 (CharPtr      title,
6623  BioSourcePtr biop)
6624 {
6625   CharPtr valstr, new_val;
6626   Int4    new_len;
6627   CharPtr next_org_loc;
6628 
6629   next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6630   while ((valstr = FindValueFromPairInDeflineBeforeCharPtr ("common name", title, next_org_loc)) != NULL)
6631   {
6632     if (!StringHasNoText (valstr))
6633     {
6634       biop = AddOrgRef (biop);
6635       if (StringHasNoText (biop->org->common))
6636       {
6637         biop->org->common = MemFree (biop->org->common);
6638         biop->org->common = valstr;
6639         valstr = NULL;
6640       }
6641       else
6642       {
6643         new_len = StringLen (biop->org->common) + StringLen (valstr) + 3;
6644         new_val = (CharPtr) MemNew (new_len * sizeof (Char));
6645         if (new_val != NULL)
6646         {
6647           sprintf (new_val, "%s; %s", biop->org->common, valstr);
6648           biop->org->common = MemFree (biop->org->common);
6649           biop->org->common = new_val;
6650         }
6651       }
6652     }
6653     valstr = MemFree (valstr);
6654     RemoveValueFromDefline ("common name", title);
6655     next_org_loc = FindValuePairInDefLine ("organism", title, NULL);
6656   }
6657   return biop;
6658 }
6659 
6660 /* When the user specifies multiple organisms on the definition line, modifiers after the
6661  * second organism go with the second organism, after the third organism go with the third
6662  * organism, etc.
6663  */
ExtractFromDeflineToBioSource(CharPtr defline,BioSourcePtr biop)6664 extern BioSourcePtr ExtractFromDeflineToBioSource (CharPtr defline, BioSourcePtr biop)
6665 {
6666   CharPtr      taxname = NULL;
6667   OrgInfoPtr   oip = NULL;
6668   CharPtr      valstr;
6669   Nlm_EnumFieldAssocPtr ap;
6670   Nlm_QualNameAssocPtr  qp;
6671   CharPtr            next_org_loc;
6672 
6673   if (StringHasNoText (defline))
6674   {
6675     return NULL;
6676   }
6677 
6678   taxname = FindValueFromPairInDefline ("organism", defline);
6679   RemoveValueFromDefline ("organism", defline);
6680   if (StringHasNoText (taxname))
6681   {
6682     taxname = MemFree (taxname);
6683     return NULL;
6684   }
6685   else
6686   {
6687     biop = AddOrgRef (biop);
6688     if (biop == NULL)
6689     {
6690       return biop;
6691     }
6692     LoadOrganismList ();
6693     oip = FindByTaxName (taxname);
6694     SetTaxNameAndRemoveTaxRef (biop->org, taxname);
6695   }
6696 
6697   /* add division */
6698   if (oip != NULL && !StringHasNoText (oip->div))
6699   {
6700     biop = AddOrgName (biop);
6701     if (biop == NULL)
6702     {
6703       return biop;
6704     }
6705     biop->org->orgname->div = StringSave (oip->div);
6706   }
6707 
6708   /* add common name (s) - if there are multiple entries, separate with semicolon */
6709   biop = ExtractFromTitleToBioSourceCommonName (defline, biop);
6710   /* if common name was not supplied in defline, use common name from organism list */
6711   if (biop->org == NULL || StringHasNoText (biop->org->common))
6712   {
6713     if (oip != NULL && !StringHasNoText (oip->common))
6714     {
6715       biop = AddOrgRef (biop);
6716       if (biop == NULL)
6717       {
6718         return biop;
6719       }
6720       biop->org->common = StringSave (oip->common);
6721     }
6722   }
6723 
6724   /* add lineage */
6725   if (oip != NULL && !StringHasNoText (oip->lineage))
6726   {
6727     biop = AddOrgName (biop);
6728     if (biop == NULL)
6729     {
6730       return biop;
6731     }
6732     biop->org->orgname->lineage = StringSave (oip->lineage);
6733   }
6734 
6735   /* add origin */
6736   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6737   valstr = FindValueFromPairInDeflineBeforeCharPtr ("origin", defline, next_org_loc);
6738   if (!StringHasNoText (valstr))
6739   {
6740     for (ap = biosource_origin_alist; ap->name != NULL; ap++) {
6741       if (StringICmp (valstr, ap->name) == 0) {
6742         if (biop == NULL)
6743         {
6744           biop = BioSourceNew ();
6745         }
6746         if (biop == NULL)
6747         {
6748           return biop;
6749         }
6750         biop->origin = (Uint1) ap->value;
6751       }
6752     }
6753   }
6754   if (valstr != NULL)
6755   {
6756     RemoveValueFromDefline ("origin", defline);
6757     next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6758   }
6759   valstr = MemFree (valstr);
6760 
6761   valstr = FindValueFromPairInDeflineBeforeCharPtr ("lineage", defline, next_org_loc);
6762   if (!StringHasNoText (valstr))
6763   {
6764     biop = AddOrgName (biop);
6765   }
6766   if (!StringHasNoText (valstr) && biop != NULL && biop->org != NULL && biop->org->orgname != NULL &&
6767       StringCmp (valstr, biop->org->orgname->lineage) != 0)
6768   {
6769     biop = AddOrgModValue (biop, ORGMOD_old_lineage, valstr);
6770     valstr = NULL;
6771   }
6772   if (valstr != NULL)
6773   {
6774     RemoveValueFromDefline ("lineage", defline);
6775   }
6776   valstr = MemFree (valstr);
6777 
6778   biop = SetAllGeneticCodesFromTitle (biop, defline);
6779   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6780 
6781   for (qp = current_orgmod_subtype_alist; qp->name != NULL; qp++) {
6782     biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, qp->name, qp->value);
6783   }
6784   for (qp = current_subsource_subtype_alist; qp->name != NULL; qp++) {
6785     biop = ExtractFromTitleToBioSourceSubSource (defline, biop, qp->name, qp->value);
6786   }
6787 
6788   /* parse primers */
6789   biop = ExtractFromTitleToBioSourcePrimers (defline, biop);
6790 
6791   /* parse notes */
6792   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "note-orgmod", 255);
6793   biop = ExtractFromTitleToBioSourceSubSource (defline, biop, "note-subsrc", 255);
6794 
6795   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "Note -- OrgMod", 255);
6796   biop = ExtractFromTitleToBioSourceSubSource (defline, biop, "Note -- SubSource", 255);
6797 
6798   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "note", 255);
6799   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "comment", 255);
6800   biop = ExtractFromTitleToBioSourceSubSource (defline, biop, "subsource", 255);
6801 
6802 
6803   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6804 
6805   /* set location */
6806   valstr = FindValueFromPairInDeflineBeforeCharPtr ("location", defline, next_org_loc);
6807   if (StringHasNoText (valstr))
6808   {
6809     if (biop == NULL)
6810     {
6811       biop = BioSourceNew ();
6812     }
6813     if (biop == NULL)
6814     {
6815       return biop;
6816     }
6817     biop->genome = 1;
6818   }
6819   else if (StringICmp (valstr, "Mitochondrial") == 0)
6820   {
6821     if (biop == NULL)
6822     {
6823       biop = BioSourceNew ();
6824     }
6825     if (biop == NULL)
6826     {
6827       return biop;
6828     }
6829     biop->genome = 5;
6830   }
6831   else
6832   {
6833     for (ap = biosource_genome_simple_alist; ap->name != NULL; ap++) {
6834       if (StringICmp (valstr, ap->name) == 0) {
6835         if (biop == NULL)
6836         {
6837           biop = BioSourceNew ();
6838         }
6839         if (biop == NULL)
6840         {
6841           return biop;
6842         }
6843         biop->genome = (Uint1) ap->value;
6844       }
6845     }
6846   }
6847   if (valstr != NULL)
6848   {
6849     RemoveValueFromDefline ("location", defline);
6850   }
6851   valstr = MemFree (valstr);
6852 
6853   TrimSpacesAroundString (defline);
6854 
6855   return biop;
6856 
6857 }
6858 
6859 extern Boolean ProcessOneNucleotideTitle (Int2 seqPackage,
6860                                           SeqEntryPtr nsep, SeqEntryPtr top);
6861 
6862 
ParseDeflineToBiop(CharPtr defline,BioSourcePtr biop)6863 static void ParseDeflineToBiop(CharPtr defline, BioSourcePtr biop)
6864 {
6865   CharPtr      taxname = NULL;
6866   OrgInfoPtr   oip = NULL;
6867   CharPtr      valstr;
6868   EnumFieldAssocPtr  ap;
6869   Nlm_QualNameAssocPtr qp;
6870   CharPtr            next_org_loc;
6871 
6872   if (StringHasNoText (defline) || biop == NULL)
6873   {
6874     return;
6875   }
6876 
6877   taxname = FindValueFromPairInDefline ("organism", defline);
6878   RemoveValueFromDefline ("organism", defline);
6879   if (StringHasNoText (taxname))
6880   {
6881     taxname = MemFree (taxname);
6882   }
6883   else
6884   {
6885     biop = AddOrgRef (biop);
6886     biop->org->taxname = taxname;
6887     LoadOrganismList ();
6888     oip = FindByTaxName (taxname);
6889   }
6890 
6891   /* add division */
6892   if (oip != NULL && !StringHasNoText (oip->div))
6893   {
6894     biop = AddOrgName (biop);
6895     biop->org->orgname->div = StringSave (oip->div);
6896   }
6897 
6898   /* add common name (s) - if there are multiple entries, separate with semicolon */
6899   biop = ExtractFromTitleToBioSourceCommonName (defline, biop);
6900   /* if common name was not supplied in defline, use common name from organism list */
6901   if (biop->org == NULL || StringHasNoText (biop->org->common))
6902   {
6903     if (oip != NULL && !StringHasNoText (oip->common))
6904     {
6905       biop = AddOrgRef (biop);
6906       biop->org->common = StringSave (oip->common);
6907     }
6908   }
6909 
6910   /* add lineage */
6911   if (oip != NULL && !StringHasNoText (oip->lineage))
6912   {
6913     biop = AddOrgName (biop);
6914     biop->org->orgname->lineage = StringSave (oip->lineage);
6915   }
6916 
6917   /* add origin */
6918   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6919   valstr = FindValueFromPairInDeflineBeforeCharPtr ("origin", defline, next_org_loc);
6920   if (!StringHasNoText (valstr))
6921   {
6922     for (ap = biosource_origin_alist; ap->name != NULL; ap++) {
6923       if (StringICmp (valstr, ap->name) == 0) {
6924         biop->origin = (Uint1) ap->value;
6925       }
6926     }
6927   }
6928   if (valstr != NULL)
6929   {
6930     RemoveValueFromDefline ("origin", defline);
6931     next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6932   }
6933   valstr = MemFree (valstr);
6934 
6935   valstr = FindValueFromPairInDeflineBeforeCharPtr ("lineage", defline, next_org_loc);
6936   if (!StringHasNoText (valstr))
6937   {
6938     biop = AddOrgName (biop);
6939   }
6940   if (!StringHasNoText (valstr) && biop != NULL && biop->org != NULL && biop->org->orgname != NULL &&
6941       StringCmp (valstr, biop->org->orgname->lineage) != 0)
6942   {
6943     biop = AddOrgModValue (biop, ORGMOD_old_lineage, valstr);
6944     valstr = NULL;
6945   }
6946   if (valstr != NULL)
6947   {
6948     RemoveValueFromDefline ("lineage", defline);
6949   }
6950   valstr = MemFree (valstr);
6951 
6952   biop = SetAllGeneticCodesFromTitle (biop, defline);
6953   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6954 
6955   for (qp = current_orgmod_subtype_alist; qp->name != NULL; qp++) {
6956     biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, qp->name, qp->value);
6957   }
6958   for (qp = current_subsource_subtype_alist; qp->name != NULL; qp++) {
6959     biop = ExtractFromTitleToBioSourceSubSource (defline, biop, qp->name, qp->value);
6960   }
6961 
6962   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "note-orgmod", 255);
6963   biop = ExtractFromTitleToBioSourceSubSource (defline, biop, "note-subsrc", 255);
6964   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "note", 255);
6965   biop = ExtractFromTitleToBioSourceOrgMod (defline, biop, "comment", 255);
6966   biop = ExtractFromTitleToBioSourceSubSource (defline, biop, "subsource", 255);
6967 
6968 
6969   next_org_loc = FindValuePairInDefLine ("organism", defline, NULL);
6970 
6971   /* set location */
6972   valstr = FindValueFromPairInDeflineBeforeCharPtr ("location", defline, next_org_loc);
6973   if (StringHasNoText (valstr))
6974   {
6975     /* don't set defaults */
6976   }
6977   else if (StringICmp (valstr, "Mitochondrial") == 0)
6978   {
6979     biop->genome = 5;
6980   }
6981   else
6982   {
6983     for (ap = biosource_genome_simple_alist; ap->name != NULL; ap++) {
6984       if (StringICmp (valstr, ap->name) == 0) {
6985         biop->genome = (Uint1) ap->value;
6986       }
6987     }
6988   }
6989   if (valstr != NULL)
6990   {
6991     RemoveValueFromDefline ("location", defline);
6992   }
6993   valstr = MemFree (valstr);
6994 
6995   TrimSpacesAroundString (defline);
6996 }
6997 
6998 
ProcessOneNucleotideTitle(Int2 seqPackage,SeqEntryPtr nsep,SeqEntryPtr top)6999 extern Boolean ProcessOneNucleotideTitle (Int2 seqPackage,
7000                                           SeqEntryPtr nsep, SeqEntryPtr top)
7001 
7002 {
7003   BioSourcePtr       biop = NULL;
7004   BioseqSetPtr       bssp;
7005   BioseqPtr          nbsp;
7006   Boolean            needbiop;
7007   SeqEntryPtr        sep;
7008   CharPtr            str;
7009   CharPtr            valstr;
7010   CharPtr            title;
7011   ValNodePtr         vnp;
7012   Int4               topology;
7013 #if 0
7014   SeqFeatPtr         sfp;
7015 #endif
7016 
7017   if (nsep == NULL || top == NULL) return FALSE;
7018   nbsp = (BioseqPtr) nsep->data.ptrvalue;
7019   if (nbsp == NULL) return FALSE;
7020   if (! ISA_na (nbsp->mol)) return FALSE;
7021   str = MemNew (PROC_NUC_STR_SIZE * sizeof (Char));
7022   if (str == NULL) return FALSE;
7023   sep = NULL;
7024 
7025   SeqEntryExplore (top, (Pointer) &sep, FindFirstSeqEntryTitle);
7026   sep = FindNucSeqEntry (sep);
7027   if (sep != NULL) {
7028     vnp = SeqEntryGetSeqDescr (sep, Seq_descr_title, NULL);
7029     if (vnp != NULL && vnp->data.ptrvalue != NULL) {
7030       title = (CharPtr) vnp->data.ptrvalue;
7031 
7032       SetMoleculeAndMolTypeFromTitle (nbsp, title, seqPackage);
7033 
7034       if (nbsp->topology == 0)
7035       {
7036         topology = TOPOLOGY_LINEAR;
7037       }
7038       else
7039       {
7040         topology = nbsp->topology;
7041       }
7042 
7043       /* get topology from defline */
7044       valstr = FindValueFromPairInDefline ("topology", title);
7045       if (valstr != NULL)
7046       {
7047         if (!StringHasNoText (valstr))
7048         {
7049           topology = TopologyFromString (valstr);
7050         }
7051         RemoveValueFromDefline ("topology", title);
7052         valstr = MemFree (valstr);
7053       }
7054       nbsp->topology = topology;
7055 
7056       /* add bankit comment for genetic code */
7057       valstr = FindValueFromPairInDefline ("gencode_comment", title);
7058       if (valstr != NULL)
7059       {
7060         AddGeneticCodeComment (nbsp, valstr);
7061         RemoveValueFromDefline ("gencode_comment", title);
7062         valstr = MemFree (valstr);
7063       }
7064 
7065       needbiop = FALSE;
7066 
7067       if (PackageTypeIsSet (seqPackage))
7068       {
7069         needbiop = TRUE;
7070         if (GetAppParam ("SEQUIN", "PREFERENCES", "BIOSRCONALL", NULL, str, PROC_NUC_STR_SIZE)) {
7071           if (StringICmp (str, "FALSE") == 0) {
7072             needbiop = FALSE;
7073           }
7074         }
7075       }
7076 
7077       vnp = SeqEntryGetSeqDescr (sep, Seq_descr_source, NULL);
7078       if (vnp == NULL)
7079       {
7080         biop = ExtractFromDeflineToBioSource (title, NULL);
7081         if (biop == NULL && needbiop)
7082         {
7083           biop = BioSourceNew ();
7084         }
7085 
7086         if (biop != NULL)
7087         {
7088           vnp = CreateNewDescriptor (top, Seq_descr_source);
7089           if (vnp != NULL) {
7090             vnp->data.ptrvalue = (Pointer) biop;
7091           }
7092         }
7093 #if 0
7094         biop = BioSourceFromDefline (title);
7095         while (biop != NULL)
7096         {
7097           sfp = CreateNewFeature (sep, NULL, SEQFEAT_BIOSRC, NULL);
7098           if (sfp != NULL)
7099           {
7100             sfp->data.value.ptrvalue = biop;
7101           }
7102           biop = BioSourceFromDefline (title);
7103         }
7104 #endif
7105       }
7106 
7107       if (StringHasNoText (title) || sep != top) {
7108         vnp = NULL;
7109         if (IS_Bioseq (sep)) {
7110           nbsp = (BioseqPtr) sep->data.ptrvalue;
7111           vnp = ValNodeExtract (&(nbsp->descr), Seq_descr_title);
7112         } else if (IS_Bioseq_set (sep)) {
7113           bssp = (BioseqSetPtr) sep->data.ptrvalue;
7114           vnp = ValNodeExtract (&(bssp->descr), Seq_descr_title);
7115         }
7116         if (vnp != NULL && StringHasNoText ((CharPtr) vnp->data.ptrvalue)) {
7117           vnp = ValNodeFreeData (vnp);
7118         }
7119         if (sep != top && vnp != NULL) {
7120           if (IS_Bioseq (top)) {
7121             nbsp = (BioseqPtr) top->data.ptrvalue;
7122             ValNodeLink (&(nbsp->descr), vnp);
7123           } else if (IS_Bioseq_set (top)) {
7124             bssp = (BioseqSetPtr) top->data.ptrvalue;
7125             ValNodeLink (&(bssp->descr), vnp);
7126           }
7127         }
7128       }
7129     }
7130   } else {
7131     needbiop = FALSE;
7132     if (PackageTypeIsSet (seqPackage)
7133         || seqPackage == SEQ_PKG_GENOMICCDNA)
7134     {
7135       needbiop = TRUE;
7136       if (GetAppParam ("SEQUIN", "PREFERENCES", "BIOSRCONALL", NULL, str, PROC_NUC_STR_SIZE)) {
7137         if (StringICmp (str, "FALSE") == 0) {
7138           needbiop = FALSE;
7139         }
7140       }
7141     }
7142   }
7143   MemFree (str);
7144 
7145   return TRUE;
7146 }
7147 
AutomaticNucleotideProcess(SequencesFormPtr sqfp,SeqEntryPtr nsep,SeqEntryPtr top)7148 static Boolean AutomaticNucleotideProcess (SequencesFormPtr sqfp, SeqEntryPtr nsep,
7149                                            SeqEntryPtr top)
7150 
7151 {
7152   BioseqSetPtr  bssp;
7153   Boolean       rsult;
7154   SeqEntryPtr   tmp;
7155 
7156   if (sqfp == NULL || nsep == NULL || top == NULL) return FALSE;
7157   if (IS_Bioseq_set (nsep)) {
7158     bssp = (BioseqSetPtr) nsep->data.ptrvalue;
7159     rsult = FALSE;
7160     if (bssp != NULL) {
7161       for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
7162         if (AutomaticNucleotideProcess (sqfp, tmp, top)) {
7163           rsult = TRUE;
7164         }
7165       }
7166     }
7167     return rsult;
7168   }
7169   return ProcessOneNucleotideTitle (sqfp->seqPackage,
7170                                     nsep, top);
7171 }
7172 
7173 typedef struct idlist {
7174   BioseqPtr  bsp;
7175   CharPtr    key;
7176   struct idlist PNTR left;
7177   struct idlist PNTR right;
7178 } IdList, PNTR IdListPtr;
7179 
BuildTree(IdListPtr PNTR head,BioseqPtr bsp,CharPtr x)7180 static void BuildTree (IdListPtr PNTR head, BioseqPtr bsp, CharPtr x)
7181 
7182 {
7183   Int2       comp;
7184   IdListPtr  idlist;
7185   SeqIdPtr   sip;
7186   CharPtr    str;
7187 
7188   if (*head != NULL) {
7189     idlist = *head;
7190     comp = StringICmp (idlist->key, x);
7191     if (comp < 0) {
7192       BuildTree (&(idlist->right), bsp, x);
7193     } else if (comp > 0) {
7194       BuildTree (&(idlist->left), bsp, x);
7195     } else {
7196       sip = MakeNewProteinSeqId (NULL, NULL);
7197       if (sip != NULL) {
7198         bsp->id = SeqIdFree (bsp->id);
7199         bsp->id = sip;
7200         SeqMgrReplaceInBioseqIndex (bsp);
7201         str = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
7202         BuildTree (head, bsp, str);
7203         str = MemFree (str);
7204       }
7205     }
7206   } else {
7207     idlist = MemNew (sizeof (IdList));
7208     if (idlist != NULL) {
7209       *head = idlist;
7210       idlist->bsp = bsp;
7211       idlist->key = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
7212       idlist->left = NULL;
7213       idlist->right = NULL;
7214     }
7215   }
7216 }
7217 
FreeTree(IdListPtr PNTR head)7218 static void FreeTree (IdListPtr PNTR head)
7219 
7220 {
7221   IdListPtr  idlist;
7222 
7223   if (head != NULL && *head != NULL) {
7224     idlist = *head;
7225     FreeTree (&(idlist->left));
7226     FreeTree (&(idlist->right));
7227     MemFree (idlist->key);
7228     MemFree (idlist);
7229   }
7230 }
7231 
ResolveCollidingIDs(IdListPtr PNTR head,SeqEntryPtr list)7232 static void ResolveCollidingIDs (IdListPtr PNTR head, SeqEntryPtr list)
7233 
7234 {
7235   BioseqPtr  bsp;
7236   CharPtr    str;
7237 
7238   if (head == NULL) return;
7239   while (list != NULL) {
7240     if (IS_Bioseq (list)) {
7241       bsp = (BioseqPtr) list->data.ptrvalue;
7242       if (bsp != NULL) {
7243         str = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
7244         BuildTree (head, bsp, str);
7245         str = MemFree (str);
7246       }
7247     }
7248     list = list->next;
7249   }
7250 }
7251 
7252 
PutMolInfoOnSeqEntry(SequencesFormPtr sqfp,SeqEntryPtr sep)7253 static void PutMolInfoOnSeqEntry (SequencesFormPtr sqfp, SeqEntryPtr sep)
7254 
7255 {
7256   BioseqSetPtr bssp;
7257   MolInfoPtr   mip;
7258   ValNodePtr   vnp;
7259 
7260   if (sqfp != NULL && sep != NULL) {
7261     if (IS_Bioseq_set (sep))
7262     {
7263       bssp = (BioseqSetPtr) sep->data.ptrvalue;
7264       for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
7265       {
7266       	PutMolInfoOnSeqEntry (sqfp, sep);
7267       }
7268       return;
7269     }
7270 
7271     vnp = SeqEntryGetSeqDescr (sep, Seq_descr_molinfo, NULL);
7272     if (vnp == NULL)
7273     {
7274       vnp = CreateNewDescriptor (sep, Seq_descr_molinfo);
7275     }
7276     if (vnp != NULL)
7277     {
7278       mip = (MolInfoPtr) vnp->data.ptrvalue;
7279       if (mip == NULL)
7280       {
7281         mip = MolInfoNew ();
7282         vnp->data.ptrvalue = mip;
7283       }
7284     }
7285   }
7286 }
7287 
PrefixOrgToDefline(SeqEntryPtr sep)7288 static void PrefixOrgToDefline (SeqEntryPtr sep)
7289 
7290 {
7291   BioSourcePtr  biop;
7292   BioseqPtr     bsp;
7293   BioseqSetPtr  bssp;
7294   CharPtr       def;
7295   OrgRefPtr     orp;
7296   CharPtr       ptr;
7297   CharPtr       str;
7298   Char          taxname [64];
7299   ValNodePtr    ttl;
7300   ValNodePtr    vnp;
7301 
7302   if (sep == NULL) return;
7303   if (IS_Bioseq_set (sep)) {
7304     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7305     if (bssp != NULL && (bssp->_class == 7 ||
7306                          (IsPopPhyEtcSet (bssp->_class)))) {
7307       for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7308         PrefixOrgToDefline (sep);
7309       }
7310       return;
7311     }
7312   }
7313 
7314   if (! IS_Bioseq (sep)) return;
7315   bsp = (BioseqPtr) sep->data.ptrvalue;
7316   if (bsp == NULL) return;
7317 
7318   taxname [0] = '\0';
7319   orp = NULL;
7320   biop = NULL;
7321   ttl = NULL;
7322   vnp = bsp->descr;
7323   for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next) {
7324     if (vnp->choice == Seq_descr_source) {
7325       biop = (BioSourcePtr) vnp->data.ptrvalue;
7326     } else if (vnp->choice == Seq_descr_org) {
7327       orp = (OrgRefPtr) vnp->data.ptrvalue;
7328     } else if (vnp->choice == Seq_descr_title) {
7329       ttl = vnp;
7330     }
7331   }
7332   if (orp == NULL && biop != NULL) {
7333     orp = biop->org;
7334   }
7335   if (orp == NULL) return;
7336   if (ttl == NULL) return;
7337   StringNCpy_0 (taxname, orp->taxname, sizeof (taxname));
7338   ptr = StringSearch (taxname, "(");
7339   if (ptr != NULL) {
7340     *ptr = '\0';
7341   }
7342   TrimSpacesAroundString (taxname);
7343   if ((StringICmp (taxname, "Human immunodeficiency virus type 1") == 0) ||
7344       (StringICmp (taxname, "Human immunodeficiency virus 1") == 0)) {
7345     StringCpy (taxname, "HIV-1");
7346   } else if ((StringICmp (taxname,"Human immunodeficiency virus type 2")==0) ||
7347 	     (StringICmp (taxname,"Human immunodeficiency virus 2")==0)) {
7348     StringCpy (taxname, "HIV-2");
7349   }
7350 
7351   def = (CharPtr) ttl->data.ptrvalue;
7352   if (StringHasNoText (def)) return;
7353 
7354   ptr = StringISearch (def, taxname);
7355   if (ptr != NULL && ptr == def) return;
7356   str = MemNew ((StringLen (taxname) + StringLen (def) + 4) * sizeof (Char));
7357   if (str == NULL) return;
7358   StringCpy (str, taxname);
7359   StringCat (str, " ");
7360   StringCat (str, def);
7361   ttl->data.ptrvalue = MemFree (ttl->data.ptrvalue);
7362   ttl->data.ptrvalue = str;
7363 }
7364 
7365 static CharPtr onecomponent = "\
7366 Multiple sequence components are expected in this submission.\n\
7367 They should all be read in at the same time from the same file.";
7368 
OnlyOneComponentWarning(SequencesFormPtr sqfp)7369 static void OnlyOneComponentWarning (SequencesFormPtr sqfp)
7370 
7371 {
7372   CharPtr  type;
7373 
7374   if (sqfp != NULL) {
7375     if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA
7376         || PackageTypeIsSingle (sqfp->seqPackage))
7377     {
7378       return;
7379     }
7380     switch (sqfp->seqPackage) {
7381       case SEQ_PKG_POPULATION :
7382         type = "population set";
7383         break;
7384       case SEQ_PKG_PHYLOGENETIC :
7385         type = "phylogenetic set";
7386         break;
7387       case SEQ_PKG_MUTATION :
7388         type = "mutation set";
7389         break;
7390       case SEQ_PKG_ENVIRONMENT :
7391         type = "environmental samples";
7392         break;
7393       case SEQ_PKG_GENBANK :
7394         type = "batch submission";
7395         break;
7396       case SEQ_PKG_TSA:
7397         type = "Transcriptome Shotgun Assembly";
7398         break;
7399       default :
7400         type = "unknown set";
7401         break;
7402     }
7403     Message (MSG_OK, "WARNING - There is only one component in this %s.\n%s",
7404              type, onecomponent);
7405   }
7406 }
7407 
7408 /*---------------------------------*/
7409 /* Parse the gene and gene-related */
7410 /* fields from the title.          */
7411 /*---------------------------------*/
7412 extern void
AddGeneFeatureFromTitle(SeqEntryPtr nucsep,CharPtr ttl,SeqLocPtr slp)7413 AddGeneFeatureFromTitle
7414 (SeqEntryPtr nucsep,
7415  CharPtr ttl,
7416  SeqLocPtr slp)
7417 {
7418   CharPtr    gene = NULL;
7419   CharPtr    gene_desc = NULL;
7420   CharPtr    allele = NULL;
7421   CharPtr    gene_syn = NULL;
7422   GeneRefPtr grp = NULL;
7423   SeqFeatPtr sfp;
7424   SeqIdPtr   sip;
7425   BioseqPtr  nbsp, bsp;
7426   SeqLocPtr  gslp;
7427   Boolean    hasNulls;
7428 
7429   if (nucsep == NULL || !IS_Bioseq (nucsep)
7430       || (nbsp = (BioseqPtr) nucsep->data.ptrvalue) == NULL
7431       || StringHasNoText (ttl) || slp == NULL)
7432   {
7433     return;
7434   }
7435 
7436   gene = FindValueFromPairInDefline ("gene", ttl);
7437   if (!StringHasNoText (gene))
7438   {
7439     gene_desc = StringChr (gene, ';');
7440     if (gene_desc != NULL) {
7441       *gene_desc = '\0';
7442       gene_desc++;
7443       allele = StringChr (gene_desc, ';');
7444       if (allele != NULL) {
7445         *allele = '\0';
7446         allele++;
7447       }
7448     }
7449     grp = CreateNewGeneRef (gene, allele, gene_desc, FALSE);
7450   }
7451   gene = MemFree (gene);
7452 
7453   /*-----------------------------------------*/
7454   /* Parse the gene_syn field from the title */
7455   /*-----------------------------------------*/
7456 
7457   gene_syn = FindValueFromPairInDefline ("gene_syn", ttl);
7458   if (!StringHasNoText (gene_syn))
7459   {
7460     if (grp == NULL) {
7461       grp = GeneRefNew ();
7462     }
7463     ValNodeCopyStr(&(grp->syn),0,gene_syn);
7464   }
7465   gene_syn = MemFree (gene_syn);
7466 
7467   /* Create the gene feature */
7468   if (grp != NULL) {
7469     if (ExtendGene (grp, nucsep, slp)) {
7470       grp = GeneRefFree (grp);
7471     } else {
7472       sfp = CreateNewFeature (nucsep, NULL, SEQFEAT_GENE, NULL);
7473       if (sfp != NULL) {
7474         sfp->data.value.ptrvalue = (Pointer) grp;
7475         sfp->location = SeqLocFree (sfp->location);
7476         sfp->location = AsnIoMemCopy ((Pointer) slp,
7477                                       (AsnReadFunc) SeqLocAsnRead,
7478                                       (AsnWriteFunc) SeqLocAsnWrite);
7479         sip = SeqLocId (sfp->location);
7480         if (sip != NULL) {
7481           bsp = BioseqFind (sip);
7482         } else {
7483           bsp = nbsp;
7484         }
7485         if (bsp != NULL) {
7486           gslp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE);
7487           if (gslp != NULL) {
7488             sfp->location = SeqLocFree (sfp->location);
7489             sfp->location = gslp;
7490             if (bsp->repr == Seq_repr_seg) {
7491               gslp = SegLocToPartsEx (bsp, sfp->location, TRUE);
7492               sfp->location = SeqLocFree (sfp->location);
7493               sfp->location = gslp;
7494               hasNulls = LocationHasNullsBetween (sfp->location);
7495               sfp->partial = (sfp->partial || hasNulls);
7496             }
7497             FreeAllFuzz (gslp);
7498           }
7499         }
7500       }
7501     }
7502     RemoveValueFromDefline ("gene", ttl);
7503     RemoveValueFromDefline ("gene_syn", ttl);
7504   }
7505 }
7506 
AddProteinFeatureFromDefline(SeqEntryPtr psep,CharPtr title)7507 extern SeqFeatPtr AddProteinFeatureFromDefline (SeqEntryPtr psep, CharPtr title)
7508 {
7509   CharPtr    activity = NULL;
7510   CharPtr    ec = NULL;
7511   CharPtr    prot_name = NULL;
7512   CharPtr    prot_desc = NULL;
7513   CharPtr    other_prot_desc = NULL, tmp_desc;
7514   ProtRefPtr prp;
7515   SeqFeatPtr sfp = NULL;
7516 
7517   if (psep == NULL)
7518   {
7519     return NULL;
7520   }
7521 
7522 	/*-----------------------------------------*/
7523 	/* Parse the function field from the title */
7524 	/*-----------------------------------------*/
7525 
7526   activity = FindValueFromPairInDefline ("function", title);
7527 
7528 	/*------------------------------------------*/
7529 	/* Parse the EC_number field from the title */
7530 	/*------------------------------------------*/
7531 
7532   ec = FindValueFromPairInDefline ("EC_number", title);
7533 
7534 	/*---------------------------------*/
7535 	/* Parse the protein and prot_desc */
7536 	/* fields from the title.          */
7537 	/*---------------------------------*/
7538 
7539   prot_name = FindValueFromPairInDefline ("protein", title);
7540 
7541 	/*---------------------------------*/
7542 	/* If we found a protein value ... */
7543 	/*---------------------------------*/
7544   if (!StringHasNoText (prot_name))
7545   {
7546 	  /*----------------------------------------------*/
7547 	  /* ... search for a protein description, either */
7548 	  /*     in the prot field (seperated by a ';')   */
7549 	  /*     or in its own 'prot_desc' field.         */
7550 	  /*----------------------------------------------*/
7551 
7552     prot_desc = StringChr (prot_name, ';');
7553 	  if (prot_desc != NULL)
7554 	  {
7555 		  *prot_desc = '\0';
7556 		  prot_desc++;
7557 		  /* ignore this description if empty */
7558 		  if (StringHasNoText (prot_desc))
7559 		  {
7560 		    prot_desc = NULL;
7561 		  }
7562 		  else
7563 		  {
7564 		    prot_desc = StringSave (prot_desc);
7565 		  }
7566 	  }
7567   }
7568 	other_prot_desc = FindValueFromPairInDefline ("prot_desc", title);
7569 	if (StringHasNoText (other_prot_desc))
7570 	{
7571 	  other_prot_desc = MemFree (other_prot_desc);
7572 	}
7573 	else
7574 	{
7575    if (prot_desc == NULL)
7576 	  {
7577 	    prot_desc = other_prot_desc;
7578 	    other_prot_desc = NULL;
7579 	  }
7580 	  else
7581 	  {
7582       tmp_desc = (CharPtr) MemNew ((StringLen (prot_desc) + StringLen (other_prot_desc) + 3)
7583 	                               * sizeof (Char));
7584 	    if (tmp_desc != NULL)
7585 	    {
7586 	      StringCpy (tmp_desc, prot_desc);
7587 	      StringCat (tmp_desc, ";");
7588   	    StringCat (tmp_desc, other_prot_desc);
7589 	      prot_desc = MemFree (prot_desc);
7590 	      other_prot_desc = MemFree (other_prot_desc);
7591 	      prot_desc = tmp_desc;
7592 	    }
7593 	  }
7594 	}
7595 
7596 	/*--------------------------------*/
7597 	/* ... add the prot and prot_desc */
7598 	/*     to the Seq Features.       */
7599 	/*--------------------------------*/
7600 
7601 	prp = CreateNewProtRef (prot_name, prot_desc, ec, activity);
7602 	if (prp != NULL)
7603 	{
7604 		sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
7605 		if (sfp != NULL)
7606 		{
7607 		  sfp->data.value.ptrvalue = (Pointer) prp;
7608 		  RemoveValueFromDefline ("protein", title);
7609 		  RemoveValueFromDefline ("prot_desc", title);
7610 		  RemoveValueFromDefline ("function", title);
7611 		  RemoveValueFromDefline ("EC_number", title);
7612 		}
7613 	}
7614   return sfp;
7615 }
7616 
7617 extern void
AddCodingRegionFieldsFromProteinTitle(CdRegionPtr crp,CharPtr title,CharPtr PNTR pcomment)7618 AddCodingRegionFieldsFromProteinTitle
7619 (CdRegionPtr  crp,
7620  CharPtr      title,
7621  CharPtr PNTR pcomment)
7622 {
7623   CharPtr comment, comment_loc, total_comment = NULL, tmp_comment;
7624 
7625   if (crp == NULL || StringHasNoText (title))
7626   {
7627     return;
7628   }
7629 
7630 	/*---------------------*/
7631 	/* Parse the ORF field */
7632 	/*---------------------*/
7633   if (FindValuePairInDefLine ("orf", title, NULL) != NULL)
7634   {
7635     crp->orf = TRUE;
7636     RemoveValueFromDefline ("orf", title);
7637   }
7638 
7639   if (pcomment == NULL)
7640   {
7641     return;
7642   }
7643 
7644 	/*-------------------------------*/
7645 	/* Parse the comment/note fields */
7646 	/*-------------------------------*/
7647   comment_loc = FindValuePairInDefLine ("comment", title, NULL);
7648   while (comment_loc != NULL)
7649   {
7650     comment = FindValueFromPairInDefline ("comment", comment_loc);
7651     if (!StringHasNoText (comment))
7652     {
7653       if (total_comment == NULL)
7654       {
7655         total_comment = comment;
7656         comment = NULL;
7657       }
7658       else
7659       {
7660         tmp_comment = (CharPtr) MemNew ((StringLen (total_comment) + StringLen (comment) + 3) * sizeof (Char));
7661         if (tmp_comment != NULL)
7662         {
7663           StringCpy (tmp_comment, total_comment);
7664           StringCat (tmp_comment, ";");
7665           StringCat (tmp_comment, comment);
7666           total_comment = MemFree (total_comment);
7667           total_comment = tmp_comment;
7668         }
7669       }
7670     }
7671     comment = MemFree (comment);
7672     RemoveValueFromDefline ("comment", title);
7673     comment_loc = FindValuePairInDefLine ("comment", title, NULL);
7674   }
7675 
7676   *pcomment = total_comment;
7677 }
7678 
AutomaticMrnaProcess(SeqEntryPtr nucsep,SeqEntryPtr mrnasep,Boolean partial5,Boolean partial3)7679 static void AutomaticMrnaProcess (SeqEntryPtr nucsep, SeqEntryPtr mrnasep, Boolean partial5, Boolean partial3)
7680 
7681 {
7682   CharPtr     mrna = NULL;
7683   CharPtr     comment = NULL;
7684   BioseqPtr   bsp;
7685   MolInfoPtr  mip;
7686   BioseqPtr   mrnabsp;
7687   BioseqPtr   nucbsp;
7688   SeqLocPtr   oldslp;
7689   RnaRefPtr   rrp;
7690   SeqFeatPtr  sfp;
7691   SeqIdPtr    sip;
7692   SeqLocPtr   slp;
7693   CharPtr     ttl;
7694   ValNodePtr  vnp;
7695 
7696   if (nucsep == NULL || mrnasep == NULL) return;
7697   if (IS_Bioseq (nucsep) && IS_Bioseq (mrnasep)) {
7698     nucbsp = (BioseqPtr) nucsep->data.ptrvalue;
7699     mrnabsp = (BioseqPtr) mrnasep->data.ptrvalue;
7700     if (nucbsp == NULL || mrnabsp == NULL) return;
7701     slp = AlignmRNA2genomic (nucbsp, mrnabsp);
7702     if (slp == NULL) return;
7703     sip = SeqLocId (slp);
7704     if (sip != NULL) {
7705       bsp = BioseqFind (sip);
7706       if (bsp != NULL) {
7707         if (bsp->repr == Seq_repr_seg) {
7708           oldslp = slp;
7709           slp = SegLocToParts (bsp, oldslp);
7710           FreeAllFuzz (slp);
7711           SeqLocFree (oldslp);
7712         }
7713       }
7714     }
7715     StripLocusFromSeqLoc (slp);
7716     ttl = NULL;
7717     vnp = ValNodeFindNext (mrnabsp->descr, NULL, Seq_descr_title);
7718     if (vnp != NULL) {
7719       ttl = (CharPtr) vnp->data.ptrvalue;
7720     }
7721     if (ttl != NULL) {
7722       AddGeneFeatureFromTitle (nucsep, ttl, slp);
7723 
7724       /* get mRNA name */
7725       mrna = FindValueFromPairInDefline ("mrna", ttl);
7726       RemoveValueFromDefline ("mrna", ttl);
7727       if (StringHasNoText (mrna))
7728       {
7729         mrna = MemFree (mrna);
7730         mrna = FindValueFromPairInDefline ("cdna", ttl);
7731         RemoveValueFromDefline ("cdna", ttl);
7732       }
7733     }
7734     rrp = RnaRefNew ();
7735     if (rrp != NULL) {
7736       rrp->type = 2;
7737       if (! StringHasNoText (mrna)) {
7738         rrp->ext.choice = 1;
7739         rrp->ext.value.ptrvalue = mrna;
7740         mrna = NULL;
7741       }
7742       sfp = CreateNewFeature (nucsep, NULL, SEQFEAT_RNA, NULL);
7743       if (sfp != NULL) {
7744         sfp->data.value.ptrvalue = (Pointer) rrp;
7745         sfp->location = SeqLocFree (sfp->location);
7746         sfp->location = AsnIoMemCopy ((Pointer) slp,
7747                                       (AsnReadFunc) SeqLocAsnRead,
7748                                       (AsnWriteFunc) SeqLocAsnWrite);
7749         SetSeqFeatProduct (sfp, mrnabsp);
7750         SetSeqLocPartial (sfp->location, partial5, partial3);
7751         sfp->partial = (sfp->partial || partial5 || partial3);
7752         if (ttl != NULL) {
7753           comment = FindValueFromPairInDefline ("comment", ttl);
7754           if (!StringHasNoText (comment)) {
7755             sfp->comment = comment;
7756           }
7757           else
7758           {
7759             comment = MemFree (comment);
7760           }
7761           RemoveValueFromDefline ("comment", ttl);
7762         }
7763       }
7764     }
7765     mrna = MemFree (mrna);
7766     SeqLocFree (slp);
7767     if (StringHasNoText (ttl)) {
7768       ValNodeExtract (&(mrnabsp->descr), Seq_descr_title);
7769     }
7770     mip = MolInfoNew ();
7771     if (mip != NULL) {
7772       mip->biomol = 3;
7773       if (partial5 && partial3) {
7774         mip->completeness = 5;
7775       } else if (partial5) {
7776         mip->completeness = 3;
7777       } else if (partial3) {
7778         mip->completeness = 4;
7779       }
7780       vnp = CreateNewDescriptor (mrnasep, Seq_descr_molinfo);
7781       if (vnp != NULL) {
7782         vnp->data.ptrvalue = (Pointer) mip;
7783       }
7784     }
7785     mrnabsp->mol = Seq_mol_rna;
7786   }
7787 }
7788 
LookForValueInBioseq(SeqEntryPtr sep,Uint1 mol,CharPtr valname)7789 static CharPtr LookForValueInBioseq (SeqEntryPtr sep, Uint1 mol, CharPtr valname)
7790 {
7791   BioseqPtr   bsp;
7792   CharPtr     title;
7793   ValNodePtr  vnp;
7794 
7795   if (sep == NULL || StringHasNoText (valname)) return FALSE;
7796   if (! IS_Bioseq (sep)) return FALSE;
7797   bsp = (BioseqPtr) sep->data.ptrvalue;
7798   if (bsp == NULL || bsp->mol != mol || bsp->descr == NULL) return FALSE;
7799   vnp = ValNodeFindNext (bsp->descr, NULL, Seq_descr_title);
7800   if (vnp == NULL || vnp->data.ptrvalue == NULL) return FALSE;
7801   title = (CharPtr) vnp->data.ptrvalue;
7802   return FindValueFromPairInDefline (valname, title);
7803 }
7804 
FindBioseqWithValue(SeqEntryPtr sep,Uint1 mol,CharPtr valname,CharPtr value,SeqEntryPtr PNTR rsult)7805 static void FindBioseqWithValue (SeqEntryPtr sep, Uint1 mol, CharPtr valname, CharPtr value, SeqEntryPtr PNTR rsult)
7806 {
7807   BioseqPtr     bsp = NULL;
7808   BioseqSetPtr  bssp = NULL;
7809   CharPtr       match_value;
7810 
7811   if (sep == NULL || sep->data.ptrvalue == NULL || rsult == NULL) return;
7812   if (IS_Bioseq (sep)) {
7813     bsp = (BioseqPtr) sep->data.ptrvalue;
7814     match_value = LookForValueInBioseq (sep, mol, valname);
7815     if (StringICmp (match_value, value))
7816     {
7817       *rsult = sep;
7818     }
7819     match_value = MemFree (match_value);
7820   } else if (IS_Bioseq_set (sep)) {
7821     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7822     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7823       FindBioseqWithValue (sep, mol, valname, value, rsult);
7824     }
7825   }
7826 }
7827 
RemoveValueFromBioseq(SeqEntryPtr sep,CharPtr valname)7828 static void RemoveValueFromBioseq (SeqEntryPtr sep, CharPtr valname)
7829 {
7830   BioseqPtr   bsp;
7831   ValNodePtr  vnp;
7832 
7833   if (sep == NULL) return;
7834   if (! IS_Bioseq (sep)) return;
7835   bsp = (BioseqPtr) sep->data.ptrvalue;
7836   if (bsp == NULL || bsp->descr == NULL) return;
7837   vnp = SeqEntryGetSeqDescr (sep, Seq_descr_title, NULL);
7838   if (vnp == NULL) return;
7839   RemoveValueFromDefline (valname, vnp->data.ptrvalue);
7840   if (StringHasNoText (vnp->data.ptrvalue)) {
7841     ValNodeExtract (&(bsp->descr), Seq_descr_title);
7842   }
7843 }
7844 
FindRnaByRefOnRna(SeqEntryPtr sep,SeqEntryPtr psep)7845 static SeqEntryPtr FindRnaByRefOnRna (SeqEntryPtr sep, SeqEntryPtr psep)
7846 
7847 {
7848   SeqEntryPtr  msep;
7849   CharPtr      prot_name;
7850 
7851   msep = NULL;
7852   if (sep == NULL || psep == NULL) return NULL;
7853   prot_name = LookForValueInBioseq (psep, Seq_mol_aa, "prot");
7854   if (!StringHasNoText (prot_name))
7855   {
7856     FindBioseqWithValue (sep, Seq_mol_rna, "prot", prot_name, &msep);
7857     RemoveValueFromBioseq (msep, "prot");
7858   }
7859   prot_name = MemFree (prot_name);
7860   return msep;
7861 }
7862 
FindRnaByName(SeqEntryPtr sep,CharPtr str,SeqEntryPtr PNTR msep)7863 static void FindRnaByName (SeqEntryPtr sep, CharPtr str, SeqEntryPtr PNTR msep)
7864 
7865 {
7866   BioseqPtr     bsp = NULL;
7867   BioseqSetPtr  bssp = NULL;
7868   RnaRefPtr     rrp;
7869   SeqAnnotPtr   sap;
7870   SeqFeatPtr    sfp;
7871 
7872   if (sep == NULL || sep->data.ptrvalue == NULL) return;
7873   if (str == NULL || msep == NULL) return;
7874   if (IS_Bioseq (sep)) {
7875     bsp = (BioseqPtr) sep->data.ptrvalue;
7876     sap = bsp->annot;
7877   } else if (IS_Bioseq_set (sep)) {
7878     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7879     sap = bssp->annot;
7880   } else return;
7881   while (sap != NULL) {
7882     if (sap->type == 1) {
7883       sfp = (SeqFeatPtr) sap->data;
7884       while (sfp != NULL) {
7885         if (sfp->data.choice == SEQFEAT_RNA) {
7886           rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7887           if (rrp != NULL && rrp->type == 2 && rrp->ext.choice == 1 && sfp->product != NULL) {
7888             if (StringICmp (rrp->ext.value.ptrvalue, str) == 0) {
7889               bsp = BioseqFind (SeqLocId (sfp->product));
7890               if (bsp != NULL) {
7891                 *msep = SeqMgrGetSeqEntryForData (bsp);
7892               }
7893             }
7894           }
7895         }
7896         sfp = sfp->next;
7897       }
7898     }
7899     sap = sap->next;
7900   }
7901   if (bssp != NULL) {
7902     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7903       FindRnaByName (sep, str, msep);
7904     }
7905   }
7906 }
7907 
FindRnaByRefOnProtein(SeqEntryPtr sep,SeqEntryPtr psep)7908 static SeqEntryPtr FindRnaByRefOnProtein (SeqEntryPtr sep, SeqEntryPtr psep)
7909 
7910 {
7911   SeqEntryPtr  msep;
7912   CharPtr      mrna_name;
7913 
7914   msep = NULL;
7915   if (sep == NULL || psep == NULL) return NULL;
7916   mrna_name = LookForValueInBioseq (psep, Seq_mol_aa, "mrna");
7917   if (!StringHasNoText (mrna_name))
7918   {
7919     FindRnaByName (sep, mrna_name, &msep);
7920     RemoveValueFromBioseq (msep, "mrna");
7921   }
7922   mrna_name = MemFree (mrna_name);
7923   return msep;
7924 }
7925 
FindRnaByLocationOverlap(SeqEntryPtr sep,SeqLocPtr slp,Int4Ptr mindiff,SeqEntryPtr PNTR msep)7926 static void FindRnaByLocationOverlap (SeqEntryPtr sep, SeqLocPtr slp,
7927                                       Int4Ptr mindiff, SeqEntryPtr PNTR msep)
7928 
7929 {
7930   BioseqPtr     bsp = NULL;
7931   BioseqSetPtr  bssp = NULL;
7932   Int4          diff;
7933   RnaRefPtr     rrp;
7934   SeqAnnotPtr   sap;
7935   SeqFeatPtr    sfp;
7936 
7937   if (sep == NULL || sep->data.ptrvalue == NULL) return;
7938   if (slp == NULL || mindiff == NULL || msep == NULL) return;
7939   if (IS_Bioseq (sep)) {
7940     bsp = (BioseqPtr) sep->data.ptrvalue;
7941     sap = bsp->annot;
7942   } else if (IS_Bioseq_set (sep)) {
7943     bssp = (BioseqSetPtr) sep->data.ptrvalue;
7944     sap = bssp->annot;
7945   } else return;
7946   while (sap != NULL) {
7947     if (sap->type == 1) {
7948       sfp = (SeqFeatPtr) sap->data;
7949       while (sfp != NULL) {
7950         if (sfp->data.choice == SEQFEAT_RNA) {
7951           rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
7952           if (rrp != NULL && rrp->type == 2 && sfp->product != NULL) {
7953             diff = SeqLocAinB (slp, sfp->location);
7954             if (diff >= 0) {
7955               if (diff < *mindiff) {
7956                 bsp = BioseqFind (SeqLocId (sfp->product));
7957                 if (bsp != NULL) {
7958                   *mindiff = diff;
7959                   *msep = SeqMgrGetSeqEntryForData (bsp);
7960                 }
7961               }
7962             }
7963           }
7964         }
7965         sfp = sfp->next;
7966       }
7967     }
7968     sap = sap->next;
7969   }
7970   if (bssp != NULL) {
7971     for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
7972       FindRnaByLocationOverlap (sep, slp, mindiff, msep);
7973     }
7974   }
7975 }
7976 
FuseNucProtBiosources(SeqEntryPtr sep)7977 static void FuseNucProtBiosources (SeqEntryPtr sep)
7978 
7979 {
7980   BioSourcePtr  biop1, biop2;
7981   BioseqPtr     bsp;
7982   BioseqSetPtr  bssp;
7983   ValNodePtr    PNTR prev;
7984   ValNodePtr    sdp1, sdp2;
7985   SeqEntryPtr   tmp;
7986 
7987   if (sep == NULL) return;
7988   if (! IS_Bioseq_set (sep)) return;
7989   bssp = (BioseqSetPtr) sep->data.ptrvalue;
7990   if (bssp == NULL || bssp->_class != BioseqseqSet_class_nuc_prot) return;
7991   tmp = FindNucSeqEntry (sep);
7992   if (tmp == NULL) return;
7993   if (! IS_Bioseq (tmp)) return;
7994   bsp = (BioseqPtr) tmp->data.ptrvalue;
7995   if (bsp == NULL) return;
7996   prev = &(bssp->descr);
7997   sdp1 = bssp->descr;
7998   while (sdp1 != NULL && sdp1->choice != Seq_descr_source) {
7999     prev = &(sdp1->next);
8000     sdp1 = sdp1->next;
8001   }
8002   if (sdp1 == NULL) return;
8003   sdp2 = SeqEntryGetSeqDescr (tmp, Seq_descr_source, NULL);
8004   if (sdp2 == NULL) return;
8005   biop1 = (BioSourcePtr) sdp1->data.ptrvalue;
8006   biop2 = (BioSourcePtr) sdp2->data.ptrvalue;
8007   if (CmpOrgById (biop1, biop2)) {
8008     *prev = sdp1->next;
8009     sdp1->next = NULL;
8010     SeqDescrFree (sdp1);
8011   }
8012 }
8013 
AssignOneProtein(SeqEntryPtr prot_sep,SequencesFormPtr sqfp,SeqEntryPtr assign_sep,SeqLocPtr use_this,BioseqPtr nucbsp,Int2 code,Boolean makeMRNA)8014 static void AssignOneProtein
8015 (SeqEntryPtr      prot_sep,
8016  SequencesFormPtr sqfp,
8017  SeqEntryPtr      assign_sep,
8018  SeqLocPtr        use_this,
8019  BioseqPtr        nucbsp,
8020  Int2             code,
8021  Boolean          makeMRNA)
8022 {
8023   MolInfoPtr        mip;
8024   SeqEntryPtr       msep = NULL;
8025   BioseqPtr         protbsp;
8026   SeqLocPtr         slp;
8027   Int4              mindiff;
8028   Boolean           partialN;
8029   Boolean           partialC;
8030   ValNodePtr        vnp;
8031 
8032   if (prot_sep == NULL)
8033   {
8034     return;
8035   }
8036 
8037   mip = MolInfoNew ();
8038   if (mip != NULL) {
8039     mip->biomol = 8;
8040     if (sqfp == NULL) {
8041       /* no technique */
8042     } else {
8043       mip->tech = 13;
8044     }
8045     if (sqfp == NULL) {
8046       if (use_this == NULL) {
8047         partialN = FALSE;
8048         partialC = FALSE;
8049       } else {
8050         CheckSeqLocForPartial (use_this, &partialN, &partialC);
8051       }
8052     } else {
8053       partialN = GetStatus (sqfp->partialN);
8054       partialC = GetStatus (sqfp->partialC);
8055     }
8056     if (partialN && partialC) {
8057       mip->completeness = 5;
8058     } else if (partialN) {
8059       mip->completeness = 3;
8060     } else if (partialC) {
8061       mip->completeness = 4;
8062     }
8063     vnp = CreateNewDescriptor (prot_sep, Seq_descr_molinfo);
8064     if (vnp != NULL) {
8065       vnp->data.ptrvalue = (Pointer) mip;
8066     }
8067   }
8068   if (assign_sep != NULL) {
8069     if (sqfp != NULL && sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
8070       ClearBatchSuggestNucleotide ();
8071       msep = FindRnaByRefOnProtein (assign_sep, prot_sep);
8072       if (msep == NULL) {
8073         msep = FindRnaByRefOnRna (assign_sep, prot_sep);
8074       }
8075       if (msep == NULL && nucbsp != NULL && IS_Bioseq (prot_sep)) {
8076         protbsp = (BioseqPtr) prot_sep->data.ptrvalue;
8077         if (protbsp != NULL) {
8078           slp = PredictCodingRegion (nucbsp, protbsp, code);
8079           if (slp != NULL) {
8080             mindiff = INT4_MAX;
8081             FindRnaByLocationOverlap (assign_sep, slp, &mindiff, &msep);
8082           }
8083           SeqLocFree (slp);
8084         }
8085       }
8086     }
8087     if (msep != NULL) {
8088       msep = GetBestTopParentForDataEx (ObjMgrGetEntityIDForChoice (msep),
8089                                         (BioseqPtr) msep->data.ptrvalue, TRUE);
8090     }
8091     if (msep == NULL) {
8092       msep = assign_sep;
8093       if (IS_Bioseq (msep))
8094       {
8095         msep = GetBestTopParentForDataEx (ObjMgrGetEntityIDForChoice (msep),
8096                                           (BioseqPtr) msep->data.ptrvalue, TRUE);
8097       }
8098     }
8099     AddSeqEntryToSeqEntry (msep, prot_sep, TRUE);
8100     AutomaticProteinProcess (msep, prot_sep, code, makeMRNA, use_this);
8101   } else {
8102     AutomaticProteinProcess (assign_sep, prot_sep, code, makeMRNA, use_this);
8103   }
8104 }
8105 
FindSeqEntryWithTranscriptID(SeqEntryPtr sep,CharPtr transcript_id)8106 static SeqEntryPtr FindSeqEntryWithTranscriptID (SeqEntryPtr sep, CharPtr transcript_id)
8107 {
8108   SeqEntryPtr  found_sep = NULL;
8109   BioseqPtr    nbsp;
8110   SeqIdPtr     sip, sip_next;
8111   CharPtr      tmp;
8112   BioseqSetPtr bssp;
8113 
8114   if (IS_Bioseq (sep))
8115   {
8116     nbsp = sep->data.ptrvalue;
8117     for (sip = nbsp->id; sip != NULL && found_sep == NULL; sip = sip_next)
8118     {
8119       sip_next = sip->next;
8120       sip->next = NULL;
8121       tmp = SeqIdWholeLabel (sip, PRINTID_REPORT);
8122       sip->next = sip_next;
8123       if (StringCmp (tmp, transcript_id) == 0)
8124       {
8125         found_sep = sep;
8126       }
8127       tmp = MemFree (tmp);
8128     }
8129   }
8130   else
8131   {
8132     bssp = (BioseqSetPtr) sep->data.ptrvalue;
8133     for (sep = bssp->seq_set; sep != NULL && found_sep == NULL; sep = sep->next)
8134     {
8135       found_sep = FindSeqEntryWithTranscriptID (sep, transcript_id);
8136     }
8137   }
8138   return found_sep;
8139 }
8140 
8141 /* This section of code is used for matching up proteins to coding region locations
8142  * on the nucleotide sequences.
8143  */
8144 
8145 /* A ValNode list will be used to hold the list of pairings between protein and nucleotide
8146  * sequences.  There will be one ValNode per protein sequence.  The choice for the ValNode
8147  * indicates the position of the nucleotide sequence in the set plus one - a zero indicates
8148  * that there is no nucleotide for this protein.  The data.ptrvalue will be used to hold the
8149  * location of the coding region on the nucleotide.
8150  */
8151 
8152 /* This function frees the AssociationList. */
FreeAssociationList(NucProtAssocPtr assoc_list)8153 extern NucProtAssocPtr FreeAssociationList (NucProtAssocPtr assoc_list)
8154 {
8155   if (assoc_list == NULL)
8156   {
8157     return NULL;
8158   }
8159   assoc_list->next = FreeAssociationList (assoc_list->next);
8160   assoc_list->loc = SeqLocFree (assoc_list->loc);
8161   assoc_list = MemFree (assoc_list);
8162   return assoc_list;
8163 }
8164 
NewAssociationList(NucProtAssocPtr PNTR assoc_list,Int4 position,SeqLocPtr loc)8165 static NucProtAssocPtr NewAssociationList (NucProtAssocPtr PNTR assoc_list, Int4 position, SeqLocPtr loc)
8166 {
8167   NucProtAssocPtr last = NULL;
8168   NucProtAssocPtr new_assoc = (NucProtAssocPtr) MemNew (sizeof (NucProtAssocData));
8169 
8170   if (assoc_list == NULL) {
8171     return NULL;
8172   }
8173   if (new_assoc != NULL) {
8174     new_assoc->position = position;
8175     new_assoc->loc = loc;
8176     new_assoc->next = NULL;
8177     if (*assoc_list == NULL) {
8178       *assoc_list = new_assoc;
8179     } else {
8180       last = *assoc_list;
8181       while (last->next != NULL) {
8182         last = last->next;
8183       }
8184       last->next = new_assoc;
8185     }
8186   }
8187   return *assoc_list;
8188 }
8189 
8190 /* This function copies the AssociationList */
CopyAssociationList(NucProtAssocPtr orig_assoc_list)8191 static NucProtAssocPtr CopyAssociationList (NucProtAssocPtr orig_assoc_list)
8192 {
8193   NucProtAssocPtr copy_assoc_list = NULL;
8194 
8195   if (orig_assoc_list == NULL)
8196   {
8197     return NULL;
8198   }
8199   copy_assoc_list = (NucProtAssocPtr) MemNew (sizeof (NucProtAssocData));
8200   if (copy_assoc_list != NULL)
8201   {
8202     copy_assoc_list->position = orig_assoc_list->position;
8203     copy_assoc_list->loc = SeqLocCopy (orig_assoc_list->loc);
8204     copy_assoc_list->next = CopyAssociationList (orig_assoc_list->next);
8205   }
8206 
8207   return copy_assoc_list;
8208 }
8209 
8210 
8211 /* This function determines whether all proteins have been assigned to
8212  * nucleotide sequences.
8213  */
AllLocationsProvided(NucProtAssocPtr vnp)8214 static Boolean AllLocationsProvided (NucProtAssocPtr vnp)
8215 {
8216   if (vnp == NULL)
8217   {
8218     return FALSE;
8219   }
8220   while (vnp != NULL)
8221   {
8222     if (vnp->position == 0)
8223     {
8224       return FALSE;
8225     }
8226     vnp = vnp->next;
8227   }
8228   return TRUE;
8229 }
8230 
8231 /* This function determines whether any proteins have been assigned to
8232  * nucleotide sequences.
8233  */
AnyLocationsProvided(NucProtAssocPtr vnp)8234 static Boolean AnyLocationsProvided (NucProtAssocPtr vnp)
8235 {
8236   if (vnp == NULL)
8237   {
8238     return FALSE;
8239   }
8240   while (vnp != NULL)
8241   {
8242     if (vnp->position != 0)
8243     {
8244       return TRUE;
8245     }
8246     vnp = vnp->next;
8247   }
8248   return FALSE;
8249 }
8250 
8251 /* Given a nucleotide-protein pair, this function calculates a coding region location
8252  * using Suggest Intervals.  If no location is found, a location that includes the
8253  * entire sequence is returned instead.
8254  */
DefaultPairInterval(BioseqPtr nbsp,BioseqPtr pbsp,Int2 code)8255 static SeqLocPtr DefaultPairInterval (BioseqPtr nbsp, BioseqPtr pbsp, Int2 code)
8256 {
8257   SeqLocPtr slp;
8258   ErrSev    oldsev;
8259   Char      prot_str[3];
8260   Boolean   partial5 = FALSE, partial3 = FALSE;
8261 
8262   if (nbsp == NULL || pbsp == NULL)
8263   {
8264     return NULL;
8265   }
8266 
8267   /* need to suppress errors */
8268   oldsev = ErrSetMessageLevel (SEV_MAX);
8269 
8270   /* try to get location using SuggestIntervals */
8271   SetBatchSuggestNucleotide (nbsp, code);
8272   slp = PredictCodingRegion (nbsp, pbsp, code);
8273   ClearBatchSuggestNucleotide ();
8274 
8275   ErrSetMessageLevel (oldsev);
8276 
8277   /* if no location, use entire sequence */
8278   if (slp == NULL)
8279   {
8280     return slp;
8281   }
8282 
8283   /* check for start and stop codons */
8284   SeqPortStreamInt (pbsp, 0, 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) (prot_str), NULL);
8285   if (prot_str[0] != 'M') {
8286     partial5 = TRUE;
8287   }
8288 
8289   if (SeqLocLen (slp) / 3 != pbsp->length + 1) {
8290     partial3 = TRUE;
8291   }
8292 
8293   SetSeqLocPartial (slp, partial5, partial3);
8294 
8295   return slp;
8296 }
8297 
8298 
FindFeaturesInIdenticalRegions(NucProtAssocPtr assoc_list)8299 static Boolean FindFeaturesInIdenticalRegions (NucProtAssocPtr assoc_list)
8300 {
8301   Char       path [PATH_MAX];
8302   FILE       *fp;
8303   NucProtAssocPtr   vnp;
8304   SeqFeatPtr sfp;
8305   SeqMgrFeatContext fcontext;
8306   Char        id_txt [128];
8307   Boolean     found_any = FALSE;
8308 
8309   if (assoc_list == NULL)
8310   {
8311     return FALSE;
8312   }
8313 
8314   TmpNam (path);
8315   fp = FileOpen (path, "wb");
8316 
8317   for (vnp = assoc_list; vnp != NULL; vnp = vnp->next) {
8318     if (vnp->loc != NULL) {
8319       sfp = SeqMgrGetOverlappingCDS (vnp->loc, &fcontext);
8320       if (sfp != NULL && SeqLocCompare (vnp->loc, sfp->location) == SLC_A_EQ_B) {
8321         if (fp == NULL) {
8322           return TRUE;
8323         } else {
8324           found_any = TRUE;
8325           SeqIdWrite (SeqLocId (vnp->loc), id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
8326           fprintf (fp, "%s\n", id_txt);
8327         }
8328       }
8329     }
8330   }
8331   FileClose (fp);
8332 
8333   if (found_any) {
8334     LaunchGeneralTextViewer (path, "Sequences with pre-existing Coding Regions");
8335   }
8336   FileRemove (path);
8337   return found_any;
8338 }
8339 
8340 
GetGeneticCodeFromBioseq(BioseqPtr bsp)8341 static Int2 GetGeneticCodeFromBioseq (BioseqPtr bsp)
8342 {
8343   Int2 code = 1;
8344   Int2 which_code, read_code;
8345   SeqDescrPtr sdp;
8346   SeqMgrDescContext context;
8347   CharPtr location, genetic_code;
8348   BioSourcePtr biop;
8349 
8350   if (bsp != NULL) {
8351     sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
8352     if (sdp != NULL) {
8353       biop = (BioSourcePtr) sdp->data.ptrvalue;
8354       if (biop->genome == GENOME_mitochondrion) {
8355         if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->mgcode != 0) {
8356           code = biop->org->orgname->mgcode;
8357         }
8358       } else if (biop->genome == GENOME_plastid) {
8359         if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->pgcode != 0) {
8360           code = biop->org->orgname->pgcode;
8361         } else {
8362           code = 11;
8363         }
8364       } else if (biop->org != NULL && biop->org->orgname != NULL && biop->org->orgname->gcode != 0) {
8365         code = biop->org->orgname->gcode;
8366       }
8367     } else {
8368       sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &context);
8369       if (sdp != NULL) {
8370         location = FindValueFromPairInDeflineBeforeCharPtr ("location", sdp->data.ptrvalue, NULL);
8371         if (!StringHasNoText (location)) {
8372           which_code = UseGeneticCodeForLocation (location);
8373           if (which_code == USE_MITOCHONDRIAL_GENETIC_CODE) {
8374             genetic_code = FindValueFromPairInDeflineBeforeCharPtr ("mgcode", sdp->data.ptrvalue, NULL);
8375             if (genetic_code == NULL) {
8376               genetic_code = FindValueFromPairInDeflineBeforeCharPtr ("gcode", sdp->data.ptrvalue, NULL);
8377             }
8378             if (genetic_code == NULL) {
8379               genetic_code = FindValueFromPairInDeflineBeforeCharPtr ("genetic_code", sdp->data.ptrvalue, NULL);
8380             }
8381             if (genetic_code != NULL) {
8382               read_code = GeneticCodeFromString (genetic_code);
8383               genetic_code = MemFree (genetic_code);
8384               if (read_code > 0) {
8385                 code = read_code;
8386               }
8387             }
8388           } else if (which_code == USE_OTHER_GENETIC_CODE) {
8389             /* plastid */
8390             code = 11;
8391           } else {
8392             genetic_code = FindValueFromPairInDeflineBeforeCharPtr ("genetic_code", sdp->data.ptrvalue, NULL);
8393             read_code = GeneticCodeFromString (genetic_code);
8394             genetic_code = MemFree (genetic_code);
8395             if (read_code > 0) {
8396               code = read_code;
8397             }
8398           }
8399         }
8400         location = MemFree (location);
8401       }
8402     }
8403   }
8404   return code;
8405 }
8406 
8407 
8408 /* This function takes a ValNode list where each ValNode represents
8409  * a protein in prot_list (in order).  The choice for each ValNode
8410  * represents the position of the chosen nucleotide in the nuc_list
8411  * (position includes segments in segmented sets, which is why
8412  * FindNthSequenceInSet is used) plus one - zero indicates that there
8413  * is no nucleotide sequence for this protein.
8414  * The data.ptrvalue for the ValNode is to be populated with a
8415  * coding region SeqLoc, or NULL if there is no nucleotide for the protein.
8416  */
8417 static Boolean
PickCodingRegionLocationsForProteinNucleotidePairs(NucProtAssocPtr assoc_list,SeqEntryPtr nuc_list,SeqEntryPtr prot_list)8418 PickCodingRegionLocationsForProteinNucleotidePairs
8419 (NucProtAssocPtr  assoc_list,
8420  SeqEntryPtr nuc_list,
8421  SeqEntryPtr prot_list)
8422 {
8423   NucProtAssocPtr vnp_assoc;
8424   Int4       data_row;
8425   BioseqPtr  nbsp, pbsp;
8426   SeqLocPtr  slp;
8427   Char       path [PATH_MAX];
8428   FILE       *fp;
8429   Boolean    errors_found = FALSE;
8430   Char       n_idstr[128];
8431   Char       p_idstr[128];
8432   Int2       code;
8433 
8434   if (assoc_list == NULL || nuc_list == NULL || prot_list == NULL)
8435   {
8436     return FALSE;
8437   }
8438 
8439   TmpNam (path);
8440   fp = FileOpen (path, "wb");
8441 
8442   vnp_assoc = assoc_list;
8443   for (data_row = 0, vnp_assoc = assoc_list;
8444        vnp_assoc != NULL;
8445        data_row++, vnp_assoc = vnp_assoc->next)
8446   {
8447     if (vnp_assoc->position > 0)
8448     {
8449       nbsp = FindNthSequenceInSet (nuc_list, vnp_assoc->position - 1, NULL, TRUE);
8450       pbsp = FindNthSequenceInSet (prot_list, data_row, NULL, FALSE);
8451       if (nbsp == NULL || pbsp == NULL) {
8452         slp = NULL;
8453       } else if ((nbsp->length +1) / 3 < pbsp->length) {
8454         if (fp != NULL) {
8455           SeqIdWrite (SeqIdFindWorst (nbsp->id), n_idstr, PRINTID_REPORT,
8456                       sizeof (n_idstr));
8457           SeqIdWrite (SeqIdFindWorst (pbsp->id), p_idstr, PRINTID_REPORT,
8458                       sizeof (p_idstr));
8459           fprintf (fp, "%s is too short to encode %s\n", n_idstr, p_idstr);
8460         }
8461         vnp_assoc->position = 0;
8462         errors_found = TRUE;
8463         slp = NULL;
8464       } else {
8465         code = GetGeneticCodeFromBioseq (nbsp);
8466         slp = DefaultPairInterval (nbsp, pbsp, code);
8467         if (slp == NULL) {
8468           errors_found = TRUE;
8469           if (fp != NULL) {
8470             SeqIdWrite (SeqIdFindWorst (nbsp->id), n_idstr, PRINTID_REPORT,
8471                         sizeof (n_idstr));
8472             SeqIdWrite (SeqIdFindWorst (pbsp->id), p_idstr, PRINTID_REPORT,
8473                         sizeof (p_idstr));
8474             fprintf (fp, "Unable to determine coding region location on %s for %s\n", n_idstr, p_idstr);
8475           }
8476         }
8477       }
8478     }
8479     else
8480     {
8481       slp = NULL;
8482     }
8483     vnp_assoc->loc = SeqLocFree (vnp_assoc->loc);
8484     vnp_assoc->loc = slp;
8485   }
8486 
8487   FileClose (fp);
8488   if (errors_found) {
8489     LaunchGeneralTextViewer (path, "Nucleotide-Protein Mismatches");
8490   }
8491   FileRemove (path);
8492   return !errors_found;
8493 }
8494 
8495 
FindGeneticCodeForBioseq(BioseqPtr bsp,Int2 default_code)8496 static Int2 FindGeneticCodeForBioseq (BioseqPtr bsp, Int2 default_code)
8497 {
8498   Int2         code = default_code;
8499   BioSourcePtr biop;
8500   SeqEntryPtr  nsep;
8501   BioseqSetPtr bssp;
8502   SeqDescrPtr  sdp = NULL;
8503 
8504   if (bsp == NULL) return default_code;
8505   nsep = GetBestTopParentForData (ObjMgrGetEntityIDForPointer (bsp), bsp);
8506   if (nsep == NULL || nsep->data.ptrvalue == NULL) return default_code;
8507   if (nsep->choice == 1)
8508   {
8509     bsp = nsep->data.ptrvalue;
8510     sdp = bsp->descr;
8511   }
8512   else if (nsep->choice == 2)
8513   {
8514     bssp = nsep->data.ptrvalue;
8515     sdp = bssp->descr;
8516   }
8517   while (sdp != NULL)
8518   {
8519     if (sdp->choice == Seq_descr_source && sdp->data.ptrvalue != NULL)
8520     {
8521       biop = (BioSourcePtr) sdp->data.ptrvalue;
8522       if (biop->org != NULL && biop->org->orgname != NULL)
8523       {
8524         code = BioSourceToGeneticCode (biop);
8525       }
8526     }
8527     sdp = sdp->next;
8528   }
8529   return code;
8530 }
8531 
8532 
8533 /* This function takes a ValNode list of coding region SeqLocs,
8534  * the list of nucleotide sequences, and the list of protein sequences
8535  * and creates the nuc-prot sets.
8536  */
8537 static void
AssignProteinsToSelectedNucleotides(NucProtAssocPtr assoc_list,SeqEntryPtr nuc_list,SeqEntryPtr prot_list,SequencesFormPtr sqfp,Int2 code,Boolean makeMRNA)8538 AssignProteinsToSelectedNucleotides
8539 (NucProtAssocPtr  assoc_list,
8540  SeqEntryPtr      nuc_list,
8541  SeqEntryPtr      prot_list,
8542  SequencesFormPtr sqfp,
8543  Int2             code,
8544  Boolean          makeMRNA)
8545 {
8546   SeqEntryPtr prot_sep, nsep, prot_next;
8547   NucProtAssocPtr  vnp_assoc;
8548   BioseqPtr   nbsp;
8549   BioseqPtr PNTR bsp_array;
8550   Int4           prot_num;
8551   ValNodePtr     descr = NULL;
8552   Int2           genCode;
8553 
8554   if (assoc_list == NULL || nuc_list == NULL || prot_list == NULL)
8555   {
8556     return;
8557   }
8558 
8559   /* need to collect bioseqs before we start adding, otherwise the position in
8560    * the set changes */
8561 
8562   bsp_array = (BioseqPtr PNTR) MemNew (ValNodeLen (prot_list) * sizeof (BioseqPtr));
8563   if (bsp_array == NULL)
8564   {
8565     return;
8566   }
8567 
8568   for (prot_num = 0, vnp_assoc = assoc_list;
8569        vnp_assoc != NULL;
8570        prot_num++, vnp_assoc = vnp_assoc->next)
8571   {
8572     if (vnp_assoc->loc == NULL)
8573     {
8574       bsp_array [prot_num] = NULL;
8575     }
8576     else
8577     {
8578       bsp_array [prot_num] = FindNthSequenceInSet (nuc_list, vnp_assoc->position - 1, NULL, TRUE);
8579     }
8580   }
8581 
8582   for (prot_sep = prot_list, vnp_assoc = assoc_list, prot_num = 0;
8583        prot_sep != NULL && vnp_assoc != NULL;
8584        prot_sep = prot_next, vnp_assoc = vnp_assoc->next, prot_num++)
8585   {
8586     prot_next = prot_sep->next;
8587     prot_sep->next = NULL;
8588 
8589     if (vnp_assoc->loc == NULL)
8590     {
8591       /* discard protein */
8592       if (IS_Bioseq (prot_sep))
8593       {
8594         SeqMgrDeleteFromBioseqIndex (prot_sep->data.ptrvalue);
8595       }
8596       prot_sep = SeqEntryFree (prot_sep);
8597     }
8598     else
8599     {
8600       nbsp = bsp_array [prot_num];
8601       nsep = SeqMgrGetSeqEntryForData (nbsp);
8602       if (nbsp != NULL && nbsp->repr == Seq_repr_seg)
8603       {
8604         nsep = GetBestTopParentForData (ObjMgrGetEntityIDForPointer (nbsp), nbsp);
8605       }
8606       genCode = FindGeneticCodeForBioseq (nbsp, code);
8607       if (nsep != NULL && nsep->data.ptrvalue == nbsp) {
8608         descr = ExtractBioSourceAndPubs (nsep);
8609       }
8610       AssignOneProtein (prot_sep, sqfp, nsep, vnp_assoc->loc, nbsp,
8611                         genCode, makeMRNA);
8612       if (descr != NULL) {
8613         ReplaceBioSourceAndPubs (nsep, descr);
8614       }
8615       vnp_assoc->loc = NULL; /*SeqLoc was freed in AssignOneProtein */
8616     }
8617   }
8618 
8619   bsp_array = MemFree (bsp_array);
8620 }
8621 
8622 /* This function creates a new protein ID based on the nucleotide ID that will be
8623  * unique within the record - nucleotide and protein sequence IDs are checked
8624  * for matches.
8625  */
8626 static CharPtr
BuildProteinIDUniqueInIDAndTitleEdit(CharPtr nuc_id,IDAndTitleEditPtr iatep_nuc,IDAndTitleEditPtr iatep_prot)8627 BuildProteinIDUniqueInIDAndTitleEdit
8628 (CharPtr nuc_id,
8629  IDAndTitleEditPtr iatep_nuc,
8630  IDAndTitleEditPtr iatep_prot)
8631 {
8632   CharPtr new_id, cp;
8633   Int4    offset, seq_num;
8634   Boolean unique_found = FALSE;
8635 
8636   if (iatep_nuc == NULL || iatep_prot == NULL || StringHasNoText (nuc_id))
8637   {
8638     return NULL;
8639   }
8640 
8641   new_id = (CharPtr) MemNew ((StringLen (nuc_id) + 20) * sizeof (Char));
8642   if (new_id != NULL)
8643   {
8644     StringCpy (new_id, nuc_id);
8645     StringCat (new_id, "_");
8646     cp = new_id + StringLen (new_id);
8647     for (offset = 1; offset < INT4_MAX && ! unique_found; offset ++)
8648     {
8649       sprintf (cp, "%d", offset);
8650       unique_found = TRUE;
8651       for (seq_num = 0; seq_num < iatep_nuc->num_sequences && unique_found; seq_num++)
8652       {
8653         if (StringCmp (iatep_nuc->id_list [seq_num], new_id) == 0)
8654         {
8655           unique_found = FALSE;
8656         }
8657       }
8658       for (seq_num = 0; seq_num < iatep_prot->num_sequences && unique_found; seq_num++)
8659       {
8660         if (StringCmp (iatep_prot->id_list [seq_num], new_id) == 0)
8661         {
8662           unique_found = FALSE;
8663         }
8664       }
8665     }
8666   }
8667   if (unique_found)
8668   {
8669     return new_id;
8670   }
8671   else
8672   {
8673     new_id = MemFree (new_id);
8674     return StringSave ("too_many");
8675   }
8676 }
8677 
DoIdsMatch(CharPtr id1,CharPtr id2)8678 static Boolean DoIdsMatch (CharPtr id1, CharPtr id2)
8679 {
8680   CharPtr tmp1, cp1 = NULL;
8681   CharPtr tmp2, cp2 = NULL;
8682   Boolean match = FALSE;
8683 
8684   tmp1 = StringChr (id1, '|');
8685   if (tmp1 == NULL) {
8686     tmp1 = id1;
8687   } else if (tmp1 == id1 + 2) {
8688     tmp1++;
8689     cp1 = StringChr (tmp1, '|');
8690     if (cp1 != NULL) {
8691       *cp1 = 0;
8692     }
8693   }
8694 
8695   tmp2 = StringChr (id2, '|');
8696   if (tmp2 == NULL) {
8697     tmp2 = id2;
8698   } else if (tmp2 == id2 + 2) {
8699     tmp2++;
8700     cp2 = StringChr (tmp2, '|');
8701     if (cp2 != NULL) {
8702       *cp2 = 0;
8703     }
8704   }
8705 
8706   if (StringCmp (tmp1, tmp2) == 0)
8707   {
8708     match = TRUE;
8709   }
8710   if (cp1 != NULL) {
8711     *cp1 = '|';
8712   }
8713   if (cp2 != NULL) {
8714     *cp2 = '|';
8715   }
8716   return match;
8717 }
8718 
8719 
8720 /* if the user gave the protein sequences the same IDs as the nucleotide sequences,
8721  * we need to create new sequence IDs for the proteins so that they will be unique.
8722  * We should also make sure that sequence IDs that don't match nucleotide sequence
8723  * IDs are unique.
8724  */
ReplaceDuplicateProteinIDs(SeqEntryPtr nuc_list,SeqEntryPtr prot_list)8725 static void ReplaceDuplicateProteinIDs (SeqEntryPtr nuc_list, SeqEntryPtr prot_list)
8726 {
8727   Int4              nuc_seq_num, prot_seq_num, prot_seq_num_check;
8728   IDAndTitleEditPtr iatep_nuc, iatep_prot;
8729   Boolean           found_nuc_match;
8730   CharPtr           tmp_str, cp;
8731   BioseqPtr         prot_bsp, nuc_bsp;
8732 
8733   if (nuc_list == NULL || prot_list == NULL)
8734   {
8735     return;
8736   }
8737 
8738   iatep_nuc = SeqEntryListToIDAndTitleEditEx (nuc_list, TRUE);
8739   iatep_prot = SeqEntryListToIDAndTitleEdit (prot_list);
8740   if (iatep_nuc != NULL && iatep_prot != NULL)
8741   {
8742     for (prot_seq_num = 0; prot_seq_num < iatep_prot->num_sequences; prot_seq_num++)
8743     {
8744       /* This part replaces any protein sequence IDs that match a nucleotide ID with
8745        * the nucleotide ID plus an underscore plus a number that makes the ID
8746        * unique.
8747        */
8748       found_nuc_match = FALSE;
8749       prot_bsp = FindNthSequenceInSet (prot_list, prot_seq_num, &(iatep_prot->is_seg[prot_seq_num]), FALSE);
8750       if (prot_bsp == NULL) continue;
8751       for (nuc_seq_num = 0;
8752            nuc_seq_num < iatep_nuc->num_sequences && ! found_nuc_match;
8753            nuc_seq_num++)
8754       {
8755         nuc_bsp = FindNthSequenceInSet (nuc_list, nuc_seq_num, &(iatep_nuc->is_seg[prot_seq_num]), TRUE);
8756         if (nuc_bsp == NULL) continue;
8757 
8758         if (SeqIdIn (prot_bsp->id, nuc_bsp->id) || RelaxedSeqIdIn (prot_bsp->id, nuc_bsp->id)
8759             || DoIdsMatch (iatep_nuc->id_list [nuc_seq_num],
8760                            iatep_prot->id_list [prot_seq_num])) {
8761           tmp_str = iatep_nuc->id_list [nuc_seq_num];
8762           cp = StringChr (tmp_str, '|');
8763           if (cp == tmp_str + 2) {
8764             tmp_str += 3;
8765             cp = StringChr (tmp_str, '|');
8766           }
8767           if (cp != NULL) {
8768             *cp = 0;
8769           }
8770 
8771           iatep_prot->id_list [prot_seq_num] = MemFree (iatep_prot->id_list [prot_seq_num]);
8772           iatep_prot->id_list [prot_seq_num] = BuildProteinIDUniqueInIDAndTitleEdit (tmp_str,
8773                                                                                      iatep_nuc,
8774                                                                                      iatep_prot);
8775           if (cp != NULL) {
8776             *cp = '|';
8777           }
8778           found_nuc_match = TRUE;
8779         }
8780       }
8781       /* This part replaces a protein sequence ID that matches a previous protein
8782        * sequence ID with the original protein sequence ID plus an underscore plus
8783        * a number that makes the ID unique.
8784        */
8785       if (!found_nuc_match)
8786       {
8787         for (prot_seq_num_check = prot_seq_num + 1;
8788              prot_seq_num_check < iatep_prot->num_sequences;
8789              prot_seq_num_check ++)
8790         {
8791           if (StringCmp (iatep_prot->id_list [prot_seq_num],
8792                          iatep_prot->id_list [prot_seq_num_check]) == 0)
8793           {
8794             tmp_str = iatep_prot->id_list [prot_seq_num_check];
8795             cp = StringChr (tmp_str, '|');
8796             if (cp == tmp_str + 2) {
8797               tmp_str += 3;
8798               cp = StringChr (tmp_str, '|');
8799             }
8800             if (cp != NULL) {
8801               *cp = 0;
8802             }
8803             tmp_str = StringSave (tmp_str);
8804 
8805             iatep_prot->id_list [prot_seq_num_check] = MemFree (iatep_prot->id_list [prot_seq_num_check]);
8806             iatep_prot->id_list [prot_seq_num_check] = BuildProteinIDUniqueInIDAndTitleEdit (tmp_str,
8807                                                                                              iatep_nuc,
8808                                                                                              iatep_prot);
8809             tmp_str = MemFree (tmp_str);
8810           }
8811         }
8812       }
8813     }
8814   }
8815   ApplyIDAndTitleEditToSeqEntryList (prot_list, iatep_prot);
8816   iatep_prot = IDAndTitleEditFree (iatep_prot);
8817   iatep_nuc = IDAndTitleEditFree (iatep_nuc);
8818 }
8819 
8820 static Uint2 nucprotedit_types [] = {
8821   TAGLIST_PROMPT, TAGLIST_PROMPT, TAGLIST_POPUP, TAGLIST_TEXT, TAGLIST_TEXT
8822 };
8823 
8824 static Uint2 nucprotedit_widths [] = {
8825   5, 20, 10, 15, 15
8826 };
8827 
8828 #define NUCPROTEDIT_NUCID_COLUMN 2
8829 #define NUCPROTEDIT_GENE_COLUMN  3
8830 #define NUCPROTEDIT_PROT_COLUMN  4
8831 
8832 typedef struct nucprotedit
8833 {
8834   SeqEntryPtr nuc_list;
8835   SeqEntryPtr prot_list;
8836   DialoG      dlg;
8837   ButtoN      accept_btn;
8838   NucProtAssocPtr  assoc_list;
8839   TexT        all_gene_txt;
8840   TexT        all_prot_txt;
8841 } NucProtEditData, PNTR NucProtEditPtr;
8842 
PopulateNucProtEdit(NucProtEditPtr npep)8843 static void PopulateNucProtEdit (NucProtEditPtr npep)
8844 {
8845   IDAndTitleEditPtr     iatep_nuc, iatep_prot;
8846   ValNodePtr            row_list = NULL;
8847   NucProtAssocPtr       vnp_assoc;
8848   TagListPtr            tlp;
8849   CharPtr               data_string, gene_locus, prot_name;
8850   Int4                  data_len;
8851   Int4                  prot_num;
8852   Int4                  old_scroll_pos = 0;
8853 
8854   if (npep == NULL)
8855   {
8856     return;
8857   }
8858 
8859   tlp = (TagListPtr) GetObjectExtra (npep->dlg);
8860   if (tlp == NULL)
8861   {
8862     return;
8863   }
8864 
8865   /* need to get bar value and reset after populating */
8866   if (tlp->bar != NULL)
8867   {
8868     old_scroll_pos = GetBarValue (tlp->bar);
8869   }
8870 
8871   iatep_nuc = SeqEntryListToIDAndTitleEditEx (npep->nuc_list, TRUE);
8872   iatep_prot = SeqEntryListToIDAndTitleEdit (npep->prot_list);
8873   if (iatep_nuc != NULL && iatep_prot != NULL)
8874   {
8875     vnp_assoc = npep->assoc_list;
8876     for (prot_num = 0; prot_num < iatep_prot->num_sequences; prot_num++)
8877     {
8878       /* first column is protein ID */
8879       /* second column is choice for nucleotide ID */
8880       /* third column is gene locus tag */
8881       /* fourth column is protein name */
8882       /* fifth column indicates presence of suggested interval */
8883       gene_locus = FindValueFromPairInDefline ("gene", iatep_prot->title_list [prot_num]);
8884       prot_name = FindValueFromPairInDefline ("protein", iatep_prot->title_list [prot_num]);
8885 
8886       data_len = StringLen (iatep_prot->id_list [prot_num])
8887                   + 20
8888                   + StringLen (gene_locus)
8889                   + StringLen (prot_name);
8890       data_string = (CharPtr) MemNew (data_len * sizeof (Char));
8891       if (data_string != NULL)
8892       {
8893         sprintf (data_string, "%d\t%s\t%d\t%s\t%s\n",
8894                                prot_num + 1,
8895                                iatep_prot->id_list [prot_num],
8896                                vnp_assoc == NULL ? 0 : vnp_assoc->position,
8897                                gene_locus == NULL ? "" : gene_locus,
8898                                prot_name == NULL ? "" : prot_name);
8899         ValNodeAddPointer (&row_list, 0, data_string);
8900       }
8901       gene_locus = MemFree (gene_locus);
8902       prot_name = MemFree (prot_name);
8903       if (vnp_assoc != NULL)
8904       {
8905         vnp_assoc = vnp_assoc->next;
8906       }
8907     }
8908     SendMessageToDialog (npep->dlg, VIB_MSG_RESET);
8909     tlp->vnp = row_list;
8910 
8911     if (iatep_prot->num_sequences > tlp->rows)
8912     {
8913       tlp->max = MAX ((Int2) 0, (Int2) (iatep_prot->num_sequences - tlp->rows));
8914       CorrectBarMax (tlp->bar, tlp->max);
8915       CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
8916       Enable (tlp->bar);
8917       SetBarValue (tlp->bar, old_scroll_pos);
8918     }
8919     else
8920     {
8921       Hide (tlp->bar);
8922     }
8923     SendMessageToDialog (npep->dlg, VIB_MSG_REDRAW);
8924   }
8925 
8926   iatep_nuc = IDAndTitleEditFree (iatep_nuc);
8927   iatep_prot = IDAndTitleEditFree (iatep_prot);
8928 }
8929 
8930 static CharPtr
8931 GetTagListValueEx (TagListPtr tlp, Int4 seq_num, Int4 col_num);
8932 
ApplyGeneNameToAllSequences(ButtoN b)8933 static void ApplyGeneNameToAllSequences (ButtoN b)
8934 {
8935   NucProtEditPtr npep;
8936   CharPtr        all_gene_name, new_val;
8937   TagListPtr     tlp;
8938   Int4           seq_num;
8939   ValNodePtr     vnp;
8940 
8941   npep = (NucProtEditPtr) GetObjectExtra (b);
8942   if (npep == NULL)
8943   {
8944     return;
8945   }
8946 
8947   tlp = (TagListPtr) GetObjectExtra (npep->dlg);
8948   if (tlp == NULL)
8949   {
8950     return;
8951   }
8952   all_gene_name = SaveStringFromText (npep->all_gene_txt);
8953   if (ANS_YES == Message (MSG_YN, "Are you sure you want to set all of the gene locus values to %s?",
8954                           all_gene_name))
8955   {
8956     for (vnp = tlp->vnp, seq_num = 0;
8957          vnp != NULL;
8958          vnp = vnp->next, seq_num++)
8959     {
8960       new_val = ReplaceTagListColumn (vnp->data.ptrvalue, all_gene_name, NUCPROTEDIT_GENE_COLUMN);
8961       vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
8962       vnp->data.ptrvalue = new_val;
8963     }
8964     SendMessageToDialog (npep->dlg, VIB_MSG_REDRAW);
8965   }
8966   all_gene_name = MemFree (all_gene_name);
8967 }
8968 
ApplyProteinNameToAllSequences(ButtoN b)8969 static void ApplyProteinNameToAllSequences (ButtoN b)
8970 {
8971   NucProtEditPtr npep;
8972   CharPtr        all_prot_name, new_val;
8973   TagListPtr     tlp;
8974   Int4           seq_num;
8975   ValNodePtr     vnp;
8976 
8977   npep = (NucProtEditPtr) GetObjectExtra (b);
8978   if (npep == NULL)
8979   {
8980     return;
8981   }
8982 
8983   tlp = (TagListPtr) GetObjectExtra (npep->dlg);
8984   if (tlp == NULL)
8985   {
8986     return;
8987   }
8988   all_prot_name = SaveStringFromText (npep->all_prot_txt);
8989   if (ANS_YES == Message (MSG_YN, "Are you sure you want to set all of the protein names to %s?",
8990                           all_prot_name))
8991   {
8992     for (vnp = tlp->vnp, seq_num = 0;
8993          vnp != NULL;
8994          vnp = vnp->next, seq_num++)
8995     {
8996       new_val = ReplaceTagListColumn (vnp->data.ptrvalue, all_prot_name, NUCPROTEDIT_PROT_COLUMN);
8997       vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
8998       vnp->data.ptrvalue = new_val;
8999     }
9000     SendMessageToDialog (npep->dlg, VIB_MSG_REDRAW);
9001   }
9002   all_prot_name = MemFree (all_prot_name);
9003 }
9004 
ApplyNucProtEditGeneAndProt(NucProtEditPtr npep)9005 static void ApplyNucProtEditGeneAndProt (NucProtEditPtr npep)
9006 {
9007   TagListPtr tlp;
9008   Int4       seq_num;
9009   CharPtr    gene, prot;
9010   IDAndTitleEditPtr iatep;
9011   ValNodePtr        vnp;
9012 
9013   if (npep == NULL)
9014   {
9015     return;
9016   }
9017 
9018   tlp = (TagListPtr) GetObjectExtra (npep->dlg);
9019   if (tlp == NULL)
9020   {
9021     return;
9022   }
9023 
9024   iatep = SeqEntryListToIDAndTitleEdit (npep->prot_list);
9025   if (iatep == NULL)
9026   {
9027     return;
9028   }
9029 
9030   for (seq_num = 0, vnp = tlp->vnp;
9031        seq_num < iatep->num_sequences && vnp != NULL;
9032        seq_num++, vnp = vnp->next)
9033   {
9034     gene = GetTagListValueEx (tlp, seq_num, NUCPROTEDIT_GENE_COLUMN);
9035     iatep->title_list [seq_num] = ReplaceValueInOneDefLine (iatep->title_list [seq_num],
9036                                                             "gene",
9037                                                             gene);
9038     gene = MemFree (gene);
9039     prot = GetTagListValueEx (tlp, seq_num, NUCPROTEDIT_PROT_COLUMN);
9040     iatep->title_list [seq_num] = ReplaceValueInOneDefLine (iatep->title_list [seq_num],
9041                                                             "protein",
9042                                                             prot);
9043   }
9044   ApplyIDAndTitleEditToSeqEntryList (npep->prot_list, iatep);
9045   iatep = IDAndTitleEditFree (iatep);
9046 }
9047 
9048 /* This function collects the pairings of nucleotides and proteins from
9049  * the NucProtEdit dialog.
9050  * The ValNode list stored in npep->assoc_list has one ValNode for each
9051  * protein in npep->prot_list.  The choice for each ValNode is the position
9052  * of the nucleotide in npep->nuc_list plus one - zero indicates that there
9053  * is no nucleotide for this protein sequence.
9054  */
CollectSequenceAssociationsFromNucProtEdit(NucProtEditPtr npep)9055 static void CollectSequenceAssociationsFromNucProtEdit (NucProtEditPtr npep)
9056 {
9057   TagListPtr tlp;
9058   CharPtr    str;
9059   Int4       num_data_rows, assoc_num, data_row;
9060   NucProtAssocPtr assoc_list = NULL;
9061 
9062   if (npep == NULL)
9063   {
9064     return;
9065   }
9066   tlp = (TagListPtr) GetObjectExtra (npep->dlg);
9067   if (tlp == NULL)
9068   {
9069     return;
9070   }
9071 
9072   num_data_rows = ValNodeLen (tlp->vnp);
9073   for (data_row = 0; data_row < num_data_rows; data_row++)
9074   {
9075     str  = GetTagListValueEx (tlp, data_row, NUCPROTEDIT_NUCID_COLUMN);
9076     if (!StringHasNoText (str))
9077     {
9078       assoc_num = atoi (str);
9079     }
9080     else
9081     {
9082       assoc_num = 0;
9083     }
9084     str = MemFree (str);
9085 
9086     NewAssociationList (&assoc_list, assoc_num, NULL);
9087   }
9088 
9089   npep->assoc_list = FreeAssociationList (npep->assoc_list);
9090   npep->assoc_list = assoc_list;
9091 }
9092 
GetNumSuffix(Int4 i)9093 static CharPtr GetNumSuffix (Int4 i)
9094 {
9095   if (i == 1) {
9096     return "st";
9097   } else if (i == 2) {
9098     return "nd";
9099   } else if (i == 3) {
9100     return "rd";
9101   } else {
9102     return "th";
9103   }
9104 }
9105 
GetNucProtAssocMessage(NucProtAssocPtr assoc_list)9106 static CharPtr GetNucProtAssocMessage (NucProtAssocPtr assoc_list)
9107 {
9108   NucProtAssocPtr   vnp_assoc;
9109   Int4              num = 0, pos;
9110   CharPtr           msg = NULL;
9111   CharPtr           msg_fmt = "The %s protein sequence%s could not be associated with %snucleotide sequence%s.  You must scroll to %s and select the correct nucleotide sequence ID%s for %s.  Clicking Accept at this point will discard %s.";
9112   Int4              extra_space = 2 * StringLen ("these protein sequences") + StringLen ("these positions");
9113   Int4              msg_len = 0;
9114   ValNodePtr        desc_list = NULL, vnp;
9115   Char              desc[18];
9116   CharPtr           num_list = NULL;
9117   Int4              num_list_len = 0;
9118 
9119   if (assoc_list == NULL) {
9120     return StringSave ("You must select nucleotide sequences for your protein sequences.");
9121   } else {
9122     for (vnp_assoc = assoc_list, pos = 1; vnp_assoc != NULL; vnp_assoc = vnp_assoc->next, pos++) {
9123       if (vnp_assoc->position == 0) {
9124         sprintf (desc, "%d%s", pos, GetNumSuffix(pos));
9125         ValNodeAddPointer (&desc_list, 0, StringSave (desc));
9126         num_list_len += StringLen (desc) + 2;
9127         num++;
9128       }
9129     }
9130     if (num == 0) {
9131       return NULL;
9132     }
9133     if (num > 1) {
9134       num_list_len += 6;
9135     }
9136     num_list = (CharPtr) MemNew (sizeof (Char) * num_list_len);
9137     for (vnp = desc_list; vnp != NULL; vnp = vnp->next) {
9138       StringCat (num_list, vnp->data.ptrvalue);
9139       if (vnp->next != NULL) {
9140         if (num == 2) {
9141           StringCat (num_list, " and ");
9142         } else if (vnp->next->next == NULL) {
9143           StringCat (num_list, ", and ");
9144         } else {
9145           StringCat (num_list, ", ");
9146         }
9147       }
9148     }
9149     desc_list = ValNodeFreeData (desc_list);
9150 
9151     msg_len = StringLen (msg_fmt) + extra_space + num_list_len;
9152 
9153     msg = (CharPtr) MemNew (sizeof (Char) * (msg_len));
9154 
9155     if (num == 1) {
9156       sprintf (msg, msg_fmt, num_list, "",  "a ", "",  "this position",   "",  "this protein sequence",  "this protein sequence");
9157     } else {
9158       sprintf (msg, msg_fmt, num_list, "s", "",   "s", "these positions", "s", "these protein sequences", "these protein sequences");
9159     }
9160   }
9161   return msg;
9162 }
9163 
9164 /* This function produces a dialog that allows the user to edit the gene and protein names
9165  * and to select the locations for the coding regions for each protein sequence.
9166  */
9167 static NucProtAssocPtr
CollectNucleotideProteinAssociations(SeqEntryPtr nuc_list,SeqEntryPtr prot_list,NucProtAssocPtr default_assoc_list)9168 CollectNucleotideProteinAssociations
9169 (SeqEntryPtr nuc_list,
9170  SeqEntryPtr prot_list,
9171  NucProtAssocPtr default_assoc_list)
9172 {
9173   WindoW                w;
9174   GrouP                 h, title_grp, all_gene_grp, all_prot_grp, k, c, p_msg;
9175   PrompT                p_prot, p_nuc, p_locus, p_name;
9176   ButtoN                b;
9177   Int4                  num_prots;
9178   Int4                  rows_shown = 0;
9179   TagListPtr            tlp;
9180   ModalAcceptCancelData acd;
9181   NucProtEditData       nped;
9182   IDAndTitleEditPtr     iatep_nuc;
9183   EnumFieldAssocPtr     nuc_alist;
9184   EnumFieldAssocPtr     nucprotedit_alists [] = { NULL, NULL, NULL, NULL, NULL};
9185   Int4                  nuc_num;
9186   CharPtr               msg;
9187 
9188   if (nuc_list == NULL || prot_list == NULL)
9189   {
9190     return NULL;
9191   }
9192 
9193   nped.nuc_list = nuc_list;
9194   nped.prot_list = prot_list;
9195   nped.assoc_list = CopyAssociationList (default_assoc_list);
9196 
9197   num_prots = ValNodeLen (prot_list);
9198   rows_shown = MIN (num_prots, 5);
9199 
9200   /* set up ALIST for nucleotide list.
9201    * cannot free IDAndTitleEdit until done with ALIST.
9202    */
9203   iatep_nuc = SeqEntryListToIDAndTitleEditEx (nped.nuc_list, TRUE);
9204   nuc_alist = (EnumFieldAssocPtr) MemNew ((iatep_nuc->num_sequences + 2) * sizeof (EnumFieldAssoc));
9205   nuc_alist [0].name = "";
9206   nuc_alist [0].value = 0;
9207   for (nuc_num = 0; nuc_num < iatep_nuc->num_sequences; nuc_num++)
9208   {
9209     nuc_alist [nuc_num + 1].name = iatep_nuc->id_list [nuc_num];
9210     nuc_alist [nuc_num + 1].value = nuc_num + 1;
9211   }
9212   nuc_alist [nuc_num + 1].name = NULL;
9213   nucprotedit_alists [NUCPROTEDIT_NUCID_COLUMN] = nuc_alist;
9214 
9215   w = MovableModalWindow (-20, -13, -10, -10, "Map Proteins to Nucleotides", NULL);
9216 
9217   h = HiddenGroup(w, -1, 0, NULL);
9218   SetGroupSpacing (h, 10, 10);
9219 
9220   msg = GetNucProtAssocMessage (nped.assoc_list);
9221   p_msg = MultiLinePrompt (h, msg, 750, systemFont);
9222 
9223   k = HiddenGroup (h, 2, 0, NULL);
9224   /* text and button for setting all gene locus values */
9225   all_gene_grp = HiddenGroup (k, -1, 0, NULL);
9226   b = PushButton (all_gene_grp, "Set All Gene Locus Values to Value Below", ApplyGeneNameToAllSequences);
9227   SetObjectExtra (b, &nped, NULL);
9228   nped.all_gene_txt = DialogText (all_gene_grp, "", 15, NULL);
9229   AlignObjects (ALIGN_CENTER, (HANDLE) nped.all_gene_txt, (HANDLE) b, NULL);
9230 
9231   /* text and button for setting all protein names */
9232   all_prot_grp = HiddenGroup (k, -1, 0, NULL);
9233   b = PushButton (all_prot_grp, "Set All Protein Names to Value Below", ApplyProteinNameToAllSequences);
9234   SetObjectExtra (b, &nped, NULL);
9235   nped.all_prot_txt = DialogText (all_prot_grp, "", 15, NULL);
9236   AlignObjects (ALIGN_CENTER, (HANDLE) nped.all_prot_txt, (HANDLE) b, NULL);
9237 
9238   title_grp = HiddenGroup (h, 5, 0, NULL);
9239   SetGroupSpacing (title_grp, 10, 10);
9240 
9241   p_prot = StaticPrompt (title_grp, "Prot ID", 0, 0, programFont, 'l');
9242   p_nuc = StaticPrompt (title_grp, "Nuc ID", 0, 0, programFont, 'l');
9243   p_locus = StaticPrompt (title_grp, "Gene Locus", 0, 0, programFont, 'l');
9244   p_name = StaticPrompt (title_grp, "Protein Name", 0, 0, programFont, 'l');
9245 
9246   nped.dlg = CreateTagListDialogEx (h, rows_shown, 5, 2,
9247                                            nucprotedit_types, nucprotedit_widths,
9248                                            nucprotedit_alists, TRUE, TRUE,
9249                                            NULL, NULL);
9250 
9251 
9252   tlp = (TagListPtr) GetObjectExtra (nped.dlg);
9253   if (tlp == NULL) return NULL;
9254 
9255   if (num_prots > rows_shown)
9256   {
9257     tlp->max = MAX ((Int2) 0, (Int2) (num_prots - tlp->rows));
9258     CorrectBarMax (tlp->bar, tlp->max);
9259     CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
9260     Enable (tlp->bar);
9261   }
9262   else
9263   {
9264     Hide (tlp->bar);
9265   }
9266 
9267   c = HiddenGroup (h, 2, 0, NULL);
9268   nped.accept_btn = PushButton (c, "Accept", ModalAcceptButton);
9269   SetObjectExtra (nped.accept_btn, &acd, NULL);
9270   b = PushButton (c, "Cancel", ModalCancelButton);
9271   SetObjectExtra (b, &acd, NULL);
9272 
9273   AlignObjects (ALIGN_CENTER, (HANDLE) nped.dlg, (HANDLE) c, (HANDLE) NULL);
9274 
9275   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [1], (HANDLE) p_prot, NULL);
9276   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [NUCPROTEDIT_NUCID_COLUMN], (HANDLE) p_nuc, NULL);
9277   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [NUCPROTEDIT_GENE_COLUMN], (HANDLE) p_locus,
9278                                (HANDLE) all_gene_grp, NULL);
9279   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [NUCPROTEDIT_PROT_COLUMN], (HANDLE) p_name,
9280                                (HANDLE) all_prot_grp, NULL);
9281 
9282   PopulateNucProtEdit (&nped);
9283 
9284   Show (w);
9285   Select (w);
9286   acd.accepted = FALSE;
9287   acd.cancelled = FALSE;
9288   while (!acd.accepted && ! acd.cancelled)
9289   {
9290     while (!acd.accepted && ! acd.cancelled)
9291     {
9292       ProcessExternalEvent ();
9293       Update ();
9294     }
9295     ProcessAnEvent ();
9296     if (acd.accepted)
9297     {
9298       CollectSequenceAssociationsFromNucProtEdit (&nped);
9299       if (!AllLocationsProvided (nped.assoc_list))
9300       {
9301         if (ANS_NO == Message (MSG_YN, "You have not provided coding region locations for all of your proteins - these proteins will be discarded.  Are you sure you want to continue?"))
9302         {
9303           acd.accepted = FALSE;
9304         }
9305       }
9306       if (acd.accepted)
9307       {
9308         if (!PickCodingRegionLocationsForProteinNucleotidePairs (nped.assoc_list,
9309                                                                  nuc_list,
9310                                                                  prot_list))
9311         {
9312           if (ANS_NO == Message (MSG_YN, "Unable to determine coding region location for some proteins.  These proteins will be discarded.  Do you wish to continue?"))
9313           {
9314             acd.accepted = FALSE;
9315             PopulateNucProtEdit (&nped);
9316           }
9317         }
9318         if (acd.accepted && FindFeaturesInIdenticalRegions (nped.assoc_list))
9319         {
9320           if (ANS_CANCEL == Message (MSG_OKC, "Duplicate coding regions were found on one or more sequences - do you wish to continue?"))
9321           {
9322             acd.accepted = FALSE;
9323           }
9324         }
9325       }
9326     }
9327   }
9328 
9329   Remove (w);
9330 
9331   if (acd.accepted)
9332   {
9333     /* apply any gene protein data the user may have entered to the titles */
9334     ApplyNucProtEditGeneAndProt (&nped);
9335   }
9336   else
9337   {
9338     nped.assoc_list = FreeAssociationList (nped.assoc_list);
9339   }
9340 
9341   nuc_alist = MemFree (nuc_alist);
9342   iatep_nuc = IDAndTitleEditFree (iatep_nuc);
9343 
9344   Update ();
9345   return nped.assoc_list;
9346 }
9347 
9348 /* This function tries to build an association list by matching nucleotide sequence IDs
9349  * to protein sequence IDs.
9350  */
9351 static NucProtAssocPtr
BuildAssociationListByMatch(SeqEntryPtr nuc_list,SeqEntryPtr prot_list)9352 BuildAssociationListByMatch
9353 (SeqEntryPtr       nuc_list,
9354  SeqEntryPtr       prot_list)
9355 {
9356   IDAndTitleEditPtr iatep_nuc, iatep_prot;
9357   Int4       nuc_seq_num, prot_seq_num, found_num;
9358   NucProtAssocPtr assoc_list = NULL;
9359   BioseqPtr       nuc_bsp, prot_bsp;
9360 
9361   if (nuc_list == NULL || prot_list == NULL)
9362   {
9363     return NULL;
9364   }
9365 
9366   iatep_nuc = SeqEntryListToIDAndTitleEditEx (nuc_list, TRUE);
9367   iatep_prot = SeqEntryListToIDAndTitleEdit (prot_list);
9368   if (iatep_nuc == NULL || iatep_prot == NULL)
9369   {
9370     iatep_nuc = IDAndTitleEditFree (iatep_nuc);
9371     iatep_prot = IDAndTitleEditFree (iatep_prot);
9372     return NULL;
9373   }
9374 
9375   for (prot_seq_num = 0; prot_seq_num < iatep_prot->num_sequences; prot_seq_num++)
9376   {
9377     found_num = 0;
9378     if (iatep_nuc->num_sequences == 1)
9379     {
9380       found_num = 1;
9381     }
9382     prot_bsp = FindNthSequenceInSet (prot_list, prot_seq_num, &(iatep_prot->is_seg[prot_seq_num]), FALSE);
9383     if (prot_bsp == NULL) continue;
9384     for (nuc_seq_num = 0;
9385          nuc_seq_num < iatep_nuc->num_sequences && found_num == 0;
9386          nuc_seq_num++)
9387     {
9388       nuc_bsp = FindNthSequenceInSet (nuc_list, nuc_seq_num, &(iatep_nuc->is_seg [nuc_seq_num]), TRUE);
9389       if (SeqIdIn (prot_bsp->id, nuc_bsp->id) || RelaxedSeqIdIn (prot_bsp->id, nuc_bsp->id)
9390           || DoIdsMatch(iatep_nuc->id_list[nuc_seq_num], iatep_prot->id_list [prot_seq_num])) {
9391         found_num = nuc_seq_num + 1;
9392       }
9393     }
9394 
9395     NewAssociationList (&assoc_list, found_num, NULL);
9396   }
9397   iatep_nuc = IDAndTitleEditFree (iatep_nuc);
9398   iatep_prot = IDAndTitleEditFree (iatep_prot);
9399   return assoc_list;
9400 }
9401 
9402 /* This function builds an association list by matching nucleotide sequences
9403  * to protein sequences by position.
9404  */
9405 static NucProtAssocPtr
BuildAssociationListByPosition(SeqEntryPtr nuc_list,SeqEntryPtr prot_list)9406 BuildAssociationListByPosition
9407 (SeqEntryPtr       nuc_list,
9408  SeqEntryPtr       prot_list)
9409 {
9410   IDAndTitleEditPtr iatep_nuc, iatep_prot;
9411   Int4       nuc_seq_num, prot_seq_num;
9412   NucProtAssocPtr assoc_list = NULL;
9413   Int4       num_masters = 0, num_segs = 0;
9414 
9415   if (nuc_list == NULL || prot_list == NULL)
9416   {
9417     return NULL;
9418   }
9419 
9420   iatep_nuc = SeqEntryListToIDAndTitleEditEx (nuc_list, TRUE);
9421   iatep_prot = SeqEntryListToIDAndTitleEdit (prot_list);
9422   if (iatep_nuc == NULL || iatep_prot == NULL)
9423   {
9424     iatep_nuc = IDAndTitleEditFree (iatep_nuc);
9425     iatep_prot = IDAndTitleEditFree (iatep_prot);
9426     return NULL;
9427   }
9428 
9429   for (nuc_seq_num = 0; nuc_seq_num < iatep_nuc->num_sequences; nuc_seq_num++)
9430   {
9431     if (iatep_nuc->is_seg != NULL && iatep_nuc->is_seg [nuc_seq_num])
9432     {
9433       num_segs ++;
9434     }
9435     else
9436     {
9437       num_masters ++;
9438     }
9439   }
9440 
9441   if (num_segs == iatep_prot->num_sequences && iatep_nuc->is_seg != NULL)
9442   {
9443     /* assign proteins to segments */
9444     nuc_seq_num = 0;
9445     for (prot_seq_num = 0; prot_seq_num < iatep_prot->num_sequences; prot_seq_num++)
9446     {
9447       while (! iatep_nuc->is_seg [nuc_seq_num] && nuc_seq_num < iatep_nuc->num_sequences)
9448       {
9449         nuc_seq_num++;
9450       }
9451       if (nuc_seq_num < iatep_nuc->num_sequences)
9452       {
9453         NewAssociationList (&assoc_list, nuc_seq_num + 1, NULL);
9454         nuc_seq_num++;
9455       }
9456       else
9457       {
9458         NewAssociationList (&assoc_list, 0, NULL);
9459       }
9460     }
9461   }
9462   else if (num_masters == iatep_prot->num_sequences)
9463   {
9464     /* assign proteins to master sequences */
9465     nuc_seq_num = 0;
9466     for (prot_seq_num = 0; prot_seq_num < iatep_prot->num_sequences; prot_seq_num++)
9467     {
9468       if (iatep_nuc->is_seg != NULL)
9469       {
9470         while (iatep_nuc->is_seg [nuc_seq_num] && nuc_seq_num < iatep_nuc->num_sequences)
9471         {
9472           nuc_seq_num++;
9473         }
9474       }
9475       if (nuc_seq_num < iatep_nuc->num_sequences)
9476       {
9477         NewAssociationList (&assoc_list, nuc_seq_num + 1, NULL);
9478         nuc_seq_num ++;
9479       }
9480       else
9481       {
9482         NewAssociationList (&assoc_list, 0, NULL);
9483       }
9484     }
9485   }
9486   else if (num_masters == 1)
9487   {
9488     /* assign all proteins to one sequence */
9489     for (prot_seq_num = 0; prot_seq_num < iatep_prot->num_sequences; prot_seq_num++)
9490     {
9491       NewAssociationList (&assoc_list, 1, NULL);
9492     }
9493   }
9494   else
9495   {
9496     /* can't get a match.  Null will be returned. */
9497   }
9498 
9499   iatep_nuc = IDAndTitleEditFree (iatep_nuc);
9500   iatep_prot = IDAndTitleEditFree (iatep_prot);
9501   return assoc_list;
9502 }
9503 
9504 /* This function will attempt to make a default assignation of nucleotides to proteins.
9505  * If it is unable to produce a default mapping, it will prompt the user for the mapping.
9506  * It will then build the nuc-prot sets and discard any proteins for which no nucleotide
9507  * was assigned.
9508  */
9509 
9510 extern NucProtAssocPtr
AssignProteinsForSequenceSet(SeqEntryPtr nuc_list,SeqEntryPtr prot_list,Boolean always_review)9511 AssignProteinsForSequenceSet
9512 (SeqEntryPtr nuc_list,
9513  SeqEntryPtr prot_list,
9514  Boolean     always_review)
9515 {
9516   NucProtAssocPtr            assoc_list, tmp_list;
9517   Boolean          all_provided;
9518 
9519   if (nuc_list == NULL || prot_list == NULL)
9520   {
9521     return NULL;
9522   }
9523 
9524   assoc_list = BuildAssociationListByMatch (nuc_list, prot_list);
9525 
9526   if (! AnyLocationsProvided (assoc_list))
9527   {
9528     tmp_list = BuildAssociationListByPosition (nuc_list, prot_list);
9529     if (tmp_list != NULL)
9530     {
9531       assoc_list = FreeAssociationList (assoc_list);
9532       assoc_list = tmp_list;
9533     }
9534   }
9535 
9536   all_provided = AllLocationsProvided (assoc_list);
9537   if (!always_review && all_provided) {
9538     if (!PickCodingRegionLocationsForProteinNucleotidePairs (assoc_list,
9539                                                              nuc_list,
9540                                                              prot_list)) {
9541       always_review = TRUE;
9542     } else if (FindFeaturesInIdenticalRegions (assoc_list)) {
9543       always_review = TRUE;
9544     }
9545   }
9546 
9547   if (always_review || !AllLocationsProvided (assoc_list))
9548   {
9549     tmp_list = CollectNucleotideProteinAssociations (nuc_list, prot_list, assoc_list);
9550     assoc_list = FreeAssociationList (assoc_list);
9551     assoc_list = tmp_list;
9552   }
9553   return assoc_list;
9554 }
9555 
BuildNucProtSets(SeqEntryPtr nuc_list,SeqEntryPtr prot_list,SequencesFormPtr sqfp,Int2 code)9556 static void BuildNucProtSets
9557 (SeqEntryPtr      nuc_list,
9558  SeqEntryPtr      prot_list,
9559  SequencesFormPtr sqfp,
9560  Int2             code)
9561 {
9562   if (nuc_list == NULL || prot_list == NULL ||  sqfp == NULL )
9563   {
9564     return;
9565   }
9566   ReplaceDuplicateProteinIDs (nuc_list, prot_list);
9567   AssignProteinsToSelectedNucleotides (sqfp->nuc_prot_assoc_list,
9568                                        nuc_list,
9569                                        prot_list,
9570                                        sqfp, code,
9571                                        sqfp->makeMRNA);
9572   sqfp->nuc_prot_assoc_list = FreeAssociationList (sqfp->nuc_prot_assoc_list);
9573 }
9574 
9575 typedef enum {
9576   eSubmitAnnotType_CDS = 1,
9577   eSubmitAnnotType_rRNA,
9578   eSubmitAnnotType_Gene,
9579   eSubmitAnnotType_None
9580 } ESubmitAnnotTypen;
9581 
9582 
SubmissionFeatureInfoNew(void)9583 static SubmissionFeatureInfoPtr SubmissionFeatureInfoNew(void)
9584 {
9585   SubmissionFeatureInfoPtr info;
9586 
9587   info = (SubmissionFeatureInfoPtr) MemNew (sizeof (SubmissionFeatureInfoData));
9588   info->strand = Seq_strand_unknown;
9589   return info;
9590 }
9591 
9592 
SubmissionFeatureInfoFree(SubmissionFeatureInfoPtr info)9593 NLM_EXTERN SubmissionFeatureInfoPtr SubmissionFeatureInfoFree (SubmissionFeatureInfoPtr info)
9594 {
9595   if (info != NULL) {
9596     info->gene_name = MemFree (info->gene_name);
9597     info->product = MemFree (info->product);
9598     info->prot_desc = MemFree (info->prot_desc);
9599     info->feat_comment = MemFree (info->feat_comment);
9600     info = MemFree (info);
9601   }
9602   return info;
9603 }
9604 
9605 
IsSubmissionFeatureInfoEmpty(SubmissionFeatureInfoPtr info)9606 static Boolean IsSubmissionFeatureInfoEmpty(SubmissionFeatureInfoPtr info)
9607 {
9608   Boolean rval = TRUE;
9609 
9610   if (info == NULL) {
9611     return TRUE;
9612   }
9613   switch (info->feature_type) {
9614     case FEATDEF_CDS:
9615       if (!StringHasNoText (info->product)
9616           || !StringHasNoText (info->prot_desc)
9617           || !StringHasNoText (info->feat_comment)) {
9618         rval = FALSE;
9619       }
9620       break;
9621     case FEATDEF_rRNA:
9622       if (!StringHasNoText (info->product)
9623           || !StringHasNoText (info->feat_comment)) {
9624         rval = FALSE;
9625       }
9626       break;
9627     case FEATDEF_GENE:
9628       if (!StringHasNoText (info->gene_name)) {
9629         rval = FALSE;
9630       }
9631       break;
9632   }
9633   return rval;
9634 }
9635 
9636 
ProtRefFromSubmissionFeatureInfo(SubmissionFeatureInfoPtr info)9637 static ProtRefPtr ProtRefFromSubmissionFeatureInfo (SubmissionFeatureInfoPtr info)
9638 {
9639   ProtRefPtr prp;
9640 
9641   if (info == NULL || info->feature_type != FEATDEF_CDS) {
9642     return NULL;
9643   }
9644   prp = ProtRefNew ();
9645   if (!StringHasNoText (info->product)) {
9646     ValNodeAddPointer (&(prp->name), 0, StringSave (info->product));
9647   }
9648   if (!StringHasNoText (info->prot_desc)) {
9649     prp->desc = StringSave (info->prot_desc);
9650   }
9651   return prp;
9652 }
9653 
9654 
GeneRefFromSubmissionFeatureInfo(SubmissionFeatureInfoPtr info)9655 static GeneRefPtr GeneRefFromSubmissionFeatureInfo (SubmissionFeatureInfoPtr info)
9656 {
9657   GeneRefPtr grp = NULL;
9658 
9659   if (info == NULL) {
9660     return NULL;
9661   }
9662   if (!StringHasNoText (info->gene_name)) {
9663     grp = GeneRefNew ();
9664     grp->locus = StringSave (info->gene_name);
9665   }
9666   return grp;
9667 }
9668 
9669 
RnaRefFromSubmissionFeatureInfo(SubmissionFeatureInfoPtr info)9670 static RnaRefPtr RnaRefFromSubmissionFeatureInfo (SubmissionFeatureInfoPtr info)
9671 {
9672   RnaRefPtr rrp;
9673 
9674   if (info == NULL) {
9675     return NULL;
9676   }
9677   rrp = RnaRefNew ();
9678   rrp->type = RNA_TYPE_rRNA;
9679   if (!StringHasNoText (info->product)) {
9680     rrp->ext.choice = 1;
9681     rrp->ext.value.ptrvalue = StringSave (info->product);
9682   }
9683   return rrp;
9684 }
9685 
9686 
ApplySubmissionFeatureInfoToLocation(SeqFeatPtr sfp,SubmissionFeatureInfoPtr info)9687 static void ApplySubmissionFeatureInfoToLocation (SeqFeatPtr sfp, SubmissionFeatureInfoPtr info)
9688 {
9689   if (sfp == NULL || info == NULL) {
9690     return;
9691   }
9692 
9693   if (info->strand == Seq_strand_minus)
9694   {
9695     /* reverse strand direction - strand direction is plus by default */
9696     SetSeqLocStrand (sfp->location, Seq_strand_minus);
9697   }
9698 
9699   SetSeqLocPartial (sfp->location, info->partial5, info->partial3);
9700 
9701   sfp->partial = (info->partial5 || info->partial3);
9702 }
9703 
9704 
AddCDSToSeqEntry(SeqEntryPtr sep,SubmissionFeatureInfoPtr info,BoolPtr ambig)9705 NLM_EXTERN SeqFeatPtr AddCDSToSeqEntry (SeqEntryPtr sep, SubmissionFeatureInfoPtr info, BoolPtr ambig)
9706 {
9707   Int2               genCode;
9708   CdRegionPtr        crp;
9709   SeqFeatPtr         sfp;
9710   Uint2              parenttype;
9711   Pointer            parentptr;
9712   BioseqSetPtr       bssp;
9713   SeqEntryPtr        setsep = NULL, scope, old_scope;
9714 
9715   GetSeqEntryParent (sep, &parentptr, &parenttype);
9716   if (parenttype == OBJ_BIOSEQSET
9717       && (bssp = (BioseqSetPtr) parentptr) != NULL
9718       && bssp->_class == BioseqseqSet_class_nuc_prot) {
9719     setsep = SeqMgrGetSeqEntryForData (bssp);
9720   }
9721 
9722   /*Create a new CDS feature */
9723 
9724   genCode = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
9725   crp = CreateNewCdRgn (1, FALSE, genCode);
9726   if (NULL == crp)
9727   {
9728     return NULL;
9729   }
9730 
9731   sfp = CreateNewFeature (sep, setsep, SEQFEAT_CDREGION, NULL);
9732 
9733   if (NULL == sfp)
9734   {
9735     crp = CdRegionFree (crp);
9736     return NULL;
9737   }
9738 
9739   sfp->data.value.ptrvalue = (Pointer) crp;
9740 
9741   ApplySubmissionFeatureInfoToLocation (sfp, info);
9742 
9743   /* set scope before choosing frame, to make sure we find the correct BioSeq */
9744   old_scope = SeqEntrySetScope(sep);
9745 
9746   /* Choose frame for new CDS feature */
9747   if (!SetBestFrameByLocation (sfp)) {
9748     if (ambig != NULL) {
9749       *ambig = TRUE;
9750     }
9751   }
9752 
9753   SeqEntrySetScope(old_scope);
9754 
9755   return sfp;
9756 }
9757 
9758 
CreateProteinSeqEntryForCDS(SeqFeatPtr cds,ProtRefPtr prp,SeqEntryPtr sep)9759 static SeqEntryPtr CreateProteinSeqEntryForCDS (SeqFeatPtr cds, ProtRefPtr prp, SeqEntryPtr sep)
9760 {
9761   ByteStorePtr       bs;
9762   CharPtr            prot, ptr;
9763   Char               ch;
9764   Int4               i;
9765   BioseqPtr          bsp;
9766   SeqEntryPtr        old, psep;
9767   MolInfoPtr         mip;
9768   Boolean            partial5 = FALSE, partial3 = FALSE;
9769   ValNodePtr         vnp;
9770   SeqFeatPtr         prot_sfp;
9771 
9772   bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
9773   if (NULL == bs)
9774     return NULL;
9775 
9776   prot = BSMerge (bs, NULL);
9777   bs = BSFree (bs);
9778   if (NULL == prot)
9779     return NULL;
9780 
9781   ptr = prot;
9782   ch = *ptr;
9783   while (ch != '\0') {
9784     *ptr = TO_UPPER (ch);
9785     ptr++;
9786     ch = *ptr;
9787   }
9788   i = StringLen (prot);
9789   if (i > 0 && prot [i - 1] == '*') {
9790     prot [i - 1] = '\0';
9791   }
9792   bs = BSNew (1000);
9793   if (bs != NULL) {
9794     ptr = prot;
9795     BSWrite (bs, (VoidPtr) ptr, StringLen (ptr));
9796   }
9797   prot = MemFree (prot);
9798 
9799   /* Create the product protein Bioseq */
9800 
9801   bsp = BioseqNew ();
9802   if (NULL == bsp) {
9803     bs = BSFree (bs);
9804     return NULL;
9805   }
9806 
9807   /* Create a new SeqEntry for the Prot Bioseq */
9808   psep = SeqEntryNew ();
9809   if (NULL == psep) {
9810     bs = BSFree (bs);
9811     return NULL;
9812   }
9813 
9814   bsp->repr = Seq_repr_raw;
9815   bsp->mol = Seq_mol_aa;
9816   bsp->seq_data_type = Seq_code_ncbieaa;
9817   bsp->seq_data = (SeqDataPtr) bs;
9818   bsp->length = BSLen (bs);
9819   bs = NULL;
9820   old = SeqEntrySetScope (sep);
9821   bsp->id = MakeNewProteinSeqId (cds->location, NULL);
9822   SeqMgrAddToBioseqIndex (bsp);
9823   SeqEntrySetScope (old);
9824 
9825   psep->choice = 1;
9826   psep->data.ptrvalue = (Pointer) bsp;
9827   SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep);
9828 
9829   /* Add a descriptor to the protein Bioseq */
9830 
9831   mip = MolInfoNew ();
9832   if (mip != NULL) {
9833     mip->biomol = 8;
9834     mip->tech = 8;
9835     CheckSeqLocForPartial (cds->location, &partial5, &partial3);
9836     if (partial5 && partial3) {
9837       mip->completeness = 5;
9838     } else if (partial5) {
9839       mip->completeness = 3;
9840     } else if (partial3) {
9841       mip->completeness = 4;
9842     }
9843     vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
9844     if (vnp == NULL) {
9845       mip = MolInfoFree (mip);
9846     } else {
9847       vnp->data.ptrvalue = (Pointer) mip;
9848     }
9849   }
9850 
9851   if (prp != NULL) {
9852     prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
9853     if (prot_sfp != NULL) {
9854       prot_sfp->data.value.ptrvalue = (Pointer) prp;
9855       SetSeqLocPartial (prot_sfp->location, partial5, partial3);
9856       prot_sfp->partial = (partial5 || partial3);
9857     }
9858   }
9859   return psep;
9860 }
9861 
9862 
AddProteinSequenceToNucSeq(SeqEntryPtr sep,SeqEntryPtr psep,SeqFeatPtr cds)9863 static void AddProteinSequenceToNucSeq (SeqEntryPtr sep, SeqEntryPtr psep, SeqFeatPtr cds)
9864 {
9865   SeqDescrPtr  descr;
9866   Uint2        parenttype;
9867   Pointer      parentptr;
9868   BioseqSetPtr bssp;
9869   SeqEntryPtr  setsep = NULL;
9870 
9871   GetSeqEntryParent (sep, &parentptr, &parenttype);
9872   if (parenttype == OBJ_BIOSEQSET
9873       && (bssp = (BioseqSetPtr) parentptr) != NULL
9874       && bssp->_class == BioseqseqSet_class_nuc_prot) {
9875     setsep = SeqMgrGetSeqEntryForData (bssp);
9876   }
9877 
9878   if (setsep == NULL) {
9879     descr = ExtractBioSourceAndPubs (sep);
9880     AddSeqEntryToSeqEntry (sep, psep, TRUE);
9881     ReplaceBioSourceAndPubs (sep, descr);
9882   } else {
9883     AddSeqEntryToSeqEntry (setsep, psep, TRUE);
9884   }
9885 
9886   SetSeqFeatProduct (cds, psep->data.ptrvalue);
9887 
9888   /* after the feature has been created, then adjust it for gaps */
9889   /* Note - this step may result in multiple coding regions being created. */
9890   AdjustCDSLocationsForUnknownGapsCallback (cds, NULL);
9891 }
9892 
9893 
9894 typedef struct submissionfeatureinfoplus {
9895   SubmissionFeatureInfoPtr info;
9896   Int4 ambig_count;
9897 } SubmissionFeatureInfoPlusData, PNTR SubmissionFeatureInfoPlusPtr;
9898 
AddFeaturesFromSubmissionDialogCallback(BioseqPtr bsp,Pointer data)9899 static void AddFeaturesFromSubmissionDialogCallback (BioseqPtr bsp, Pointer data)
9900 {
9901   SubmissionFeatureInfoPlusPtr plus;
9902   SeqFeatPtr sfp = NULL, gene = NULL;
9903   Boolean    ambig = FALSE;
9904   ProtRefPtr prp;
9905   RnaRefPtr  rrp;
9906   GeneRefPtr grp;
9907   SeqEntryPtr psep, sep;
9908 
9909   if (bsp == NULL || ISA_aa (bsp->mol)
9910       || (plus = (SubmissionFeatureInfoPlusPtr) data) == NULL
9911       || plus->info == NULL) {
9912     return;
9913   }
9914 
9915   sep = SeqMgrGetSeqEntryForData (bsp);
9916   if (sep == NULL) {
9917     return;
9918   }
9919 
9920   switch (plus->info->feature_type) {
9921     case FEATDEF_CDS:
9922       sfp = AddCDSToSeqEntry (sep, plus->info, &ambig);
9923       if (ambig) {
9924         plus->ambig_count++;
9925       }
9926       if (sfp != NULL) {
9927         prp = ProtRefFromSubmissionFeatureInfo (plus->info);
9928         psep = CreateProteinSeqEntryForCDS (sfp, prp, sep);
9929         AddProteinSequenceToNucSeq (sep, psep, sfp);
9930       }
9931       break;
9932     case FEATDEF_rRNA:
9933       rrp = RnaRefFromSubmissionFeatureInfo (plus->info);
9934       if (rrp != NULL) {
9935         sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_RNA, NULL);
9936         sfp->data.value.ptrvalue = rrp;
9937         ApplySubmissionFeatureInfoToLocation (sfp, plus->info);
9938       }
9939       break;
9940     case FEATDEF_GENE:
9941       grp = GeneRefFromSubmissionFeatureInfo (plus->info);
9942       if (grp != NULL) {
9943         sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, NULL);
9944         sfp->data.value.ptrvalue = grp;
9945         ApplySubmissionFeatureInfoToLocation (sfp, plus->info);
9946       }
9947       break;
9948   }
9949   if (sfp != NULL) {
9950     if (plus->info->feature_type != FEATDEF_GENE) {
9951       grp = GeneRefFromSubmissionFeatureInfo (plus->info);
9952       if (grp != NULL) {
9953         gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, NULL);
9954         gene->data.value.ptrvalue = grp;
9955         ApplySubmissionFeatureInfoToLocation (gene, plus->info);
9956       }
9957     }
9958     if (!StringHasNoText (plus->info->feat_comment)) {
9959       sfp->comment = StringSave (plus->info->feat_comment);
9960     }
9961   }
9962 }
9963 
9964 
AddFeaturesFromSubmissionDialog(SeqEntryPtr sep,SubmissionFeatureInfoPtr info)9965 static void AddFeaturesFromSubmissionDialog (SeqEntryPtr sep, SubmissionFeatureInfoPtr info)
9966 {
9967   SubmissionFeatureInfoPlusData plus;
9968 
9969   if (sep == NULL || info == NULL) {
9970     return;
9971   }
9972   MemSet (&plus, 0, sizeof (SubmissionFeatureInfoPlusData));
9973   plus.info = info;
9974 
9975   VisitBioseqsInSep (sep, &plus, AddFeaturesFromSubmissionDialogCallback);
9976   if (plus.ambig_count > 0) {
9977      Message (MSG_OK, "Possible ambiguous frames detected in %d record%s",
9978               plus.ambig_count, plus.ambig_count > 1 ? "s": "");
9979   }
9980 }
9981 
9982 
9983 typedef struct submissionfeatureinfodialog
9984 {
9985   DIALOG_MESSAGE_BLOCK
9986 
9987   GrouP           feature_type;
9988   GrouP           details;
9989   GrouP           strand;
9990   ButtoN          partial5;
9991   ButtoN          partial3;
9992   TexT            geneName;
9993   PrompT          protOrRnaPpt;
9994   TexT            protOrRnaName;
9995   PrompT          protDescPpt;
9996   TexT            protDesc;
9997   TexT            featcomment;
9998 } SubmissionFeatureInfoDialogData, PNTR SubmissionFeatureInfoDialogPtr;
9999 
10000 
SubmissionFeatureInfoDialogToPointer(DialoG d)10001 static Pointer SubmissionFeatureInfoDialogToPointer (DialoG d)
10002 {
10003   SubmissionFeatureInfoDialogPtr dlg;
10004   SubmissionFeatureInfoPtr info;
10005   Int2 val;
10006 
10007   dlg = (SubmissionFeatureInfoDialogPtr) GetObjectExtra (d);
10008   if (dlg == NULL) {
10009     return NULL;
10010   }
10011 
10012   info = SubmissionFeatureInfoNew();
10013   val = GetValue (dlg->strand);
10014   if (val == 2) {
10015     info->strand = Seq_strand_minus;
10016   }
10017   info->partial5 = GetStatus (dlg->partial5);
10018   info->partial3 = GetStatus (dlg->partial3);
10019   val = GetValue (dlg->feature_type);
10020   switch (val) {
10021     case 1:
10022       info->feature_type = FEATDEF_CDS;
10023       info->product = SaveStringFromText (dlg->protOrRnaName);
10024       info->prot_desc = SaveStringFromText (dlg->protDesc);
10025       info->gene_name = SaveStringFromText (dlg->geneName);
10026       info->feat_comment = SaveStringFromText (dlg->featcomment);
10027       break;
10028     case 2:
10029       info->feature_type = FEATDEF_rRNA;
10030       info->product = SaveStringFromText (dlg->protOrRnaName);
10031       info->gene_name = SaveStringFromText (dlg->geneName);
10032       info->feat_comment = SaveStringFromText (dlg->featcomment);
10033       break;
10034     case 3:
10035       info->feature_type = FEATDEF_GENE;
10036       info->gene_name = SaveStringFromText (dlg->geneName);
10037       info->feat_comment = SaveStringFromText (dlg->featcomment);
10038       break;
10039     default:
10040       info = SubmissionFeatureInfoFree(info);
10041       break;
10042   }
10043   return info;
10044 }
10045 
10046 
ChangeAnnotType(GrouP g)10047 static void ChangeAnnotType (GrouP g)
10048 
10049 {
10050   SubmissionFeatureInfoDialogPtr dlg;
10051   Int2              val;
10052 
10053   dlg = (SubmissionFeatureInfoDialogPtr) GetObjectExtra (g);
10054   if (dlg == NULL) return;
10055   val = GetValue (dlg->feature_type);
10056   switch (val) {
10057     case eSubmitAnnotType_Gene :
10058       SafeHide (dlg->protOrRnaPpt);
10059       SafeHide (dlg->protOrRnaName);
10060       SafeHide (dlg->protDescPpt);
10061       SafeHide (dlg->protDesc);
10062       SafeShow (dlg->details);
10063       Select (dlg->geneName);
10064       break;
10065     case eSubmitAnnotType_rRNA :
10066       SafeSetTitle (dlg->protOrRnaPpt, "rRNA Name");
10067       SafeShow (dlg->protOrRnaPpt);
10068       SafeShow (dlg->protOrRnaName);
10069       SafeHide (dlg->protDescPpt);
10070       SafeHide (dlg->protDesc);
10071       SafeShow (dlg->details);
10072       Select (dlg->protOrRnaName);
10073       break;
10074     case eSubmitAnnotType_CDS :
10075       SafeSetTitle (dlg->protOrRnaPpt, "Protein Name");
10076       SafeShow (dlg->protOrRnaPpt);
10077       SafeShow (dlg->protOrRnaName);
10078       SafeShow (dlg->protDescPpt);
10079       SafeShow (dlg->protDesc);
10080       SafeShow (dlg->details);
10081       Select (dlg->protOrRnaName);
10082       break;
10083     default :
10084       SafeHide (dlg->details);
10085       break;
10086   }
10087   Update ();
10088 }
10089 
10090 
ChooseFeatureTypeForSubmissionFeatureInfoDialog(DialoG d,Int2 feature_type)10091 static void ChooseFeatureTypeForSubmissionFeatureInfoDialog(DialoG d, Int2 feature_type)
10092 {
10093   SubmissionFeatureInfoDialogPtr dlg;
10094 
10095   dlg = (SubmissionFeatureInfoDialogPtr) GetObjectExtra (d);
10096   if (dlg == NULL) return;
10097   SetValue (dlg->feature_type, feature_type);
10098   ChangeAnnotType (dlg->feature_type);
10099 }
10100 
10101 
SubmissionFeatureInfoDialog(GrouP h)10102 static DialoG SubmissionFeatureInfoDialog (GrouP h)
10103 {
10104   SubmissionFeatureInfoDialogPtr dlg;
10105   GrouP                          p, x, y;
10106 
10107   dlg = (SubmissionFeatureInfoDialogPtr) MemNew (sizeof (SubmissionFeatureInfoDialogData));
10108   if (dlg == NULL)
10109   {
10110     return NULL;
10111   }
10112 
10113   p = HiddenGroup (h, -1, 0, NULL);
10114   SetObjectExtra (p, dlg, StdCleanupExtraProc);
10115 
10116   dlg->dialog = (DialoG) p;
10117   dlg->fromdialog = SubmissionFeatureInfoDialogToPointer;
10118 
10119   dlg->feature_type = HiddenGroup (p, 5, 0, ChangeAnnotType);
10120   SetObjectExtra (dlg->feature_type, dlg, NULL);
10121   RadioButton (dlg->feature_type, "CDS");
10122   RadioButton (dlg->feature_type, "rRNA");
10123   RadioButton (dlg->feature_type, "Gene");
10124   RadioButton (dlg->feature_type, "None");
10125   SetValue (dlg->feature_type, eSubmitAnnotType_CDS);
10126   dlg->details = HiddenGroup (p, -1, 0, NULL);
10127   SetGroupSpacing (dlg->details, 10, 10);
10128   x = HiddenGroup (dlg->details, 2, 0, NULL);
10129   dlg->partial5 = CheckBox (x, "Incomplete at 5' end", NULL);
10130   dlg->partial3 = CheckBox (x, "Incomplete at 3' end", NULL);
10131   dlg->strand = HiddenGroup (dlg->details, 2, 0, NULL);
10132   RadioButton (dlg->strand, "Plus strand");
10133   RadioButton (dlg->strand, "Minus strand");
10134   SetValue (dlg->strand, 1);
10135   y = HiddenGroup (dlg->details, 2, 0, NULL);
10136   dlg->protOrRnaPpt = StaticPrompt (y, "Protein Name", 0, dialogTextHeight, programFont, 'l');
10137   dlg->protOrRnaName = DialogText (y, "", 20, NULL);
10138   dlg->protDescPpt = StaticPrompt (y, "Protein Description", 0, dialogTextHeight, programFont, 'l');
10139   dlg->protDesc = DialogText (y, "", 20, NULL);
10140   StaticPrompt (y, "Gene Symbol", 0, dialogTextHeight, programFont, 'l');
10141   dlg->geneName = DialogText (y, "", 20, NULL);
10142   StaticPrompt (y, "Comment", 0, 3 * Nlm_stdLineHeight, programFont, 'l');
10143   dlg->featcomment = ScrollText (y, 20, 3, programFont, TRUE, NULL);
10144   AlignObjects (ALIGN_CENTER, (HANDLE) x, (HANDLE) dlg->strand, (HANDLE) y, NULL);
10145   AlignObjects (ALIGN_CENTER, (HANDLE) dlg->feature_type, (HANDLE) dlg->details, NULL);
10146 
10147   return (DialoG) p;
10148 }
10149 
10150 
GetSubmissionFeatureInfoFeatCommentRect(DialoG d,RectPtr rect)10151 static void GetSubmissionFeatureInfoFeatCommentRect (DialoG d, RectPtr rect)
10152 {
10153   SubmissionFeatureInfoDialogPtr dlg;
10154 
10155   if (rect == NULL) {
10156     return;
10157   }
10158 
10159   dlg = (SubmissionFeatureInfoDialogPtr) GetObjectExtra (d);
10160   if (dlg == NULL) return;
10161   ObjectRect (dlg->featcomment, rect);
10162 }
10163 
10164 
ApplySubmissionFeatureInfo(SeqEntryPtr sep,DialoG d)10165 static void ApplySubmissionFeatureInfo (SeqEntryPtr sep, DialoG d)
10166 {
10167   SubmissionFeatureInfoPtr feature_info;
10168 
10169   feature_info = DialogToPointer (d);
10170   if (feature_info != NULL
10171       && feature_info->feature_type == FEATDEF_CDS
10172       && IsSubmissionFeatureInfoEmpty (feature_info)) {
10173     ChooseFeatureTypeForSubmissionFeatureInfoDialog (d, eSubmitAnnotType_Gene);
10174     feature_info = SubmissionFeatureInfoFree (feature_info);
10175     feature_info = DialogToPointer (d);
10176   }
10177 
10178   if (feature_info != NULL) {
10179     AddFeaturesFromSubmissionDialog (sep, feature_info);
10180     feature_info = SubmissionFeatureInfoFree(feature_info);
10181   }
10182 }
10183 
10184 
SetTsaCallback(BioseqPtr bsp,Pointer data)10185 NLM_EXTERN void SetTsaCallback (BioseqPtr bsp, Pointer data)
10186 {
10187   SeqDescrPtr sdp, sdp_last = NULL;
10188   MolInfoPtr mip;
10189 
10190   if (bsp == NULL || ISA_aa (bsp->mol)) {
10191     return;
10192   }
10193 
10194   /* note - can't use indexed functions yet */
10195 
10196   sdp = bsp->descr;
10197   while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
10198     sdp = sdp->next;
10199   }
10200   if (sdp == NULL) {
10201     sdp = SeqDescrNew (NULL);
10202     if (sdp_last == NULL) {
10203       bsp->descr = sdp;
10204     } else {
10205       sdp_last->next = sdp;
10206     }
10207     sdp->choice = Seq_descr_molinfo;
10208   }
10209   mip = (MolInfoPtr) sdp->data.ptrvalue;
10210   if (mip == NULL) {
10211     mip = MolInfoNew ();
10212     sdp->data.ptrvalue = mip;
10213   }
10214   mip->tech = MI_TECH_tsa;
10215   mip->biomol = MOLECULE_TYPE_MRNA;
10216 }
10217 
10218 
ApplySeqPkgToSet(BioseqSetPtr bssp,Int2 seqPackage)10219 static void ApplySeqPkgToSet (BioseqSetPtr bssp, Int2 seqPackage)
10220 {
10221   if (bssp == NULL) {
10222     return;
10223   }
10224   switch (seqPackage) {
10225     case SEQ_PKG_GENOMICCDNA :
10226       bssp->_class = BioseqseqSet_class_gen_prod_set;
10227       break;
10228     case SEQ_PKG_POPULATION :
10229       bssp->_class = 14;
10230       break;
10231     case SEQ_PKG_PHYLOGENETIC :
10232       bssp->_class = 15;
10233       break;
10234     case SEQ_PKG_MUTATION :
10235       bssp->_class = 13;
10236       break;
10237     case SEQ_PKG_ENVIRONMENT :
10238       bssp->_class = 16;
10239       break;
10240     case SEQ_PKG_GENBANK :
10241       bssp->_class = 7;
10242       break;
10243     case SEQ_PKG_TSA :
10244       bssp->_class = 7;
10245       break;
10246     default :
10247       bssp->_class = 7;
10248       break;
10249   }
10250 
10251 }
10252 
10253 
FastaSequencesFormToSeqEntryPtr(ForM f)10254 static Pointer FastaSequencesFormToSeqEntryPtr (ForM f)
10255 
10256 {
10257   BioSourcePtr      biop = NULL;
10258   BioseqSetPtr      bssp;
10259   Int2              code;
10260   DatePtr           dp;
10261   FastaPagePtr      fpp;
10262   IdListPtr         head;
10263   SeqEntryPtr       list;
10264   SeqEntryPtr       next;
10265   SeqEntryPtr       nucsep;
10266   Boolean           partialmRNA5;
10267   Boolean           partialmRNA3;
10268   SeqEntryPtr       sep;
10269   SequencesFormPtr  sqfp;
10270   ValNodePtr        vnp;
10271 
10272   sep = NULL;
10273   sqfp = (SequencesFormPtr) GetObjectExtra (f);
10274   if (sqfp == NULL) {
10275     return (Pointer) sep;
10276   }
10277 
10278   WatchCursor ();
10279   Update ();
10280   head = NULL;
10281   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
10282   if (fpp != NULL) {
10283     ResolveCollidingIDs (&head, fpp->list);
10284   }
10285   /* NOTE - we do not resolve colliding IDs for proteins here.
10286    * Duplicate protein IDs are resolved when they are assigned
10287    * to nucleotide sequences.
10288    */
10289   fpp = (FastaPagePtr) GetObjectExtra (sqfp->mrnaseq);
10290   if (fpp != NULL) {
10291     ResolveCollidingIDs (&head, fpp->list);
10292   }
10293   FreeTree (&head);
10294   code = 1;
10295   list = NULL;
10296   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
10297   if (fpp != NULL) {
10298     list = fpp->list;
10299     fpp->list = NULL;
10300   }
10301   if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA
10302       || PackageTypeIsSet (sqfp->seqPackage))
10303   {
10304     bssp = BioseqSetNew ();
10305     if (bssp != NULL) {
10306       ApplySeqPkgToSet (bssp, sqfp->seqPackage);
10307       sep = SeqEntryNew ();
10308       if (sep != NULL) {
10309         sep->choice = 2;
10310         sep->data.ptrvalue = (Pointer) bssp;
10311       }
10312     }
10313   }
10314   if (list != NULL) {
10315     if (list->next == NULL) {
10316       OnlyOneComponentWarning (sqfp);
10317     }
10318     while (list != NULL) {
10319       next = list->next;
10320       list->next = NULL;
10321       if (sep != NULL) {
10322         AddSeqEntryToSeqEntry (sep, list, TRUE);
10323         AutomaticNucleotideProcess (sqfp, list, list);
10324       } else {
10325         sep = list;
10326         AutomaticNucleotideProcess (sqfp, list, list);
10327       }
10328       if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA
10329           || PackageTypeIsSet (sqfp->seqPackage))
10330       {
10331         PutMolInfoOnSeqEntry (sqfp, list);
10332       }
10333       list = next;
10334     }
10335   }
10336   if (sep != NULL) {
10337     sqfp->dnamolfrommolinfo = 0;
10338     dp = DateCurr ();
10339     if (dp != NULL) {
10340       vnp = CreateNewDescriptor (sep, Seq_descr_create_date);
10341       if (vnp != NULL) {
10342         vnp->data.ptrvalue = (Pointer) dp;
10343       }
10344     }
10345   }
10346   if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA &&
10347       sqfp->seqPackage != SEQ_PKG_GAPPED) {
10348     if (! TextHasNoText (sqfp->defline)) {
10349       ApplyAnnotationToAll (ADD_TITLE, sep, NULL, NULL,
10350                             NULL, NULL, NULL, NULL, NULL, sqfp->defline);
10351     }
10352     if (GetStatus (sqfp->orgPrefix)) {
10353       PrefixOrgToDefline (sep);
10354     }
10355   }
10356 
10357   if (sep != NULL && sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
10358     list = NULL;
10359     fpp = (FastaPagePtr) GetObjectExtra (sqfp->mrnaseq);
10360     if (fpp != NULL) {
10361       list = fpp->list;
10362       /* now we will keep instantiated mrna bioseqs */
10363       fpp->list = NULL;
10364     }
10365 
10366     if (list != NULL) {
10367       nucsep = FindNucSeqEntry (sep);
10368       if (nucsep != NULL) {
10369         partialmRNA5 = GetStatus (sqfp->partialmRNA5);
10370         partialmRNA3 = GetStatus (sqfp->partialmRNA3);
10371         while (list != NULL) {
10372           next = list->next;
10373           list->next = NULL;
10374           AddSeqEntryToSeqEntry (sep, list, TRUE);
10375           AutomaticMrnaProcess (nucsep, list, partialmRNA5, partialmRNA3);
10376           list = next;
10377         }
10378       }
10379     }
10380   }
10381 
10382   list = NULL;
10383   fpp = (FastaPagePtr) GetObjectExtra (sqfp->protseq);
10384   if (fpp != NULL) {
10385     list = fpp->list;
10386     fpp->list = NULL;
10387   }
10388   if (list != NULL) {
10389     BuildNucProtSets (sep, list, sqfp, code);
10390   }
10391   if (biop != NULL) {
10392     vnp = CreateNewDescriptor (sep, Seq_descr_source);
10393     if (vnp != NULL) {
10394       vnp->data.ptrvalue = (Pointer) biop;
10395     }
10396   }
10397   if (PackageTypeIsSet (sqfp->seqPackage)) {
10398     if (GetStatus (sqfp->makeAlign)) {
10399       AddSeqAlignForSeqEntry (sep, ObjMgrGetEntityIDForChoice (sep), FALSE, TRUE);
10400     }
10401   }
10402   if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA &&
10403       sqfp->seqPackage != SEQ_PKG_GAPPED) {
10404       ApplySubmissionFeatureInfo (sep, sqfp->feature_info);
10405   }
10406   if (sqfp->seqPackage == SEQ_PKG_TSA) {
10407     VisitBioseqsInSep (sep, NULL, SetTsaCallback);
10408   }
10409   ArrowCursor ();
10410   Update ();
10411 
10412   FuseNucProtBiosources (sep);
10413   return (Pointer) sep;
10414 }
10415 
LaunchSequinQuickGuide(void)10416 static void LaunchSequinQuickGuide (void)
10417 
10418 {
10419   Char       str [256];
10420 #ifdef WIN_MOTIF
10421   NS_Window  window = NULL;
10422 #endif
10423 
10424   sprintf (str,
10425            "http://www.ncbi.nlm.nih.gov/Sequin/QuickGuide/sequin.htm#before");
10426 #ifdef WIN_MAC
10427   Nlm_SendURLAppleEvent (str, "MOSS", NULL);
10428 #endif
10429 #ifdef WIN_MSWIN
10430   Nlm_MSWin_OpenDocument (str);
10431 #endif
10432 #ifdef WIN_MOTIF
10433   NS_OpenURL (&window, str, NULL, TRUE);
10434   NS_WindowFree (window);
10435 #endif
10436 }
10437 
10438 extern Boolean allowUnableToProcessMessage;
10439 
10440 static CharPtr noOrgInTitleAbort =
10441 "sequences have organism information in titles. " \
10442 "It is critical to annotate the data file with organism and source information. " \
10443 "Sequin will not continue processing this submission. " \
10444 "Please read the Sequin Quick Guide section on preparing the data files before proceeding. " \
10445 "Do you wish to launch your browser on the Sequin Quick Guide automatically?";
10446 
10447 static CharPtr pleaseReadLocalGuide =
10448 "Please read your local copy of the Sequin Quick Guide before annotating your data file.";
10449 
10450 static CharPtr noSrcInTitleAbort =
10451 "sequences have source information in titles. " \
10452 "It is critical to annotate the data file with organism and source information. " \
10453 "Sequin will continue processing this submission. " \
10454 "However, please consider reading the Sequin Quick Guide section on preparing the data files before proceeding.";
10455 
PhylipSequencesFormToSeqEntryPtr(ForM f)10456 static Pointer PhylipSequencesFormToSeqEntryPtr (ForM f)
10457 
10458 {
10459   MsgAnswer         ans;
10460   BioseqSetPtr      bssp;
10461   Int2              code;
10462   DatePtr           dp;
10463   PhylipPagePtr     ppp;
10464   SeqEntryPtr       sep;
10465   Int2              seqtitles;
10466   Int2              seqtotals;
10467   Char              str [256];
10468   SequencesFormPtr  sqfp;
10469   SeqEntryPtr       tmp;
10470   CharPtr           ttl;
10471   ValNodePtr        vnp;
10472   SeqEntryPtr       list;
10473   FastaPagePtr      fpp;
10474 
10475   sep = NULL;
10476   sqfp = (SequencesFormPtr) GetObjectExtra (f);
10477   if (sqfp != NULL) {
10478     code = 1;
10479     ppp = (PhylipPagePtr) GetObjectExtra (sqfp->dnaseq);
10480     if (ppp != NULL) {
10481       sep = ppp->sep;
10482       ppp->sep = NULL;
10483     }
10484     if (sep != NULL) {
10485 
10486       if (IS_Bioseq_set (sep)) {
10487         bssp = (BioseqSetPtr) sep->data.ptrvalue;
10488         if (bssp != NULL && (bssp->_class == 7 ||
10489                              (IsPopPhyEtcSet (bssp->_class)))) {
10490           seqtitles = 0;
10491           seqtotals = 0;
10492           for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
10493             /*
10494             ttl = SeqEntryGetTitle (tmp);
10495             */
10496             ttl = NULL;
10497             SeqEntryExplore (sep, (Pointer) (&ttl), FindFirstTitle);
10498             if (ttl != NULL) {
10499               if (sqfp->seqPackage == SEQ_PKG_PHYLOGENETIC) {
10500                 if (StringISearch (ttl, "[org=") != NULL ||
10501                     StringISearch (ttl, "[organism=") != NULL) {
10502                   seqtitles++;
10503                 }
10504               } else if (StringISearch (ttl, "[") != NULL) {
10505                 seqtitles++;
10506               }
10507             }
10508             seqtotals++;
10509           }
10510           if (seqtotals != seqtitles) {
10511             sprintf (str, "None");
10512             if (seqtitles > 0) {
10513               sprintf (str, "Only %d", (int) seqtitles);
10514             }
10515             ArrowCursor ();
10516             Update ();
10517             Beep ();
10518             if (! indexerVersion) {
10519               if (sqfp->seqPackage == SEQ_PKG_PHYLOGENETIC) {
10520                 ans = Message (MSG_YN, "%s of %d %s", str, (int) seqtotals, noOrgInTitleAbort);
10521                 if (ans == ANS_YES) {
10522                   LaunchSequinQuickGuide ();
10523                 } else {
10524                   Message (MSG_OK, "%s", pleaseReadLocalGuide);
10525                 }
10526                 allowUnableToProcessMessage = FALSE;
10527                 QuitProgram ();
10528                 return NULL; /* aborting */
10529               } else {
10530                 Message (MSG_OK, "%s of %d %s", str, (int) seqtotals, noSrcInTitleAbort);
10531               }
10532             } else {
10533               if (sqfp->seqPackage == SEQ_PKG_PHYLOGENETIC) {
10534                 Message (MSG_OK, "%s of %d %s (Regular version will abort here.)", str, (int) seqtotals, noOrgInTitleAbort);
10535               } else {
10536                 Message (MSG_OK, "%s of %d %s (Regular version will continue here.)", str, (int) seqtotals, noSrcInTitleAbort);
10537               }
10538             }
10539           }
10540         }
10541         if (bssp != NULL) {
10542           ApplySeqPkgToSet (bssp, sqfp->seqPackage);
10543           tmp = bssp->seq_set;
10544           if (tmp == NULL || tmp->next == NULL) {
10545             OnlyOneComponentWarning (sqfp);
10546           }
10547           for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
10548             AutomaticNucleotideProcess (sqfp, tmp, tmp);
10549             PutMolInfoOnSeqEntry (sqfp, tmp);
10550           }
10551         }
10552       } else {
10553         OnlyOneComponentWarning (sqfp);
10554         PutMolInfoOnSeqEntry (sqfp, sep);
10555       }
10556       dp = DateCurr ();
10557       if (dp != NULL) {
10558         vnp = CreateNewDescriptor (sep, Seq_descr_create_date);
10559         if (vnp != NULL) {
10560           vnp->data.ptrvalue = (Pointer) dp;
10561         }
10562       }
10563     }
10564     if (PackageTypeIsSet (sqfp->seqPackage)) {
10565       if (! TextHasNoText (sqfp->defline)) {
10566         ApplyAnnotationToAll (ADD_TITLE, sep, NULL, NULL,
10567                               NULL, NULL, NULL, NULL, NULL, sqfp->defline);
10568       }
10569       if (GetStatus (sqfp->orgPrefix)) {
10570         PrefixOrgToDefline (sep);
10571       }
10572     }
10573     ApplySubmissionFeatureInfo (sep, sqfp->feature_info);
10574   }
10575   FuseNucProtBiosources (sep);
10576 
10577   if (sqfp != NULL) {
10578     list = NULL;
10579     fpp = (FastaPagePtr) GetObjectExtra (sqfp->protseq);
10580     if (fpp != NULL) {
10581       list = fpp->list;
10582       fpp->list = NULL;
10583     }
10584     if (list != NULL) {
10585       BuildNucProtSets (sep, list, sqfp, code);
10586     }
10587   }
10588 
10589   return (Pointer) sep;
10590 }
10591 
10592 static void SeqEntryPtrToSourceTab (SequencesFormPtr sqfp);
10593 static SeqEntryPtr GetSeqEntryFromSequencesForm (SequencesFormPtr sqfp);
10594 static void ReplaceAllAliases (SeqEntryPtr sep);
10595 static void ReplaceMolNamesWithMolBracketsInDefinitionLines (SeqEntryPtr sep);
10596 static Boolean CheckSequencesForOrganisms (SequencesFormPtr sqfp);
10597 static void SequencesFormDeleteProc (Pointer formDataPtr);
10598 
NucleotideImportFinish(SequencesFormPtr sqfp)10599 static void NucleotideImportFinish (SequencesFormPtr sqfp)
10600 {
10601   FastaPagePtr  fpp = NULL;
10602   PhylipPagePtr ppp = NULL;
10603   SeqEntryPtr   sep = NULL;
10604   Boolean       cancelled = FALSE;
10605   BioseqSetPtr  bssp;
10606 
10607   if (sqfp == NULL) return;
10608 
10609   if (sqfp->seqFormat == SEQ_FMT_FASTA) {
10610     fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
10611     if (fpp != NULL)
10612     {
10613       sep = fpp->list;
10614     }
10615   } else if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT) {
10616     ppp = (PhylipPagePtr) GetObjectExtra (sqfp->dnaseq);
10617     if (ppp != NULL) {
10618       sep = ppp->sep;
10619       if (sep != NULL && IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL)
10620       {
10621         bssp = (BioseqSetPtr) sep->data.ptrvalue;
10622         sep = bssp->seq_set;
10623       }
10624     }
10625   }
10626 
10627   if (sep == NULL)
10628   {
10629     Disable (sqfp->molecule_btn);
10630     Disable (sqfp->topology_btn);
10631     Disable (sqfp->import_mod_btn);
10632     Disable (sqfp->source_assist_btn);
10633     Disable (sqfp->specify_orgs_btn);
10634     Disable (sqfp->specify_locs_btn);
10635     Disable (sqfp->specify_gcode_btn);
10636     Disable (sqfp->specify_mgcode_btn);
10637     Disable (sqfp->clear_mods_btn);
10638     Disable (sqfp->vecscreen_btn);
10639   }
10640   else
10641   {
10642     Enable (sqfp->molecule_btn);
10643     Enable (sqfp->topology_btn);
10644     Enable (sqfp->import_mod_btn);
10645     Enable (sqfp->source_assist_btn);
10646     Enable (sqfp->specify_orgs_btn);
10647     Enable (sqfp->specify_locs_btn);
10648     Enable (sqfp->specify_gcode_btn);
10649     Enable (sqfp->specify_mgcode_btn);
10650     Enable (sqfp->clear_mods_btn);
10651     Enable (sqfp->vecscreen_btn);
10652   }
10653 
10654   ReplaceAllAliases (sep);
10655   ReplaceMolNamesWithMolBracketsInDefinitionLines (sep);
10656 
10657   AddDefaultModifierValues (sep);
10658 
10659   if (fpp != NULL)
10660   {
10661     Reset (fpp->doc);
10662     FormatFastaDoc (fpp);
10663   }
10664 
10665   if (cancelled)
10666   {
10667     SequencesFormDeleteProc (sqfp);
10668   }
10669 
10670   SeqEntryPtrToSourceTab (sqfp);
10671 }
10672 
GetFirstModValueFromSeqEntryTitles(SeqEntryPtr sep,CharPtr mod_name)10673 static CharPtr GetFirstModValueFromSeqEntryTitles (SeqEntryPtr sep, CharPtr mod_name)
10674 {
10675   CharPtr value = NULL;
10676 
10677   while (sep != NULL && value == NULL)
10678   {
10679     value = GetModValueFromSeqEntry (sep, mod_name);
10680     sep = sep->next;
10681   }
10682   return value;
10683 }
10684 
GetSeqIdFromSeqEntryPtr(SeqEntryPtr sep)10685 static SeqIdPtr GetSeqIdFromSeqEntryPtr (SeqEntryPtr sep)
10686 {
10687   BioseqPtr    bsp;
10688   BioseqSetPtr bssp;
10689 
10690   if (sep == NULL || sep->data.ptrvalue == NULL) {
10691   	return NULL;
10692   }
10693   if (IS_Bioseq (sep)) {
10694   	bsp = (BioseqPtr) sep->data.ptrvalue;
10695     return bsp->id;
10696   } else if (IS_Bioseq_set(sep)) {
10697   	bssp = (BioseqSetPtr) sep->data.ptrvalue;
10698     return GetSeqIdFromSeqEntryPtr (bssp->seq_set);
10699   }
10700   return NULL;
10701 }
10702 
10703 static void
ReportMissingOrganismNames(ValNodePtr no_org_list)10704 ReportMissingOrganismNames
10705 (ValNodePtr no_org_list)
10706 {
10707   Char         path [PATH_MAX];
10708   FILE         *fp;
10709   ValNodePtr   vnp;
10710 
10711   if (no_org_list == NULL)
10712   {
10713     return;
10714   }
10715 
10716   TmpNam (path);
10717   fp = FileOpen (path, "wb");
10718   if (fp == NULL) return;
10719 
10720   if (no_org_list != NULL)
10721   {
10722     fprintf (fp, "The following sequences have no organism names.  You must supply one for each sequence listed.\n");
10723     for (vnp = no_org_list; vnp != NULL; vnp = vnp->next)
10724     {
10725       fprintf (fp, "%s\n", (CharPtr) vnp->data.ptrvalue);
10726     }
10727   }
10728 
10729   FileClose (fp);
10730   LaunchGeneralTextViewer (path, "Organism Errors");
10731   FileRemove (path);
10732 }
10733 
10734 
CheckSequencesForOrganisms(SequencesFormPtr sqfp)10735 static Boolean CheckSequencesForOrganisms (SequencesFormPtr sqfp)
10736 {
10737   SeqEntryPtr       sep_list;
10738   ValNodePtr        no_org_list = NULL;
10739   Boolean           rval = TRUE;
10740   IDAndTitleEditPtr iatep;
10741   Int4              seq_num;
10742   CharPtr           org_name_from_title;
10743 
10744   if (sqfp == NULL) return FALSE;
10745   sep_list = GetSeqEntryFromSequencesForm (sqfp);
10746   if (sep_list == NULL) return FALSE;
10747 
10748   iatep = SeqEntryListToIDAndTitleEdit (sep_list);
10749   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
10750   {
10751     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
10752     {
10753       continue;
10754     }
10755     org_name_from_title = FindValueFromPairInDefline ("organism", iatep->title_list [seq_num]);
10756     if (StringHasNoText (org_name_from_title))
10757  	  {
10758  	    rval = FALSE;
10759  	    ValNodeAddPointer (&no_org_list, 0, StringSave (iatep->id_list [seq_num]));
10760  	  }
10761     org_name_from_title = MemFree (org_name_from_title);
10762   }
10763 
10764   ReportMissingOrganismNames (no_org_list);
10765   no_org_list = ValNodeFreeData (no_org_list);
10766   iatep = IDAndTitleEditFree (iatep);
10767   return rval;
10768 }
10769 
ExportSequencesForm(ForM f,CharPtr filename)10770 static Boolean ExportSequencesForm (ForM f, CharPtr filename)
10771 
10772 {
10773   SequencesFormPtr  sqfp;
10774   Boolean           rval = FALSE;
10775 
10776   sqfp = (SequencesFormPtr) GetObjectExtra (f);
10777   if (sqfp != NULL) {
10778     switch (sqfp->tagFromPage [sqfp->currentPage]) {
10779       case NUCLEOTIDE_PAGE :
10780         rval = ExportDialog (sqfp->dnaseq, "");
10781         break;
10782       case MRNA_PAGE :
10783         break;
10784       case PROTEIN_PAGE :
10785         break;
10786       case ORGANISM_PAGE:
10787         break;
10788       default :
10789         break;
10790     }
10791   }
10792   return rval;
10793 }
10794 
IsAnnotTabEmpty(SequencesFormPtr sqfp)10795 NLM_EXTERN Boolean IsAnnotTabEmpty (SequencesFormPtr sqfp)
10796 
10797 {
10798   SubmissionFeatureInfoPtr feature_info;
10799   Boolean rval;
10800 
10801   if (sqfp == NULL)
10802   {
10803     return TRUE;
10804   }
10805   feature_info = (SubmissionFeatureInfoPtr) DialogToPointer (sqfp->feature_info);
10806   rval = IsSubmissionFeatureInfoEmpty (feature_info);
10807   feature_info = SubmissionFeatureInfoFree (feature_info);
10808   return rval;
10809 }
10810 
10811 
GetSubmissionFeatureInfo(SequencesFormPtr sqfp)10812 NLM_EXTERN SubmissionFeatureInfoPtr GetSubmissionFeatureInfo (SequencesFormPtr sqfp)
10813 {
10814   SubmissionFeatureInfoPtr feature_info;
10815 
10816   if (sqfp == NULL)
10817   {
10818     return NULL;
10819   }
10820   feature_info = (SubmissionFeatureInfoPtr) DialogToPointer (sqfp->feature_info);
10821   return feature_info;
10822 }
10823 
10824 
10825 static void ChooseFeatureTypeForSubmissionFeatureInfoDialog(DialoG d, Int2 feature_type);
10826 
ImportSequencesForm(ForM f,CharPtr filename)10827 static Boolean ImportSequencesForm (ForM f, CharPtr filename)
10828 
10829 {
10830   SequencesFormPtr  sqfp;
10831   FastaPagePtr      fpp;
10832   Boolean           rval = FALSE;
10833   SeqEntryPtr       seq_list;
10834   IDAndTitleEditPtr iatep;
10835 
10836   sqfp = (SequencesFormPtr) GetObjectExtra (f);
10837   if (sqfp != NULL) {
10838     switch (sqfp->tagFromPage [sqfp->currentPage]) {
10839       case NUCLEOTIDE_PAGE :
10840         rval = ImportDialog (sqfp->dnaseq, "");
10841         if (rval)
10842         {
10843           NucleotideImportFinish (sqfp);
10844           seq_list = GetSeqEntryFromSequencesForm (sqfp);
10845           if (sqfp->seqFormat == SEQ_FMT_FASTA && (fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq)) != NULL
10846               && SuggestJumpingToWizard (seq_list)) {
10847             fpp->list = NULL;
10848             Remove (sqfp->form);
10849           }
10850         }
10851         break;
10852       case MRNA_PAGE :
10853         rval = ImportDialog (sqfp->mrnaseq, "");
10854         break;
10855       case PROTEIN_PAGE :
10856         rval = ImportDialog (sqfp->protseq, "");
10857         if (rval && IsAnnotTabEmpty (sqfp))
10858         {
10859           ChooseFeatureTypeForSubmissionFeatureInfoDialog (sqfp->feature_info, eSubmitAnnotType_None);
10860         }
10861         break;
10862       case ORGANISM_PAGE:
10863         seq_list = GetSeqEntryFromSequencesForm (sqfp);
10864         iatep = SeqEntryListToIDAndTitleEdit (seq_list);
10865         rval = ImportModifiersToIDAndTitleEdit (iatep);
10866         if (rval)
10867         {
10868           ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
10869           SeqEntryPtrToSourceTab (sqfp);
10870         }
10871         iatep = IDAndTitleEditFree (iatep);
10872         break;
10873       default :
10874         break;
10875     }
10876   }
10877   return rval;
10878 }
10879 
ImportBtnProc(ButtoN b)10880 static void ImportBtnProc (ButtoN b)
10881 
10882 {
10883   SequencesFormPtr  sqfp;
10884 
10885   sqfp = (SequencesFormPtr) GetObjectExtra (b);
10886   if (sqfp) {
10887     ImportSequencesForm (sqfp->form, "");
10888   }
10889 }
10890 
SetOrgNucProtImportExportItems(SequencesFormPtr sqfp)10891 static void SetOrgNucProtImportExportItems (SequencesFormPtr sqfp)
10892 
10893 {
10894   IteM  exportItm;
10895   IteM  importItm;
10896 
10897   if (sqfp != NULL) {
10898     importItm = FindFormMenuItem ((BaseFormPtr) sqfp, VIB_MSG_IMPORT);
10899     exportItm = FindFormMenuItem ((BaseFormPtr) sqfp, VIB_MSG_EXPORT);
10900     switch (sqfp->tagFromPage [sqfp->currentPage]) {
10901       case ORGANISM_PAGE :
10902         SafeSetTitle (importItm, "Import Organism Modifiers From File");
10903         SafeSetTitle (exportItm, "Export...");
10904         SafeEnable (importItm);
10905         SafeDisable (exportItm);
10906         break;
10907       case NUCLEOTIDE_PAGE :
10908         SafeSetTitle (exportItm, "Export...");
10909         SafeDisable (exportItm);
10910         switch (sqfp->seqFormat) {
10911           case SEQ_FMT_FASTA :
10912             if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
10913               SafeSetTitle (importItm, "Import Genomic FASTA...");
10914             } else {
10915               SafeSetTitle (importItm, "Import Nucleotide FASTA...");
10916               SafeSetTitle (exportItm, "Export Nucleotide FASTA...");
10917               SafeEnable (exportItm);
10918             }
10919             break;
10920           case SEQ_FMT_ALIGNMENT :
10921             SafeSetTitle (importItm, "Import Nucleotide Alignment...");
10922             break;
10923           default :
10924             SafeSetTitle (importItm, "Import Nucleotide FASTA...");
10925             break;
10926         }
10927         SafeEnable (importItm);
10928         break;
10929       case MRNA_PAGE :
10930         SafeSetTitle (importItm, "Import Transcript FASTA...");
10931         SafeSetTitle (exportItm, "Export...");
10932         SafeEnable (importItm);
10933         SafeDisable (exportItm);
10934         break;
10935       case PROTEIN_PAGE :
10936         SafeSetTitle (importItm, "Import Protein FASTA...");
10937         SafeSetTitle (exportItm, "Export...");
10938         SafeEnable (importItm);
10939         SafeDisable (exportItm);
10940         break;
10941       case ANNOTATE_PAGE :
10942         SafeSetTitle (importItm, "Import...");
10943         SafeSetTitle (exportItm, "Export...");
10944         SafeDisable (importItm);
10945         SafeDisable (exportItm);
10946         break;
10947       default :
10948         break;
10949     }
10950   }
10951 }
10952 
ChangeSequencesPage(VoidPtr data,Int2 newval,Int2 oldval)10953 static void ChangeSequencesPage (VoidPtr data, Int2 newval, Int2 oldval)
10954 
10955 {
10956   SequencesFormPtr  sqfp;
10957 
10958   sqfp = (SequencesFormPtr) data;
10959   if (sqfp == NULL) {
10960     return;
10961   }
10962 
10963   if (sqfp->show_annot) {
10964     newval = newval + 2;
10965     oldval = oldval + 2;
10966   }
10967 
10968   sqfp->currentPage = newval;
10969   SafeHide (sqfp->pages [oldval]);
10970   Update ();
10971   switch (sqfp->tagFromPage [newval]) {
10972     case ORGANISM_PAGE :
10973       break;
10974     case NUCLEOTIDE_PAGE :
10975       SendMessageToDialog (sqfp->dnaseq, VIB_MSG_ENTER);
10976       break;
10977     case MRNA_PAGE :
10978       SendMessageToDialog (sqfp->mrnaseq, VIB_MSG_ENTER);
10979       break;
10980     case PROTEIN_PAGE :
10981       SendMessageToDialog (sqfp->protseq, VIB_MSG_ENTER);
10982       break;
10983     default :
10984       break;
10985   }
10986   if (newval == 0) {
10987     SafeSetTitle (sqfp->prevBtn, "<< Prev Form");
10988   } else {
10989     SafeSetTitle (sqfp->prevBtn, "<< Prev Page");
10990   }
10991   if (newval == sqfp->numPages - 1) {
10992     SafeSetTitle (sqfp->nextBtn, "Next Form >>");
10993   } else {
10994     SafeSetTitle (sqfp->nextBtn, "Next Page >>");
10995   }
10996   SetOrgNucProtImportExportItems (sqfp);
10997   SafeShow (sqfp->pages [newval]);
10998   Update ();
10999   switch (sqfp->tagFromPage [newval]) {
11000     case ORGANISM_PAGE :
11001       SendHelpScrollMessage (helpForm, "Organism Page", NULL);
11002       break;
11003     case NUCLEOTIDE_PAGE :
11004       if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
11005         SendHelpScrollMessage (helpForm, "Organism and Sequences Form", "Genomic Page");
11006       } else {
11007         if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT)
11008         {
11009           SendHelpScrollMessage (helpForm, "Nucleotide Page", "Nucleotide Page for Aligned Data Formats");
11010         }
11011         else
11012         {
11013           SendHelpScrollMessage (helpForm, "Nucleotide Page", "Nucleotide Page for FASTA Data Format");
11014         }
11015       }
11016       break;
11017     case SEQUENCING_METHOD_PAGE :
11018       SendHelpScrollMessage (helpForm, "Sequencing Method Page", "");
11019       break;
11020     case MRNA_PAGE :
11021       SendHelpScrollMessage (helpForm, "Organism and Sequences Form", "Transcripts Page");
11022       break;
11023     case PROTEIN_PAGE :
11024       SendHelpScrollMessage (helpForm, "Protein Page", NULL);
11025       break;
11026     case ANNOTATE_PAGE :
11027       SendHelpScrollMessage (helpForm, "Annotation Page", NULL);
11028       break;
11029     default :
11030       break;
11031   }
11032 }
11033 
11034 static Boolean SequenceAssistantValidate (SeqEntryPtr seq_list);
FinalSequenceValidation(SequencesFormPtr sqfp)11035 static Boolean FinalSequenceValidation (SequencesFormPtr  sqfp)
11036 {
11037   FastaPagePtr fpp;
11038 
11039   if (sqfp == NULL)
11040   {
11041     return FALSE;
11042   }
11043   if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT || sqfp->seqPackage == SEQ_PKG_GAPPED)
11044   {
11045     /* we can't edit these, so there's no sense pestering the user...*/
11046     return TRUE;
11047   }
11048   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
11049   if (fpp == NULL || fpp->list == NULL)
11050   {
11051     return FALSE;
11052   }
11053 
11054   return SequenceAssistantValidate (fpp->list);
11055 }
11056 
11057 
NextSequencesFormBtn(ButtoN b)11058 static void NextSequencesFormBtn (ButtoN b)
11059 
11060 {
11061   SequencesFormPtr  sqfp;
11062   Boolean force_change = FALSE;
11063   SequencingMethodInfoPtr info;
11064   ValNodePtr seq_list;
11065 
11066   sqfp = (SequencesFormPtr) GetObjectExtra (b);
11067   if (sqfp != NULL) {
11068     if (sqfp->currentPage == 1) {
11069       /* validate before leaving sequence area */
11070       seq_list = GetSeqEntryFromSequencesForm (sqfp);
11071       if (seq_list == NULL) {
11072         Message (MSG_ERROR, "You must import sequences before continuing.");
11073         return;
11074       }
11075       info = DialogToPointer (sqfp->sequencing_method_dlg);
11076       if (!IsSequencingMethodInfoValid(info, CountSequencesAndSegments (seq_list, TRUE),
11077                                              sqfp->seqPackage == SEQ_PKG_TSA,
11078                                              0)) {
11079         if (info->quit_now) {
11080           QuitFromWizard (sqfp->form);
11081         }
11082         info = SequencingMethodInfoFree (info);
11083         return;
11084       }
11085       info = SequencingMethodInfoFree (info);
11086 
11087       /* now switch to other tab */
11088       Hide (sqfp->seq_tbs);
11089       Show (sqfp->annot_tbs);
11090       sqfp->show_annot = TRUE;
11091       force_change = TRUE;
11092     }
11093 
11094     if (sqfp->currentPage < 1) {
11095       SetValue (sqfp->seq_tbs, sqfp->currentPage + 1);
11096     } else if (sqfp->currentPage < sqfp->numPages - 1) {
11097       SetValue (sqfp->annot_tbs, sqfp->currentPage - 1);
11098       if (force_change) {
11099         ChangeSequencesPage (sqfp, 0, -1);
11100       }
11101     } else if (sqfp->goToNext != NULL) {
11102       if (!CheckSequencesForOrganisms (sqfp) || !FinalSequenceValidation (sqfp)) return;
11103       (sqfp->goToNext) (b);
11104     }
11105   }
11106 }
11107 
PrevSequencesFormBtn(ButtoN b)11108 static void PrevSequencesFormBtn (ButtoN b)
11109 
11110 {
11111   SequencesFormPtr  sqfp;
11112   Boolean force_change = FALSE;
11113 
11114   sqfp = (SequencesFormPtr) GetObjectExtra (b);
11115   if (sqfp != NULL) {
11116     if (sqfp->currentPage == 2) {
11117       /* switch between tabs */
11118       Hide (sqfp->annot_tbs);
11119       Show (sqfp->seq_tbs);
11120       sqfp->show_annot = FALSE;
11121       force_change = TRUE;
11122     }
11123     if (sqfp->currentPage > 2) {
11124       SetValue (sqfp->annot_tbs, sqfp->currentPage - 3);
11125     } else if (sqfp->currentPage > 0) {
11126       SetValue (sqfp->seq_tbs, sqfp->currentPage - 1);
11127       if (force_change) {
11128         ChangeSequencesPage (sqfp, 1, 2);
11129       }
11130     } else if (sqfp->goToPrev != NULL) {
11131       (sqfp->goToPrev) (b);
11132     }
11133   }
11134 }
11135 
11136 static void SetModifierList (DoC doc, ValNodePtr mod_list);
11137 static void SeqEntryPtrToOrgDoc (SequencesFormPtr sqfp);
11138 
ClearOrganismModifiers(SequencesFormPtr sqfp)11139 static void ClearOrganismModifiers (SequencesFormPtr sqfp)
11140 {
11141   if (sqfp == NULL) return;
11142   Disable (sqfp->import_mod_btn);
11143   Disable (sqfp->source_assist_btn);
11144   Disable (sqfp->specify_orgs_btn);
11145   Disable (sqfp->specify_locs_btn);
11146   Disable (sqfp->specify_gcode_btn);
11147   Disable (sqfp->specify_mgcode_btn);
11148   Disable (sqfp->clear_mods_btn);
11149 
11150   SeqEntryPtrToOrgDoc (sqfp);
11151 }
11152 
11153 
DeleteAllSequencesFromForm(SequencesFormPtr sqfp)11154 static void DeleteAllSequencesFromForm (SequencesFormPtr sqfp)
11155 {
11156   FastaPagePtr fpp;
11157   PhylipPagePtr ppp;
11158 
11159   if (sqfp == NULL) {
11160     return;
11161   }
11162   if (sqfp->seqFormat == SEQ_FMT_FASTA) {
11163     fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
11164     if (fpp != NULL) {
11165       ResetFastaPage (fpp);
11166       fpp->path [0] = '\0';
11167       SafeHide (fpp->have_seq_instr_grp);
11168       Reset (fpp->doc);
11169       SafeShow (fpp->instructions);
11170       Update ();
11171       if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA)
11172       {
11173         SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
11174       }
11175       Enable (fpp->import_btn);
11176       Disable (fpp->clear_btn);
11177       Disable (sqfp->molecule_btn);
11178       Disable (sqfp->topology_btn);
11179       Disable (sqfp->vecscreen_btn);
11180     }
11181   } else if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT) {
11182     ppp = (PhylipPagePtr) GetObjectExtra (sqfp->dnaseq);
11183     if (ppp != NULL) {
11184       ResetPhylipPage (ppp);
11185       ppp->path [0] = '\0';
11186       SetPhylipDocInstructions (ppp);
11187     }
11188   }
11189   ClearOrganismModifiers (sqfp);
11190 }
11191 
11192 
SequencesFormDeleteProc(Pointer formDataPtr)11193 static void SequencesFormDeleteProc (Pointer formDataPtr)
11194 
11195 {
11196   FastaPagePtr      fpp;
11197   SequencesFormPtr  sqfp;
11198 
11199   sqfp = (SequencesFormPtr) formDataPtr;
11200   if (sqfp != NULL) {
11201     switch (sqfp->tagFromPage [sqfp->currentPage]) {
11202       case ORGANISM_PAGE :
11203         ClearText (CurrentVisibleText ());
11204         break;
11205       case NUCLEOTIDE_PAGE :
11206         DeleteAllSequencesFromForm (sqfp);
11207         break;
11208       case MRNA_PAGE :
11209         if (sqfp->seqFormat == SEQ_FMT_FASTA) {
11210           fpp = (FastaPagePtr) GetObjectExtra (sqfp->mrnaseq);
11211           if (fpp != NULL) {
11212             ResetFastaPage (fpp);
11213             fpp->path [0] = '\0';
11214             SafeHide (fpp->have_seq_instr_grp);
11215             Reset (fpp->doc);
11216             SafeShow (fpp->instructions);
11217             Update ();
11218           }
11219         }
11220         break;
11221       case PROTEIN_PAGE :
11222         if (ANS_YES == Message (MSG_YN, "Are you sure you want to remove all of the protein sequences?"))
11223         {
11224           if (sqfp->seqFormat == SEQ_FMT_FASTA) {
11225             fpp = (FastaPagePtr) GetObjectExtra (sqfp->protseq);
11226             if (fpp != NULL) {
11227               ResetFastaPage (fpp);
11228               fpp->path [0] = '\0';
11229               SafeHide (fpp->have_seq_instr_grp);
11230               Reset (fpp->doc);
11231               Disable (fpp->clear_btn);
11232               SafeShow (fpp->instructions);
11233               Update ();
11234             }
11235           } else {
11236             ClearText (CurrentVisibleText ());
11237           }
11238         }
11239         break;
11240       default :
11241         break;
11242     }
11243   }
11244 }
11245 
11246 static CharPtr  seqSegSeqFormTabs [] = {
11247   "Nucleotide", "Sequencing Method", NULL
11248 };
11249 
11250 static CharPtr  cdnaGenSeqFormTabs [] = {
11251   "Genomic",  "Sequencing Method", NULL
11252 };
11253 
11254 static CharPtr  popPhyMutSeqFormTabs [] = {
11255   "Nucleotide",  "Sequencing Method", NULL
11256 };
11257 
11258 static CharPtr  seqSegAnnotFormTabs [] = {
11259   "Organism", "Proteins", NULL
11260 };
11261 
11262 static CharPtr  cdnaGenAnnotFormTabs [] = {
11263   "Organism", "Transcripts", "Proteins", NULL
11264 };
11265 
11266 static CharPtr  popPhyMutAnnotFormTabs [] = {
11267   "Organism", "Proteins", "Annotation", NULL
11268 };
11269 
PasteIntoDialog(DialoG seq)11270 static void PasteIntoDialog (DialoG seq)
11271 
11272 {
11273   Char     ch;
11274   FILE     *fp;
11275   Char     path [PATH_MAX];
11276   CharPtr  ptr;
11277   CharPtr  str;
11278 
11279   if (Nlm_ClipboardHasString ()) {
11280     TmpNam (path);
11281     fp = FileOpen (path, "w");
11282     if (fp == NULL) return;
11283     str = ClipboardToString ();
11284     if (str != NULL) {
11285       ptr = str;
11286       ch = *ptr;
11287       while (ch != '\0') {
11288         if (ch == '\r') {
11289           *ptr = '\n';
11290         }
11291         ptr++;
11292         ch = *ptr;
11293       }
11294       FilePuts (str, fp);
11295       MemFree (str);
11296     }
11297     FileClose (fp);
11298     ImportDialog (seq, path);
11299     FileRemove (path);
11300   }
11301 }
11302 
SequencesFormMessage(ForM f,Int2 mssg)11303 static void SequencesFormMessage (ForM f, Int2 mssg)
11304 
11305 {
11306   SequencesFormPtr  sqfp;
11307 
11308   sqfp = (SequencesFormPtr) GetObjectExtra (f);
11309   if (sqfp != NULL) {
11310     switch (mssg) {
11311       case VIB_MSG_IMPORT :
11312         ImportSequencesForm (f, NULL);
11313         break;
11314       case VIB_MSG_EXPORT :
11315         ExportSequencesForm (f, NULL);
11316         break;
11317       case VIB_MSG_CUT :
11318         StdCutTextProc (NULL);
11319         break;
11320       case VIB_MSG_COPY :
11321         StdCopyTextProc (NULL);
11322         break;
11323       case VIB_MSG_PASTE :
11324         switch (sqfp->tagFromPage [sqfp->currentPage]) {
11325           case ORGANISM_PAGE :
11326             StdPasteTextProc (NULL);
11327             break;
11328           case NUCLEOTIDE_PAGE :
11329             PasteIntoDialog (sqfp->dnaseq);
11330             break;
11331           case MRNA_PAGE :
11332             PasteIntoDialog (sqfp->mrnaseq);
11333             break;
11334           case PROTEIN_PAGE :
11335             PasteIntoDialog (sqfp->protseq);
11336             break;
11337           default :
11338             StdPasteTextProc (NULL);
11339             break;
11340         }
11341         break;
11342       case VIB_MSG_DELETE :
11343         SequencesFormDeleteProc (sqfp);
11344         break;
11345       default :
11346         if (sqfp->appmessage != NULL) {
11347           sqfp->appmessage (f, mssg);
11348         }
11349         break;
11350     }
11351   }
11352 }
11353 
InitOrgNucProtFormActivate(WindoW w)11354 static void InitOrgNucProtFormActivate (WindoW w)
11355 
11356 {
11357   SequencesFormPtr  sqfp;
11358 
11359   sqfp = (SequencesFormPtr) GetObjectExtra (w);
11360   if (sqfp != NULL) {
11361     if (sqfp->activate != NULL) {
11362       sqfp->activate (w);
11363     }
11364     SetOrgNucProtImportExportItems (sqfp);
11365   }
11366 }
11367 
ChangeMrnaFlag(ButtoN b)11368 static void ChangeMrnaFlag (ButtoN b)
11369 
11370 {
11371   SequencesFormPtr  sqfp;
11372 
11373   sqfp = (SequencesFormPtr) GetObjectExtra (b);
11374   if (sqfp != NULL) {
11375     sqfp->makeMRNA = GetStatus (b);
11376     if (sqfp->makeMRNA) {
11377       SetAppParam ("SEQUINCUSTOM", "PREFERENCES", "CREATEMRNA", "TRUE");
11378     } else {
11379       SetAppParam ("SEQUINCUSTOM", "PREFERENCES", "CREATEMRNA", "FALSE");
11380     }
11381   }
11382 }
11383 
CreateListMessage(CharPtr msg_before,CharPtr msg_after,ValNodePtr id_list)11384 extern CharPtr CreateListMessage (CharPtr msg_before, CharPtr msg_after, ValNodePtr id_list)
11385 {
11386   Int4       num_pos;
11387   ValNodePtr vnp;
11388   CharPtr    msg_txt;
11389   Int4       txt_len = StringLen (msg_before) + StringLen (msg_after) + 3;
11390   Char       num_buf [14];
11391 
11392   if (id_list == NULL) return NULL;
11393   num_pos = ValNodeLen (id_list);
11394   for (vnp = id_list; vnp != NULL; vnp = vnp->next)
11395   {
11396     if (StringHasNoText (vnp->data.ptrvalue))
11397     {
11398       txt_len += PRINTED_INT_MAX_LEN;
11399     }
11400     else
11401     {
11402       txt_len += StringLen (vnp->data.ptrvalue) + 5;
11403     }
11404   }
11405   msg_txt = (CharPtr) MemNew (txt_len * sizeof (Char));
11406   if (msg_txt != NULL)
11407   {
11408     msg_txt [0] = 0;
11409     if (msg_before != NULL)
11410     {
11411       StringCat (msg_txt, msg_before);
11412       if (num_pos > 1)
11413       {
11414         StringCat (msg_txt, "s ");
11415       }
11416       else
11417       {
11418         StringCat (msg_txt, " ");
11419       }
11420     }
11421 
11422     for (vnp = id_list; vnp != NULL; vnp = vnp->next)
11423     {
11424       if (num_pos > 1 && vnp->next == NULL)
11425       {
11426         StringCat (msg_txt, "and ");
11427       }
11428       if (StringHasNoText (vnp->data.ptrvalue))
11429       {
11430         sprintf (num_buf, "%d", vnp->choice);
11431         StringCat (msg_txt, num_buf);
11432       }
11433       else
11434       {
11435         StringCat (msg_txt, vnp->data.ptrvalue);
11436       }
11437       if (vnp->next != NULL)
11438       {
11439   	    if (num_pos > 2)
11440   	    {
11441 	        StringCat (msg_txt, ", ");
11442   	    }
11443   	    else
11444   	    {
11445   	  	  StringCat (msg_txt, " ");
11446   	    }
11447       }
11448     }
11449     StringCat (msg_txt, msg_after);
11450   }
11451   return msg_txt;
11452 }
11453 
11454 
ContinueWithErrorList(ValNodePtr err_list,Boolean ask_for_continue)11455 static Boolean ContinueWithErrorList (ValNodePtr err_list, Boolean ask_for_continue)
11456 {
11457   ValNodePtr           vnp;
11458   GrouP                required_grp = NULL;
11459   GrouP                warning_grp = NULL;
11460   GrouP                h, g, c;
11461   PrompT               p;
11462   ButtoN               b;
11463   WindoW                w;
11464   ModalAcceptCancelData acd;
11465   Boolean               ok_to_continue = TRUE;
11466 
11467 
11468   if (err_list == NULL) return TRUE;
11469 
11470   acd.accepted = FALSE;
11471   acd.cancelled = FALSE;
11472 
11473   w = ModalWindow(-20, -13, -10, -10, NULL);
11474   h = HiddenGroup (w, -1, 0, NULL);
11475   g = HiddenGroup (h, 1, 0, NULL);
11476   /* create required list */
11477   for (vnp = err_list; vnp != NULL; vnp = vnp->next)
11478   {
11479     if (vnp->choice == CREATE_FASTA_REQUIRED)
11480     {
11481       ok_to_continue = FALSE;
11482       if (required_grp == NULL)
11483       {
11484         required_grp = NormalGroup (g, 1, 0, "Required", systemFont, NULL);
11485       }
11486       MultiLinePrompt (required_grp, vnp->data.ptrvalue, 600, systemFont);
11487     }
11488   }
11489   /* create warning list */
11490   for (vnp = err_list; vnp != NULL; vnp = vnp->next)
11491   {
11492     if (vnp->choice == CREATE_FASTA_WARNING)
11493     {
11494       if (warning_grp == NULL)
11495       {
11496         warning_grp = NormalGroup (g, 1, 0, "Warning", systemFont, NULL);
11497       }
11498       MultiLinePrompt (warning_grp, vnp->data.ptrvalue, 600, systemFont);
11499     }
11500   }
11501 
11502   if (! ask_for_continue)
11503   {
11504     p = NULL;
11505     c = HiddenGroup (h, 1, 0, NULL);
11506     b = PushButton (c, "OK", ModalCancelButton);
11507     SetObjectExtra (b, &acd, NULL);
11508   }
11509   else if (ok_to_continue)
11510   {
11511     p = StaticPrompt (h, "Continue anyway?",
11512                       0, dialogTextHeight, systemFont, 'c');
11513     c = HiddenGroup (h, 2, 0, NULL);
11514     b = PushButton (c, "Yes", ModalAcceptButton);
11515     SetObjectExtra (b, &acd, NULL);
11516     b = PushButton (c, "No", ModalCancelButton);
11517     SetObjectExtra (b, &acd, NULL);
11518   }
11519   else
11520   {
11521     p = StaticPrompt (h, "You must resolve the required errors before continuing.",
11522                       0, dialogTextHeight, systemFont, 'c');
11523     c = HiddenGroup (h, 1, 0, NULL);
11524     b = PushButton (c, "OK", ModalCancelButton);
11525     SetObjectExtra (b, &acd, NULL);
11526   }
11527 
11528   if (ask_for_continue)
11529   {
11530     AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) p, (HANDLE) c, NULL);
11531   }
11532   else
11533   {
11534     AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
11535   }
11536 
11537   Show(w);
11538   Select (w);
11539   while (!acd.accepted && ! acd.cancelled)
11540   {
11541     ProcessExternalEvent ();
11542     Update ();
11543   }
11544   ProcessAnEvent ();
11545   Remove (w);
11546   if (acd.accepted)
11547   {
11548     return TRUE;
11549   }
11550   else
11551   {
11552     return FALSE;
11553   }
11554 
11555 }
11556 
11557 static CharPtr valid_iupac_characters = "atgcbdhkmnrsuvwy";
11558 
SeqCharsOk(CharPtr seq_chars,Int4 seq_num,CharPtr local_id,ValNodePtr PNTR err_list)11559 static Boolean SeqCharsOk
11560 (CharPtr seq_chars,
11561  Int4 seq_num,
11562  CharPtr local_id,
11563  ValNodePtr PNTR err_list)
11564 {
11565   CharPtr cp;
11566   Char    ch;
11567   Boolean at_least_one = FALSE;
11568   Int4    len = StringLen (seq_chars);
11569   CharPtr badchars;
11570   CharPtr err_msg;
11571   CharPtr empty_fmt_d = "There are no sequence characters for sequence %d.  Please enter some.";
11572   CharPtr empty_fmt_s = "There are no sequence characters for sequence %s.  Please enter some.";
11573   CharPtr bad_char_fmt_d = "There were %d illegal characters were found in sequence %d: %s."
11574   	         "  Repeated characters are listed only once. "
11575   	         "  You may only have IUPAC characters in your sequence ";
11576   CharPtr bad_char_fmt_s = "There were %d illegal characters were found in sequence %s: %s."
11577   	         "  Repeated characters are listed only once. "
11578   	         "  You may only have IUPAC characters in your sequence ";
11579 
11580   if (StringHasNoText (seq_chars))
11581   {
11582     err_msg = (CharPtr) MemNew (sizeof (Char) *
11583                 (StringLen (empty_fmt_d) + PRINTED_INT_MAX_LEN + StringLen (local_id)));
11584     if (err_msg != NULL)
11585     {
11586       if (StringHasNoText (local_id))
11587       {
11588         sprintf (err_msg, empty_fmt_d, seq_num);
11589       }
11590       else
11591       {
11592         sprintf (err_msg, empty_fmt_s, local_id);
11593       }
11594       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED, err_msg);
11595     }
11596   	return FALSE;
11597   }
11598 
11599   badchars = (CharPtr) MemNew (sizeof (Char) * (len + 1));
11600   if (badchars == NULL) return FALSE;
11601   badchars[0] = 0;
11602   len = 0;
11603   for (cp = seq_chars; *cp != 0; cp++)
11604   {
11605     ch = TO_LOWER (*cp);
11606   	if (isspace ((Int4)ch))
11607   	{
11608   	  /* space allowed */
11609   	}
11610   	else if (StringChr (valid_iupac_characters, ch) == NULL)
11611   	{
11612   	  if (StringChr (badchars, *cp) == NULL)
11613   	  {
11614   	  	badchars [len] = ch;
11615   	  	len++;
11616   	  	badchars [len] = 0;
11617   	  }
11618   	}
11619   	else
11620   	{
11621   	  at_least_one = TRUE;
11622   	}
11623   }
11624   if (len > 0)
11625   {
11626     err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_char_fmt_d)
11627                + (2 * PRINTED_INT_MAX_LEN) + StringLen (local_id) + len
11628                + StringLen (valid_iupac_characters)));
11629     if (err_msg != NULL)
11630     {
11631       if (StringHasNoText (local_id))
11632       {
11633         sprintf (err_msg, bad_char_fmt_d, len, seq_num, badchars, valid_iupac_characters);
11634       }
11635       else
11636       {
11637         sprintf (err_msg, bad_char_fmt_s, len, local_id, badchars, valid_iupac_characters);
11638       }
11639       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED, err_msg);
11640     }
11641   	return FALSE;
11642   }
11643   if (!at_least_one)
11644   {
11645     err_msg = (CharPtr) MemNew (sizeof (Char) *
11646                 (StringLen (empty_fmt_d) + PRINTED_INT_MAX_LEN + StringLen (local_id)));
11647     if (err_msg != NULL)
11648     {
11649       if (StringHasNoText (local_id))
11650       {
11651         sprintf (err_msg, empty_fmt_d, seq_num);
11652       }
11653       else
11654       {
11655         sprintf (err_msg, empty_fmt_s, local_id);
11656       }
11657       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED, err_msg);
11658     }
11659   	return FALSE;
11660   }
11661 
11662   return TRUE;
11663 }
11664 
IsSequenceAllNs(CharPtr seq_str)11665 static Boolean IsSequenceAllNs (CharPtr seq_str)
11666 {
11667   CharPtr cp;
11668 
11669   if (StringHasNoText (seq_str)) return FALSE;
11670 
11671   for (cp = seq_str; *cp != 0; cp++)
11672   {
11673     if (isalpha ((Int4)(*cp)) && *cp != 'n' && *cp != 'N')
11674     {
11675       return FALSE;
11676     }
11677   }
11678   return TRUE;
11679 }
11680 
IsSequenceAllOneCharacter(CharPtr seq_str)11681 static Boolean IsSequenceAllOneCharacter (CharPtr seq_str)
11682 {
11683   CharPtr cp;
11684   Char    first_char = 0;
11685 
11686   if (StringHasNoText (seq_str)) return FALSE;
11687 
11688   for (cp = seq_str; *cp != 0; cp++)
11689   {
11690     if (isalpha ((Int4)(*cp)))
11691     {
11692       if (first_char == 0)
11693       {
11694         first_char = *cp;
11695       }
11696       else if (*cp != first_char)
11697       {
11698         return FALSE;
11699       }
11700     }
11701   }
11702   return TRUE;
11703 
11704 }
11705 
CountSeqChars(CharPtr seq_str)11706 static Int4 CountSeqChars (CharPtr seq_str)
11707 {
11708   CharPtr cp;
11709   Int4    num_chars = 0;
11710 
11711   if (StringHasNoText (seq_str)) return 0;
11712   for (cp = seq_str; *cp != 0; cp++)
11713   {
11714     if (isalpha ((Int4)(*cp)))
11715     {
11716       num_chars++;
11717     }
11718   }
11719   return num_chars;
11720 }
11721 
ReformatLocalId(CharPtr local_id)11722 static CharPtr ReformatLocalId (CharPtr local_id)
11723 {
11724   CharPtr cp, new_local_id;
11725 
11726   if (local_id == NULL) return NULL;
11727 
11728   cp = local_id;
11729   while (*cp == '>')
11730   {
11731     cp ++;
11732   }
11733   while (isspace ((Int4)(*cp)))
11734   {
11735   	cp++;
11736   }
11737   new_local_id = StringSave (cp);
11738   cp = new_local_id;
11739   while (*cp != 0)
11740   {
11741     if (isspace ((Int4)(*cp)))
11742     {
11743       *cp = '_';
11744     }
11745     cp++;
11746   }
11747   MemFree (local_id);
11748   return new_local_id;
11749 }
11750 
FindPreviousWhitespace(CharPtr str_start,CharPtr str_end)11751 static CharPtr FindPreviousWhitespace (CharPtr str_start, CharPtr str_end)
11752 {
11753   CharPtr cp;
11754   if (str_start == NULL || str_end == NULL || str_end < str_start)
11755   {
11756     return NULL;
11757   }
11758 
11759   cp = str_end;
11760   while (cp > str_start && !isspace (*cp))
11761   {
11762     cp--;
11763   }
11764   return cp;
11765 }
11766 
GetModNameStartFromEqLoc(CharPtr eq_loc,CharPtr prev_eq_loc)11767 static CharPtr GetModNameStartFromEqLoc (CharPtr eq_loc, CharPtr prev_eq_loc)
11768 {
11769   CharPtr cp, prev_quote;
11770   Char    match_quote;
11771 
11772   if (StringHasNoText (eq_loc) || StringHasNoText (prev_eq_loc) || eq_loc < prev_eq_loc)
11773   {
11774     return NULL;
11775   }
11776 
11777   cp = eq_loc - 1;
11778   /* skip over spaces between equals sign and modifier name */
11779   while (cp > prev_eq_loc && isspace (*cp))
11780   {
11781     cp--;
11782   }
11783   if (cp != prev_eq_loc)
11784   {
11785     /* now backtrack over name */
11786     if (*cp == '"' && *(cp - 1) != '\\')
11787     {
11788       match_quote = *cp;
11789       /* take everything to previous matching quote */
11790       cp = FindPreviousUnescapedQuote (prev_eq_loc, cp - 1);
11791       if (cp == NULL)
11792       {
11793         cp = prev_eq_loc;
11794       }
11795       if (isspace (*cp))
11796       {
11797         cp++;
11798       }
11799     }
11800     else
11801     {
11802       /* take everything up to the first space or quote character */
11803 
11804       prev_quote = FindPreviousUnescapedQuote (prev_eq_loc, cp);
11805       cp = FindPreviousWhitespace (prev_eq_loc, cp);
11806       if (prev_quote != NULL && prev_quote > cp)
11807       {
11808         cp = prev_quote + 1;
11809       }
11810 
11811       if (isspace (*cp) || *cp == '"')
11812       {
11813         cp++;
11814       }
11815     }
11816   }
11817   return cp;
11818 }
11819 
11820 static CharPtr fake_modifier_name = "modifier_name";
11821 static Int4    len_fake_modifier_name = 13;
11822 
InsertMissingModifierNames(CharPtr str)11823 static CharPtr InsertMissingModifierNames (CharPtr str)
11824 {
11825   CharPtr start_bracket, next_start_bracket, end_bracket, eq_loc;
11826   CharPtr new_str, tmp_new;
11827   Int4    offset;
11828 
11829   if (str == NULL) return NULL;
11830 
11831   new_str = StringSave (str);
11832   start_bracket = StringChr (new_str, '[');
11833   while (start_bracket != NULL)
11834   {
11835     next_start_bracket = StringChr (start_bracket + 1, '[');
11836     eq_loc = StringChr (start_bracket, '=');
11837     end_bracket = StringChr (start_bracket, ']');
11838 
11839     if (eq_loc == NULL || end_bracket == NULL
11840         || eq_loc > end_bracket
11841         || (next_start_bracket != NULL && end_bracket > next_start_bracket))
11842     {
11843       /* can't fix this pair, move along */
11844     }
11845     else if (StringSpn (start_bracket + 1, " \t") == eq_loc - start_bracket - 1)
11846     {
11847       offset = start_bracket - new_str;
11848       tmp_new = InsertStringAtOffset (new_str, fake_modifier_name, offset + 1);
11849       if (tmp_new != NULL)
11850       {
11851         new_str = MemFree (new_str);
11852         new_str = tmp_new;
11853       }
11854       start_bracket = new_str + offset;
11855       next_start_bracket = StringChr (start_bracket + 1, '[');
11856     }
11857     start_bracket = next_start_bracket;
11858   }
11859   return new_str;
11860 }
11861 
SuggestCorrectBracketing(CharPtr str)11862 static CharPtr SuggestCorrectBracketing (CharPtr str)
11863 {
11864   CharPtr cp, next_token;
11865   CharPtr new_str, tmp_new;
11866   Int4    offset, name_len, token_offset;
11867   CharPtr step_back, step_forward, next_next_token, next_next_next_token;
11868   Char    insert_buf [2];
11869 
11870   if (str == NULL) return NULL;
11871 
11872   new_str = StringSave (str);
11873   cp = new_str;
11874   next_token = NextBracketToken (cp);
11875 
11876   while (*cp != 0 && next_token != NULL)
11877   {
11878     if (*next_token == '"')
11879     {
11880       insert_buf [0] = *next_token;
11881       insert_buf [1] = 0;
11882       tmp_new = InsertStringAtOffset (new_str, insert_buf, StringLen (new_str));
11883       new_str = MemFree (new_str);
11884       new_str = tmp_new;
11885       return new_str;
11886     }
11887     next_next_token = NextBracketToken (next_token + 1);
11888     if (next_next_token == NULL)
11889     {
11890       next_next_next_token = NULL;
11891     }
11892     else
11893     {
11894       next_next_next_token = NextBracketToken (next_next_token + 1);
11895     }
11896 
11897     /* skip over correctly formatted bits */
11898     if (*next_token == '['
11899         && next_next_token != NULL
11900         && *next_next_token == '='
11901         && next_next_next_token != NULL
11902         && *next_next_next_token == ']')
11903     {
11904       cp = next_next_next_token + 1;
11905     }
11906     /* remove repeated tokens ([[, ]], or ==) in first pair*/
11907     else if (next_next_token != NULL
11908              && *next_token == *next_next_token
11909              && next_next_token - next_token - 1== StringSpn (next_token + 1, " \t"))
11910     {
11911       ShiftString (next_token, next_next_token - next_token);
11912     }
11913     /* remove repeated tokens ([[, ]], or ==) in first pair*/
11914     else if (next_next_token != NULL
11915              && next_next_next_token != NULL
11916              && *next_next_token == *next_next_next_token
11917              && next_next_next_token - next_next_token - 1== StringSpn (next_next_token + 1, " \t"))
11918     {
11919       ShiftString (next_next_token, next_next_next_token - next_next_token);
11920     }
11921     /* no start - either remove end token or insert start */
11922     else
11923     {
11924       switch (*next_token)
11925       {
11926         case '=' :
11927           /* insert start before equals token */
11928           if (next_token == cp)
11929           {
11930             offset = cp - new_str;
11931           }
11932           else
11933           {
11934             step_back = GetModNameStartFromEqLoc (next_token, cp);
11935             offset = step_back - new_str;
11936           }
11937           tmp_new = InsertStringAtOffset (new_str, "[", offset);
11938           if (tmp_new != NULL)
11939           {
11940             new_str = MemFree (new_str);
11941             new_str = tmp_new;
11942           }
11943           cp = tmp_new + offset;
11944           break;
11945         case ']' :
11946           /* remove lonely end bracket */
11947           ShiftString (next_token, 1);
11948           cp = next_token;
11949           break;
11950         case '[' :
11951           next_next_token = NextBracketToken (next_token + 1);
11952           if (next_next_token == NULL
11953               || *next_next_token == '[')
11954           {
11955             /* remove unwanted beginning bracket */
11956             ShiftString (next_token, 1);
11957             cp = next_token;
11958           }
11959           else if (*next_next_token == '=')
11960           {
11961             /* find the best place to put a closing bracket */
11962             next_next_next_token = NextBracketToken (next_next_token + 1);
11963             if (next_next_next_token == NULL)
11964             {
11965               offset = StringLen (new_str);
11966             }
11967             else
11968             {
11969               token_offset = next_next_token - new_str + 1;
11970               offset = next_next_next_token - new_str;
11971               while (offset > token_offset && isspace (new_str[offset - 1]))
11972               {
11973                 offset--;
11974               }
11975               if (*next_next_next_token == '=')
11976               {
11977                 step_back = GetModNameStartFromEqLoc (next_next_next_token, next_next_token);
11978                 if (step_back == next_next_token) {
11979                   /* no name before second equals sign, put bracket after value */
11980                 } else {
11981                   while (step_back > next_next_token + 1
11982                         && isspace (*(step_back - 1)))
11983                   {
11984                     step_back --;
11985                   }
11986                   offset = step_back - new_str;
11987                 }
11988               }
11989             }
11990             tmp_new = InsertStringAtOffset (new_str, "]", offset);
11991             if (tmp_new != NULL)
11992             {
11993               new_str = MemFree (new_str);
11994               new_str = tmp_new;
11995             }
11996             cp = tmp_new + offset + 1;
11997           }
11998           else if (*next_next_token == ']')
11999           {
12000             /* see if we can insert an equals sign */
12001             /* skip over empty space after '[', if any */
12002             step_forward = next_token + 1 + StringSpn (next_token + 1, " \t");
12003             if (step_forward == next_next_token)
12004             {
12005               /* eliminate the empty bracket pair */
12006               ShiftString (next_token, next_next_token - next_token + 1);
12007               cp = next_token;
12008             }
12009             else
12010             {
12011               /* get length of first text token */
12012               name_len = StringCSpn (step_forward, " \t");
12013               if (next_next_token - step_forward < name_len)
12014               {
12015                 name_len = next_next_token - step_forward;
12016               }
12017               if (step_forward + name_len < next_next_token && isspace (*(step_forward + name_len)))
12018               {
12019                 *(step_forward + name_len) = '=';
12020                 cp = next_next_token + 1;
12021               }
12022               else if (StringNICmp (step_forward, "DNA", name_len) == 0
12023                        || StringNICmp (step_forward, "RNA", name_len) == 0
12024                        || StringNICmp (step_forward, "orf", name_len) == 0)
12025               {
12026                 cp = next_next_token + 1;
12027               }
12028               else
12029               {
12030                 if ((next_token > new_str && isspace (*(next_token - 1)))
12031                     || isspace (*(next_token + 1)))
12032                 {
12033                   ShiftString (next_token, 1);
12034                   next_next_token --;
12035                 }
12036                 else
12037                 {
12038                   *next_token = ' ';
12039                 }
12040                 if (isspace (*(next_next_token - 1)) || isspace (*(next_next_token + 1)))
12041                 {
12042                   ShiftString (next_next_token, 1);
12043                 }
12044                 else
12045                 {
12046                   *next_next_token = ' ';
12047                 }
12048                 cp = next_next_token;
12049               }
12050             }
12051           }
12052           break;
12053       }
12054     }
12055 
12056     next_token = NextBracketToken (cp);
12057   }
12058 
12059   tmp_new = InsertMissingModifierNames (new_str);
12060   if (tmp_new != NULL)
12061   {
12062     new_str = MemFree (new_str);
12063     new_str = tmp_new;
12064   }
12065 
12066   return new_str;
12067 }
12068 
ClearSequencesButton(ButtoN b)12069 static void ClearSequencesButton (ButtoN b)
12070 {
12071   SequencesFormPtr   sqfp;
12072 
12073   sqfp = (SequencesFormPtr) GetObjectExtra (b);
12074   if (sqfp == NULL) return;
12075   SequencesFormDeleteProc (sqfp);
12076 }
12077 
12078 static void
SetModifierList(DoC doc,ValNodePtr mod_list)12079 SetModifierList
12080 (DoC doc,
12081  ValNodePtr mod_list)
12082 {
12083   ValNodePtr vnp;
12084   Int4       num_modifiers = 0;
12085   Int4       text_len = 0;
12086   CharPtr    text;
12087   CharPtr    text_fmt = "Already have values for:\n";
12088 
12089   if (doc == NULL)
12090   {
12091     return;
12092   }
12093   Reset (doc);
12094   if (mod_list == NULL)
12095   {
12096     text = StringSave ("No modifiers are present.");
12097   }
12098   else
12099   {
12100     text_len = StringLen (text_fmt) + 1;
12101     for (vnp = mod_list; vnp != NULL; vnp = vnp->next)
12102     {
12103       num_modifiers ++;
12104       text_len += StringLen (vnp->data.ptrvalue);
12105     }
12106     text_len += num_modifiers * 6;
12107     text = (CharPtr) MemNew (text_len * sizeof (Char));
12108     if (text != NULL)
12109     {
12110       StringCpy (text, text_fmt);
12111       for (vnp = mod_list; vnp != NULL; vnp = vnp->next)
12112       {
12113         if (vnp->next == NULL && num_modifiers > 1)
12114         {
12115           StringCat (text, "and ");
12116         }
12117       	StringCat (text, vnp->data.ptrvalue);
12118       	if (vnp->next != NULL)
12119       	{
12120       	  if (num_modifiers > 2)
12121       	  {
12122       	  	StringCat (text, ", ");
12123       	  }
12124       	  else
12125       	  {
12126       	  	StringCat (text, " ");
12127       	  }
12128       	}
12129       }
12130     }
12131   }
12132   AppendText (doc, text, NULL, NULL, programFont);
12133   InvalDocRows (doc, 0, 0, 0);
12134   MemFree (text);
12135 }
12136 
12137 
12138 
GetValForEnumName(EnumFieldAssocPtr eap,CharPtr mod_value)12139 static Int4 GetValForEnumName (EnumFieldAssocPtr eap, CharPtr mod_value)
12140 {
12141   if (StringHasNoText (mod_value) || eap == NULL)
12142   {
12143     return 0;
12144   }
12145   while (eap != NULL && eap->name != NULL)
12146   {
12147     if (StringICmp (eap->name, mod_value) == 0)
12148     {
12149       return eap->value;
12150     }
12151     eap++;
12152   }
12153   return 0;
12154 }
12155 
12156 static CharPtr
TagListStringFromDefLineValue(CharPtr defline_val,Boolean is_nontext,Int2 mod_type)12157 TagListStringFromDefLineValue
12158 (CharPtr defline_val,
12159  Boolean is_nontext,
12160  Int2    mod_type)
12161 {
12162   CharPtr taglist_str = NULL;
12163   Char        text [128];
12164 
12165   if (is_nontext)
12166   {
12167     if (StringHasNoText (defline_val))
12168     {
12169       taglist_str = StringSave ("0");
12170     }
12171     else
12172     {
12173       taglist_str = StringSave ("1");
12174     }
12175   }
12176   else if (mod_type == eModifierType_Organism)
12177   {
12178     taglist_str = StringSave (defline_val);
12179   }
12180   else if (mod_type == eModifierType_Location)
12181   {
12182     if (StringHasNoText (defline_val))
12183     {
12184       taglist_str = StringSave ("1");
12185     }
12186     else
12187     {
12188       sprintf (text, "%d", GetValForEnumName (biosource_genome_simple_alist,
12189                                               defline_val));
12190       taglist_str = StringSave (text);
12191     }
12192   }
12193   else if (mod_type == eModifierType_Origin)
12194   {
12195     if (StringHasNoText (defline_val))
12196     {
12197       taglist_str = StringSave ("1");
12198     }
12199     else
12200     {
12201       sprintf (text, "%d", GetValForEnumName (biosource_origin_alist,
12202                                               defline_val));
12203       taglist_str = StringSave (text);
12204     }
12205   }
12206   else if (mod_type == eModifierType_NucGeneticCode
12207            || mod_type == eModifierType_MitoGeneticCode)
12208   {
12209     if (StringHasNoText (defline_val))
12210     {
12211       taglist_str = StringSave ("0");
12212     }
12213     else
12214     {
12215       sprintf (text, "%d", GeneticCodeFromString (defline_val));
12216       taglist_str = StringSave (text);
12217     }
12218   }
12219   else if (mod_type == eModifierType_MolType)
12220   {
12221     if (StringHasNoText (defline_val))
12222     {
12223       taglist_str = StringSave ("253");
12224     }
12225     else
12226     {
12227       sprintf (text, "%d", MolTypeFromString (defline_val));
12228       taglist_str = StringSave (text);
12229     }
12230   }
12231   else if (mod_type == eModifierType_Molecule)
12232   {
12233     if (StringICmp (defline_val, "dna") == 0)
12234     {
12235       sprintf (text, "%d", Seq_mol_dna);
12236       taglist_str = StringSave (text);
12237     }
12238     else if (StringICmp (defline_val, "rna") == 0)
12239     {
12240       sprintf (text, "%d", Seq_mol_rna);
12241       taglist_str = StringSave (text);
12242     }
12243     else
12244     {
12245       sprintf (text, "%d", Seq_mol_dna);
12246       taglist_str = StringSave (text);
12247     }
12248   }
12249   else if (mod_type == eModifierType_Topology)
12250   {
12251     if (StringHasNoText (defline_val))
12252     {
12253       sprintf (text, "%d", TopologyFromString (""));
12254     }
12255     else
12256     {
12257       sprintf (text, "%d", TopologyFromString (defline_val));
12258     }
12259     taglist_str = StringSave (text);
12260   }
12261   else
12262   {
12263     if (StringHasNoText (defline_val))
12264     {
12265       taglist_str = StringSave (" ");
12266     }
12267     else
12268     {
12269       taglist_str = StringSave (defline_val);
12270     }
12271   }
12272   return taglist_str;
12273 }
12274 
AddSeqIDAndValueToTagList(CharPtr id,CharPtr title,CharPtr mod_name,ValNodePtr PNTR head)12275 static void AddSeqIDAndValueToTagList
12276 (CharPtr id,
12277  CharPtr title,
12278  CharPtr mod_name,
12279  ValNodePtr PNTR   head)
12280 {
12281   Char        text [2];
12282   CharPtr     str;
12283   Int4        len;
12284   Int4        mod_type;
12285   Boolean     is_nontext;
12286   CharPtr     val_str, taglist_str = NULL;
12287 
12288   if (head == NULL)
12289   {
12290     return;
12291   }
12292 
12293   is_nontext = IsNonTextModifier (mod_name);
12294   mod_type = GetModifierType (mod_name);
12295 
12296   text [0] = '\0';
12297 
12298   if (is_nontext)
12299   {
12300     if (FindValuePairInDefLine (mod_name, title, NULL))
12301     {
12302       sprintf (text, "2");
12303     }
12304     else
12305     {
12306       text [0] = '\0';
12307     }
12308     val_str = StringSave (text);
12309   }
12310   else
12311   {
12312     val_str = FindValueFromPairInDefline (mod_name, title);
12313   }
12314 
12315   taglist_str = TagListStringFromDefLineValue (val_str, is_nontext, mod_type);
12316   val_str = MemFree (val_str);
12317   len = StringLen (id) + StringLen (taglist_str);
12318   str = MemNew (len + 4);
12319   if (str != NULL) {
12320     StringCpy (str, id);
12321     StringCat (str, "\t");
12322     StringCat (str, taglist_str);
12323     StringCat (str, "\n");
12324   }
12325 
12326   taglist_str = MemFree (taglist_str);
12327   ValNodeAddPointer (head, 0, str);
12328 
12329 }
12330 
GetValueFromTitle(CharPtr mod_name,CharPtr title)12331 NLM_EXTERN CharPtr GetValueFromTitle (CharPtr mod_name, CharPtr title)
12332 {
12333   Int4        mod_type;
12334   Boolean     is_nontext;
12335   CharPtr     valstr;
12336 
12337   if (StringHasNoText (mod_name) || StringHasNoText (title))
12338   {
12339     return NULL;
12340   }
12341   mod_type = GetModifierType (mod_name);
12342   is_nontext = IsNonTextModifier (mod_name);
12343 
12344   if (mod_type == eModifierType_Organism)
12345   {
12346     valstr = FindValueFromPairInDefline ("organism", title);
12347     if (StringHasNoText (valstr))
12348     {
12349       valstr = MemFree (valstr);
12350       valstr = StringSave (" ");
12351     }
12352   }
12353   else if (mod_type == eModifierType_Location)
12354   {
12355     valstr = NULL;
12356     if (FindValuePairInDefLine ("location", title, NULL) != NULL)
12357     {
12358       valstr = FindValueFromPairInDefline ("location", title);
12359     }
12360     else
12361     {
12362       valstr = StringSave ("genomic");
12363     }
12364   }
12365   else if (IsNonTextModifier (mod_name))
12366   {
12367     if (FindValuePairInDefLine (mod_name, title, NULL) != NULL)
12368     {
12369       valstr = StringSave ("TRUE");
12370     }
12371     else
12372     {
12373       valstr = StringSave ("FALSE");
12374     }
12375   }
12376   else
12377   {
12378     valstr = FindValueFromPairInDefline (mod_name, title);
12379     if (StringHasNoText (valstr))
12380     {
12381       valstr = MemFree (valstr);
12382       valstr = StringSave (" ");
12383     }
12384   }
12385   return valstr;
12386 }
12387 
12388 /* This function returns a string suitable for display in a table
12389  * using the values from the specified modifier name found in the
12390  * specified table.
12391  * non-text modifiers are displayed as either TRUE or FALSE,
12392  * multiple values are listed in parentheses with semicolons between them.
12393  */
GetDisplayValue(CharPtr mod_name,CharPtr title,BoolPtr multi_found)12394 static CharPtr GetDisplayValue (CharPtr mod_name, CharPtr title, BoolPtr multi_found)
12395 {
12396   CharPtr begin_bracket, end_bracket;
12397   CharPtr mod_value = NULL, tmp_value;
12398   Int4    mod_value_len = 0;
12399   ValNodePtr val_list = NULL, vnp;
12400   Boolean allow_multi;
12401 
12402   allow_multi = AllowMultipleValues (mod_name);
12403   if (allow_multi)
12404   {
12405     begin_bracket = FindValuePairInDefLine (mod_name, title, &end_bracket);
12406     while (begin_bracket != NULL)
12407     {
12408       tmp_value = GetValueFromTitle (mod_name, begin_bracket);
12409       if (!StringHasNoText (tmp_value))
12410       {
12411         mod_value_len += StringLen (tmp_value) + 1;
12412         ValNodeAddPointer (&val_list, 0, tmp_value);
12413       }
12414       else
12415       {
12416         tmp_value = MemFree (tmp_value);
12417       }
12418       begin_bracket = FindValuePairInDefLine (mod_name, end_bracket + 1, &end_bracket);
12419     }
12420     if (val_list == NULL)
12421     {
12422       mod_value = StringSave (" ");
12423     }
12424     else
12425     {
12426       if (val_list->next == NULL)
12427       {
12428         mod_value = val_list->data.ptrvalue;
12429         val_list->data.ptrvalue = NULL;
12430       }
12431       else
12432       {
12433         mod_value = (CharPtr) MemNew ((mod_value_len + 3) * sizeof (Char));
12434         if (mod_value != NULL)
12435         {
12436           mod_value [0] = '(';
12437           for (vnp = val_list; vnp != NULL; vnp = vnp->next)
12438           {
12439             StringCat (mod_value, vnp->data.ptrvalue);
12440             if (vnp->next == NULL)
12441             {
12442               StringCat (mod_value, ")");
12443             }
12444             else
12445             {
12446               StringCat (mod_value, ",");
12447             }
12448           }
12449           if (multi_found != NULL)
12450           {
12451             *multi_found = TRUE;
12452           }
12453         }
12454       }
12455       val_list = ValNodeFree (val_list);
12456     }
12457   }
12458   else
12459   {
12460     mod_value = GetValueFromTitle (mod_name, title);
12461   }
12462   return mod_value;
12463 }
12464 
12465 /* This function returns a string suitable for display in a table
12466  * using the values from the specified modifier name found in the
12467  * specified table.
12468  * non-text modifiers are displayed as either TRUE or FALSE,
12469  * multiple values are listed in parentheses with semicolons between them.
12470  */
GetDisplayValueFromModifierInfoList(CharPtr mod_name,ValNodePtr modifier_info_list,BoolPtr multi_found)12471 static CharPtr GetDisplayValueFromModifierInfoList (CharPtr mod_name, ValNodePtr modifier_info_list, BoolPtr multi_found)
12472 {
12473   CharPtr mod_value = NULL;
12474   Int4    mod_value_len = 0;
12475   ValNodePtr val_list = NULL, vnp;
12476   Boolean allow_multi;
12477   ModifierInfoPtr mip;
12478   Uint1           subtype = 0;
12479 
12480   allow_multi = AllowMultipleValues (mod_name);
12481 
12482   subtype = FindTypeForModNameText (mod_name);
12483 
12484   for (vnp = modifier_info_list; vnp != NULL; vnp = vnp->next) {
12485     mip = (ModifierInfoPtr) vnp->data.ptrvalue;
12486     if (mip != NULL
12487         && !StringHasNoText (mip->value)
12488         && ((mip->subtype != 0 && subtype == mip->subtype)
12489             || StringsAreEquivalent (mod_name, mip->name))) {
12490       ValNodeAddPointer (&val_list, 0, mip->value);
12491       mod_value_len += StringLen (mip->value) + 1;
12492       if (!allow_multi) {
12493         break;
12494       }
12495     }
12496   }
12497 
12498   if (val_list == NULL)
12499   {
12500     mod_value = StringSave (" ");
12501   }
12502   else
12503   {
12504     if (val_list->next == NULL)
12505     {
12506       mod_value = StringSave (val_list->data.ptrvalue);
12507     }
12508     else
12509     {
12510       mod_value = (CharPtr) MemNew ((mod_value_len + 3) * sizeof (Char));
12511       if (mod_value != NULL)
12512       {
12513         mod_value [0] = '(';
12514         for (vnp = val_list; vnp != NULL; vnp = vnp->next)
12515         {
12516           StringCat (mod_value, vnp->data.ptrvalue);
12517           if (vnp->next == NULL)
12518           {
12519             StringCat (mod_value, ")");
12520           }
12521           else
12522           {
12523             StringCat (mod_value, ",");
12524           }
12525         }
12526         if (multi_found != NULL)
12527         {
12528           *multi_found = TRUE;
12529         }
12530       }
12531     }
12532     val_list = ValNodeFree (val_list);
12533   }
12534   return mod_value;
12535 }
12536 
IntValueInValNodeList(Int4 ival,ValNodePtr vnp)12537 static Boolean IntValueInValNodeList (Int4 ival, ValNodePtr vnp)
12538 {
12539   Boolean found_int = FALSE;
12540 
12541   while (vnp != NULL && !found_int)
12542   {
12543     if (vnp->data.intvalue == ival)
12544     {
12545       found_int = TRUE;
12546     }
12547     vnp = vnp->next;
12548   }
12549 
12550   return found_int;
12551 }
12552 
12553 static Boolean
DoColumnlistsHaveIdenticalSourceInformation(ValNodePtr col1,ValNodePtr col2,ValNodePtr header)12554 DoColumnlistsHaveIdenticalSourceInformation
12555 (ValNodePtr col1,
12556  ValNodePtr col2,
12557  ValNodePtr header)
12558 {
12559   Boolean are_identical = TRUE;
12560   Int4    mod_type;
12561 
12562   while (col1 != NULL && col2 != NULL && header != NULL && are_identical)
12563   {
12564     mod_type = GetModifierType (header->data.ptrvalue);
12565     if (mod_type != eModifierType_Protein
12566         && mod_type != eModifierType_MolType
12567         && mod_type != eModifierType_Topology
12568         && mod_type != eModifierType_Molecule
12569         && StringCmp ((CharPtr) col1->data.ptrvalue, (CharPtr) col2->data.ptrvalue) != 0)
12570     {
12571       are_identical = FALSE;
12572     }
12573     col1 = col1->next;
12574     col2 = col2->next;
12575     header = header->next;
12576   }
12577   if ((col1 == NULL && col2 != NULL) || (col1 != NULL && col2 == NULL))
12578   {
12579     are_identical = FALSE;
12580   }
12581   return are_identical;
12582 }
12583 
12584 
GetSourceInformationColumns(ValNodePtr header,ValNodePtr columns)12585 static CharPtr GetSourceInformationColumns (ValNodePtr header, ValNodePtr columns)
12586 {
12587   Int4 len = 0;
12588   CharPtr rval = NULL;
12589   ValNodePtr col, hcol;
12590   Int4    mod_type;
12591 
12592   col = columns;
12593   hcol = header;
12594   while (col != NULL && hcol != NULL)
12595   {
12596     mod_type = GetModifierType (hcol->data.ptrvalue);
12597     if (mod_type != eModifierType_Protein
12598         && mod_type != eModifierType_MolType
12599         && mod_type != eModifierType_Topology
12600         && mod_type != eModifierType_Molecule)
12601     {
12602       len += StringLen (col->data.ptrvalue) + 1;
12603     }
12604     col = col->next;
12605     hcol = hcol->next;
12606   }
12607 
12608   rval = (CharPtr) MemNew (sizeof (Char) * (len + 1));
12609 
12610   col = columns;
12611   hcol = header;
12612   while (col != NULL && hcol != NULL)
12613   {
12614     mod_type = GetModifierType (hcol->data.ptrvalue);
12615     if (mod_type != eModifierType_Protein
12616         && mod_type != eModifierType_MolType
12617         && mod_type != eModifierType_Topology
12618         && mod_type != eModifierType_Molecule)
12619     {
12620       StringCat (rval, col->data.ptrvalue);
12621       StringCat (rval, ":");
12622     }
12623     col = col->next;
12624     hcol = hcol->next;
12625   }
12626   return rval;
12627 }
12628 
12629 
HasAnySourceInformation(ValNodePtr header_list,ValNodePtr column_list)12630 static Boolean HasAnySourceInformation (ValNodePtr header_list, ValNodePtr column_list)
12631 {
12632   Boolean has_any = FALSE;
12633   Int4    mod_type;
12634 
12635   if (header_list == NULL || column_list == NULL)
12636   {
12637     return FALSE;
12638   }
12639 
12640   /* skip over SeqID column */
12641   header_list = header_list->next;
12642   column_list = column_list->next;
12643   while (header_list != NULL && column_list != NULL && ! has_any)
12644   {
12645     if (!StringHasNoText (column_list->data.ptrvalue))
12646     {
12647       mod_type = GetModifierType (header_list->data.ptrvalue);
12648       if (mod_type != eModifierType_Protein
12649           && mod_type != eModifierType_MolType
12650           && mod_type != eModifierType_Molecule
12651           && mod_type != eModifierType_Topology)
12652       {
12653         has_any = TRUE;
12654       }
12655     }
12656     header_list = header_list->next;
12657     column_list = column_list->next;
12658   }
12659   return has_any;
12660 }
12661 
OrganismMatchesAnotherRow(Int4 row,ValNodePtr row_list,Pointer userdata)12662 static Boolean OrganismMatchesAnotherRow (Int4 row, ValNodePtr row_list, Pointer userdata)
12663 {
12664   ValNodePtr header_vnp, column_list, check_column_list, row_vnp;
12665   Int4       row_num;
12666 
12667   if (row_list == NULL || row < 1)
12668   {
12669     return FALSE;
12670   }
12671 
12672   /* we start with the header of the second column, because the first column
12673    * is the sequence ID */
12674   header_vnp = row_list->data.ptrvalue;
12675   if (header_vnp == NULL) return FALSE;
12676   header_vnp = header_vnp->next;
12677   if (header_vnp == NULL) return FALSE;
12678 
12679   /* find the row we're interested in */
12680   for (row_vnp = row_list->next, row_num = 1;
12681        row_vnp != NULL && row_num != row;
12682        row_vnp = row_vnp->next, row_num++)
12683   {
12684   }
12685   if (row_vnp == NULL)
12686   {
12687     return FALSE;
12688   }
12689 
12690   column_list = (ValNodePtr) row_vnp->data.ptrvalue;
12691   if (!HasAnySourceInformation (row_list->data.ptrvalue, column_list))
12692   {
12693     return FALSE;
12694   }
12695   if (column_list == NULL || column_list->next == NULL)
12696   {
12697     return FALSE;
12698   }
12699 
12700   /* don't check when organism name is missing */
12701   if (StringHasNoText (column_list->next->data.ptrvalue))
12702   {
12703     return FALSE;
12704   }
12705 
12706   /* now check it against the other rows */
12707   for (row_vnp = row_list->next, row_num = 1;
12708        row_vnp != NULL;
12709        row_vnp = row_vnp->next, row_num++)
12710   {
12711     if (row_num == row)
12712     {
12713       continue;
12714     }
12715 
12716     check_column_list = (ValNodePtr) row_vnp->data.ptrvalue;
12717     if (check_column_list == NULL || check_column_list->next == NULL)
12718     {
12719       continue;
12720     }
12721 
12722     /* we compare the column lists, starting with the second column
12723      * because the first column contains the sequence ID
12724      */
12725     if (DoColumnlistsHaveIdenticalSourceInformation (column_list->next,
12726                                                      check_column_list->next,
12727                                                      header_vnp))
12728     {
12729       return TRUE;
12730     }
12731   }
12732   return FALSE;
12733 }
12734 
12735 /* Sequence ID is always stored in the first column, organism name
12736  * is always stored in the second column.
12737  */
AnySequencesHaveMissingOrganisms(ValNodePtr row_list)12738 static Boolean AnySequencesHaveMissingOrganisms (ValNodePtr row_list)
12739 {
12740   Boolean have_missing = FALSE;
12741   ValNodePtr col_list;
12742 
12743   if (row_list == NULL || row_list->next == NULL)
12744   {
12745     return FALSE;
12746   }
12747   row_list = row_list->next;
12748 
12749   while (row_list != NULL && ! have_missing)
12750   {
12751     col_list = row_list->data.ptrvalue;
12752     if (col_list == NULL
12753         || col_list->next == NULL
12754         || StringHasNoText (col_list->next->data.ptrvalue))
12755     {
12756       have_missing = TRUE;
12757     }
12758     row_list = row_list->next;
12759   }
12760   return have_missing;
12761 }
12762 
AnySequencesHaveIdenticalOrganisms(ValNodePtr row_list)12763 static Boolean AnySequencesHaveIdenticalOrganisms (ValNodePtr row_list)
12764 {
12765   Boolean have_match = FALSE;
12766   ValNodePtr vnp;
12767   ValNodeBlock org_list;
12768   CharPtr prev;
12769 
12770   if (row_list == NULL || row_list->next == NULL || row_list->next->next == NULL)
12771   {
12772     return FALSE;
12773   }
12774   InitValNodeBlock (&org_list, NULL);
12775   for (vnp = row_list->next; vnp != NULL; vnp = vnp->next)
12776   {
12777     ValNodeAddPointerToEnd (&org_list, 0, GetSourceInformationColumns (row_list->data.ptrvalue, vnp->data.ptrvalue));
12778   }
12779 
12780   org_list.head = ValNodeSort (org_list.head, SortVnpByString);
12781   prev = org_list.head->data.ptrvalue;
12782   for (vnp = org_list.head->next; vnp != NULL && !have_match; vnp = vnp->next)
12783   {
12784     if (StringICmp (prev, vnp->data.ptrvalue) == 0)
12785     {
12786       have_match = TRUE;
12787     }
12788     prev = vnp->data.ptrvalue;
12789   }
12790   org_list.head = ValNodeFreeData (org_list.head);
12791 
12792   return have_match;
12793 }
12794 
12795 /* Sequence ID is always stored in the first column, organism name
12796  * is always stored in the second column.
12797  */
ReportMissingOrganisms(ValNodePtr row_list,DoC doc)12798 static void ReportMissingOrganisms (ValNodePtr row_list, DoC doc)
12799 {
12800   Int4       row_num, num_missing;
12801   ValNodePtr row_vnp;
12802   ValNodePtr column_list;
12803   ValNodeBlock missing_list;
12804   CharPtr    err_msg;
12805   CharPtr    missing_fmt = "%d sequences are missing organism names.";
12806 
12807   if (row_list == NULL || doc == NULL) return;
12808 
12809 
12810   InitValNodeBlock (&missing_list, NULL);
12811 
12812   for (row_vnp = row_list->next, row_num = 0;
12813        row_vnp != NULL;
12814        row_vnp = row_vnp->next, row_num++)
12815   {
12816     column_list = (ValNodePtr) row_vnp->data.ptrvalue;
12817     if (column_list == NULL)
12818     {
12819       continue;
12820     }
12821     if (column_list->next == NULL || StringHasNoText (column_list->next->data.ptrvalue))
12822     {
12823       /* organism is missing */
12824       ValNodeAddPointerToEnd (&missing_list, row_num, column_list->data.ptrvalue);
12825     }
12826   }
12827 
12828   if (missing_list.head != NULL)
12829   {
12830     num_missing = ValNodeLen (missing_list.head);
12831     if (num_missing > 100)
12832     {
12833       err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_fmt) + 15));
12834       sprintf (err_msg, missing_fmt, num_missing);
12835     }
12836     else
12837     {
12838       err_msg = CreateListMessage ("Sequence",
12839                                    missing_list.head->next == NULL ?
12840                                              " has no organism name."
12841                                              : " have no organism names.",
12842                                    missing_list.head);
12843     }
12844     AppendText (doc, err_msg, &faParFmt, &faColFmt, programFont);
12845     err_msg = MemFree (err_msg);
12846     missing_list.head = ValNodeFree (missing_list.head);
12847     AppendText (doc, "\n", &faParFmt, &faColFmt, programFont);
12848   }
12849 }
12850 
ReportIdenticalOrganisms(ValNodePtr row_list,DoC doc)12851 static void ReportIdenticalOrganisms (ValNodePtr row_list, DoC doc)
12852 {
12853   ValNodePtr checked_list = NULL;
12854   Int4       row_num, check_row_num;
12855   ValNodePtr row_vnp, check_row_vnp;
12856   ValNodePtr column_list, check_column_list;
12857   ValNodePtr header_vnp;
12858   Boolean    skip_this;
12859   ValNodePtr this_match_list;
12860   CharPtr    err_msg;
12861   Boolean    any_data_reported = FALSE;
12862 
12863   if (row_list == NULL || doc == NULL) return;
12864 
12865   /* we start with the header of the second column, because the first column
12866    * is the sequence ID */
12867   header_vnp = row_list->data.ptrvalue;
12868   if (header_vnp == NULL) return;
12869   header_vnp = header_vnp->next;
12870   if (header_vnp == NULL) return;
12871 
12872   for (row_vnp = row_list->next, row_num = 0;
12873        row_vnp != NULL;
12874        row_vnp = row_vnp->next, row_num++)
12875   {
12876     /* don't need to check rows that have matched a previous row */
12877     skip_this = IntValueInValNodeList (row_num, checked_list);
12878     if (skip_this)
12879     {
12880       continue;
12881     }
12882 
12883     column_list = (ValNodePtr) row_vnp->data.ptrvalue;
12884     if (!HasAnySourceInformation (row_list->data.ptrvalue, column_list))
12885     {
12886       continue;
12887     }
12888 
12889     if (column_list == NULL || column_list->next == NULL)
12890     {
12891       continue;
12892     }
12893 
12894     if (StringHasNoText (column_list->next->data.ptrvalue))
12895     {
12896       /* skip - no organism name, will have already been reported */
12897       continue;
12898     }
12899 
12900     this_match_list = NULL;
12901     for (check_row_vnp = row_vnp->next, check_row_num = row_num + 1;
12902          check_row_vnp != NULL;
12903          check_row_vnp = check_row_vnp->next, check_row_num++)
12904     {
12905       skip_this = IntValueInValNodeList (row_num, checked_list);
12906       if (skip_this)
12907       {
12908         continue;
12909       }
12910       check_column_list = (ValNodePtr) check_row_vnp->data.ptrvalue;
12911       if (check_column_list == NULL || check_column_list->next == NULL)
12912       {
12913         continue;
12914       }
12915 
12916       /* we compare the column lists, starting with the second column
12917        * because the first column contains the sequence ID
12918        */
12919       if (DoColumnlistsHaveIdenticalSourceInformation (column_list->next,
12920                                                        check_column_list->next,
12921                                                        header_vnp))
12922       {
12923         /* be sure to put the first row to match the other rows in the list */
12924         if (this_match_list == NULL)
12925         {
12926           ValNodeAddPointer (&this_match_list, row_num, column_list->data.ptrvalue);
12927         }
12928         /* add the sequence ID for the check row to the list */
12929         ValNodeAddPointer (&this_match_list, check_row_num, check_column_list->data.ptrvalue);
12930         ValNodeAddInt (&checked_list, 0, check_row_num);
12931       }
12932     }
12933 
12934     /* if anything matched this row, put the list in the list of matches */
12935     if (this_match_list != NULL)
12936     {
12937       err_msg = CreateListMessage ("Sequence",
12938                      " have identical source information.",
12939                      this_match_list);
12940       AppendText (doc, err_msg, &faParFmt, &faColFmt, programFont);
12941       err_msg = MemFree (err_msg);
12942       this_match_list = ValNodeFree (this_match_list);
12943       any_data_reported = TRUE;
12944     }
12945   }
12946 
12947   checked_list = ValNodeFree (checked_list);
12948   if (any_data_reported)
12949   {
12950     AppendText (doc, "\n", &faParFmt, &faColFmt, programFont);
12951   }
12952 }
12953 
SummarizeModifiers(ValNodePtr row_list,DialoG summary_dlg)12954 static void SummarizeModifiers (ValNodePtr row_list, DialoG summary_dlg)
12955 {
12956   ValNodePtr header_vnp, row_vnp, column_vnp;
12957   Int4       column_offset, col_pos;
12958   Boolean    any_present;
12959   Boolean    all_present;
12960   Boolean    is_unique;
12961   CharPtr    first_value_seen;
12962   Boolean    all_unique;
12963   ValNodePtr values_seen;
12964   CharPtr    row_status;
12965   Int4       line_len;
12966   CharPtr    modifier_line = NULL;
12967   Int4       num_missing;
12968   ValNodePtr summary_row_list = NULL;
12969   ValNodePtr summary_col_list = NULL, summary_header_list = NULL;
12970 
12971   if (row_list == NULL || row_list->next == NULL || summary_dlg == NULL)
12972   {
12973     return;
12974   }
12975 
12976   summary_col_list = NULL;
12977   ValNodeAddPointer (&summary_col_list, 8, StringSave ("Modifier"));
12978   ValNodeAddPointer (&summary_col_list, 6, StringSave ("Status"));
12979   ValNodeAddPointer (&summary_col_list, 11, StringSave ("First Value"));
12980   ValNodeAddPointer (&summary_row_list, 0, summary_col_list);
12981   summary_header_list = summary_col_list;
12982 
12983   header_vnp = row_list->data.ptrvalue;
12984   /* skip over sequence ID column */
12985   header_vnp = header_vnp->next;
12986   column_offset = 1;
12987   while (header_vnp != NULL)
12988   {
12989     any_present = FALSE;
12990     all_present = TRUE;
12991     is_unique = TRUE;
12992     all_unique = TRUE;
12993     first_value_seen = NULL;
12994     values_seen = NULL;
12995     num_missing = 0;
12996 
12997     /* skip over header line */
12998     row_vnp = row_list->next;
12999     while (row_vnp != NULL)
13000     {
13001       for (col_pos = 0, column_vnp = row_vnp->data.ptrvalue;
13002            col_pos < column_offset && column_vnp != NULL;
13003            col_pos++, column_vnp = column_vnp->next)
13004       {
13005       }
13006       if (column_vnp == NULL)
13007       {
13008         continue;
13009       }
13010       if (StringHasNoText (column_vnp->data.ptrvalue))
13011       {
13012         all_present = FALSE;
13013         num_missing++;
13014       }
13015       else
13016       {
13017         any_present = TRUE;
13018         if (first_value_seen == NULL)
13019         {
13020           first_value_seen = StringSave (column_vnp->data.ptrvalue);
13021           ValNodeAddPointer (&values_seen, 0, first_value_seen);
13022         }
13023         else
13024         {
13025           if (StringCmp (first_value_seen, column_vnp->data.ptrvalue) != 0)
13026           {
13027             is_unique = FALSE;
13028           }
13029 
13030           if ( FindExactStringListMatch (values_seen, column_vnp->data.ptrvalue)
13031               == NULL)
13032           {
13033             ValNodeAddStr (&values_seen, 0, column_vnp->data.ptrvalue);
13034           }
13035           else
13036           {
13037             all_unique = FALSE;
13038           }
13039         }
13040       }
13041       row_vnp = row_vnp->next;
13042     }
13043 
13044     /* add summary line for this modifier */
13045     if (! any_present)
13046     {
13047       row_status = "All missing (%d sequences)";
13048     }
13049     else if (all_present && all_unique)
13050     {
13051       row_status = "All present, all unique values";
13052     }
13053     else if (all_present && is_unique)
13054     {
13055       row_status = "All present, one unique value";
13056     }
13057     else if (all_present && ! is_unique)
13058     {
13059       row_status = "All present, mixed values";
13060     }
13061     else if (! all_present && all_unique)
13062     {
13063       row_status = "%d missing, all unique values";
13064     }
13065     else if (! all_present && is_unique)
13066     {
13067       row_status = "%d missing, one unique value present";
13068     }
13069     else if (! all_present && ! is_unique)
13070     {
13071       row_status = "%d missing, mixed values";
13072     }
13073 
13074     line_len = StringLen (row_status) + 30;
13075 
13076     modifier_line = (CharPtr) MemNew (line_len * sizeof (Char));
13077     if (modifier_line != NULL)
13078     {
13079 
13080       /* add summary row for this modifier */
13081       summary_col_list = NULL;
13082       /* add modifier name */
13083       ValNodeAddPointer (&summary_col_list,
13084                          0,
13085                          StringSave (header_vnp->data.ptrvalue));
13086       /* show up to the first fifteen characters of the modifier name */
13087       summary_header_list->choice = MAX (summary_header_list->choice,
13088                                          StringLen (header_vnp->data.ptrvalue));
13089       summary_header_list->choice = MIN (summary_header_list->choice,
13090                                          15);
13091 
13092       /* add status */
13093       if (all_present)
13094       {
13095         ValNodeAddPointer (&summary_col_list,
13096                            0,
13097                            StringSave (row_status));
13098         summary_header_list->next->choice = MAX (summary_header_list->next->choice,
13099                                          StringLen (row_status));
13100       }
13101       else
13102       {
13103         sprintf (modifier_line, row_status, num_missing);
13104         ValNodeAddPointer (&summary_col_list,
13105                            0,
13106                            StringSave (modifier_line));
13107         summary_header_list->next->choice = MAX (summary_header_list->next->choice,
13108                                          StringLen (modifier_line));
13109       }
13110 
13111       /* add sample value */
13112       if (StringHasNoText (first_value_seen))
13113       {
13114         ValNodeAddPointer (&summary_col_list, 0, StringSave (""));
13115       }
13116       else
13117       {
13118         ValNodeAddPointer (&summary_col_list,
13119                            0,
13120                            StringSave (first_value_seen));
13121         summary_header_list->next->next->choice = MAX (summary_header_list->next->next->choice,
13122                                          StringLen (first_value_seen));
13123       }
13124       ValNodeAddPointer (&summary_row_list, 0, summary_col_list);
13125 
13126       modifier_line = MemFree (modifier_line);
13127     }
13128 
13129     /* free up variables */
13130     first_value_seen = MemFree (first_value_seen);
13131     values_seen = ValNodeFree (values_seen);
13132 
13133     header_vnp = header_vnp->next;
13134     column_offset++;
13135   }
13136 
13137   PointerToDialog (summary_dlg, summary_row_list);
13138   summary_row_list = FreeTableDisplayRowList (summary_row_list);
13139 
13140 }
13141 
GetListOfCurrentSourceModifiers(IDAndTitleEditPtr iatep)13142 static ValNodePtr GetListOfCurrentSourceModifiers (IDAndTitleEditPtr iatep)
13143 {
13144   ValNodePtr  found_modifiers = NULL;
13145   Int4        seq_num;
13146 
13147   /* we always list organism, and list it first, whether it's present or not */
13148   ValNodeAddPointer (&found_modifiers, 0, StringSave ("Organism"));
13149   if (iatep != NULL)
13150   {
13151     /* get list of modifiers from titles */
13152     for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
13153     {
13154       /* only add modifiers from master sequences */
13155       if (iatep->is_seg == NULL || !iatep->is_seg [seq_num])
13156       {
13157         found_modifiers = BuildModifierTypeList (found_modifiers,
13158                                                  iatep->title_list [seq_num],
13159                                                  FALSE);
13160       }
13161     }
13162   }
13163   return found_modifiers;
13164 }
13165 
13166 
13167 static CharPtr multival_explanation = "Note: When there is more than one "
13168            "modifier of the same type for a single sequence, the value list will "
13169            "be presented in tables separated by semicolons and enclosed in parentheses.";
13170 
SeqEntryPtrToOrgDoc(SequencesFormPtr sqfp)13171 static void SeqEntryPtrToOrgDoc (SequencesFormPtr sqfp)
13172 {
13173   SeqEntryPtr  seq_list;
13174   ValNodePtr   found_modifiers = NULL, vnp;
13175   CharPtr      mod_name;
13176   CharPtr      org_name;
13177   Int4         seq_num;
13178   ValNodeBlock column_list;
13179   ValNodeBlock row_list;
13180   ValNodePtr   row_vnp;
13181   ValNodePtr   header_vnp, header_list;
13182   Int4         column_width;
13183   CharPtr      mod_value;
13184   RecT              r;
13185   IDAndTitleEditPtr iatep;
13186   Boolean           multi_found = FALSE, have_missing, have_match;
13187 
13188   if (sqfp == NULL) return;
13189   Reset (sqfp->org_doc);
13190   ObjectRect (sqfp->org_doc, &r);
13191   InsetRect (&r, 4, 4);
13192   faColFmt.pixWidth = r.right - r.left;
13193 
13194   InitValNodeBlock (&row_list, NULL);
13195 
13196   seq_list = GetSeqEntryFromSequencesForm (sqfp);
13197   if (seq_list == NULL)
13198   {
13199     AppendText (sqfp->org_doc,
13200                 "You must create sequences before you can add source information.",
13201                 &faParFmt, &faColFmt, programFont);
13202     Show (sqfp->org_doc);
13203     Hide (sqfp->ident_org_grp);
13204     Hide (sqfp->summary_dlg);
13205   }
13206   else
13207   {
13208     Show (sqfp->summary_dlg);
13209     /* get list of modifiers */
13210     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
13211 
13212     found_modifiers = GetListOfCurrentSourceModifiers (iatep);
13213 
13214     /* create header line for table */
13215     /* store max column width in choice */
13216     InitValNodeBlock (&column_list, NULL);
13217     ValNodeAddPointerToEnd (&column_list, 6, StringSave ("Seq ID"));
13218     ValNodeAddPointerToEnd (&column_list, 8, StringSave ("Organism"));
13219     for (vnp = found_modifiers->next; vnp != NULL; vnp = vnp->next)
13220     {
13221       ValNodeAddPointerToEnd (&column_list, StringLen (vnp->data.ptrvalue), StringSave ((CharPtr) vnp->data.ptrvalue));
13222     }
13223 
13224     ValNodeAddPointerToEnd (&row_list, 0, column_list.head);
13225     header_list = column_list.head;
13226 
13227     /* create data lines for table */
13228     for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
13229     {
13230       /* only add rows for master sequences */
13231       if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
13232       {
13233         continue;
13234       }
13235 
13236       /* add modifiers from this title */
13237       InitValNodeBlock (&column_list, NULL);
13238       header_vnp = header_list;
13239 
13240       column_width = MAX (StringLen (iatep->id_list [seq_num]), header_vnp->choice);
13241       header_vnp->choice = column_width;
13242       ValNodeAddPointerToEnd (&column_list, 0, StringSave (iatep->id_list [seq_num]));
13243 
13244       /* add organism name */
13245       header_vnp = header_vnp->next;
13246       org_name = GetDisplayValue ("organism", iatep->title_list [seq_num], &multi_found);
13247       column_width = MAX (StringLen (org_name), header_vnp->choice);
13248       header_vnp->choice = column_width;
13249       ValNodeAddPointerToEnd (&column_list, 0, org_name);
13250 
13251       /* get remaining modifiers */
13252       for (vnp = found_modifiers->next; vnp != NULL; vnp = vnp->next)
13253       {
13254         header_vnp = header_vnp->next;
13255         mod_name = (CharPtr) vnp->data.ptrvalue;
13256         mod_value = GetDisplayValue (mod_name, iatep->title_list [seq_num], &multi_found);
13257         column_width = MAX (StringLen (mod_value), header_vnp->choice);
13258         header_vnp->choice = column_width;
13259         ValNodeAddPointerToEnd (&column_list, 0, mod_value);
13260       }
13261       ValNodeAddPointerToEnd (&row_list, 0, column_list.head);
13262     }
13263     have_missing = AnySequencesHaveMissingOrganisms (row_list.head);
13264     have_match = AnySequencesHaveIdenticalOrganisms (row_list.head);
13265     if (have_match || have_missing)
13266     {
13267       if (have_match)
13268       {
13269         Show (sqfp->ident_org_grp);
13270       }
13271       Show (sqfp->org_doc);
13272     }
13273     else
13274     {
13275       Hide (sqfp->ident_org_grp);
13276       Hide (sqfp->org_doc);
13277     }
13278     ReportMissingOrganisms (row_list.head, sqfp->org_doc);
13279     ReportIdenticalOrganisms (row_list.head, sqfp->org_doc);
13280     if (multi_found)
13281     {
13282       AppendText (sqfp->org_doc, multival_explanation, NULL, NULL, programFont);
13283       Show (sqfp->org_doc);
13284     }
13285 
13286     SummarizeModifiers (row_list.head, sqfp->summary_dlg);
13287 
13288     /* free table text */
13289     for (row_vnp = row_list.head; row_vnp != NULL; row_vnp = row_vnp->next)
13290     {
13291       column_list.head = (ValNodePtr) row_vnp->data.ptrvalue;
13292       row_vnp->data.ptrvalue = ValNodeFreeData (column_list.head);
13293     }
13294     row_list.head = ValNodeFree (row_list.head);
13295 
13296     ValNodeFreeData (found_modifiers);
13297     iatep = IDAndTitleEditFree (iatep);
13298   }
13299   /* update document */
13300   InvalDocRows (sqfp->org_doc, 0, 0, 0);
13301 }
13302 
SeqEntryPtrToSourceTab(SequencesFormPtr sqfp)13303 static void SeqEntryPtrToSourceTab (SequencesFormPtr sqfp)
13304 
13305 {
13306   SeqEntryPtrToOrgDoc (sqfp);
13307 }
13308 
GetFastaModifierList(Boolean allow_nuc,Boolean allow_prot)13309 static ValNodePtr GetFastaModifierList (Boolean allow_nuc, Boolean allow_prot)
13310 {
13311   ValNodePtr mod_choices = NULL;
13312   Int4       i;
13313 
13314   if (allow_nuc)
13315   {
13316     ValNodeAddPointer (&mod_choices, eModifierType_Organism, StringSave ("Organism"));
13317 
13318     ValNodeLink (&mod_choices, GetSourceQualDescListEx (TRUE, TRUE, FALSE, FALSE, FALSE));
13319     ValNodeAddPointer (&mod_choices, eModifierType_CommonName, StringSave ("Common Name"));
13320     ValNodeAddPointer (&mod_choices, eModifierType_Location, StringSave ("Location"));
13321     ValNodeAddPointer (&mod_choices, eModifierType_Origin, StringSave ("Origin"));
13322     ValNodeAddPointer (&mod_choices, eModifierType_Lineage, StringSave ("Lineage"));
13323     ValNodeAddPointer (&mod_choices, eModifierType_NucGeneticCode, StringSave ("gcode"));
13324     ValNodeAddPointer (&mod_choices, eModifierType_MitoGeneticCode, StringSave ("mgcode"));
13325     ValNodeAddPointer (&mod_choices, eModifierType_Molecule, StringSave ("moltype"));
13326     ValNodeAddPointer (&mod_choices, eModifierType_Molecule, StringSave ("molecule"));
13327     ValNodeAddPointer (&mod_choices, eModifierType_Technique, StringSave ("tech"));
13328   }
13329 
13330   if (allow_prot)
13331   {
13332     for (i = 0; i < num_protein_modifier_names; i++)
13333     {
13334       if (StringICmp (protein_modifier_names [i], "orf") != 0)
13335       {
13336         ValNodeAddPointer (&mod_choices, eModifierType_Protein, StringSave (protein_modifier_names[i]));
13337       }
13338     }
13339   }
13340 
13341   return mod_choices;
13342 }
13343 
ReplaceOneModifierName(CharPtr title,CharPtr orig_name,CharPtr repl_name)13344 static CharPtr ReplaceOneModifierName (CharPtr title, CharPtr orig_name, CharPtr repl_name)
13345 {
13346   CharPtr bracket_loc, eq_loc, new_title;
13347   Int4    new_title_len, search_offset;
13348 
13349   if (StringHasNoText (title)
13350       || StringHasNoText (orig_name)
13351       || StringHasNoText (repl_name)
13352       || StringICmp (orig_name, repl_name) == 0)
13353   {
13354     return title;
13355   }
13356 
13357   bracket_loc = FindValuePairInDefLine (orig_name, title, NULL);
13358   while (bracket_loc != NULL)
13359   {
13360     eq_loc = NextBracketToken (bracket_loc + 1);
13361     if (eq_loc == NULL || *eq_loc != '=')
13362     {
13363       return title;
13364     }
13365     new_title_len = StringLen (title) + StringLen (repl_name) + 1;
13366     new_title = (CharPtr) MemNew (new_title_len * sizeof (Char));
13367     if (new_title == NULL)
13368     {
13369       return title;
13370     }
13371     StringNCpy (new_title, title, bracket_loc - title + 1);
13372     StringCat (new_title, repl_name);
13373     search_offset = StringLen (new_title) + 1;
13374     StringCat (new_title, eq_loc);
13375     title = MemFree (title);
13376     title = new_title;
13377     bracket_loc = FindValuePairInDefLine (orig_name, title + search_offset, NULL);
13378   }
13379   return title;
13380 }
13381 
13382 static void
ReplaceAliasInAllDefinitionLines(SeqEntryPtr sep,CharPtr alias,CharPtr real_val)13383 ReplaceAliasInAllDefinitionLines
13384 (SeqEntryPtr sep,
13385  CharPtr alias,
13386  CharPtr real_val)
13387 {
13388   BioseqSetPtr bssp;
13389   SeqDescrPtr  sdp;
13390 
13391   if (sep == NULL || StringHasNoText (alias) || StringHasNoText (real_val)) return;
13392 
13393   while (sep != NULL) {
13394     if (IS_Bioseq_set (sep))
13395     {
13396       sdp = SeqEntryGetSeqDescr(sep, Seq_descr_title, NULL);
13397       if (sdp != NULL)
13398       {
13399         sdp->data.ptrvalue = ReplaceOneModifierName (sdp->data.ptrvalue,
13400                                                      alias, real_val);
13401 
13402       }
13403 
13404       bssp = (BioseqSetPtr) sep->data.ptrvalue;
13405       if (bssp != NULL)
13406       {
13407         ReplaceAliasInAllDefinitionLines (bssp->seq_set, alias, real_val);
13408       }
13409     }
13410     else
13411     {
13412       sdp = SeqEntryGetSeqDescr(sep, Seq_descr_title, NULL);
13413       if (sdp != NULL)
13414       {
13415         sdp->data.ptrvalue = ReplaceOneModifierName (sdp->data.ptrvalue,
13416                                                      alias, real_val);
13417       }
13418     }
13419     sep = sep->next;
13420   }
13421 }
13422 
ReplaceAllAliases(SeqEntryPtr sep)13423 static void ReplaceAllAliases (SeqEntryPtr sep)
13424 {
13425   Int4 j;
13426 
13427   for (j = 0; j < num_aliases; j++)
13428   {
13429     ReplaceAliasInAllDefinitionLines (sep, alias_list[j].alias, alias_list[j].modifier);
13430   }
13431 }
13432 
ReplaceMolNameWithMolBracketsInOneDefinitionLine(CharPtr title)13433 static CharPtr ReplaceMolNameWithMolBracketsInOneDefinitionLine (CharPtr title)
13434 {
13435   CharPtr      ptr;
13436 
13437   if (StringHasNoText (title))
13438   {
13439     return title;
13440   }
13441 
13442   ptr = StringISearch (title, "[dna]");
13443   if (ptr != NULL)
13444   {
13445     ExciseString (title, "[dna", "]");
13446     TrimSpacesAroundString (title);
13447     title = ReplaceValueInOneDefLine (title, "molecule", "dna");
13448   }
13449 
13450   ptr = StringISearch (title, "[rna]");
13451   if (ptr != NULL)
13452   {
13453     ExciseString (title, "[rna", "]");
13454     TrimSpacesAroundString (title);
13455     title = ReplaceValueInOneDefLine (title, "molecule", "rna");
13456   }
13457 
13458   return title;
13459 }
13460 
ReplaceMolNamesWithMolBracketsInDefinitionLines(SeqEntryPtr sep)13461 static void ReplaceMolNamesWithMolBracketsInDefinitionLines (SeqEntryPtr sep)
13462 {
13463   BioseqSetPtr bssp;
13464   SeqDescrPtr  sdp;
13465 
13466   while (sep != NULL)
13467   {
13468     if (IS_Bioseq_set (sep))
13469     {
13470       sdp = SeqEntryGetSeqDescr(sep, Seq_descr_title, NULL);
13471       if (sdp != NULL)
13472       {
13473         sdp->data.ptrvalue = ReplaceMolNameWithMolBracketsInOneDefinitionLine (sdp->data.ptrvalue);
13474       }
13475 
13476       bssp = (BioseqSetPtr) sep->data.ptrvalue;
13477       if (bssp != NULL)
13478       {
13479         ReplaceMolNamesWithMolBracketsInDefinitionLines (bssp->seq_set);
13480       }
13481     }
13482     else
13483     {
13484       sdp = SeqEntryGetSeqDescr(sep, Seq_descr_title, NULL);
13485       if (sdp != NULL)
13486       {
13487         sdp->data.ptrvalue = ReplaceMolNameWithMolBracketsInOneDefinitionLine (sdp->data.ptrvalue);
13488       }
13489     }
13490     sep = sep->next;
13491   }
13492 }
13493 
ImportModifiersButtonProc(ButtoN b)13494 static void ImportModifiersButtonProc (ButtoN b)
13495 {
13496   SequencesFormPtr  sqfp;
13497   Boolean           rval;
13498   IDAndTitleEditPtr iatep;
13499   SeqEntryPtr       seq_list;
13500 
13501   sqfp = (SequencesFormPtr) GetObjectExtra (b);
13502   if (sqfp == NULL) return;
13503 
13504   seq_list = GetSeqEntryFromSequencesForm (sqfp);
13505   iatep = SeqEntryListToIDAndTitleEdit (seq_list);
13506   rval = ImportModifiersToIDAndTitleEdit (iatep);
13507   if (rval)
13508   {
13509     ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
13510     SeqEntryPtrToSourceTab (sqfp);
13511   }
13512   iatep = IDAndTitleEditFree (iatep);
13513 }
13514 
13515 
13516 typedef struct sourceassistant
13517 {
13518   CharPtr PNTR    defline_list;
13519   CharPtr PNTR    id_list;
13520   Int4            num_deflines;
13521   Int2            seqPackage;
13522   DialoG          mod_type_dlg;
13523   DoC             mod_doc;
13524   DialoG          orgmod_dlg;
13525   Boolean         done;
13526   Boolean         cancelled;
13527 } SourceAssistantData, PNTR SourceAssistantPtr;
13528 
13529 /* These functions are used for converting between a SourceAssistant structure and
13530  * and IDAndTitleEdit structure.
13531  */
SourceAssistantToIDAndTitleEdit(SourceAssistantPtr sap)13532 static IDAndTitleEditPtr SourceAssistantToIDAndTitleEdit (SourceAssistantPtr sap)
13533 {
13534   IDAndTitleEditPtr iatep;
13535   Int4              j;
13536 
13537   if (sap == NULL || sap->num_deflines < 1)
13538   {
13539     return NULL;
13540   }
13541 
13542   iatep = IDAndTitleEditNew ();
13543   if (iatep != NULL)
13544   {
13545     iatep->num_sequences = sap->num_deflines;
13546     iatep->id_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
13547     iatep->title_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
13548     for (j = 0; j < sap->num_deflines; j++)
13549     {
13550       iatep->id_list [j] = StringSave (sap->id_list [j]);
13551       iatep->title_list [j] = StringSave (sap->defline_list [j]);
13552     }
13553   }
13554   return iatep;
13555 }
13556 
ApplyIDAndTitleEditToSourceAssistant(SourceAssistantPtr sap,IDAndTitleEditPtr iatep)13557 static void ApplyIDAndTitleEditToSourceAssistant (SourceAssistantPtr sap, IDAndTitleEditPtr iatep)
13558 {
13559   Int4 seq_num;
13560 
13561   if (sap == NULL || iatep == NULL || sap->num_deflines != iatep->num_sequences)
13562   {
13563     return;
13564   }
13565 
13566   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
13567   {
13568     /* copy sequence IDs */
13569     sap->id_list [seq_num] = MemFree (sap->id_list [seq_num]);
13570     sap->id_list [seq_num] = StringSave (iatep->id_list [seq_num]);
13571 
13572     /* copy titles */
13573     sap->defline_list [seq_num] = MemFree (sap->defline_list [seq_num]);
13574     sap->defline_list [seq_num] = StringSave (iatep->title_list [seq_num]);
13575   }
13576 }
13577 
PrepareSourceAssistantTableData(SourceAssistantPtr sap,BoolPtr multi_found)13578 static ValNodePtr PrepareSourceAssistantTableData (SourceAssistantPtr sap, BoolPtr multi_found)
13579 {
13580   Int4               i;
13581   ValNodePtr         found_modifiers = NULL;
13582   ValNodePtr         vnp;
13583   ValNodePtr         column_list = NULL, row_list = NULL;
13584   ValNodePtr         header_list, header_vnp;
13585   Int4               column_width, num_columns = 0;
13586   CharPtr            org_name, mod_name, mod_value;
13587   Int4               max_column_width = 20;
13588 
13589   if (sap == NULL)
13590   {
13591     return NULL;
13592   }
13593 
13594   /* get list of modifiers */
13595   /* location will be listed whether present or not */
13596   ValNodeAddPointer (&found_modifiers, 0, StringSave ("location"));
13597   for (i = 0; i < sap->num_deflines; i++)
13598   {
13599     found_modifiers = BuildModifierTypeList (found_modifiers,
13600                                              sap->defline_list[i],
13601                                              FALSE);
13602   }
13603 
13604   /* create header line for table */
13605   /* store max column width in choice */
13606   ValNodeAddPointer (&column_list, 6, StringSave ("Seq ID"));
13607   ValNodeAddPointer (&column_list, 8, StringSave ("organism"));
13608   for (vnp = found_modifiers; vnp != NULL; vnp = vnp->next)
13609   {
13610     ValNodeAddPointer (&column_list, StringLen (vnp->data.ptrvalue), StringSave ((CharPtr) vnp->data.ptrvalue));
13611   }
13612 
13613   ValNodeAddPointer (&row_list, 0, column_list);
13614   header_list = column_list;
13615 
13616   num_columns = ValNodeLen (column_list);
13617 
13618   /* create data lines for table */
13619   for (i = 0; i < sap->num_deflines; i++)
13620   {
13621     column_list = NULL;
13622     header_vnp = header_list;
13623     /* add Sequence ID */
13624     column_width = MAX (StringLen (sap->id_list[i]), header_vnp->choice);
13625     column_width = MIN (column_width, max_column_width);
13626     header_vnp->choice = column_width;
13627     ValNodeAddPointer (&column_list, 0, StringSave (sap->id_list[i]));
13628 
13629     /* add organism name */
13630     header_vnp = header_vnp->next;
13631     org_name = GetDisplayValue ("organism", sap->defline_list[i], multi_found);
13632     column_width = MAX (StringLen (org_name), header_vnp->choice);
13633     column_width = MIN (column_width, max_column_width);
13634     header_vnp->choice = column_width;
13635     ValNodeAddPointer (&column_list, 0, org_name);
13636 
13637     /* get remaining modifiers */
13638     for (vnp = found_modifiers; vnp != NULL; vnp = vnp->next)
13639     {
13640       header_vnp = header_vnp->next;
13641       mod_name = (CharPtr) vnp->data.ptrvalue;
13642       mod_value = GetDisplayValue (mod_name, sap->defline_list[i], multi_found);
13643       if (StringICmp (mod_name, "location") == 0 && StringHasNoText (mod_value))
13644       {
13645         /* display default value for location */
13646         mod_value = MemFree (mod_value);
13647         mod_value = StringSave ("genomic");
13648       }
13649       column_width = MAX (StringLen (mod_value), header_vnp->choice);
13650       column_width = MIN (column_width, max_column_width);
13651       header_vnp->choice = column_width;
13652       ValNodeAddPointer (&column_list, 0, mod_value);
13653     }
13654     ValNodeAddPointer (&row_list, 0, column_list);
13655   }
13656   ValNodeFreeData (found_modifiers);
13657   return row_list;
13658 }
13659 
13660 
13661 /* code for scientific name selection controls */
13662 
13663 typedef struct organismselectiondialog
13664 {
13665   DIALOG_MESSAGE_BLOCK
13666   TexT       tax_name_txt;
13667   DoC        org_list;
13668   Int4       org_row;
13669   CharPtr    tax_name_val;
13670 } OrganismSelectionDialogData, PNTR OrganismSelectionDialogPtr;
13671 
CleanupOrganismSelectionDialog(GraphiC g,VoidPtr data)13672 static void CleanupOrganismSelectionDialog (GraphiC g, VoidPtr data)
13673 
13674 {
13675   OrganismSelectionDialogPtr dlg;
13676 
13677   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (g);
13678   if (dlg != NULL)
13679   {
13680     dlg->tax_name_val = MemFree (dlg->tax_name_val);
13681   }
13682 
13683   StdCleanupExtraProc (g, data);
13684 }
13685 
OrgNameHighlight(DoC doc,Int2 item,Int2 row,Int2 col)13686 static Boolean OrgNameHighlight (DoC doc, Int2 item, Int2 row, Int2 col)
13687 {
13688   OrganismSelectionDialogPtr dlg;
13689 
13690   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (doc);
13691   if (dlg == NULL) return FALSE;
13692 
13693   if (item == dlg->org_row) return TRUE;
13694   return FALSE;
13695 }
13696 
GetTextForOrgPos(Int4 pos)13697 static CharPtr GetTextForOrgPos (Int4 pos)
13698 {
13699   ValNodePtr vnp;
13700   Int4       val;
13701   OrgInfoPtr oip;
13702 
13703   for (vnp = orglist, val = 1; vnp != NULL && val < pos; vnp = vnp->next, val++)
13704   {
13705   }
13706   if (vnp != NULL && vnp->data.ptrvalue != NULL)
13707   {
13708     oip = (OrgInfoPtr) vnp->data.ptrvalue;
13709   	return oip->taxname;;
13710   }
13711   else
13712   {
13713   	return NULL;
13714   }
13715 }
13716 
GetOrgPosForText(CharPtr cp,Int4Ptr pos,Boolean PNTR match)13717 static void GetOrgPosForText (CharPtr cp, Int4Ptr pos, Boolean PNTR match)
13718 {
13719   ValNodePtr vnp;
13720   Int4       val = 1;
13721   CharPtr    dat;
13722   Int4       res;
13723   OrgInfoPtr oip;
13724 
13725   if (cp == NULL || pos == NULL || match == NULL) return;
13726   for (vnp = orglist; vnp != NULL; vnp = vnp->next)
13727   {
13728     if (vnp->data.ptrvalue == NULL)
13729     {
13730       continue;
13731     }
13732     oip = (OrgInfoPtr) vnp->data.ptrvalue;
13733   	dat = oip->taxname;
13734   	res = StringCmp (cp, dat);
13735   	if (res < 0)
13736   	{
13737   	  *pos = val;
13738   	  *match = FALSE;
13739   	  return;
13740   	}
13741   	else if (res == 0)
13742   	{
13743   	  *pos = val;
13744   	  *match = TRUE;
13745   	  return;
13746   	}
13747   	val++;
13748   }
13749   *pos = val - 1;
13750   *match = FALSE;
13751 }
13752 
OrgNameOnKey(SlatE s,Char ch)13753 static void OrgNameOnKey (SlatE s, Char ch)
13754 {
13755   OrganismSelectionDialogPtr dlg;
13756   CharPtr                    str;
13757 
13758   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (s);
13759   if (dlg == NULL) return;
13760 
13761   if ( (int) ch == 0 ) return;
13762 
13763   /* later, handle control key combos */
13764 #ifdef WIN_MSWIN
13765   if (ch == 3)
13766   {
13767     str = SaveStringFromText (dlg->tax_name_txt);
13768     StringToClipboard (str);
13769     str = MemFree (str);
13770   }
13771 #else
13772   if (ctrlKey && ch == 'c')
13773   {
13774     str = SaveStringFromText (dlg->tax_name_txt);
13775     StringToClipboard (str);
13776     str = MemFree (str);
13777   }
13778 #endif
13779 }
13780 
SetOrganismText(TexT t)13781 static void SetOrganismText (TexT t)
13782 {
13783   OrganismSelectionDialogPtr dlg;
13784   Int4                       pos, prevpos;
13785   Boolean                    match;
13786   CharPtr                    old_val;
13787   Boolean                    changed_val = FALSE;
13788 
13789   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (t);
13790   if (dlg == NULL) return;
13791   old_val = dlg->tax_name_val;
13792   dlg->tax_name_val = SaveStringFromText (dlg->tax_name_txt);
13793   if (dlg->tax_name_val != NULL)
13794   {
13795   	dlg->tax_name_val [0] = TO_UPPER (dlg->tax_name_val [0]);
13796   }
13797   if (!StringHasNoText (old_val) && StringCmp (old_val, dlg->tax_name_val) != 0)
13798   {
13799     changed_val = TRUE;
13800   }
13801   if (old_val != NULL)
13802   {
13803   	MemFree (old_val);
13804   }
13805 
13806   pos = -1;
13807   match = FALSE;
13808   GetOrgPosForText (dlg->tax_name_val, &pos, &match);
13809   SetOffset (dlg->org_list, 0, pos - 1);
13810   if (pos != dlg->org_row)
13811   {
13812     prevpos = dlg->org_row;
13813     if (match)
13814     {
13815       dlg->org_row = pos;
13816       SetTitle (dlg->tax_name_txt, dlg->tax_name_val);
13817     }
13818     else
13819     {
13820       dlg->org_row = -1;
13821     }
13822   	if (prevpos != -1)
13823     {
13824   	  InvalDocRows (dlg->org_list, prevpos, 1, 1);
13825     }
13826     if (match)
13827     {
13828       InvalDocRows (dlg->org_list, dlg->org_row, 1, 1);
13829     }
13830   }
13831   else if (!match)
13832   {
13833   	dlg->org_row = -1;
13834     InvalDocRows (dlg->org_list, pos, 1, 1);
13835   }
13836 }
13837 
SetOrganismDoc(DoC d,PoinT pt)13838 static void SetOrganismDoc (DoC d, PoinT pt)
13839 {
13840   Int2      item, row, prevrow;
13841 
13842   OrganismSelectionDialogPtr dlg;
13843   CharPtr           old_name;
13844 
13845   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (d);
13846   if (dlg == NULL) return;
13847 
13848   MapDocPoint (d, pt, &item, &row, NULL, NULL);
13849   if (item > 0 && row > 0) {
13850     prevrow = dlg->org_row;
13851     dlg->org_row = item;
13852     if (item != prevrow)
13853     {
13854       if (prevrow != -1)
13855       {
13856         InvalDocRows (d, prevrow, 1, 1);
13857       }
13858       InvalDocRows (d, item, 1, 1);
13859       old_name = SaveStringFromText (dlg->tax_name_txt);
13860       SetTitle (dlg->tax_name_txt, GetTextForOrgPos (item));
13861       old_name = MemFree (old_name);
13862       dlg->tax_name_val = SaveStringFromText (dlg->tax_name_txt);
13863     }
13864   }
13865 }
13866 
DataToOrganismSelectionDialog(DialoG d,Pointer data)13867 static void DataToOrganismSelectionDialog (DialoG d, Pointer data)
13868 {
13869   OrganismSelectionDialogPtr dlg;
13870 
13871   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (d);
13872   if (dlg == NULL)
13873   {
13874     return;
13875   }
13876 
13877   if (StringHasNoText (data))
13878   {
13879     SetTitle (dlg->tax_name_txt, "");
13880   }
13881   else
13882   {
13883     SetTitle (dlg->tax_name_txt, data);
13884   }
13885   SetOrganismText (dlg->tax_name_txt);
13886 }
13887 
OrganismSelectionDialogToData(DialoG d)13888 static Pointer OrganismSelectionDialogToData (DialoG d)
13889 {
13890   OrganismSelectionDialogPtr dlg;
13891 
13892   dlg = (OrganismSelectionDialogPtr) GetObjectExtra (d);
13893   if (dlg == NULL)
13894   {
13895     return NULL;
13896   }
13897   else
13898   {
13899     return SaveStringFromText (dlg->tax_name_txt);
13900   }
13901 }
13902 
13903 static ParData orgListPar = {FALSE, FALSE, FALSE, FALSE, FALSE, 0, 0};
13904 static ColData orgListCol = {0, 0, 160, 0, NULL, 'l', FALSE, FALSE, FALSE, FALSE, TRUE};
13905 
OrganismSelectionDialog(GrouP parent,CharPtr org_name)13906 extern DialoG OrganismSelectionDialog (GrouP parent, CharPtr org_name)
13907 {
13908   GrouP           grp;
13909   Int2            height;
13910   ValNodePtr      vnp;
13911   OrganismSelectionDialogPtr dlg;
13912   RecT                       r;
13913   OrgInfoPtr                 oip;
13914 
13915   dlg = (OrganismSelectionDialogPtr) MemNew (sizeof (OrganismSelectionDialogData));
13916 
13917   grp = HiddenGroup (parent, -1, 0, NULL);
13918   SetObjectExtra (grp, dlg, CleanupOrganismSelectionDialog);
13919   SetGroupSpacing (grp, 10, 10);
13920 
13921   dlg->dialog = (DialoG) grp;
13922   dlg->todialog = DataToOrganismSelectionDialog;
13923   dlg->fromdialog = OrganismSelectionDialogToData;
13924   dlg->dialogmessage = NULL;
13925   dlg->testdialog = NULL;
13926 
13927   LoadOrganismList ();
13928 
13929   dlg->tax_name_txt = DialogText (grp, "", 20, SetOrganismText);
13930   SetObjectExtra (dlg->tax_name_txt, dlg, NULL);
13931   dlg->org_row = -1;
13932   if (org_name != NULL)
13933   {
13934   	SetTitle (dlg->tax_name_txt, org_name);
13935   }
13936   SetOrganismText (dlg->tax_name_txt);
13937 
13938   SelectFont (programFont);
13939   height = LineHeight ();
13940   SelectFont (systemFont);
13941   dlg->org_list = DocumentPanel (grp, stdCharWidth * 25, height * 6);
13942   SetObjectExtra (dlg->org_list, dlg, NULL);
13943 
13944   ObjectRect (dlg->org_list, &r);
13945   InsetRect (&r, 4, 4);
13946   orgListCol.pixWidth = r.right - r.left;
13947 
13948   for (vnp = orglist; vnp != NULL; vnp = vnp->next)
13949   {
13950     oip = (OrgInfoPtr) vnp->data.ptrvalue;
13951     if (oip != NULL)
13952     {
13953   	  AppendText (dlg->org_list, oip->taxname, &orgListPar, &orgListCol, programFont);
13954     }
13955   }
13956   SetDocAutoAdjust (dlg->org_list, FALSE);
13957   SetDocProcs (dlg->org_list, SetOrganismDoc, NULL, NULL, NULL);
13958   SetDocShade (dlg->org_list, NULL, NULL, OrgNameHighlight, NULL);
13959   SetSlateChar ((SlatE) dlg->org_list, OrgNameOnKey);
13960 
13961   AlignObjects (ALIGN_CENTER, (HANDLE) dlg->tax_name_txt, (HANDLE) dlg->org_list, NULL);
13962   InvalDocument (dlg->org_list);
13963   return (DialoG) grp;
13964 }
13965 
13966 #define NUM_ORGS_DISPLAYED 5
13967 typedef struct multiorganismselectiondialog
13968 {
13969   DIALOG_MESSAGE_BLOCK
13970   DialoG       org_select_dlg;
13971   TexT         tax_name_txt [NUM_ORGS_DISPLAYED];
13972   ButtoN       copy_btn [NUM_ORGS_DISPLAYED];
13973   PrompT       id_txt [NUM_ORGS_DISPLAYED];
13974   DialoG       location_dlg [NUM_ORGS_DISPLAYED];
13975   GrouP        gcode_grp [NUM_ORGS_DISPLAYED];
13976   ButtoN       gcode_btn [NUM_ORGS_DISPLAYED];
13977   DialoG       gcode_dlg [NUM_ORGS_DISPLAYED];
13978   BaR          id_scroll;
13979   ValNodePtr   row_list;
13980   Int4         num_vals;
13981   ValNodePtr   geneticcodelist;
13982 } MultiOrganismSelectionDialogData, PNTR MultiOrganismSelectionDialogPtr;
13983 
13984 typedef struct multiorgcopybtn
13985 {
13986   MultiOrganismSelectionDialogPtr dlg;
13987   Int4                            pos;
13988 } MultiOrgCopyBtnData, PNTR MultiOrgCopyBtnPtr;
13989 
CleanupMultiOrganismSelectionDialog(GraphiC g,VoidPtr data)13990 static void CleanupMultiOrganismSelectionDialog (GraphiC g, VoidPtr data)
13991 
13992 {
13993   MultiOrganismSelectionDialogPtr dlg;
13994 
13995   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (g);
13996   if (dlg != NULL)
13997   {
13998     dlg->row_list = FreeTableDisplayRowList (dlg->row_list);
13999     dlg->geneticcodelist = ValNodeFree (dlg->geneticcodelist);
14000   }
14001 
14002   StdCleanupExtraProc (g, data);
14003 }
14004 
GetTableDisplayCellValue(ValNodePtr row_list,Int4 row_num,Int4 col_num)14005 static CharPtr GetTableDisplayCellValue (ValNodePtr row_list, Int4 row_num, Int4 col_num)
14006 {
14007   ValNodePtr row_vnp, col_vnp;
14008   Int4       j;
14009 
14010   if (row_list == NULL)
14011   {
14012     return NULL;
14013   }
14014 
14015   for (row_vnp = row_list, j = 0;
14016        row_vnp != NULL && j < row_num;
14017        row_vnp = row_vnp->next, j++)
14018   {
14019   }
14020 
14021   if (row_vnp == NULL)
14022   {
14023     return NULL;
14024   }
14025 
14026   for (col_vnp = row_vnp->data.ptrvalue, j = 0;
14027        col_vnp != NULL && j < col_num;
14028        col_vnp = col_vnp->next, j++)
14029   {
14030   }
14031   if (col_vnp == NULL)
14032   {
14033     return NULL;
14034   }
14035   else
14036   {
14037     return col_vnp->data.ptrvalue;
14038   }
14039 }
14040 
14041 static void
UpdateTableDisplayCellValue(ValNodePtr row_list,Int4 row_num,Int4 col_num,CharPtr new_value)14042 UpdateTableDisplayCellValue
14043 (ValNodePtr row_list,
14044  Int4 row_num,
14045  Int4 col_num,
14046  CharPtr new_value)
14047 {
14048   ValNodePtr row_vnp, col_vnp;
14049   Int4       j;
14050 
14051   if (row_list == NULL)
14052   {
14053     return;
14054   }
14055 
14056   for (row_vnp = row_list, j = 0;
14057        row_vnp != NULL && j < row_num;
14058        row_vnp = row_vnp->next, j++)
14059   {
14060   }
14061 
14062   if (row_vnp == NULL)
14063   {
14064     return;
14065   }
14066 
14067   for (col_vnp = row_vnp->data.ptrvalue, j = 0;
14068        col_vnp != NULL && j < col_num;
14069        col_vnp = col_vnp->next, j++)
14070   {
14071   }
14072   if (col_vnp == NULL)
14073   {
14074     return;
14075   }
14076   else
14077   {
14078     col_vnp->data.ptrvalue = StringSave (new_value);
14079   }
14080 }
14081 
14082 static void
UpdateGeneticCodePosition(MultiOrganismSelectionDialogPtr dlg,Int4 row_num,CharPtr taxname,CharPtr location)14083 UpdateGeneticCodePosition
14084 (MultiOrganismSelectionDialogPtr dlg,
14085  Int4    row_num,
14086  CharPtr taxname,
14087  CharPtr location)
14088 {
14089   ValNode    vn;
14090   Int4       gcode = -1;
14091   Int4       offset;
14092   CharPtr    gcode_name;
14093 
14094   if (dlg == NULL || row_num < 0 || row_num >= NUM_ORGS_DISPLAYED)
14095   {
14096     return;
14097   }
14098 
14099   gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
14100 
14101   if (gcode < 0)
14102   {
14103     offset = GetBarValue (dlg->id_scroll);
14104     gcode_name = GetTableDisplayCellValue (dlg->row_list,
14105                                            offset + row_num, 3);
14106 
14107     vn.choice = GeneticCodeFromStringAndList (gcode_name, dlg->geneticcodelist);
14108     vn.next = NULL;
14109     vn.data.ptrvalue = gcode_name;
14110     PointerToDialog (dlg->gcode_dlg [row_num], &vn);
14111     Hide (dlg->gcode_btn [row_num]);
14112     Show (dlg->gcode_dlg [row_num]);
14113   }
14114   else
14115   {
14116     offset = GetBarValue (dlg->id_scroll);
14117     gcode_name = GeneticCodeStringFromIntAndList (gcode, dlg->geneticcodelist);
14118     UpdateTableDisplayCellValue (dlg->row_list, offset + row_num, 3, gcode_name);
14119     SetTitle (dlg->gcode_btn [row_num], gcode_name);
14120     Hide (dlg->gcode_dlg [row_num]);
14121     Show (dlg->gcode_btn [row_num]);
14122   }
14123 }
14124 
DisplayPosition(MultiOrganismSelectionDialogPtr dlg,Int4 pos)14125 static void DisplayPosition (MultiOrganismSelectionDialogPtr dlg, Int4 pos)
14126 {
14127   Int4       row_num;
14128   ValNodePtr row_vnp, col_vnp;
14129   ValNode    vn;
14130   CharPtr    taxname = NULL;
14131   CharPtr    location = NULL;
14132   Int4       gcode;
14133 
14134   if (dlg == NULL)
14135   {
14136     return;
14137   }
14138 
14139   for (row_num = 0, row_vnp = dlg->row_list;
14140        row_num < pos && row_vnp != NULL;
14141        row_num++, row_vnp = row_vnp->next)
14142   {
14143   }
14144 
14145   for (row_num = 0;
14146        row_num < NUM_ORGS_DISPLAYED && row_vnp != NULL;
14147        row_num++, row_vnp = row_vnp->next)
14148   {
14149     /* set ID */
14150     col_vnp = row_vnp->data.ptrvalue;
14151     SetTitle (dlg->id_txt [row_num], col_vnp->data.ptrvalue);
14152     /* set tax name */
14153     col_vnp = col_vnp->next;
14154     taxname = col_vnp->data.ptrvalue;
14155     SetTitle (dlg->tax_name_txt [row_num], taxname);
14156     /* set location */
14157     col_vnp = col_vnp->next;
14158     location = col_vnp->data.ptrvalue;
14159     vn.data.ptrvalue = NULL;
14160     if (StringHasNoText (location))
14161     {
14162       vn.choice = Source_location_genomic;
14163     }
14164     else
14165     {
14166       vn.choice = SrcLocFromGenome (GenomeFromLocName (location));
14167     }
14168     vn.next = NULL;
14169     PointerToDialog (dlg->location_dlg[row_num], &vn);
14170 
14171     Show (dlg->copy_btn [row_num]);
14172     Show (dlg->id_txt [row_num]);
14173     Show (dlg->tax_name_txt [row_num]);
14174     Show (dlg->location_dlg [row_num]);
14175 
14176     /* display genetic code */
14177     col_vnp = col_vnp->next;
14178     Show (dlg->gcode_grp [row_num]);
14179 
14180     gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
14181     if (gcode < 0)
14182     {
14183       vn.choice = GeneticCodeFromStringAndList (col_vnp->data.ptrvalue, dlg->geneticcodelist);
14184       vn.next = NULL;
14185       vn.data.ptrvalue = col_vnp->data.ptrvalue;
14186       PointerToDialog (dlg->gcode_dlg [row_num], &vn);
14187       Hide (dlg->gcode_btn [row_num]);
14188       Show (dlg->gcode_dlg [row_num]);
14189     }
14190     else
14191     {
14192       SetTitle (dlg->gcode_btn [row_num], GeneticCodeStringFromIntAndList (gcode, dlg->geneticcodelist));
14193       Hide (dlg->gcode_dlg [row_num]);
14194       Show (dlg->gcode_btn [row_num]);
14195     }
14196   }
14197 
14198   while (row_num < NUM_ORGS_DISPLAYED)
14199   {
14200     Hide (dlg->copy_btn [row_num]);
14201     Hide (dlg->id_txt [row_num]);
14202     Hide (dlg->tax_name_txt [row_num]);
14203     Hide (dlg->location_dlg [row_num]);
14204     Hide (dlg->gcode_grp [row_num]);
14205     row_num++;
14206   }
14207 }
14208 
CollectPositionValues(MultiOrganismSelectionDialogPtr dlg,Int4 pos)14209 static void CollectPositionValues (MultiOrganismSelectionDialogPtr dlg, Int4 pos)
14210 {
14211   Int4       row_num;
14212   ValNodePtr row_vnp, col_vnp, val_vnp;
14213   Int4       gcode;
14214   CharPtr    taxname, location, gcode_name;
14215 
14216   if (dlg == NULL)
14217   {
14218     return;
14219   }
14220   for (row_num = 0, row_vnp = dlg->row_list;
14221        row_num < pos && row_vnp != NULL;
14222        row_num++, row_vnp = row_vnp->next)
14223   {
14224   }
14225 
14226   for (row_num = 0;
14227        row_num < NUM_ORGS_DISPLAYED && row_vnp != NULL;
14228        row_num++, row_vnp = row_vnp->next)
14229   {
14230     col_vnp = row_vnp->data.ptrvalue;
14231     /* skip ID - it can't be edited */
14232     col_vnp = col_vnp->next;
14233 
14234     /* get tax name */
14235     col_vnp->data.ptrvalue = MemFree (col_vnp->data.ptrvalue);
14236     col_vnp->data.ptrvalue = SaveStringFromText (dlg->tax_name_txt [row_num]);
14237     taxname = col_vnp->data.ptrvalue;
14238     col_vnp = col_vnp->next;
14239 
14240     /* get location */
14241     val_vnp = DialogToPointer (dlg->location_dlg [row_num]);
14242     if (val_vnp == NULL)
14243     {
14244       location = NULL;
14245     }
14246     else
14247     {
14248       location = val_vnp->data.ptrvalue;
14249     }
14250     StringToLower (location);
14251     val_vnp = ValNodeFree (val_vnp);
14252     col_vnp->data.ptrvalue = MemFree (col_vnp->data.ptrvalue);
14253     col_vnp->data.ptrvalue = location;
14254     col_vnp = col_vnp->next;
14255 
14256     /* get genetic code */
14257     gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
14258     if (gcode < 0)
14259     {
14260       val_vnp = DialogToPointer (dlg->gcode_dlg [row_num]);
14261       if (val_vnp == NULL)
14262       {
14263         gcode_name = NULL;
14264       }
14265       else
14266       {
14267         gcode_name = val_vnp->data.ptrvalue;
14268       }
14269       ValNodeFree (val_vnp);
14270     }
14271     else
14272     {
14273       gcode_name = StringSave (GeneticCodeStringFromIntAndList (gcode, dlg->geneticcodelist));
14274     }
14275     col_vnp->data.ptrvalue = MemFree (col_vnp->data.ptrvalue);
14276     col_vnp->data.ptrvalue = gcode_name;
14277     col_vnp = col_vnp->next;
14278 
14279   }
14280 }
14281 
MultiOrgScroll(BaR sb,GraphiC g,Int4 newval,Int4 oldval)14282 static void MultiOrgScroll (BaR sb, GraphiC g, Int4 newval, Int4 oldval)
14283 {
14284   MultiOrganismSelectionDialogPtr dlg;
14285 
14286   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (sb);
14287   if (dlg == NULL)
14288   {
14289     return;
14290   }
14291 
14292   /* first, collect old values */
14293   CollectPositionValues (dlg, oldval);
14294 
14295   /* set newly visible values */
14296   DisplayPosition (dlg, newval);
14297 }
14298 
MultiOrgCopy(ButtoN b)14299 static void MultiOrgCopy (ButtoN b)
14300 {
14301   MultiOrgCopyBtnPtr mp;
14302   CharPtr            tax_name;
14303   ValNodePtr         val_vnp;
14304   CharPtr            location;
14305 
14306   mp = (MultiOrgCopyBtnPtr) GetObjectExtra (b);
14307   if (mp == NULL || mp->dlg == NULL || mp->pos < 0 || mp->pos >= NUM_ORGS_DISPLAYED)
14308   {
14309     return;
14310   }
14311 
14312   tax_name = (CharPtr) DialogToPointer (mp->dlg->org_select_dlg);
14313   SetTitle (mp->dlg->tax_name_txt [mp->pos], tax_name);
14314 
14315   /* get location for this row */
14316   val_vnp = DialogToPointer (mp->dlg->location_dlg [mp->pos]);
14317   if (val_vnp != NULL)
14318   {
14319     location = val_vnp->data.ptrvalue;
14320     UpdateGeneticCodePosition (mp->dlg, mp->pos, tax_name, location);
14321 
14322     val_vnp = ValNodeFreeData (val_vnp);
14323     /* location is freed when we free val_vnp */
14324     location = NULL;
14325   }
14326 
14327   tax_name = MemFree (tax_name);
14328 }
14329 
DataToMultiOrganismSelectionDialog(DialoG d,Pointer userdata)14330 static void DataToMultiOrganismSelectionDialog (DialoG d, Pointer userdata)
14331 {
14332   MultiOrganismSelectionDialogPtr dlg;
14333   ValNodePtr                      row_list;
14334 
14335   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (d);
14336   if (dlg == NULL)
14337   {
14338     return;
14339   }
14340 
14341   row_list = (ValNodePtr) userdata;
14342   dlg->row_list = FreeTableDisplayRowList (dlg->row_list);
14343   dlg->row_list = CopyTableDisplayRowList (row_list);
14344   dlg->num_vals = ValNodeLen (dlg->row_list);
14345 
14346   CorrectBarMax (dlg->id_scroll, dlg->num_vals - NUM_ORGS_DISPLAYED);
14347   CorrectBarValue (dlg->id_scroll, 0);
14348   DisplayPosition (dlg, 0);
14349 }
14350 
MultiOrganismSelectionDialogToData(DialoG d)14351 static Pointer MultiOrganismSelectionDialogToData (DialoG d)
14352 {
14353   MultiOrganismSelectionDialogPtr dlg;
14354   Int4                            pos;
14355   ValNodePtr                      row_list;
14356 
14357   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (d);
14358   if (dlg == NULL)
14359   {
14360     return NULL;
14361   }
14362 
14363   pos = GetBarValue (dlg->id_scroll);
14364   CollectPositionValues (dlg, pos);
14365   row_list = CopyTableDisplayRowList (dlg->row_list);
14366   return row_list;
14367 }
14368 
SetRowListColumn(ValNodePtr row_list,Int4 column,CharPtr new_value)14369 static void SetRowListColumn (ValNodePtr row_list, Int4 column, CharPtr new_value)
14370 {
14371   ValNodePtr row_vnp, col_vnp;
14372   Int4       col_num;
14373 
14374   for (row_vnp = row_list; row_vnp != NULL; row_vnp = row_vnp->next)
14375   {
14376     for (col_vnp = row_vnp->data.ptrvalue, col_num = 0;
14377          col_vnp != NULL && col_num < column;
14378          col_vnp = col_vnp->next, col_num++)
14379     {
14380     }
14381     if (col_vnp != NULL)
14382     {
14383       col_vnp->data.ptrvalue = MemFree (col_vnp->data.ptrvalue);
14384       col_vnp->data.ptrvalue = StringSave (new_value);
14385     }
14386   }
14387 }
14388 
14389 static void
14390 ApplyOrgModColumnOrCell
14391 (CharPtr            mod_name,
14392  CharPtr            suggested_value,
14393  Int4               row,
14394  SourceAssistantPtr sap,
14395  SeqEntryPtr        seq_list,
14396  ValNodePtr         row_list,
14397  Int4               row_list_column,
14398  Int2               seqPackage);
14399 
14400 static Boolean
14401 ContinueWithAutopopulatedGeneticCodes
14402 (SeqEntryPtr        seq_list,
14403  SourceAssistantPtr sap,
14404  ValNodePtr         row_list,
14405  Int4               affected_row);
14406 
SetAllGeneticCodes(ButtoN b)14407 static void SetAllGeneticCodes (ButtoN b)
14408 {
14409   MultiOrganismSelectionDialogPtr dlg;
14410   Int4                            scroll_pos;
14411 
14412   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (b);
14413   if (dlg == NULL)
14414   {
14415     return;
14416   }
14417   scroll_pos = GetBarValue (dlg->id_scroll);
14418   CollectPositionValues (dlg, scroll_pos);
14419 
14420   if (ContinueWithAutopopulatedGeneticCodes (NULL, NULL, dlg->row_list, -1))
14421   {
14422     ApplyOrgModColumnOrCell ("genetic_code", "Standard", -1, NULL, NULL, dlg->row_list, 3, 0);
14423     DisplayPosition (dlg, scroll_pos);
14424   }
14425 
14426 }
14427 
AddGcodeCommentBtn(ButtoN b)14428 static void AddGcodeCommentBtn (ButtoN b)
14429 {
14430   MultiOrgCopyBtnPtr bp;
14431   Int4               scroll_pos;
14432   CharPtr            orig_val = NULL;
14433 
14434   bp = (MultiOrgCopyBtnPtr) GetObjectExtra (b);
14435   if (bp == NULL || bp->dlg == NULL)
14436   {
14437     return;
14438   }
14439 
14440   scroll_pos = GetBarValue (bp->dlg->id_scroll);
14441   /* first, collect current values from dialog*/
14442   CollectPositionValues (bp->dlg, scroll_pos);
14443 
14444   orig_val = GetTableDisplayCellValue (bp->dlg->row_list, bp->pos, 4);
14445   ApplyOrgModColumnOrCell ("gencode_comment", orig_val, bp->pos, NULL, NULL,
14446                            bp->dlg->row_list, 4, 0);
14447   /* now repopulate */
14448   DisplayPosition (bp->dlg, scroll_pos);
14449 }
14450 
SetAllLocations(ButtoN b)14451 static void SetAllLocations (ButtoN b)
14452 {
14453   MultiOrganismSelectionDialogPtr dlg;
14454   Int4                            scroll_pos;
14455 
14456   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (b);
14457   if (dlg == NULL)
14458   {
14459     return;
14460   }
14461 
14462   scroll_pos = GetBarValue (dlg->id_scroll);
14463   /* first, collect current values from dialog*/
14464   CollectPositionValues (dlg, scroll_pos);
14465 
14466   ApplyOrgModColumnOrCell ("location", "genomic", -1, NULL, NULL, dlg->row_list, 2, 0);
14467   DisplayPosition (dlg, scroll_pos);
14468 }
14469 
SetAllOrganisms(ButtoN b)14470 static void SetAllOrganisms (ButtoN b)
14471 {
14472   MultiOrganismSelectionDialogPtr dlg;
14473   Int4                            scroll_pos;
14474 
14475   dlg = (MultiOrganismSelectionDialogPtr) GetObjectExtra (b);
14476   if (dlg == NULL)
14477   {
14478     return;
14479   }
14480   scroll_pos = GetBarValue (dlg->id_scroll);
14481   /* first, collect current values from dialog*/
14482   CollectPositionValues (dlg, scroll_pos);
14483 
14484   ApplyOrgModColumnOrCell ("organism", NULL, -1, NULL, NULL, dlg->row_list, 1, 0);
14485   DisplayPosition (dlg, scroll_pos);
14486 }
14487 
ChangeLocationOrTaxName(MultiOrgCopyBtnPtr bp)14488 static void ChangeLocationOrTaxName (MultiOrgCopyBtnPtr bp)
14489 {
14490   ValNodePtr           val_vnp;
14491   CharPtr              tax_name = NULL, location = NULL;
14492 
14493   if (bp == NULL)
14494   {
14495     return;
14496   }
14497 
14498   /* get taxname for this row */
14499   tax_name = SaveStringFromText (bp->dlg->tax_name_txt [bp->pos]);
14500 
14501   /* get location for this row */
14502   val_vnp = DialogToPointer (bp->dlg->location_dlg [bp->pos]);
14503   if (val_vnp->data.ptrvalue == NULL)
14504   {
14505     UpdateGeneticCodePosition (bp->dlg, bp->pos, tax_name, NULL);
14506   }
14507   else
14508   {
14509     location = val_vnp->data.ptrvalue;
14510     UpdateGeneticCodePosition (bp->dlg, bp->pos, tax_name, location);
14511 
14512     val_vnp = ValNodeFreeData (val_vnp);
14513     /* location is freed when we free val_vnp */
14514     location = NULL;
14515   }
14516 
14517   tax_name = MemFree (tax_name);
14518 }
14519 
ChangeLocationPopup(Pointer userdata)14520 static void ChangeLocationPopup (Pointer userdata)
14521 {
14522   MultiOrgCopyBtnPtr   bp;
14523 
14524   bp = (MultiOrgCopyBtnPtr) userdata;
14525   if (bp == NULL)
14526   {
14527     return;
14528   }
14529 
14530   ChangeLocationOrTaxName (bp);
14531 }
14532 
MultiOrgText(TexT t)14533 static void MultiOrgText (TexT t)
14534 {
14535   MultiOrgCopyBtnPtr bp;
14536   CharPtr            cp;
14537 
14538   bp = (MultiOrgCopyBtnPtr) GetObjectExtra (t);
14539   if (bp == NULL)
14540   {
14541     return;
14542   }
14543   cp = SaveStringFromText (t);
14544   PointerToDialog (bp->dlg->org_select_dlg, cp);
14545   cp = MemFree (cp);
14546   ChangeLocationOrTaxName (bp);
14547 }
14548 
ChangeGeneticCodePopup(Pointer userdata)14549 static void ChangeGeneticCodePopup (Pointer userdata)
14550 {
14551   MultiOrgCopyBtnPtr   bp;
14552   ValNodePtr           val_vnp;
14553   CharPtr              gcode_name;
14554   Int4                 offset;
14555 
14556   bp = (MultiOrgCopyBtnPtr) userdata;
14557   if (bp == NULL)
14558   {
14559     return;
14560   }
14561 
14562   val_vnp = DialogToPointer (bp->dlg->gcode_dlg [bp->pos]);
14563   if (val_vnp == NULL)
14564   {
14565     gcode_name = NULL;
14566   }
14567   else
14568   {
14569     gcode_name = val_vnp->data.ptrvalue;
14570   }
14571 
14572   offset = GetBarValue (bp->dlg->id_scroll);
14573 
14574   UpdateTableDisplayCellValue (bp->dlg->row_list, bp->pos + offset, 3, gcode_name);
14575 
14576   val_vnp = ValNodeFreeData (val_vnp);
14577 
14578 }
14579 
MultiOrganismSelectionDialog(GrouP parent)14580 static DialoG MultiOrganismSelectionDialog (GrouP parent)
14581 {
14582   MultiOrganismSelectionDialogPtr dlg;
14583   GrouP                           grp, id_grp, scroll_grp;
14584   Int4                            k;
14585   MultiOrgCopyBtnPtr              bp;
14586   RecT                            r1, r2, r3;
14587   ValNodePtr                      gencodelist;
14588   ButtoN                          b;
14589   PrompT                          p1;
14590 #ifdef WIN_MAC
14591   Int2                            wid = 12;
14592 #else
14593   Int2                            wid = 20;
14594 #endif
14595 
14596   dlg = (MultiOrganismSelectionDialogPtr) MemNew (sizeof (MultiOrganismSelectionDialogData));
14597 
14598   grp = HiddenGroup (parent, -1, 0, NULL);
14599   SetObjectExtra (grp, dlg, CleanupMultiOrganismSelectionDialog);
14600   SetGroupSpacing (grp, 10, 10);
14601 
14602   dlg->dialog = (DialoG) grp;
14603   dlg->todialog = DataToMultiOrganismSelectionDialog;
14604   dlg->fromdialog = MultiOrganismSelectionDialogToData;
14605   dlg->dialogmessage = NULL;
14606   dlg->testdialog = NULL;
14607 
14608   dlg->num_vals = 0;
14609   dlg->row_list = NULL;
14610 
14611   dlg->geneticcodelist = GetGeneticCodeValNodeList ();
14612 
14613   dlg->org_select_dlg = OrganismSelectionDialog (grp, "");
14614   p1 = StaticPrompt (grp, "You can use the Copy buttons to populate the organism field from the selector above.",
14615                      0, 0, programFont, 'l');
14616   scroll_grp = NormalGroup (grp, 2, 0, "", NULL, NULL);
14617 
14618   id_grp = HiddenGroup (scroll_grp, 5, 0, NULL);
14619   SetGroupSpacing (id_grp, 10, 10);
14620   StaticPrompt (id_grp, "SeqID", 7 * stdCharWidth, 0, programFont, 'l');
14621   StaticPrompt (id_grp, "Copy", 0, 0, programFont, 'l');
14622   b = PushButton (id_grp, "Organism", SetAllOrganisms);
14623   SetObjectExtra (b, dlg, NULL);
14624   b = PushButton (id_grp, "Location", SetAllLocations);
14625   SetObjectExtra (b, dlg, NULL);
14626   b = PushButton (id_grp, "Genetic Code", SetAllGeneticCodes);
14627   SetObjectExtra (b, dlg, NULL);
14628 
14629   for (k = 0; k < NUM_ORGS_DISPLAYED; k++)
14630   {
14631     /* prompt with sequence ID */
14632      dlg->id_txt [k] = StaticPrompt (id_grp, "", 7 * stdCharWidth, 0, programFont, 'l');
14633     /* button for copying from organism selector */
14634     dlg->copy_btn [k] = PushButton (id_grp, "->", MultiOrgCopy);
14635     bp = (MultiOrgCopyBtnPtr) MemNew (sizeof (MultiOrgCopyBtnData));
14636     if (bp != NULL)
14637     {
14638       bp->dlg = dlg;
14639       bp->pos = k;
14640     }
14641     SetObjectExtra (dlg->copy_btn [k], bp, StdCleanupExtraProc);
14642     dlg->tax_name_txt [k] = DialogText (id_grp, "", wid, MultiOrgText);
14643     SetTextSelect (dlg->tax_name_txt [k], MultiOrgText, NULL);
14644     SetObjectExtra (dlg->tax_name_txt [k], bp, NULL);
14645 
14646     dlg->location_dlg [k] = ValNodeSelectionDialogExEx (id_grp,
14647                                                  GetLocListForBioSource (NULL), 6,
14648                                                  ValNodeStringName,
14649                                                  ValNodeSimpleDataFree,
14650                                                  ValNodeStringCopy,
14651                                                  ValNodeChoiceMatch,
14652                                                  "location code",
14653                                                   ChangeLocationPopup, bp, FALSE,
14654                                                   FALSE, TRUE, NULL);
14655 
14656     dlg->gcode_grp [k] = HiddenGroup (id_grp, 0, 0, NULL);
14657     dlg->gcode_btn [k] = PushButton (dlg->gcode_grp [k],
14658                                      "                                                ",
14659                                      AddGcodeCommentBtn);
14660     SetObjectExtra (dlg->gcode_btn [k], bp, NULL);
14661     Hide (dlg->gcode_btn [k]);
14662     /* NOTE - need separate list because ValNodeSelectionDialog will free this one */
14663     gencodelist = GetGeneticCodeValNodeList ();
14664     dlg->gcode_dlg [k] = ValNodeSelectionDialog (dlg->gcode_grp [k], gencodelist, 6,
14665                                                  ValNodeStringName,
14666                                                  ValNodeSimpleDataFree,
14667                                                  ValNodeStringCopy,
14668                                                  ValNodeChoiceMatch,
14669                                                  "genetic code",
14670                                                   ChangeGeneticCodePopup, bp, FALSE);
14671     Hide (dlg->gcode_dlg [k]);
14672     AlignObjects (ALIGN_LEFT, (HANDLE) dlg->gcode_btn [k], (HANDLE) dlg->gcode_dlg [k], NULL);
14673   }
14674 
14675   dlg->id_scroll = ScrollBar4 (scroll_grp, 0, 10, MultiOrgScroll);
14676   SetObjectExtra (dlg->id_scroll, dlg, NULL);
14677 
14678   AlignObjects (ALIGN_CENTER, (HANDLE) dlg->org_select_dlg, (HANDLE) p1, (HANDLE) scroll_grp, NULL);
14679 
14680   ObjectRect (dlg->copy_btn [0], &r1);
14681   ObjectRect (dlg->copy_btn [4], &r2);
14682   ObjectRect (dlg->id_scroll, &r3);
14683   r3.top = r1.top;
14684   r3.bottom = r2.bottom;
14685   SetPosition (dlg->id_scroll, &r3);
14686 
14687   return (DialoG) grp;
14688 }
14689 
14690 
SourceAssistantExport(ButtoN b)14691 static void SourceAssistantExport (ButtoN b)
14692 {
14693   SourceAssistantPtr sap;
14694   ValNodePtr         row_list = NULL;
14695   FILE               *fp;
14696   Char               path [PATH_MAX];
14697 
14698   sap = (SourceAssistantPtr) GetObjectExtra (b);
14699   if (sap == NULL) return;
14700 
14701   if (! GetOutputFileName (path, sizeof (path), NULL)) return;
14702   fp = FileOpen (path, "w");
14703   if (fp == NULL)
14704   {
14705     Message (MSG_ERROR, "Unable to open %s", path);
14706     return;
14707   }
14708 
14709   row_list = PrepareSourceAssistantTableData (sap, NULL);
14710 
14711   PrintTableDisplayRowListToFile (row_list, fp);
14712   row_list = FreeTableDisplayRowList (row_list);
14713   FileClose (fp);
14714 }
14715 
SourceAssistantOk(ButtoN b)14716 static void SourceAssistantOk (ButtoN b)
14717 {
14718   SourceAssistantPtr sap;
14719 
14720   sap = (SourceAssistantPtr) GetObjectExtra (b);
14721   if (sap == NULL) return;
14722   sap->cancelled = FALSE;
14723   sap->done = TRUE;
14724 }
14725 
SourceAssistantCancel(ButtoN b)14726 static void SourceAssistantCancel (ButtoN b)
14727 {
14728   SourceAssistantPtr sap;
14729 
14730   sap = (SourceAssistantPtr) GetObjectExtra (b);
14731   if (sap == NULL) return;
14732   if (Message (MSG_YN, "You will lose your changes if you cancel.  Are you sure?")
14733       == ANS_NO)
14734   {
14735     return;
14736   }
14737 
14738   sap->cancelled = TRUE;
14739   sap->done = TRUE;
14740 }
14741 
GetFirstDeflineValue(SourceAssistantPtr sap,CharPtr mod_name)14742 static CharPtr GetFirstDeflineValue (SourceAssistantPtr sap, CharPtr mod_name)
14743 {
14744   Int4         i;
14745   CharPtr      valstr = NULL;
14746 
14747   if (sap == NULL)
14748   {
14749     return NULL;
14750   }
14751 
14752   for (i = 0; i < sap->num_deflines && valstr == NULL; i++)
14753   {
14754     valstr = FindValueFromPairInDefline (mod_name, sap->defline_list [i]);
14755     if (StringHasNoText (valstr))
14756     {
14757       valstr = MemFree (valstr);
14758     }
14759   }
14760   return valstr;
14761 }
14762 
14763 static CharPtr
GetTagListValueEx(TagListPtr tlp,Int4 seq_num,Int4 col_num)14764 GetTagListValueEx (TagListPtr tlp, Int4 seq_num, Int4 col_num)
14765 {
14766   Int4         seq_pos;
14767   CharPtr      str = NULL;
14768   ValNodePtr   vnp;
14769 
14770   if (tlp == NULL) return NULL;
14771 
14772   for (vnp = tlp->vnp, seq_pos = 0;
14773        vnp != NULL && seq_pos != seq_num;
14774        vnp = vnp->next, seq_pos++)
14775   {
14776 
14777   }
14778   if (vnp != NULL)
14779   {
14780     str = ExtractTagListColumn ((CharPtr) vnp->data.ptrvalue, col_num);
14781   }
14782   return str;
14783 }
14784 
SetTagListValue(TagListPtr tlp,Int4 row,Int4 column,CharPtr new_value)14785 static void SetTagListValue (TagListPtr tlp, Int4 row, Int4 column, CharPtr new_value)
14786 {
14787   ValNodePtr vnp;
14788   Int4       row_num;
14789   CharPtr    new_val;
14790 
14791   if (tlp == NULL)
14792   {
14793     return;
14794   }
14795 
14796   for (vnp = tlp->vnp, row_num = 0;
14797        vnp != NULL && row_num != row;
14798        vnp = vnp->next, row_num++)
14799   {
14800   }
14801   if (vnp == NULL)
14802   {
14803     return;
14804   }
14805 
14806   new_val = ReplaceTagListColumn (vnp->data.ptrvalue, new_value, column);
14807   if (new_val != vnp->data.ptrvalue)
14808   {
14809     vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
14810     vnp->data.ptrvalue = new_val;
14811   }
14812 }
14813 
SetTagListColumnValue(TagListPtr tlp,Int4 column,CharPtr new_value)14814 static void SetTagListColumnValue (TagListPtr tlp, Int4 column, CharPtr new_value)
14815 {
14816   ValNodePtr vnp;
14817   CharPtr    new_val;
14818 
14819   for (vnp = tlp->vnp;
14820        vnp != NULL;
14821        vnp = vnp->next)
14822   {
14823     new_val = ReplaceTagListColumn (vnp->data.ptrvalue, new_value, column);
14824     if (new_val != vnp->data.ptrvalue)
14825     {
14826       vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
14827       vnp->data.ptrvalue = new_val;
14828     }
14829   }
14830 }
14831 
UpdateOrgModDlg(SourceAssistantPtr sap)14832 static void UpdateOrgModDlg (SourceAssistantPtr sap)
14833 {
14834   Int4               j;
14835   Boolean            found_organism = FALSE;
14836   Boolean            multi_found = FALSE;
14837   ValNodePtr         row_list, header_list = NULL;
14838 
14839   if (sap == NULL)
14840   {
14841     return;
14842   }
14843   row_list = PrepareSourceAssistantTableData (sap, &multi_found);
14844   PointerToDialog (sap->orgmod_dlg, row_list);
14845 
14846   if (row_list == NULL)
14847   {
14848     SetModifierList (sap->mod_doc, NULL);
14849     return;
14850   }
14851 
14852   for (j = 0; j < sap->num_deflines && !found_organism; j++)
14853   {
14854     if (FindValuePairInDefLine ("organism", sap->defline_list[j], NULL) != NULL)
14855     {
14856       found_organism = TRUE;
14857     }
14858   }
14859   header_list = row_list->data.ptrvalue;
14860   header_list = header_list->next;
14861   if (!found_organism)
14862   {
14863     header_list = header_list->next;
14864   }
14865   SetModifierList (sap->mod_doc, header_list);
14866   FreeTableDisplayRowList (row_list);
14867 
14868   if (multi_found)
14869   {
14870     AppendText (sap->mod_doc, multival_explanation, NULL, NULL, programFont);
14871   }
14872 }
14873 
BuildGeneticCodeEnum(void)14874 static EnumFieldAssocPtr BuildGeneticCodeEnum (void)
14875 {
14876   ValNodePtr        gencodelist = NULL;
14877   Int4              num_gencodes = 0, index;
14878   EnumFieldAssocPtr gencode_alist = NULL;
14879   ValNodePtr        vnp;
14880 
14881   gencodelist = GetGeneticCodeValNodeList ();
14882   num_gencodes = ValNodeLen (gencodelist);
14883   gencode_alist = (EnumFieldAssocPtr) MemNew ((num_gencodes + 2) * sizeof (EnumFieldAssoc));
14884 
14885   gencode_alist [0].name = StringSave (" ");
14886   gencode_alist [0].value = 0;
14887 
14888   for (index = 1, vnp = gencodelist;
14889        index <= num_gencodes && vnp != NULL;
14890        index++, vnp = vnp->next)
14891   {
14892     gencode_alist [index].name = StringSave (vnp->data.ptrvalue);
14893     gencode_alist [index].value = vnp->choice;
14894   }
14895 
14896   gencode_alist [index].name = NULL;
14897   ValNodeFreeData (gencodelist);
14898   return gencode_alist;
14899 }
14900 
FreeGeneticCodeEnum(EnumFieldAssocPtr gcode_alist)14901 static EnumFieldAssocPtr FreeGeneticCodeEnum (EnumFieldAssocPtr gcode_alist)
14902 {
14903   EnumFieldAssocPtr eap;
14904 
14905   for (eap = gcode_alist; eap != NULL && eap->name != NULL; eap++)
14906   {
14907     eap->name = MemFree (eap->name);
14908   }
14909   gcode_alist = MemFree (gcode_alist);
14910   return gcode_alist;
14911 }
14912 
MakeSourceInstructionGroup(GrouP parent)14913 static GrouP MakeSourceInstructionGroup (GrouP parent)
14914 {
14915   GrouP instr_grp;
14916 
14917   instr_grp = HiddenGroup (parent, 1, 0, NULL);
14918   StaticPrompt (instr_grp, "Scientific names should not be abbreviated.",
14919                 0, 0, programFont, 'l');
14920   StaticPrompt (instr_grp, "(use 'Drosophila melanogaster' instead of 'D. melanogaster')",
14921                 0, 0, programFont, 'l');
14922   return instr_grp;
14923 }
14924 
MakeGeneticCodeInstructionGroup(GrouP parent)14925 static GrouP MakeGeneticCodeInstructionGroup (GrouP parent)
14926 {
14927   GrouP instr_grp;
14928 
14929   instr_grp = HiddenGroup (parent, 1, 0, NULL);
14930   StaticPrompt (instr_grp, "Please choose the translation table for your sequence.",
14931                 0, 0, programFont, 'l');
14932   StaticPrompt (instr_grp, "Examples: Standard, Bacterial, Archaeal and Plant Plastid, Vertebrate Mitochondrial",
14933                 0, 0, programFont, 'l');
14934 
14935   return instr_grp;
14936 }
14937 
MakeGeneticCodeCommentInstructionGroup(GrouP parent)14938 static GrouP MakeGeneticCodeCommentInstructionGroup (GrouP parent)
14939 {
14940   GrouP instr_grp;
14941 
14942   instr_grp = HiddenGroup (parent, 1, 0, NULL);
14943   StaticPrompt (instr_grp, "When a genetic code is determined automatically from the organism name and location,",
14944                 0, 0, programFont, 'l');
14945   StaticPrompt (instr_grp, "you cannot edit the genetic code directly.  You may provide an alternate genetic code",
14946                 0, 0, programFont, 'l');
14947   StaticPrompt (instr_grp, "and the evidence to support it here and it will be reviewed by the GenBank staff.",
14948                 0, 0, programFont, 'l');
14949 
14950   return instr_grp;
14951 }
14952 
MakeNontextInstructionGroup(GrouP parent)14953 static GrouP MakeNontextInstructionGroup (GrouP parent)
14954 {
14955   GrouP instr_grp;
14956 
14957   instr_grp = HiddenGroup (parent, 1, 0, NULL);
14958   StaticPrompt (instr_grp, "This modifier allows only TRUE/FALSE values.",
14959                 0, 0, programFont, 'l');
14960   StaticPrompt (instr_grp, "The modifier will only appear in the file if it is set to TRUE,",
14961                 0, 0, programFont, 'l');
14962   StaticPrompt (instr_grp, "otherwise it will be absent.",
14963                 0, 0, programFont, 'l');
14964 
14965   return instr_grp;
14966 }
14967 
MakeLocationInstructionGroup(GrouP parent)14968 static GrouP MakeLocationInstructionGroup (GrouP parent)
14969 {
14970   GrouP instr_grp;
14971 
14972   instr_grp = HiddenGroup (parent, 1, 0, NULL);
14973   StaticPrompt (instr_grp, "Use this to specify the subcellular location or viral origin of the sequences.",
14974                 0, 0, programFont, 'l');
14975   StaticPrompt (instr_grp, "Example: Use 'Genomic' for a sequence encoded by a nuclear gene.",
14976                 0, 0, programFont, 'l');
14977 
14978   return instr_grp;
14979 }
14980 
MakeInstructionGroup(GrouP parent,Boolean is_nontext,Int2 mod_type)14981 static GrouP MakeInstructionGroup (GrouP parent, Boolean is_nontext, Int2 mod_type)
14982 {
14983   GrouP instr_grp = NULL;
14984 
14985   if (is_nontext)
14986   {
14987     instr_grp = MakeNontextInstructionGroup (parent);
14988   }
14989   else if (mod_type == eModifierType_Location)
14990   {
14991     instr_grp = MakeLocationInstructionGroup (parent);
14992   }
14993   else if (mod_type == eModifierType_Organism)
14994   {
14995     instr_grp = MakeSourceInstructionGroup (parent);
14996   }
14997   else if (mod_type == eModifierType_NucGeneticCode
14998            || mod_type == eModifierType_MitoGeneticCode
14999            || mod_type == eModifierType_GeneticCode)
15000   {
15001     instr_grp = MakeGeneticCodeInstructionGroup (parent);
15002   }
15003   else if (mod_type == eModifierType_GeneticCodeComment)
15004   {
15005     instr_grp = MakeGeneticCodeCommentInstructionGroup (parent);
15006   }
15007   return instr_grp;
15008 }
15009 
15010 
15011 /* This section of code prepares a dialog for editing one value for
15012  * the specified modifier type.  It can be used for setting the value
15013  * for a single sequence or the values for all sequences.
15014  */
15015 typedef struct singlemodvaldlg
15016 {
15017   DIALOG_MESSAGE_BLOCK
15018   Boolean is_nontext;
15019   Int2    mod_type;
15020   Int2    seqPackage;
15021 
15022   PopuP   nontext_popup;
15023   DialoG  strvalue_dlg;
15024   DialoG  org_dlg;
15025   TexT    text_txt;
15026 } SingleModValDlgData, PNTR SingleModValDlgPtr;
15027 
SingleModValToDialog(DialoG d,Pointer userdata)15028 static void SingleModValToDialog (DialoG d, Pointer userdata)
15029 {
15030   SingleModValDlgPtr dlg;
15031   CharPtr            suggested_value;
15032   ValNode            vn;
15033   ValNodePtr         gencodelist;
15034 
15035   dlg = (SingleModValDlgPtr) GetObjectExtra (d);
15036 
15037   if (dlg == NULL)
15038   {
15039     return;
15040   }
15041 
15042   suggested_value = (CharPtr) userdata;
15043 
15044   if (dlg->is_nontext)
15045   {
15046     if (StringICmp (suggested_value, "TRUE") == 0)
15047     {
15048       SetValue (dlg->nontext_popup, 2);
15049     }
15050     else
15051     {
15052       SetValue (dlg->nontext_popup, 1);
15053     }
15054   }
15055   else if (dlg->mod_type == eModifierType_Location)
15056   {
15057     if (StringHasNoText (suggested_value))
15058     {
15059       vn.choice = 1;
15060       vn.data.ptrvalue = "genomic";
15061     }
15062     else
15063     {
15064       vn.choice = GetValForEnumName (biosource_genome_simple_alist, suggested_value);
15065       vn.data.ptrvalue = suggested_value;
15066     }
15067     vn.next = NULL;
15068     PointerToDialog (dlg->strvalue_dlg, &vn);
15069   }
15070   else if (dlg->mod_type == eModifierType_Origin)
15071   {
15072     vn.choice = GetValForEnumName (biosource_origin_alist, suggested_value);
15073     vn.data.ptrvalue = suggested_value;
15074     vn.next = NULL;
15075     PointerToDialog (dlg->strvalue_dlg, &vn);
15076   }
15077   else if (dlg->mod_type == eModifierType_Organism)
15078   {
15079     PointerToDialog (dlg->org_dlg, suggested_value);
15080   }
15081   else if (dlg->mod_type == eModifierType_NucGeneticCode
15082            || dlg->mod_type == eModifierType_MitoGeneticCode
15083            || dlg->mod_type == eModifierType_GeneticCode)
15084   {
15085     gencodelist = GetGeneticCodeValNodeList ();
15086     if (StringHasNoText (suggested_value))
15087     {
15088       vn.choice = 0;
15089     }
15090     else if (isdigit (suggested_value[0]))
15091     {
15092       vn.choice = atoi (suggested_value);
15093     }
15094     else
15095     {
15096       vn.choice = GeneticCodeFromStringAndList (suggested_value, gencodelist);
15097     }
15098     vn.next = NULL;
15099     vn.data.ptrvalue = suggested_value;
15100     PointerToDialog (dlg->strvalue_dlg, &vn);
15101     gencodelist = ValNodeFreeData (gencodelist);
15102   }
15103   else if (dlg->mod_type == eModifierType_MolType)
15104   {
15105     if (StringHasNoText (suggested_value))
15106     {
15107       vn.choice = 253;
15108     }
15109     else if (isdigit (suggested_value[0]))
15110     {
15111       vn.choice = atoi (suggested_value);
15112     }
15113     else
15114     {
15115       vn.choice = MolTypeFromString (suggested_value);
15116     }
15117     vn.next = NULL;
15118     vn.data.ptrvalue = suggested_value;
15119     PointerToDialog (dlg->strvalue_dlg, &vn);
15120   }
15121   else if (dlg->mod_type == eModifierType_Molecule)
15122   {
15123     if (StringICmp (suggested_value, "dna") == 0)
15124     {
15125       vn.choice = Seq_mol_dna;
15126     }
15127     else if (StringICmp (suggested_value, "rna") == 0)
15128     {
15129       vn.choice = Seq_mol_rna;
15130     }
15131     else
15132     {
15133       vn.choice = Seq_mol_dna;
15134     }
15135   }
15136   else if (dlg->mod_type == eModifierType_Topology)
15137   {
15138     if (StringHasNoText (suggested_value))
15139     {
15140       vn.choice = 1;
15141     }
15142     else if (isdigit (suggested_value[0]))
15143     {
15144       vn.choice = atoi (suggested_value);
15145     }
15146     else
15147     {
15148       vn.choice = TopologyFromString (suggested_value);
15149     }
15150     vn.next = NULL;
15151     vn.data.ptrvalue = suggested_value;
15152     PointerToDialog (dlg->strvalue_dlg, &vn);
15153   }
15154   else
15155   {
15156     if (StringHasNoText (suggested_value))
15157     {
15158       SetTitle (dlg->text_txt, "");
15159     }
15160     else
15161     {
15162       SetTitle (dlg->text_txt, suggested_value);
15163     }
15164   }
15165 }
15166 
DialogToSingleModVal(DialoG d)15167 static Pointer DialogToSingleModVal (DialoG d)
15168 {
15169   SingleModValDlgPtr dlg;
15170   CharPtr            new_value = NULL;
15171   ValNodePtr         value_vnp;
15172 
15173   dlg = (SingleModValDlgPtr) GetObjectExtra (d);
15174 
15175   if (dlg == NULL)
15176   {
15177     return NULL;
15178   }
15179 
15180   /* prepare value */
15181   if (dlg->is_nontext)
15182   {
15183     if (GetValue (dlg->nontext_popup) == 2)
15184     {
15185       new_value = StringSave ("2");
15186     }
15187   }
15188   else if (dlg->mod_type == eModifierType_Location
15189            || dlg->mod_type == eModifierType_Origin
15190            || dlg->mod_type == eModifierType_NucGeneticCode
15191            || dlg->mod_type == eModifierType_MitoGeneticCode
15192            || dlg->mod_type == eModifierType_GeneticCode
15193            || dlg->mod_type == eModifierType_MolType
15194            || dlg->mod_type == eModifierType_Molecule
15195            || dlg->mod_type == eModifierType_Topology)
15196   {
15197     value_vnp = DialogToPointer (dlg->strvalue_dlg);
15198     new_value = value_vnp->data.ptrvalue;
15199     if (dlg->mod_type == eModifierType_Location
15200         || dlg->mod_type == eModifierType_Origin)
15201     {
15202       StringToLower (new_value);
15203     }
15204     value_vnp = ValNodeFree (value_vnp);
15205     if (dlg->mod_type == eModifierType_MolType
15206         && StringICmp (new_value, "mRNA [cDNA]") == 0)
15207     {
15208       new_value = MemFree (new_value);
15209       new_value = StringSave ("mRNA");
15210     }
15211   }
15212   else if (dlg->mod_type == eModifierType_Organism)
15213   {
15214     new_value = DialogToPointer (dlg->org_dlg);
15215   }
15216   else
15217   {
15218     new_value = SaveStringFromText (dlg->text_txt);
15219   }
15220   return (Pointer) new_value;
15221 }
15222 
15223 
SingleModValDialog(GrouP parent,Boolean is_nontext,Int2 mod_type,Int2 seqPackage)15224 static DialoG SingleModValDialog (GrouP parent, Boolean is_nontext, Int2 mod_type, Int2 seqPackage)
15225 {
15226   SingleModValDlgPtr dlg;
15227   GrouP              grp;
15228   ValNodePtr         gencodelist;
15229 
15230   dlg = (SingleModValDlgPtr) MemNew (sizeof (SingleModValDlgData));
15231   if (dlg == NULL)
15232   {
15233     return NULL;
15234   }
15235 
15236   grp = HiddenGroup (parent, 1, 0, NULL);
15237   SetObjectExtra (grp, dlg, StdCleanupExtraProc);
15238   SetGroupSpacing (grp, 10, 10);
15239 
15240   dlg->dialog = (DialoG) grp;
15241   dlg->todialog = SingleModValToDialog;
15242   dlg->fromdialog = DialogToSingleModVal;
15243   dlg->dialogmessage = NULL;
15244   dlg->testdialog = NULL;
15245 
15246   dlg->is_nontext = is_nontext;
15247   dlg->mod_type = mod_type;
15248   dlg->nontext_popup = NULL;
15249   dlg->strvalue_dlg = NULL;
15250   dlg->org_dlg = NULL;
15251   dlg->text_txt = NULL;
15252 
15253 
15254   if (dlg->is_nontext)
15255   {
15256     dlg->nontext_popup = PopupList (grp, TRUE, NULL);
15257     PopupItem (dlg->nontext_popup, "FALSE");
15258     PopupItem (dlg->nontext_popup, "TRUE");
15259   }
15260   else if (dlg->mod_type == eModifierType_Location)
15261   {
15262     dlg->strvalue_dlg = ValNodeSelectionDialogExEx (grp,
15263                                                  GetLocListForBioSource (NULL), 6,
15264                                                  ValNodeStringName,
15265                                                  ValNodeSimpleDataFree,
15266                                                  ValNodeStringCopy,
15267                                                  ValNodeChoiceMatch,
15268                                                  "location code",
15269                                                   NULL, NULL, FALSE,
15270                                                   FALSE, TRUE, NULL);
15271 
15272   }
15273   else if (dlg->mod_type == eModifierType_Origin)
15274   {
15275     dlg->strvalue_dlg = EnumAssocSelectionDialog (grp, biosource_origin_alist,
15276                                                   "origin", FALSE, NULL, NULL);
15277   }
15278   else if (dlg->mod_type == eModifierType_Organism)
15279   {
15280     dlg->org_dlg = OrganismSelectionDialog (grp, "");
15281   }
15282   else if (dlg->mod_type == eModifierType_NucGeneticCode
15283            || dlg->mod_type == eModifierType_MitoGeneticCode
15284            || dlg->mod_type == eModifierType_GeneticCode)
15285   {
15286     gencodelist = GetGeneticCodeValNodeList ();
15287     dlg->strvalue_dlg = ValNodeSelectionDialog (grp, gencodelist, 6,
15288                                         ValNodeStringName,
15289                                         ValNodeSimpleDataFree,
15290                                         ValNodeStringCopy,
15291                                         ValNodeChoiceMatch,
15292                                         "genetic code",
15293                                         NULL, NULL, FALSE);
15294   }
15295   else if (dlg->mod_type == eModifierType_MolType)
15296   {
15297     if (seqPackage == SEQ_PKG_GENOMICCDNA)
15298     {
15299       dlg->strvalue_dlg = EnumAssocSelectionDialog (grp, biomol_nucGen_alist,
15300                                                "moltype", FALSE, NULL, NULL);
15301     }
15302     else
15303     {
15304       dlg->strvalue_dlg = EnumAssocSelectionDialog (grp, biomol_nucX_alist,
15305                                                "moltype", FALSE, NULL, NULL);
15306     }
15307   }
15308   else if (dlg->mod_type == eModifierType_Molecule)
15309   {
15310     dlg->strvalue_dlg = EnumAssocSelectionDialog (grp, molecule_alist,
15311                                                   "molecule", FALSE, NULL, NULL);
15312   }
15313   else if (dlg->mod_type == eModifierType_Topology)
15314   {
15315     dlg->strvalue_dlg = EnumAssocSelectionDialog (grp, topology_nuc_alist,
15316                                                "topology", FALSE, NULL, NULL);
15317   }
15318   else
15319   {
15320     if (dlg->mod_type == eModifierType_GeneticCodeComment)
15321     {
15322       dlg->text_txt = DialogText (grp, "", 40, NULL);
15323     }
15324     else
15325     {
15326       dlg->text_txt = DialogText (grp, "", 20, NULL);
15327     }
15328   }
15329 
15330   return (DialoG) grp;
15331 }
15332 
AddSeqIDAndValueToRowList(CharPtr id,CharPtr title,ValNodePtr PNTR row_list)15333 static void AddSeqIDAndValueToRowList
15334 (CharPtr         id,
15335  CharPtr         title,
15336  ValNodePtr PNTR row_list)
15337 {
15338   CharPtr      str = NULL;
15339   ValNodeBlock column_list;
15340   ValNodeBlock row_block;
15341   CharPtr      org_loc, org_end = NULL, next_org = NULL;
15342 
15343   if (row_list == NULL)
15344   {
15345     return;
15346   }
15347   InitValNodeBlock (&column_list, NULL);
15348   InitValNodeBlock (&row_block, *row_list);
15349 
15350   /* put ID in first location */
15351   ValNodeAddPointerToEnd (&column_list, 0, StringSave (id));
15352 
15353   /* get organism */
15354   org_loc = FindValuePairInDefLine ("organism", title, &org_end);
15355   str = FindValueFromPairInDefline ("organism", title);
15356   ValNodeAddPointerToEnd (&column_list, 0, str);
15357 
15358   if (org_end != NULL)
15359   {
15360     next_org = FindValuePairInDefLine ("organism", org_end + 1, &org_end);
15361   }
15362 
15363   /* get location */
15364   str = FindValueFromPairInDeflineBeforeCharPtr ("location", title, next_org);
15365   if (StringHasNoText (str))
15366   {
15367     str = MemFree (str);
15368     str = StringSave ("genomic");
15369   }
15370   ValNodeAddPointerToEnd (&column_list, 0, str);
15371 
15372   /* get genetic code */
15373   str = FindValueFromPairInDeflineBeforeCharPtr ("genetic_code", title, next_org);
15374   if (StringHasNoText (str))
15375   {
15376     str = MemFree (str);
15377     str = StringSave ("Standard");
15378   }
15379   ValNodeAddPointerToEnd (&column_list, 0, str);
15380 
15381   /* get genetic code comment */
15382   str = FindValueFromPairInDeflineBeforeCharPtr ("gencode_comment", title, next_org);
15383   ValNodeAddPointerToEnd (&column_list, 0, str);
15384 
15385   ValNodeAddPointerToEnd (&row_block, 0, column_list.head);
15386 
15387   while (next_org != NULL)
15388   {
15389     InitValNodeBlock (&column_list, NULL);
15390     /* put blank ID in first location */
15391     ValNodeAddPointerToEnd (&column_list, 0, StringSave (""));
15392 
15393     /* get organism */
15394     org_loc = FindValuePairInDefLine ("organism", next_org, &org_end);
15395     str = FindValueFromPairInDefline ("organism", next_org);
15396     ValNodeAddPointerToEnd (&column_list, 0, str);
15397 
15398     next_org = FindValuePairInDefLine ("organism", org_end + 1, &org_end);
15399 
15400     /* get location */
15401     str = FindValueFromPairInDeflineBeforeCharPtr ("location", org_loc, next_org);
15402     if (StringHasNoText (str))
15403     {
15404       str = MemFree (str);
15405       str = StringSave ("genomic");
15406     }
15407     ValNodeAddPointerToEnd (&column_list, 0, str);
15408 
15409     /* get genetic code */
15410     str = FindValueFromPairInDeflineBeforeCharPtr ("genetic_code", org_loc, next_org);
15411     if (StringHasNoText (str))
15412     {
15413       str = MemFree (str);
15414       str = StringSave ("Standard");
15415     }
15416     ValNodeAddPointerToEnd (&column_list, 0, str);
15417 
15418     /* get genetic code comment */
15419     str = FindValueFromPairInDeflineBeforeCharPtr ("gencode_comment", org_loc, next_org);
15420     ValNodeAddPointerToEnd (&column_list, 0, str);
15421 
15422     ValNodeAddPointerToEnd (&row_block, 0, column_list.head);
15423   }
15424   *row_list = row_block.head;
15425 }
15426 
FindNthOrgPair(CharPtr title,Int4 org_num,CharPtr PNTR p_org_end)15427 static CharPtr FindNthOrgPair (CharPtr title, Int4 org_num, CharPtr PNTR p_org_end)
15428 {
15429   return FindNthValuePairInDefLine (title, "organism", org_num, p_org_end);
15430 }
15431 
ApplyRowListToIDAndTitleEdit(ValNodePtr row_list,IDAndTitleEditPtr iatep)15432 static void ApplyRowListToIDAndTitleEdit (ValNodePtr row_list, IDAndTitleEditPtr iatep)
15433 {
15434   ValNodePtr row_vnp, col_vnp;
15435   CharPtr    last_id = NULL, id_txt;
15436   Int4       j, seq_num;
15437   Int4       org_num = 0;
15438   CharPtr    org_loc, org_end, next_org;
15439 
15440   if (row_list == NULL || iatep == NULL)
15441   {
15442     return;
15443   }
15444 
15445   for (row_vnp = row_list;
15446        row_vnp != NULL;
15447        row_vnp = row_vnp->next)
15448   {
15449     col_vnp = row_vnp->data.ptrvalue;
15450     seq_num = -1;
15451     /* read sequence ID */
15452     if (col_vnp != NULL)
15453     {
15454       id_txt = col_vnp->data.ptrvalue;
15455       if (StringHasNoText (id_txt))
15456       {
15457         id_txt = last_id;
15458         org_num++;
15459       }
15460       else
15461       {
15462         last_id = id_txt;
15463         org_num = 0;
15464       }
15465       for (j = 0; j < iatep->num_sequences && seq_num == -1; j++)
15466       {
15467         if (StringCmp (iatep->id_list [j], id_txt) == 0)
15468         {
15469           seq_num = j;
15470         }
15471       }
15472       col_vnp = col_vnp->next;
15473     }
15474 
15475     if (seq_num < 0 || seq_num > iatep->num_sequences)
15476     {
15477       continue;
15478     }
15479 
15480     /* find organism name # org_num, find next_org and make sure
15481      * all values are added before it.
15482      */
15483     org_loc = FindNthOrgPair (iatep->title_list [seq_num], org_num, &org_end);
15484 
15485     if (org_end == NULL)
15486     {
15487       next_org = NULL;
15488     }
15489     else
15490     {
15491       next_org = FindValuePairInDefLine ("organism", org_end + 1, NULL);
15492     }
15493 
15494     /* add tax name */
15495     if (col_vnp != NULL)
15496     {
15497       if (org_loc == NULL)
15498       {
15499         iatep->title_list [seq_num] = ReplaceValueInOneDefLine (iatep->title_list [seq_num],
15500                                                                 "organism",
15501                                                                 col_vnp->data.ptrvalue);
15502       }
15503       else
15504       {
15505         iatep->title_list [seq_num] = ReplaceValueInThisValuePair (iatep->title_list [seq_num],
15506                                                                    org_loc, "organism",
15507                                                                    org_end,
15508                                                                    col_vnp->data.ptrvalue);
15509 
15510       }
15511       col_vnp = col_vnp->next;
15512     }
15513 
15514     /* add location */
15515     if (col_vnp != NULL)
15516     {
15517       org_loc = FindNthOrgPair (iatep->title_list [seq_num], org_num, &org_end);
15518       iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
15519                                                                          "location",
15520                                                                          col_vnp->data.ptrvalue,
15521                                                                          org_loc);
15522       col_vnp = col_vnp->next;
15523     }
15524 
15525     /* add genetic code */
15526     if (col_vnp != NULL)
15527     {
15528       org_loc = FindNthOrgPair (iatep->title_list [seq_num], org_num, &org_end);
15529       iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
15530                                                                          "genetic_code",
15531                                                                          col_vnp->data.ptrvalue,
15532                                                                          org_loc);
15533       col_vnp = col_vnp->next;
15534     }
15535     /* add genetic code comment */
15536     if (col_vnp != NULL)
15537     {
15538       org_loc = FindNthOrgPair (iatep->title_list [seq_num], org_num, &org_end);
15539       iatep->title_list [seq_num] = ReplaceValueInOneDefLineForOrganism (iatep->title_list [seq_num],
15540                                                                          "gencode_comment",
15541                                                                          col_vnp->data.ptrvalue,
15542                                                                          org_loc);
15543       col_vnp = col_vnp->next;
15544     }
15545   }
15546 }
15547 
15548 
15549 /* This function allows the user to edit the organisms, locations, and genetic codes for
15550  * all of the sequences in the set.
15551  */
EditOrganismColumn(SourceAssistantPtr sap,SeqEntryPtr seq_list)15552 static void EditOrganismColumn (SourceAssistantPtr sap, SeqEntryPtr seq_list)
15553 {
15554   WindoW                 w;
15555   GrouP                  h, instr_grp, c;
15556   DialoG                 dlg;
15557   ValNodePtr             row_list;
15558   ModalAcceptCancelData  acd;
15559   ButtoN                 b;
15560   Int4                   j;
15561   IDAndTitleEditPtr      iatep;
15562 
15563   if (sap == NULL && seq_list == NULL)
15564   {
15565     return;
15566   }
15567 
15568   if (sap == NULL)
15569   {
15570     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
15571   }
15572   else
15573   {
15574     iatep = SourceAssistantToIDAndTitleEdit (sap);
15575   }
15576 
15577   if (iatep == NULL || iatep->num_sequences < 1)
15578   {
15579     iatep = IDAndTitleEditFree (iatep);
15580     return;
15581   }
15582 
15583   SendHelpScrollMessage (helpForm, "Organism Page", "Add Organisms, Locations, and Genetic Codes");
15584 
15585   if (iatep->num_sequences == 1)
15586   {
15587     w = MovableModalWindow (-20, -13, -10, -10, "Organism Editor", NULL);
15588   }
15589   else
15590   {
15591     w = MovableModalWindow (-20, -13, -10, -10, "Multiple Organism Editor", NULL);
15592   }
15593   h = HiddenGroup(w, -1, 0, NULL);
15594   SetGroupSpacing (h, 10, 10);
15595 
15596   dlg = MultiOrganismSelectionDialog (h);
15597 
15598   instr_grp = MakeSourceInstructionGroup (h);
15599 
15600   row_list = NULL;
15601 
15602   for (j = 0; j < iatep->num_sequences; j++)
15603   {
15604     if (iatep->is_seg != NULL && iatep->is_seg [j])
15605     {
15606       continue;
15607     }
15608     AddSeqIDAndValueToRowList (iatep->id_list[j], iatep->title_list[j],
15609                                  &row_list);
15610   }
15611 
15612   PointerToDialog (dlg, row_list);
15613   row_list = FreeTableDisplayRowList (row_list);
15614 
15615   c = HiddenGroup (h, 2, 0, NULL);
15616   b = PushButton (c, "Accept", ModalAcceptButton);
15617   SetObjectExtra (b, &acd, NULL);
15618   b = PushButton (c, "Cancel", ModalCancelButton);
15619   SetObjectExtra (b, &acd, NULL);
15620 
15621   AlignObjects (ALIGN_CENTER, (HANDLE) dlg, (HANDLE) instr_grp,
15622                               (HANDLE) c, (HANDLE) NULL);
15623 
15624   Show (w);
15625   Select (w);
15626   acd.accepted = FALSE;
15627   acd.cancelled = FALSE;
15628   while (!acd.accepted && ! acd.cancelled)
15629   {
15630     ProcessExternalEvent ();
15631     Update ();
15632   }
15633   ProcessAnEvent ();
15634   if (acd.cancelled)
15635   {
15636     Remove (w);
15637     return;
15638   }
15639   else
15640   {
15641     row_list = DialogToPointer (dlg);
15642     ApplyRowListToIDAndTitleEdit (row_list, iatep);
15643     if (sap == NULL)
15644     {
15645       ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
15646     }
15647     else
15648     {
15649       ApplyIDAndTitleEditToSourceAssistant (sap, iatep);
15650     }
15651     row_list = FreeTableDisplayRowList (row_list);
15652     UpdateOrgModDlg (sap);
15653     Remove (w);
15654   }
15655   iatep = IDAndTitleEditFree (iatep);
15656 }
15657 
15658 typedef struct setcolumnvaluesdata
15659 {
15660   TagListPtr tlp;
15661   DialoG     all_val_dlg;
15662   Boolean    is_nontext;
15663   Int2       mod_type;
15664 } SetColumnValuesData, PNTR SetColumnValuesPtr;
15665 
SetAllColumnValues(SetColumnValuesPtr scvp,CharPtr new_value)15666 static void SetAllColumnValues (SetColumnValuesPtr scvp, CharPtr new_value)
15667 {
15668   CharPtr    taglist_str;
15669   Int4       j;
15670   ValNodePtr vnp;
15671 
15672   if (scvp == NULL || scvp->tlp == NULL)
15673   {
15674     return;
15675   }
15676 
15677   taglist_str = TagListStringFromDefLineValue (new_value, scvp->is_nontext, scvp->mod_type);
15678   SetTagListColumnValue (scvp->tlp, scvp->tlp->cols - 1, taglist_str);
15679   taglist_str = MemFree (taglist_str);
15680 
15681   SendMessageToDialog (scvp->tlp->dialog, VIB_MSG_REDRAW);
15682   for (j = 0, vnp = scvp->tlp->vnp; vnp != NULL; j++, vnp = vnp->next) {
15683   }
15684   scvp->tlp->max = MAX ((Int2) 0, (Int2) (j - scvp->tlp->rows));
15685   CorrectBarMax (scvp->tlp->bar, scvp->tlp->max);
15686   CorrectBarPage (scvp->tlp->bar, scvp->tlp->rows - 1, scvp->tlp->rows - 1);
15687   if (scvp->tlp->max > 0) {
15688     SafeShow (scvp->tlp->bar);
15689   } else {
15690     SafeHide (scvp->tlp->bar);
15691   }
15692   SendMessageToDialog (scvp->tlp->dialog, VIB_MSG_ENTER);
15693 }
15694 
ClearColumnValues(ButtoN b)15695 static void ClearColumnValues (ButtoN b)
15696 {
15697   SetColumnValuesPtr scvp;
15698 
15699   scvp = (SetColumnValuesPtr) GetObjectExtra (b);
15700 
15701   if (ANS_NO == Message (MSG_YN, "Are you sure you want to clear all the values?"))
15702   {
15703     return;
15704   }
15705 
15706   SetAllColumnValues (scvp, NULL);
15707 }
15708 
EditOrgModApplyAll(ButtoN b)15709 static void EditOrgModApplyAll (ButtoN b)
15710 {
15711   SetColumnValuesPtr scvp;
15712   CharPtr            new_value;
15713 
15714   scvp = (SetColumnValuesPtr) GetObjectExtra (b);
15715   if (scvp == NULL || scvp->all_val_dlg == NULL || scvp->tlp == NULL)
15716   {
15717     return;
15718   }
15719 
15720   if (ANS_NO == Message (MSG_YN, "Are you sure you want to set all the values?"))
15721   {
15722     return;
15723   }
15724 
15725   new_value = DialogToPointer (scvp->all_val_dlg);
15726   SetAllColumnValues (scvp, new_value);
15727   new_value = MemFree (new_value);
15728 }
15729 
15730 
15731 
15732 /* The following functions are used for editing values for all of the sequences in
15733  * a record for a specified modifier name.
15734  * Some modifiers can have multiple values, so for these we list the original value
15735  * that will be replaced by the new value.
15736  */
15737 /* when id_str has no text, this creates one column,
15738  * otherwise it creates two columns, sequence ID and new value.
15739  * if there are multiple values, the sequence ID is only listed in the first row.
15740  */
15741 static void
AddRowsForModifierValuesForDefline(CharPtr mod_name,Boolean is_nontext,Int2 mod_type,CharPtr id_str,CharPtr defline,ValNodePtr PNTR list)15742 AddRowsForModifierValuesForDefline
15743 (CharPtr         mod_name,
15744  Boolean         is_nontext,
15745  Int2            mod_type,
15746  CharPtr         id_str,
15747  CharPtr         defline,
15748  ValNodePtr PNTR list)
15749 {
15750   Boolean added_any = FALSE;
15751   CharPtr valstr;
15752   CharPtr bracket_start, bracket_end;
15753   Int4    len;
15754   CharPtr taglist_str, tag_str;
15755   Boolean is_first = TRUE;
15756 
15757   if (list == NULL || StringHasNoText (mod_name))
15758   {
15759     return;
15760   }
15761 
15762   bracket_start = FindValuePairInDefLine (mod_name, defline, &bracket_end);
15763   while (bracket_start != NULL)
15764   {
15765     valstr = FindValueFromPairInDefline (mod_name, bracket_start);
15766     if (!StringHasNoText (valstr))
15767     {
15768       taglist_str = TagListStringFromDefLineValue (valstr, is_nontext, mod_type);
15769 
15770       len = StringLen (id_str) + StringLen (taglist_str) + 4;
15771       tag_str = (CharPtr) MemNew (len * sizeof (Char));
15772       if (tag_str != NULL)
15773       {
15774         if (StringHasNoText (id_str))
15775         {
15776           sprintf (tag_str, "%s\n", taglist_str);
15777         }
15778         else
15779         {
15780           if (is_first)
15781           {
15782             sprintf (tag_str, "%s\t%s\n", id_str, taglist_str);
15783             is_first = FALSE;
15784           }
15785           else
15786           {
15787             sprintf (tag_str, "\t%s\n", taglist_str);
15788           }
15789         }
15790         ValNodeAddPointer (list, 0, tag_str);
15791         added_any = TRUE;
15792       }
15793       taglist_str = MemFree (taglist_str);
15794     }
15795     valstr = MemFree (valstr);
15796     bracket_start = FindValuePairInDefLine (mod_name, bracket_end + 1, &bracket_end);
15797   }
15798 
15799   if (!added_any)
15800   {
15801     len = StringLen (id_str) + 4;
15802     tag_str = (CharPtr) MemNew (len * sizeof (Char));
15803     if (tag_str != NULL)
15804     {
15805       if (StringHasNoText (id_str))
15806       {
15807         sprintf (tag_str, "\n");
15808       }
15809       else
15810       {
15811         sprintf (tag_str, "%s\t\n", id_str);
15812       }
15813       ValNodeAddPointer (list, 0, tag_str);
15814     }
15815   }
15816 }
15817 
15818 static ValNodePtr
IDAndTitleEditToModifierColumnTagList(IDAndTitleEditPtr iatep,CharPtr mod_name,Boolean is_nontext,Int2 mod_type,Boolean allow_multi)15819 IDAndTitleEditToModifierColumnTagList
15820 (IDAndTitleEditPtr iatep,
15821  CharPtr           mod_name,
15822  Boolean           is_nontext,
15823  Int2              mod_type,
15824  Boolean           allow_multi)
15825 {
15826   ValNodePtr list = NULL;
15827   Int4       seq_num;
15828 
15829   if (iatep == NULL)
15830   {
15831     return NULL;
15832   }
15833 
15834   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
15835   {
15836     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
15837     {
15838       continue;
15839     }
15840     if (allow_multi)
15841     {
15842       AddRowsForModifierValuesForDefline (mod_name, is_nontext, mod_type,
15843                                           iatep->id_list [seq_num],
15844                                           iatep->title_list [seq_num],
15845                                           &list);
15846     }
15847     else
15848     {
15849       AddSeqIDAndValueToTagList (iatep->id_list [seq_num],
15850                                  iatep->title_list [seq_num],
15851                                  mod_name, &list);
15852     }
15853   }
15854   return list;
15855 }
15856 
15857 static CharPtr
GetValueFromTagListString(CharPtr new_value,Boolean is_nontext,Int4 mod_type,Int2 seqPackage,EnumFieldAssocPtr gencode_alist)15858 GetValueFromTagListString
15859 (CharPtr           new_value,
15860  Boolean           is_nontext,
15861  Int4              mod_type,
15862  Int2              seqPackage,
15863  EnumFieldAssocPtr gencode_alist)
15864 {
15865   CharPtr tmp_value;
15866 
15867   if (is_nontext)
15868   {
15869     if (StringCmp (new_value, "1") != 0)
15870     {
15871       new_value = MemFree (new_value);
15872     }
15873   }
15874   else if (mod_type == eModifierType_Location)
15875   {
15876     tmp_value = GetEnumName (atoi(new_value), biosource_genome_simple_alist);
15877     StringToLower (tmp_value);
15878     new_value = MemFree (new_value);
15879     new_value = StringSave (tmp_value);
15880   }
15881   else if (mod_type == eModifierType_Origin)
15882   {
15883     tmp_value = GetEnumName (atoi (new_value), biosource_origin_alist);
15884     StringToLower (tmp_value);
15885     new_value = MemFree (new_value);
15886     new_value = StringSave (tmp_value);
15887   }
15888   else if (mod_type == eModifierType_NucGeneticCode
15889            || mod_type == eModifierType_MitoGeneticCode)
15890   {
15891     tmp_value = GetEnumName (atoi(new_value), gencode_alist);
15892     new_value = MemFree (new_value);
15893     new_value = StringSave (tmp_value);
15894   }
15895   else if (mod_type == eModifierType_MolType)
15896   {
15897     if (seqPackage == SEQ_PKG_GENOMICCDNA)
15898     {
15899       tmp_value = GetEnumName (atoi (new_value), biomol_nucGen_alist);
15900     }
15901     else
15902     {
15903       tmp_value = GetEnumName (atoi (new_value), biomol_nucX_alist);
15904       if (StringICmp (tmp_value, "mRNA [cDNA]") == 0)
15905       {
15906         tmp_value = StringSave ("mRNA");
15907       }
15908     }
15909     new_value = MemFree (new_value);
15910     new_value = StringSave (tmp_value);
15911   }
15912   else if (mod_type == eModifierType_Molecule)
15913   {
15914     tmp_value = GetEnumName (atoi (new_value), molecule_alist);
15915     new_value = MemFree (new_value);
15916     new_value = StringSave (tmp_value);
15917   }
15918   else if (mod_type == eModifierType_Topology)
15919   {
15920     tmp_value = GetEnumName (atoi (new_value), topology_nuc_alist);
15921     new_value = MemFree (new_value);
15922     new_value = StringSave (tmp_value);
15923   }
15924   return new_value;
15925 }
15926 
RemoveAllValuePairs(CharPtr value_name,CharPtr title)15927 static void RemoveAllValuePairs (CharPtr value_name, CharPtr title)
15928 {
15929   CharPtr value_loc;
15930   CharPtr end_loc;
15931 
15932   if (StringHasNoText (value_name) || StringHasNoText (title))
15933   {
15934     return;
15935   }
15936 
15937   value_loc = FindValuePairInDefLine (value_name, title, &end_loc);
15938   while (value_loc != NULL)
15939   {
15940     RemoveValuePairFromDefline (value_loc, end_loc, value_loc);
15941     value_loc = FindValuePairInDefLine (value_name, value_loc, &end_loc);
15942   }
15943 }
15944 
15945 static CharPtr
ApplyValueListToTitle(CharPtr orig_title,CharPtr value_name,ValNodePtr value_list)15946 ApplyValueListToTitle
15947 (CharPtr orig_title,
15948  CharPtr value_name,
15949  ValNodePtr value_list)
15950 {
15951   Int4       val_num;
15952   CharPtr    value_loc, end_loc = NULL;
15953   ValNodePtr vnp;
15954 
15955   if (StringHasNoText (value_name))
15956   {
15957     return orig_title;
15958   }
15959 
15960   /* if our value list is NULL, remove all values and done. */
15961   if (value_list == NULL)
15962   {
15963     RemoveAllValuePairs (value_name, orig_title);
15964     return orig_title;
15965   }
15966 
15967   /* if there are no values in the title, make sure the new value is added
15968    * to the first organism.
15969    * otherwise, replace values where they are in the title.
15970    */
15971   value_loc = FindValuePairInDefLine (value_name, orig_title, &end_loc);
15972   if (value_loc == NULL)
15973   {
15974     orig_title = ReplaceValueInOneDefLineForOrganism (orig_title, value_name,
15975                                                       value_list->data.ptrvalue,
15976                                                       NULL);
15977   }
15978   else
15979   {
15980     vnp = value_list;
15981     val_num = 0;
15982     while (value_loc != NULL && vnp != NULL)
15983     {
15984       orig_title = ReplaceValueInThisValuePair (orig_title, value_loc, value_name,
15985                                                 end_loc, vnp->data.ptrvalue);
15986       /* if the value was empty, it will have been removed */
15987       if (!StringHasNoText (vnp->data.ptrvalue))
15988       {
15989         val_num++;
15990       }
15991       vnp = vnp->next;
15992       value_loc = FindNthValuePairInDefLine (orig_title, value_name, val_num, &end_loc);
15993     }
15994   }
15995 
15996   return orig_title;
15997 }
15998 
GetRowForIDText(CharPtr id_txt,IDAndTitleEditPtr iatep)15999 static Int4 GetRowForIDText (CharPtr id_txt, IDAndTitleEditPtr iatep)
16000 {
16001   Int4 seq_num, row_num = -1;
16002 
16003   if (StringHasNoText (id_txt) || iatep == NULL)
16004   {
16005     return -1;
16006   }
16007 
16008   for (seq_num = 0; seq_num < iatep->num_sequences && row_num == -1; seq_num++)
16009   {
16010     if (StringCmp (id_txt, iatep->id_list [seq_num]) == 0)
16011     {
16012       row_num = seq_num;
16013     }
16014   }
16015   return row_num;
16016 }
16017 
16018 static void
ApplyModifierColumnTagListToIDAndTitleEdit(CharPtr mod_name,Boolean is_nontext,Int4 mod_type,Int2 seqPackage,EnumFieldAssocPtr gencode_alist,ValNodePtr list,Int4 num_columns,IDAndTitleEditPtr iatep,Int4 seq_num)16019 ApplyModifierColumnTagListToIDAndTitleEdit
16020 (CharPtr           mod_name,
16021  Boolean           is_nontext,
16022  Int4              mod_type,
16023  Int2              seqPackage,
16024  EnumFieldAssocPtr gencode_alist,
16025  ValNodePtr        list,
16026  Int4              num_columns,
16027  IDAndTitleEditPtr iatep,
16028  Int4              seq_num)
16029 {
16030   ValNodePtr vnp, value_list = NULL, row_list;
16031   CharPtr    id_txt, new_value, last_id = NULL;
16032   Int4       row_num;
16033 
16034   if (list == NULL || iatep == NULL)
16035   {
16036     return;
16037   }
16038 
16039   for (vnp = list; vnp != NULL; vnp = vnp->next)
16040   {
16041     if (seq_num < 0)
16042     {
16043       id_txt = ExtractTagListColumn ((CharPtr) vnp->data.ptrvalue, 0);
16044       if (StringHasNoText (id_txt))
16045       {
16046         id_txt = MemFree (id_txt);
16047         id_txt = StringSave (last_id);
16048       }
16049       else
16050       {
16051         last_id = MemFree (last_id);
16052         last_id = StringSave (id_txt);
16053       }
16054 
16055       /* find sequence that corresponds to this id */
16056       row_num = GetRowForIDText (id_txt, iatep);
16057       id_txt = MemFree (id_txt);
16058     }
16059     else
16060     {
16061       row_num = seq_num;
16062     }
16063     if (row_num >= iatep->num_sequences || row_num < 0)
16064     {
16065       continue;
16066     }
16067 
16068     /* extract new value from column */
16069     new_value = ExtractTagListColumn ((CharPtr) vnp->data.ptrvalue, num_columns - 1);
16070     /* translate from list value (may be number from popup) to real value */
16071     new_value = GetValueFromTagListString (new_value, is_nontext, mod_type,
16072                                            seqPackage, gencode_alist);
16073 
16074     /* add to list */
16075     /* add NULL if value is blank */
16076     if (StringHasNoText (new_value))
16077     {
16078       new_value = MemFree (new_value);
16079     }
16080     ValNodeAddPointer (&value_list, row_num, new_value);
16081   }
16082 
16083   if (seq_num < 0)
16084   {
16085     for (row_num = 0; row_num < iatep->num_sequences; row_num++)
16086     {
16087       row_list = ValNodeExtractList (&value_list, row_num);
16088       iatep->title_list [row_num] = ApplyValueListToTitle (iatep->title_list [row_num],
16089                                                          mod_name, row_list);
16090       row_list = ValNodeFreeData (row_list);
16091     }
16092   }
16093   else
16094   {
16095     iatep->title_list [seq_num] = ApplyValueListToTitle (iatep->title_list [seq_num],
16096                                                          mod_name, value_list);
16097 
16098   }
16099   value_list = ValNodeFreeData (value_list);
16100 
16101   if (seq_num < 0)
16102   {
16103     for (row_num = 0; row_num < iatep->num_sequences; row_num++)
16104     {
16105       iatep->title_list [row_num] = RemoveAllDuplicatePairsFromOneTitle (iatep->title_list [row_num]);
16106     }
16107   }
16108   else
16109   {
16110     iatep->title_list [seq_num] = RemoveAllDuplicatePairsFromOneTitle (iatep->title_list [seq_num]);
16111   }
16112 }
16113 
GetTaglistType(Boolean is_nontext,Int4 mod_type)16114 static Int4 GetTaglistType (Boolean is_nontext, Int4 mod_type)
16115 {
16116   if (is_nontext
16117       || mod_type == eModifierType_Location
16118       || mod_type == eModifierType_Origin
16119       || mod_type == eModifierType_NucGeneticCode
16120       || mod_type == eModifierType_MitoGeneticCode
16121       || mod_type == eModifierType_MolType
16122       || mod_type == eModifierType_Molecule
16123       || mod_type == eModifierType_Topology)
16124   {
16125     return TAGLIST_POPUP;
16126   }
16127   else
16128   {
16129     return TAGLIST_TEXT;
16130   }
16131 }
16132 
GetTaglistAlist(Boolean is_nontext,Int4 mod_type,Int2 seqPackage)16133 static EnumFieldAssocPtr GetTaglistAlist (Boolean is_nontext, Int4 mod_type, Int2 seqPackage)
16134 {
16135   if (is_nontext)
16136   {
16137     return nontextmodedit_alist;
16138   }
16139   else if (mod_type == eModifierType_Location)
16140   {
16141     return  biosource_genome_simple_alist;
16142   }
16143   else if (mod_type == eModifierType_Origin)
16144   {
16145     return  biosource_origin_alist;
16146   }
16147   else if (mod_type == eModifierType_NucGeneticCode
16148            || mod_type == eModifierType_MitoGeneticCode)
16149   {
16150     return BuildGeneticCodeEnum ();
16151   }
16152   else if (mod_type == eModifierType_MolType)
16153   {
16154     if (seqPackage == SEQ_PKG_GENOMICCDNA)
16155     {
16156       return biomol_nucGen_alist;
16157     }
16158     else
16159     {
16160       return biomol_nucX_alist;
16161     }
16162   }
16163   else if (mod_type == eModifierType_Molecule)
16164   {
16165     return  molecule_alist;
16166   }
16167   else if (mod_type == eModifierType_Topology)
16168   {
16169     return  topology_nuc_alist;
16170   }
16171   else
16172   {
16173     return NULL;
16174   }
16175 }
16176 
16177 static Uint2 taglist_types [] =
16178 { TAGLIST_PROMPT, TAGLIST_PROMPT};
16179 
16180 static Uint2 taglist_textWidths [] =
16181 { 10, 20};
16182 static EnumFieldAssocPtr taglist_alists [] =
16183 { NULL, NULL};
16184 
GetMaxTagListValueWidth(ValNodePtr taglist_list,Int4 col_num)16185 static Int4 GetMaxTagListValueWidth (ValNodePtr taglist_list, Int4 col_num)
16186 {
16187   ValNodePtr vnp;
16188   Int4       max_len = 0;
16189   CharPtr    tmp_value;
16190 
16191   for (vnp = taglist_list; vnp != NULL; vnp = vnp->next)
16192   {
16193     tmp_value = ExtractTagListColumn ((CharPtr) vnp->data.ptrvalue, col_num);
16194     max_len = MAX (max_len, (Int4) StringLen (tmp_value));
16195     tmp_value = MemFree (tmp_value);
16196   }
16197   return max_len;
16198 }
16199 
16200 static DialoG
CreateValueListDialog(GrouP parent_grp,CharPtr mod_name,Int2 seqPackage,IDAndTitleEditPtr iatep,Int4 seq_num)16201 CreateValueListDialog
16202 (GrouP             parent_grp,
16203  CharPtr           mod_name,
16204  Int2              seqPackage,
16205  IDAndTitleEditPtr iatep,
16206  Int4              seq_num)
16207 {
16208   GrouP                  k, g;
16209   PrompT                 ppt1 = NULL, ppt2;
16210   Int4                   num_columns, j;
16211   ValNodePtr             row_list = NULL;
16212   Boolean                allow_multi;
16213   Boolean                is_nontext;
16214   Int4                   mod_type;
16215   Int4                   rows_shown;
16216   DialoG                 dlg = NULL;
16217   TagListPtr             tlp;
16218   Int4                   first_colwidth, val_width;
16219 
16220   if (iatep == NULL || seq_num >= iatep->num_sequences)
16221   {
16222     return NULL;
16223   }
16224 
16225   is_nontext = IsNonTextModifier (mod_name);
16226   mod_type = GetModifierType (mod_name);
16227 
16228   /* set up row list and number of columns */
16229   if (seq_num < 0)
16230   {
16231     allow_multi = AllowMultipleValues (mod_name);
16232     row_list = IDAndTitleEditToModifierColumnTagList (iatep, mod_name,
16233                                                       is_nontext, mod_type,
16234                                                       allow_multi);
16235 
16236     num_columns = 2;
16237   }
16238   else
16239   {
16240     allow_multi = TRUE;
16241     num_columns = 1;
16242     AddRowsForModifierValuesForDefline (mod_name, is_nontext, mod_type,
16243                                         NULL,
16244                                         iatep->title_list [seq_num],
16245                                         &row_list);
16246   }
16247 
16248   k = HiddenGroup (parent_grp, 1, 0, NULL);
16249   g = HiddenGroup (k, 2, 0, NULL);
16250 
16251   if (seq_num < 0)
16252   {
16253     ppt1 = StaticPrompt (g, "SeqID", 0, 0, programFont, 'l');
16254   }
16255   if (mod_type == eModifierType_MolType)
16256   {
16257     ppt2 = StaticPrompt (g, "molecule type", 0, 0, programFont, 'l');
16258   }
16259   else
16260   {
16261     ppt2 = StaticPrompt (g, mod_name, 0, 0, programFont, 'l');
16262   }
16263 
16264   rows_shown = ValNodeLen (row_list);
16265   rows_shown = MIN (rows_shown, 5);
16266 
16267   /* calculate appropriate column widths */
16268   if (seq_num < 0)
16269   {
16270     first_colwidth = 5;
16271     for (j = 0; j < iatep->num_sequences; j++)
16272     {
16273       first_colwidth = MAX (first_colwidth, (Int4)StringLen (iatep->id_list [j]));
16274     }
16275 
16276     if (mod_type == eModifierType_MolType)
16277     {
16278       val_width = StringLen ("molecule type");
16279     }
16280     else
16281     {
16282       val_width = StringLen (mod_name);
16283     }
16284     val_width = MAX (val_width, GetMaxTagListValueWidth (row_list, num_columns - 1));
16285   }
16286   else
16287   {
16288     val_width = MAX (14, GetMaxTagListValueWidth (row_list, num_columns - 1));
16289     first_colwidth = val_width;
16290   }
16291 
16292   taglist_textWidths [0] = first_colwidth;
16293   taglist_textWidths [1] = val_width;
16294 
16295   taglist_types [0] = TAGLIST_PROMPT; /* sequence ID */
16296   taglist_types [1] = TAGLIST_PROMPT;
16297 
16298   taglist_alists [0] = NULL;
16299   taglist_alists [1] = NULL;
16300 
16301   taglist_types [num_columns - 1] = GetTaglistType (is_nontext, mod_type);
16302   taglist_alists [num_columns - 1] = GetTaglistAlist (is_nontext, mod_type, seqPackage);
16303 
16304   dlg = CreateTagListDialogEx (k, rows_shown, num_columns, 2,
16305                                taglist_types, taglist_textWidths,
16306                                taglist_alists, TRUE, TRUE, NULL, NULL);
16307 
16308 
16309   tlp = (TagListPtr) GetObjectExtra (dlg);
16310   if (tlp == NULL) return NULL;
16311 
16312   SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
16313   tlp->vnp = row_list;
16314   SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
16315   tlp->max = MAX ((Int2) 0, (Int2) (ValNodeLen (row_list) - tlp->rows));
16316   CorrectBarMax (tlp->bar, tlp->max);
16317   CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
16318   if (tlp->max > 0) {
16319     SafeShow (tlp->bar);
16320   } else {
16321     SafeHide (tlp->bar);
16322   }
16323   SendMessageToDialog (tlp->dialog, VIB_MSG_ENTER);
16324   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control[0], (HANDLE) ppt1, NULL);
16325   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control[num_columns - 1], (HANDLE) ppt2, NULL);
16326   return dlg;
16327 }
16328 
16329 static void
EditOrgModColumn(CharPtr mod_name,SourceAssistantPtr sap,SeqEntryPtr seq_list,Int2 seqPackage)16330 EditOrgModColumn
16331 (CharPtr            mod_name,
16332  SourceAssistantPtr sap,
16333  SeqEntryPtr        seq_list,
16334  Int2               seqPackage)
16335 {
16336   WindoW                 w;
16337   GrouP                  h, k, c;
16338   DialoG                 dlg;
16339   TagListPtr             tlp;
16340   ModalAcceptCancelData  acd;
16341   ButtoN                 b;
16342   Boolean                is_nontext;
16343   Int4                   mod_type;
16344   GrouP                  instr_grp = NULL;
16345   SetColumnValuesData    scvd;
16346   ButtoN                 clear_btn;
16347   CharPtr                mod_label;
16348   ButtoN                 apply_all_btn;
16349   ValNodePtr             row_list = NULL;
16350   Int4                   num_columns;
16351   IDAndTitleEditPtr      iatep;
16352   Boolean                allow_multi;
16353 
16354   if (StringHasNoText (mod_name) || (sap == NULL && seq_list == NULL))
16355   {
16356     return;
16357   }
16358 
16359   if (StringICmp (mod_name, "moltype") == 0)
16360   {
16361     mod_label = StringSave ("molecule type");
16362   }
16363   else
16364   {
16365     mod_label = StringSave (mod_name);
16366   }
16367 
16368   is_nontext = IsNonTextModifier (mod_name);
16369   mod_type = GetModifierType (mod_name);
16370 
16371   if (mod_type == eModifierType_Organism
16372       || mod_type == eModifierType_Location
16373       || mod_type == eModifierType_GeneticCode)
16374   {
16375     EditOrganismColumn (sap, seq_list);
16376     return;
16377   }
16378 
16379   allow_multi = AllowMultipleValues (mod_name);
16380   if (sap == NULL)
16381   {
16382     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
16383   }
16384   else
16385   {
16386     iatep = SourceAssistantToIDAndTitleEdit (sap);
16387   }
16388 
16389   row_list = IDAndTitleEditToModifierColumnTagList (iatep, mod_name,
16390                                                     is_nontext, mod_type,
16391                                                     allow_multi);
16392 
16393   num_columns = 2;
16394 
16395   w = MovableModalWindow (-20, -13, -10, -10, mod_label, NULL);
16396   h = HiddenGroup(w, -1, 0, NULL);
16397   SetGroupSpacing (h, 10, 10);
16398   mod_label = MemFree (mod_label);
16399 
16400   instr_grp = MakeInstructionGroup (h, is_nontext, mod_type);
16401 
16402   scvd.all_val_dlg = SingleModValDialog (h, is_nontext, mod_type, seqPackage);
16403   PointerToDialog (scvd.all_val_dlg, NULL);
16404   scvd.is_nontext = is_nontext;
16405   scvd.mod_type = mod_type;
16406   apply_all_btn = PushButton (h, "Apply above value to all sequences", EditOrgModApplyAll);
16407   SetObjectExtra (apply_all_btn, &scvd, NULL);
16408 
16409   k = HiddenGroup (h, -1, 0, NULL);
16410 
16411   dlg = CreateValueListDialog (k, mod_name, seqPackage, iatep, -1);
16412 
16413   scvd.tlp = (TagListPtr) GetObjectExtra (dlg);
16414 
16415   if (mod_type == eModifierType_MolType)
16416   {
16417     clear_btn = PushButton (h, "Reset All to Genomic DNA", ClearColumnValues);
16418   }
16419   else if (mod_type == eModifierType_Topology)
16420   {
16421     clear_btn = PushButton (h, "Reset All to Linear", ClearColumnValues);
16422   }
16423   else if (mod_type == eModifierType_Molecule)
16424   {
16425     clear_btn = PushButton (h, "Reset All to DNA", ClearColumnValues);
16426   }
16427   else
16428   {
16429     clear_btn = PushButton (h, "Clear All Values", ClearColumnValues);
16430   }
16431   SetObjectExtra (clear_btn, &scvd, NULL);
16432 
16433   c = HiddenGroup (h, 2, 0, NULL);
16434   b = PushButton (c, "Accept", ModalAcceptButton);
16435   SetObjectExtra (b, &acd, NULL);
16436   b = PushButton (c, "Cancel", ModalCancelButton);
16437   SetObjectExtra (b, &acd, NULL);
16438 
16439   AlignObjects (ALIGN_CENTER, (HANDLE) scvd.all_val_dlg,
16440                               (HANDLE) apply_all_btn,
16441                               (HANDLE) k,
16442                               (HANDLE) clear_btn,
16443                               (HANDLE) c,
16444                               (HANDLE) instr_grp,
16445                               NULL);
16446 
16447   Show (w);
16448   Select (w);
16449   acd.accepted = FALSE;
16450   acd.cancelled = FALSE;
16451   while (!acd.accepted && ! acd.cancelled)
16452   {
16453     ProcessExternalEvent ();
16454     Update ();
16455   }
16456   ProcessAnEvent ();
16457   if (! acd.cancelled)
16458   {
16459     tlp = GetObjectExtra (dlg);
16460 
16461     ApplyModifierColumnTagListToIDAndTitleEdit (mod_name, is_nontext,
16462                                                 mod_type, seqPackage,
16463                                                 taglist_alists [num_columns - 1],
16464                                                 tlp->vnp,
16465                                                 num_columns, iatep, -1);
16466     if (sap == NULL)
16467     {
16468       ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
16469     }
16470     else
16471     {
16472       ApplyIDAndTitleEditToSourceAssistant (sap, iatep);
16473     }
16474 
16475     UpdateOrgModDlg (sap);
16476   }
16477   Remove (w);
16478 
16479   if (mod_type == eModifierType_NucGeneticCode
16480       || mod_type == eModifierType_MitoGeneticCode)
16481   {
16482     taglist_alists [num_columns - 1] = FreeGeneticCodeEnum (taglist_alists [num_columns - 1]);
16483   }
16484   iatep = IDAndTitleEditFree (iatep);
16485 }
16486 
16487 static void
EditModsForOneSequence(CharPtr mod_name,SourceAssistantPtr sap,SeqEntryPtr seq_list,Int2 seqPackage,Int4 seq_num)16488 EditModsForOneSequence
16489 (CharPtr            mod_name,
16490  SourceAssistantPtr sap,
16491  SeqEntryPtr        seq_list,
16492  Int2               seqPackage,
16493  Int4               seq_num)
16494 {
16495   WindoW                 w;
16496   GrouP                  h, k, c;
16497   DialoG                 dlg = NULL;
16498   TagListPtr             tlp;
16499   ModalAcceptCancelData  acd;
16500   ButtoN                 b;
16501   Boolean                is_nontext;
16502   CharPtr                new_value = NULL;
16503   Int4                   mod_type;
16504   GrouP                  instr_grp = NULL;
16505   CharPtr                mod_label;
16506   Int4                   num_columns = 1;
16507   IDAndTitleEditPtr      iatep;
16508   Boolean                allow_multi;
16509   DialoG                 all_val_dlg = NULL;
16510 
16511   if (StringHasNoText (mod_name) || (sap == NULL && seq_list == NULL))
16512   {
16513     return;
16514   }
16515 
16516   if (StringICmp (mod_name, "moltype") == 0)
16517   {
16518     mod_label = StringSave ("molecule type");
16519   }
16520   else
16521   {
16522     mod_label = StringSave (mod_name);
16523   }
16524 
16525   is_nontext = IsNonTextModifier (mod_name);
16526   mod_type = GetModifierType (mod_name);
16527 
16528   allow_multi = AllowMultipleValues (mod_name);
16529   if (sap == NULL)
16530   {
16531     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
16532   }
16533   else
16534   {
16535     iatep = SourceAssistantToIDAndTitleEdit (sap);
16536   }
16537 
16538   /* make sure sequence number is in range */
16539   if (seq_num > iatep->num_sequences || seq_num < 0)
16540   {
16541     iatep = IDAndTitleEditFree (iatep);
16542     return;
16543   }
16544 
16545   w = MovableModalWindow (-20, -13, -10, -10, mod_label, NULL);
16546   h = HiddenGroup(w, -1, 0, NULL);
16547   SetGroupSpacing (h, 10, 10);
16548 
16549   instr_grp = MakeInstructionGroup (h, is_nontext, mod_type);
16550 
16551   k = HiddenGroup (h, -1, 0, NULL);
16552 
16553   if (allow_multi)
16554   {
16555     dlg = CreateValueListDialog (k, mod_name, seqPackage, iatep, seq_num);
16556   }
16557   else
16558   {
16559     all_val_dlg = SingleModValDialog (h, is_nontext, mod_type, seqPackage);
16560     /* set initial value */
16561     new_value = FindValueFromPairInDefline (mod_name, iatep->title_list [seq_num]);
16562     PointerToDialog (all_val_dlg, new_value);
16563     new_value = MemFree (new_value);
16564   }
16565 
16566   c = HiddenGroup (h, 2, 0, NULL);
16567   b = PushButton (c, "Accept", ModalAcceptButton);
16568   SetObjectExtra (b, &acd, NULL);
16569   b = PushButton (c, "Cancel", ModalCancelButton);
16570   SetObjectExtra (b, &acd, NULL);
16571 
16572   AlignObjects (ALIGN_CENTER, (HANDLE) k,
16573                               (HANDLE) c,
16574                               (HANDLE) instr_grp,
16575                               NULL);
16576 
16577   Show (w);
16578   Select (w);
16579   acd.accepted = FALSE;
16580   acd.cancelled = FALSE;
16581   while (!acd.accepted && ! acd.cancelled)
16582   {
16583     ProcessExternalEvent ();
16584     Update ();
16585   }
16586   ProcessAnEvent ();
16587   if (! acd.cancelled)
16588   {
16589     if (allow_multi)
16590     {
16591       tlp = GetObjectExtra (dlg);
16592 
16593       ApplyModifierColumnTagListToIDAndTitleEdit (mod_name, is_nontext,
16594                                                   mod_type, seqPackage,
16595                                                   taglist_alists [num_columns - 1],
16596                                                   tlp->vnp,
16597                                                   num_columns, iatep, seq_num);
16598     }
16599     else
16600     {
16601       new_value = DialogToPointer (all_val_dlg);
16602       iatep->title_list [seq_num] = ReplaceValueInOneDefLine (iatep->title_list [seq_num],
16603                                                               mod_name, new_value);
16604       new_value = MemFree (new_value);
16605     }
16606     if (sap == NULL)
16607     {
16608       ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
16609     }
16610     else
16611     {
16612       ApplyIDAndTitleEditToSourceAssistant (sap, iatep);
16613     }
16614 
16615     UpdateOrgModDlg (sap);
16616   }
16617   Remove (w);
16618 
16619   if (mod_type == eModifierType_NucGeneticCode
16620       || mod_type == eModifierType_MitoGeneticCode)
16621   {
16622     taglist_alists [num_columns - 1] = FreeGeneticCodeEnum (taglist_alists [num_columns - 1]);
16623   }
16624   iatep = IDAndTitleEditFree (iatep);
16625 }
16626 
UpdateGeneticCodesForIDAndTitleEdit(IDAndTitleEditPtr iatep)16627 static void UpdateGeneticCodesForIDAndTitleEdit (IDAndTitleEditPtr iatep)
16628 {
16629   Int4 seq_num;
16630   CharPtr     taxname, location, gcode_name;
16631   Int4        gcode;
16632   ValNodePtr  gencodelist;
16633 
16634   if (iatep == NULL)
16635   {
16636     return;
16637   }
16638   gencodelist = GetGeneticCodeValNodeList ();
16639 
16640   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
16641   {
16642     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
16643     {
16644       continue;
16645     }
16646     taxname = FindValueFromPairInDefline ("organism", iatep->title_list [seq_num]);
16647     location = FindValueFromPairInDefline ("location", iatep->title_list [seq_num]);
16648     if (StringHasNoText (location))
16649     {
16650       location = MemFree (location);
16651       location = StringSave ("genomic");
16652     }
16653     gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
16654     taxname = MemFree (taxname);
16655     location = MemFree (location);
16656     if (gcode > 0)
16657     {
16658       gcode_name = GeneticCodeStringFromIntAndList (gcode, gencodelist);
16659       iatep->title_list [seq_num] = ReplaceValueInOneDefLine (iatep->title_list [seq_num], "genetic_code", gcode_name);
16660     }
16661   }
16662   gencodelist = ValNodeFreeData (gencodelist);
16663 }
16664 
16665 static Boolean
ContinueWithAutopopulatedGeneticCodes(SeqEntryPtr seq_list,SourceAssistantPtr sap,ValNodePtr row_list,Int4 affected_row)16666 ContinueWithAutopopulatedGeneticCodes
16667 (SeqEntryPtr        seq_list,
16668  SourceAssistantPtr sap,
16669  ValNodePtr         row_list,
16670  Int4               affected_row)
16671 {
16672   ValNodePtr  autopop_list = NULL, already_have = NULL;
16673   SeqEntryPtr sep, nuc_sep;
16674   BioseqPtr   bsp;
16675   Int4        j;
16676   CharPtr     list_msg = NULL;
16677   Int4        num_sequences = 0;
16678   Boolean     rval = TRUE;
16679   CharPtr     taxname, location, gcode_name;
16680   Int4        gcode;
16681   ValNodePtr  row_vnp, col_vnp;
16682 
16683   if (seq_list == NULL && sap == NULL && row_list == NULL)
16684   {
16685     return FALSE;
16686   }
16687 
16688   if (seq_list != NULL)
16689   {
16690     for (sep = seq_list, j = 0; sep != NULL; sep = sep->next, j++)
16691     {
16692       if (affected_row != -1 && affected_row != j)
16693       {
16694         continue;
16695       }
16696       bsp = NULL;
16697       if (IS_Bioseq (sep))
16698       {
16699         bsp = (BioseqPtr) sep->data.ptrvalue;
16700       }
16701       else if (IS_Bioseq_set (sep))
16702       {
16703         nuc_sep = FindNucSeqEntry (sep);
16704         if (nuc_sep != NULL && IS_Bioseq (nuc_sep))
16705         {
16706           bsp = (BioseqPtr) nuc_sep->data.ptrvalue;
16707         }
16708       }
16709       if (bsp == NULL)
16710       {
16711         continue;
16712       }
16713 
16714       taxname = GetModValueFromSeqEntry (sep, "organism");
16715       location = GetModValueFromSeqEntry (sep, "location");
16716       if (StringHasNoText (location))
16717       {
16718         location = MemFree (location);
16719         location = StringSave ("genomic");
16720       }
16721       gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
16722       taxname = MemFree (taxname);
16723       location = MemFree (location);
16724       if (gcode > 0)
16725       {
16726         if (bsp != NULL)
16727         {
16728           ValNodeAddPointer (&autopop_list, 0, SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT));
16729         }
16730       }
16731       else
16732       {
16733         gcode_name = GetModValueFromSeqEntry (sep, "genetic_code");
16734         if (!StringHasNoText (gcode_name))
16735         {
16736           ValNodeAddPointer (&already_have, 0, SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT));
16737         }
16738         gcode_name = MemFree (gcode_name);
16739       }
16740       num_sequences++;
16741     }
16742   }
16743   else if (sap != NULL)
16744   {
16745     for (j = 0; j < sap->num_deflines; j++)
16746     {
16747       if (affected_row != -1 && affected_row != j)
16748       {
16749         continue;
16750       }
16751       taxname = GetValueFromTitle ("organism", sap->defline_list [j]);
16752       location = GetValueFromTitle ("location", sap->defline_list [j]);
16753       gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
16754       taxname = MemFree (taxname);
16755       location = MemFree (location);
16756       if (gcode > 0)
16757       {
16758         ValNodeAddPointer (&autopop_list, 0, StringSave (sap->id_list[j]));
16759       }
16760       else
16761       {
16762         gcode_name = GetValueFromTitle ("genetic_code", sap->defline_list [j]);
16763         if (!StringHasNoText (gcode_name))
16764         {
16765           ValNodeAddPointer (&already_have, 0, StringSave (sap->id_list[j]));
16766         }
16767         gcode_name = MemFree (gcode_name);
16768       }
16769     }
16770     num_sequences = sap->num_deflines;
16771   }
16772   else if (row_list != NULL)
16773   {
16774     num_sequences = 0;
16775     for (row_vnp = row_list; row_vnp != NULL; row_vnp = row_vnp->next)
16776     {
16777       col_vnp = row_vnp->data.ptrvalue;
16778       if (col_vnp != NULL && col_vnp->next != NULL
16779           && col_vnp->next->next != NULL
16780           && col_vnp->next->next->next != NULL)
16781       {
16782         taxname = col_vnp->next->data.ptrvalue;
16783         location = col_vnp->next->next->data.ptrvalue;
16784         gcode = GetGeneticCodeForTaxNameAndLocation (taxname, location);
16785         if (gcode > 0)
16786         {
16787           ValNodeAddPointer (&autopop_list, 0, StringSave (col_vnp->data.ptrvalue));
16788         }
16789       }
16790       num_sequences++;
16791     }
16792   }
16793 
16794   if (autopop_list != NULL)
16795   {
16796     list_msg = CreateListMessage ("Sequence",
16797                (autopop_list->next == NULL ?
16798                " has a genetic code determined by the location and scientific name.  The genetic code for this sequence cannot be edited."
16799                : " have genetic codes determined by the location and scientific name.  The genetic code for these sequences cannot be edited."),
16800                autopop_list);
16801     if (ValNodeLen (autopop_list) == num_sequences || affected_row != -1)
16802     {
16803       Message (MSG_ERROR, list_msg);
16804       rval = FALSE;
16805     }
16806     else
16807     {
16808       if (ANS_NO == Message (MSG_YN,
16809                      "%s  Do you want to edit the genetic code for the remaining sequences?",
16810                      list_msg))
16811       {
16812         rval = FALSE;
16813       }
16814     }
16815     list_msg = MemFree (list_msg);
16816   }
16817   autopop_list = ValNodeFreeData (autopop_list);
16818 
16819   if (rval && already_have != NULL && affected_row == -1 && num_sequences > 1)
16820   {
16821     list_msg = CreateListMessage ("Sequence",
16822                      (already_have->next == NULL ?
16823                      " already has a genetic code.  Do you wish to overwrite this value?"
16824                      : "already have genetic codes.  Do you wish to overwrite these values?"),
16825                      already_have);
16826     if (ANS_NO == Message (MSG_YN, list_msg))
16827     {
16828       rval = FALSE;
16829     }
16830     list_msg = MemFree (list_msg);
16831   }
16832   return rval;
16833 }
16834 
16835 static Boolean
IDAndTitleEditHasAllDefaultValues(Int2 mod_type,CharPtr mod_name,IDAndTitleEditPtr iatep)16836 IDAndTitleEditHasAllDefaultValues
16837 (Int2              mod_type,
16838  CharPtr           mod_name,
16839  IDAndTitleEditPtr iatep)
16840 {
16841   Boolean               orig_all_default = TRUE;
16842   Int4                  seq_num;
16843   CharPtr               mod_value;
16844 
16845   if (mod_type != eModifierType_MolType
16846       && mod_type != eModifierType_Topology
16847       && mod_type != eModifierType_Location)
16848   {
16849     return FALSE;
16850   }
16851   if (iatep == NULL)
16852   {
16853     return FALSE;
16854   }
16855 
16856   for (seq_num = 0; seq_num < iatep->num_sequences && orig_all_default; seq_num++)
16857   {
16858     if (iatep->is_seg && iatep->is_seg [seq_num])
16859     {
16860       continue;
16861     }
16862     mod_value = FindValueFromPairInDefline (mod_name, iatep->title_list [seq_num]);
16863     if ((mod_type == eModifierType_MolType && StringICmp (mod_value, "Genomic DNA") != 0)
16864         || (mod_type == eModifierType_Topology && StringICmp (mod_value, "Linear") != 0)
16865         || (mod_type == eModifierType_Location && StringICmp (mod_value, "Genomic") != 0))
16866     {
16867       orig_all_default = FALSE;
16868     }
16869     mod_value = MemFree (mod_value);
16870   }
16871   return orig_all_default;
16872 }
16873 
16874 static Boolean
RowListHasAllDefaultValues(Int2 mod_type,CharPtr mod_name,ValNodePtr row_list,Int4 row_list_column)16875 RowListHasAllDefaultValues
16876 (Int2              mod_type,
16877  CharPtr           mod_name,
16878  ValNodePtr        row_list,
16879  Int4              row_list_column)
16880 {
16881   Boolean               orig_all_default = TRUE;
16882   Int4                  seq_num;
16883   CharPtr               mod_value;
16884   Int4                  num_rows;
16885 
16886 
16887   if (mod_type != eModifierType_MolType
16888       && mod_type != eModifierType_Topology
16889       && mod_type != eModifierType_Location)
16890   {
16891     return FALSE;
16892   }
16893   if (row_list == NULL)
16894   {
16895     return FALSE;
16896   }
16897 
16898   num_rows = ValNodeLen (row_list);
16899 
16900   for (seq_num = 0; seq_num < num_rows && orig_all_default; seq_num++)
16901   {
16902     mod_value = GetRowListCellText (row_list, seq_num, row_list_column);
16903     if ((mod_type == eModifierType_MolType && StringICmp (mod_value, "Genomic DNA") != 0)
16904         || (mod_type == eModifierType_Topology && StringICmp (mod_value, "Linear") != 0)
16905         || (mod_type == eModifierType_Location && StringICmp (mod_value, "Genomic") != 0))
16906     {
16907       orig_all_default = FALSE;
16908     }
16909     mod_value = MemFree (mod_value);
16910   }
16911   return orig_all_default;
16912 }
16913 
16914 static void
ApplyOrgModColumnOrCell(CharPtr mod_name,CharPtr suggested_value,Int4 row,SourceAssistantPtr sap,SeqEntryPtr seq_list,ValNodePtr row_list,Int4 row_list_column,Int2 seqPackage)16915 ApplyOrgModColumnOrCell
16916 (CharPtr            mod_name,
16917  CharPtr            suggested_value,
16918  Int4               row,
16919  SourceAssistantPtr sap,
16920  SeqEntryPtr        seq_list,
16921  ValNodePtr         row_list,
16922  Int4               row_list_column,
16923  Int2               seqPackage)
16924 {
16925   WindoW                w;
16926   GrouP                 h, c;
16927   GrouP                 instr_grp = NULL;
16928   Int4                  j;
16929   ModalAcceptCancelData acd;
16930   ButtoN                b;
16931   Boolean               is_nontext;
16932   CharPtr               new_value = NULL;
16933   CharPtr               title;
16934   CharPtr               all_seq_fmt = "%s (all sequences)";
16935   CharPtr               one_seq_fmt = "%s (Seq_ID %s)";
16936   Int4                  num_sequences = 0;
16937   Char                  id_txt[128];
16938   Int2                  mod_type;
16939   ValNodePtr            row_vnp = NULL, col_vnp;
16940   Int4                  row_num, col_num;
16941   CharPtr               mod_label;
16942   Boolean               done;
16943   DialoG                val_dlg;
16944   PrompT                apply_to_all_prompt = NULL;
16945   Boolean               orig_all_default = FALSE;
16946   IDAndTitleEditPtr     iatep = NULL;
16947   Int4                  seq_num;
16948 
16949   if (StringHasNoText (mod_name) || row < -1)
16950   {
16951     return;
16952   }
16953   if (sap == NULL && seq_list == NULL && row_list == NULL)
16954   {
16955     return;
16956   }
16957 
16958   if (sap != NULL)
16959   {
16960     iatep = SourceAssistantToIDAndTitleEdit (sap);
16961   }
16962   else if (seq_list != NULL)
16963   {
16964     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
16965   }
16966 
16967   if (iatep != NULL)
16968   {
16969     num_sequences = 0;
16970     for (j = 0; j < iatep->num_sequences; j++)
16971     {
16972       if (iatep->is_seg != NULL && iatep->is_seg [j])
16973       {
16974         continue;
16975       }
16976       num_sequences++;
16977     }
16978   }
16979   else
16980   {
16981     num_sequences = ValNodeLen (row_list);
16982   }
16983 
16984   if (row >= num_sequences)
16985   {
16986     return;
16987   }
16988 
16989   is_nontext = IsNonTextModifier (mod_name);
16990   mod_type = GetModifierType (mod_name);
16991 
16992   if (row < 0)
16993   {
16994     if (iatep != NULL)
16995     {
16996       orig_all_default = IDAndTitleEditHasAllDefaultValues (mod_type, mod_name, iatep);
16997     }
16998     else
16999     {
17000       orig_all_default = RowListHasAllDefaultValues (mod_type, mod_name, row_list, row_list_column);
17001     }
17002   }
17003 
17004   /* get label to use in window */
17005   if (mod_type == eModifierType_MolType)
17006   {
17007     mod_label = StringSave ("molecule type");
17008   }
17009   else
17010   {
17011     mod_label = StringSave (mod_name);
17012   }
17013 
17014   if (row == -1)
17015   {
17016     title = (CharPtr) MemNew ((StringLen (mod_label) + StringLen (all_seq_fmt)) * sizeof (Char));
17017     sprintf (title, all_seq_fmt, mod_label);
17018   }
17019   else
17020   {
17021     if (iatep != NULL)
17022     {
17023       StringNCpy (id_txt, iatep->id_list [row], sizeof (id_txt) - 1);
17024       id_txt[sizeof(id_txt) - 1] = 0;
17025     }
17026     else if (row_list != NULL)
17027     {
17028       for (row_vnp = row_list, row_num = 0;
17029            row_vnp != NULL && row_num < row;
17030            row_vnp = row_vnp->next, row_num++)
17031       {
17032 
17033       }
17034       if (row_vnp != NULL)
17035       {
17036         col_vnp = row_vnp->data.ptrvalue;
17037         if (col_vnp != NULL)
17038         {
17039           StringNCpy (id_txt, col_vnp->data.ptrvalue, sizeof (id_txt) - 1);
17040           id_txt[sizeof(id_txt) - 1] = 0;
17041         }
17042         else
17043         {
17044           return;
17045         }
17046       }
17047       else
17048       {
17049         return;
17050       }
17051     }
17052     title = (CharPtr) MemNew ((StringLen (mod_label)
17053                                + StringLen (one_seq_fmt)
17054                                + StringLen (id_txt)) * sizeof (Char));
17055     sprintf (title, one_seq_fmt, mod_label, id_txt);
17056   }
17057 
17058   w = MovableModalWindow (-20, -13, -10, -10, title, NULL);
17059   title = MemFree (title);
17060 
17061   h = HiddenGroup(w, -1, 0, NULL);
17062   SetGroupSpacing (h, 10, 10);
17063 
17064   if (row == -1)
17065   {
17066     apply_to_all_prompt = StaticPrompt (h, "This will apply to all sequences in the record.",
17067                                         0, 0, programFont, 'l');
17068   }
17069 
17070   instr_grp = MakeInstructionGroup (h, is_nontext, mod_type);
17071 
17072   val_dlg = SingleModValDialog (h, is_nontext, mod_type, seqPackage);
17073   PointerToDialog (val_dlg, suggested_value);
17074 
17075   c = HiddenGroup (h, 2, 0, NULL);
17076   b = PushButton (c, "Accept", ModalAcceptButton);
17077   SetObjectExtra (b, &acd, NULL);
17078   b = PushButton (c, "Cancel", ModalCancelButton);
17079   SetObjectExtra (b, &acd, NULL);
17080 
17081   if (instr_grp == NULL)
17082   {
17083     AlignObjects (ALIGN_CENTER, (HANDLE) val_dlg,
17084                                 (HANDLE) c,
17085                                 (HANDLE) apply_to_all_prompt,
17086                                 NULL);
17087   }
17088   else
17089   {
17090     AlignObjects (ALIGN_CENTER, (HANDLE) val_dlg,
17091                                 (HANDLE) instr_grp,
17092                                 (HANDLE) c,
17093                                 (HANDLE) apply_to_all_prompt,
17094                                 NULL);
17095   }
17096 
17097   mod_label = MemFree (mod_label);
17098 
17099   Show (w);
17100   Select (w);
17101 
17102   done = FALSE;
17103   while (!done)
17104   {
17105     acd.accepted = FALSE;
17106     acd.cancelled = FALSE;
17107     while (!acd.accepted && ! acd.cancelled)
17108     {
17109       ProcessExternalEvent ();
17110       Update ();
17111     }
17112     ProcessAnEvent ();
17113     if (acd.cancelled)
17114     {
17115       done = TRUE;
17116     }
17117     else if (row < 0
17118              && ! orig_all_default
17119              && ANS_NO == Message (MSG_YN, "Are you sure you want to apply this value to all of your sequences?"))
17120     {
17121       /* do nothing - they'll be able to cancel from the dialog if they want to */
17122     }
17123     else
17124     {
17125       /* prepare value */
17126       new_value = DialogToPointer (val_dlg);
17127 
17128       /* apply values */
17129       if (iatep != NULL)
17130       {
17131         for (j = 0, seq_num = 0; j < iatep->num_sequences; j++)
17132         {
17133           /* don't apply modifiers to segments */
17134           if (iatep->is_seg != NULL && iatep->is_seg [j])
17135           {
17136             continue;
17137           }
17138 
17139           if (seq_num == row || row == -1)
17140           {
17141             iatep->title_list [j] = ReplaceValueInOneDefLine (iatep->title_list [j],
17142                                                               mod_name,
17143                                                               new_value);
17144           }
17145           seq_num++;
17146         }
17147         if (mod_type == eModifierType_Organism
17148             || mod_type == eModifierType_Location
17149             || mod_type == eModifierType_GeneticCode)
17150         {
17151           UpdateGeneticCodesForIDAndTitleEdit (iatep);
17152         }
17153         if (seq_list != NULL)
17154         {
17155           ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
17156         }
17157         else if (sap != NULL)
17158         {
17159           ApplyIDAndTitleEditToSourceAssistant (sap, iatep);
17160         }
17161       }
17162       else if (row_list != NULL)
17163       {
17164         if (row < 0)
17165         {
17166           SetRowListColumn (row_list, row_list_column, new_value);
17167         }
17168         else if (row_vnp != NULL)
17169         {
17170           for (col_vnp = row_vnp->data.ptrvalue, col_num = 0;
17171                col_vnp != NULL && col_num < row_list_column;
17172                col_vnp = col_vnp->next, col_num++)
17173           {
17174           }
17175           if (col_vnp != NULL)
17176           {
17177             col_vnp->data.ptrvalue = MemFree (col_vnp->data.ptrvalue);
17178             col_vnp->data.ptrvalue = StringSave (new_value);
17179           }
17180         }
17181       }
17182       new_value = MemFree (new_value);
17183       UpdateOrgModDlg (sap);
17184       done = TRUE;
17185     }
17186   }
17187   Remove (w);
17188   iatep = IDAndTitleEditFree (iatep);
17189 }
17190 
17191 static void
ApplyOrgModColumn(CharPtr mod_name,CharPtr suggested_value,SourceAssistantPtr sap)17192 ApplyOrgModColumn
17193 (CharPtr mod_name,
17194  CharPtr suggested_value,
17195  SourceAssistantPtr sap)
17196 {
17197   Int4    mod_type;
17198   Boolean is_nontext;
17199   IDAndTitleEditPtr iatep;
17200   Boolean           all_default;
17201 
17202   mod_type = GetModifierType (mod_name);
17203   is_nontext = IsNonTextModifier (mod_name);
17204 
17205   if (mod_type == eModifierType_GeneticCode)
17206   {
17207     if (! ContinueWithAutopopulatedGeneticCodes (NULL, sap, NULL, -1))
17208     {
17209       return;
17210     }
17211   }
17212   else if (!StringHasNoText (suggested_value)  && ! IsNonTextModifier (mod_name)
17213       && sap->num_deflines > 1)
17214   {
17215     iatep = SourceAssistantToIDAndTitleEdit (sap);
17216     all_default = IDAndTitleEditHasAllDefaultValues (mod_type, mod_name, iatep);
17217     iatep = IDAndTitleEditFree (iatep);
17218 
17219     if (!all_default
17220         && ANS_YES != Message (MSG_YN, "Warning!  Some sequences already contain "
17221                             "a value for %s.  Are you sure you want to "
17222                             "overwrite these values?", mod_name))
17223     {
17224       return;
17225     }
17226   }
17227   ApplyOrgModColumnOrCell (mod_name, suggested_value, -1, sap, NULL, NULL, 0, sap->seqPackage);
17228 }
17229 
ApplyEditOrgModColumnBtn(ButtoN b,Boolean apply)17230 static void ApplyEditOrgModColumnBtn (ButtoN b, Boolean apply)
17231 {
17232   SourceAssistantPtr sap;
17233   CharPtr            mod_name = NULL;
17234   CharPtr            suggested_value;
17235   ValNodePtr         vnp;
17236   SourceQualDescPtr  sqdp;
17237 
17238   sap = (SourceAssistantPtr) GetObjectExtra (b);
17239   if (sap == NULL) return;
17240   vnp = DialogToPointer (sap->mod_type_dlg);
17241   if (vnp != NULL)
17242   {
17243     if (vnp->choice == eModifierType_Organism)
17244     {
17245       mod_name = "organism";
17246     }
17247     else if (vnp->choice == eModifierType_Location)
17248     {
17249       mod_name = "location";
17250     }
17251     else if (vnp->choice == 0 && vnp->data.ptrvalue != NULL)
17252     {
17253       sqdp = (SourceQualDescPtr) vnp->data.ptrvalue;
17254       mod_name = sqdp->name;
17255     }
17256   }
17257   if (!StringHasNoText (mod_name)) {
17258     if (apply)
17259     {
17260       if (IsNonTextModifier (mod_name))
17261       {
17262         suggested_value = StringSave ("TRUE");
17263       }
17264       else
17265       {
17266         suggested_value = GetFirstDeflineValue (sap, mod_name);
17267       }
17268       ApplyOrgModColumn (mod_name, suggested_value, sap);
17269       suggested_value = MemFree (suggested_value);
17270     }
17271     else
17272     {
17273       EditOrgModColumn (mod_name, sap, NULL, sap->seqPackage);
17274     }
17275   }
17276   vnp = ValNodeFreeData (vnp);
17277 }
17278 
ApplyOrgModColBtn(ButtoN b)17279 static void ApplyOrgModColBtn (ButtoN b)
17280 {
17281   ApplyEditOrgModColumnBtn (b, TRUE);
17282 }
17283 
EditOrgModColBtn(ButtoN b)17284 static void EditOrgModColBtn (ButtoN b)
17285 {
17286   ApplyEditOrgModColumnBtn (b, FALSE);
17287 }
17288 
SourceAssistantImportModsBtn(ButtoN b)17289 static void SourceAssistantImportModsBtn (ButtoN b)
17290 {
17291   SourceAssistantPtr sap;
17292   IDAndTitleEditPtr  iatep;
17293   Boolean            rval;
17294 
17295   sap = (SourceAssistantPtr) GetObjectExtra (b);
17296   if (sap == NULL) return;
17297 
17298   iatep = SourceAssistantToIDAndTitleEdit (sap);
17299   rval = ImportModifiersToIDAndTitleEdit (iatep);
17300   if (rval)
17301   {
17302     ApplyIDAndTitleEditToSourceAssistant (sap, iatep);
17303     UpdateOrgModDlg (sap);
17304   }
17305   iatep = IDAndTitleEditFree (iatep);
17306 
17307 }
17308 
SourceAssistantClearAllModifiers(ButtoN b)17309 static void SourceAssistantClearAllModifiers (ButtoN b)
17310 {
17311   SourceAssistantPtr sap;
17312   Int4               j;
17313   ValNodePtr         found_modifiers = NULL, vnp;
17314 
17315   sap = (SourceAssistantPtr) GetObjectExtra (b);
17316   if (sap == NULL) return;
17317 
17318   if (Message (MSG_YN,
17319       "Are you sure you want to remove all of the source qualifiers from all of your sequences?")
17320       == ANS_NO)
17321   {
17322     return;
17323   }
17324 
17325   for (j = 0; j < sap->num_deflines; j++)
17326   {
17327     found_modifiers = BuildModifierTypeList (found_modifiers,
17328                                              sap->defline_list[j],
17329                                              FALSE);
17330   }
17331 
17332   for (j = 0; j < sap->num_deflines; j++)
17333   {
17334     for (vnp = found_modifiers; vnp != NULL; vnp = vnp->next)
17335     {
17336       if (StringICmp (vnp->data.ptrvalue, "genetic_code") == 0
17337           || StringICmp (vnp->data.ptrvalue, "organism") == 0
17338           || StringICmp (vnp->data.ptrvalue, "location") == 0
17339           || StringICmp (vnp->data.ptrvalue, "gencode_comment") == 0
17340           || StringICmp (vnp->data.ptrvalue, "moltype") == 0
17341           || StringICmp (vnp->data.ptrvalue, "topology") == 0)
17342       {
17343         continue;
17344       }
17345       RemoveValueFromDefline (vnp->data.ptrvalue, sap->defline_list [j]);
17346     }
17347   }
17348 
17349   found_modifiers = ValNodeFreeData (found_modifiers);
17350   UpdateOrgModDlg (sap);
17351 }
17352 
OrgModDblClick(PoinT cell_coord,CharPtr header_text,CharPtr cell_text,Pointer userdata)17353 static void OrgModDblClick (PoinT cell_coord, CharPtr header_text, CharPtr cell_text, Pointer userdata)
17354 {
17355   SourceAssistantPtr sap;
17356   Int4               mod_type;
17357 
17358   sap = (SourceAssistantPtr) userdata;
17359   if (sap == NULL)
17360   {
17361     return;
17362   }
17363 
17364   mod_type = GetModifierType (header_text);
17365 
17366   if (cell_coord.x == 0)
17367   {
17368     return;
17369   }
17370   else if (cell_coord.y == 0)
17371   {
17372     EditOrgModColumn (cell_text, sap, NULL, sap->seqPackage);
17373   }
17374   else if (mod_type != eModifierType_GeneticCode
17375            || ContinueWithAutopopulatedGeneticCodes (NULL, sap, NULL, cell_coord.y - 1))
17376   {
17377     EditModsForOneSequence (header_text, sap, NULL, sap->seqPackage, cell_coord.y - 1);
17378   }
17379 }
17380 
SeqEntryListToSourceAssistant(SeqEntryPtr seq_list,SourceAssistantPtr sap)17381 static void SeqEntryListToSourceAssistant (SeqEntryPtr seq_list, SourceAssistantPtr sap)
17382 {
17383   IDAndTitleEditPtr iatep;
17384   Int4              seq_num, sap_num;
17385 
17386   if (sap == NULL)
17387   {
17388     return;
17389   }
17390   sap->num_deflines = 0;
17391 
17392   iatep = SeqEntryListToIDAndTitleEdit (seq_list);
17393   if (iatep == NULL || iatep->num_sequences < 1)
17394   {
17395     iatep = IDAndTitleEditFree (iatep);
17396     return;
17397   }
17398 
17399   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
17400   {
17401     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
17402     {
17403       continue;
17404     }
17405     else
17406     {
17407       sap->num_deflines ++;
17408     }
17409   }
17410 
17411   if (sap->num_deflines < 1)
17412   {
17413     iatep = IDAndTitleEditFree (iatep);
17414     return;
17415   }
17416 
17417   sap->defline_list = (CharPtr PNTR) MemNew (sizeof (CharPtr) * sap->num_deflines);
17418   sap->id_list = (CharPtr PNTR) MemNew (sizeof (CharPtr) * sap->num_deflines);
17419   if (sap->defline_list == NULL || sap->id_list == NULL)
17420   {
17421     sap->defline_list = MemFree (sap->defline_list);
17422     sap->id_list = MemFree (sap->id_list);
17423     return;
17424   }
17425 
17426   sap_num = 0;
17427   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
17428   {
17429     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
17430     {
17431       continue;
17432     }
17433     sap->id_list [sap_num] = StringSave (iatep->id_list [seq_num]);
17434     sap->defline_list [sap_num] = StringSave (iatep->title_list [seq_num]);
17435     sap_num++;
17436   }
17437   iatep = IDAndTitleEditFree (iatep);
17438 }
17439 
ApplySourceAssistantToSeqEntryList(SourceAssistantPtr sap,SeqEntryPtr seq_list)17440 static void ApplySourceAssistantToSeqEntryList (SourceAssistantPtr sap, SeqEntryPtr seq_list)
17441 {
17442   IDAndTitleEditPtr iatep;
17443   Int4              seq_num, sap_num;
17444 
17445   if (sap == NULL || seq_list == NULL)
17446   {
17447     return;
17448   }
17449 
17450   iatep = SeqEntryListToIDAndTitleEdit (seq_list);
17451   if (iatep == NULL || iatep->num_sequences < 1)
17452   {
17453     return;
17454   }
17455 
17456   sap_num = 0;
17457 
17458   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
17459   {
17460     if (iatep->is_seg != NULL && iatep->is_seg [seq_num])
17461     {
17462       continue;
17463     }
17464     iatep->title_list [seq_num] = MemFree (iatep->title_list [seq_num]);
17465     iatep->title_list [seq_num] = StringSave (sap->defline_list [sap_num]);
17466     sap_num++;
17467   }
17468   ApplyIDAndTitleEditToSeqEntryList (seq_list, iatep);
17469   iatep = IDAndTitleEditFree (iatep);
17470 }
17471 
ShowRedSeqID(Int4 row,ValNodePtr row_list,Pointer userdata)17472 static Boolean ShowRedSeqID (Int4 row, ValNodePtr row_list, Pointer userdata)
17473 {
17474   ValNodePtr row_vnp, col_vnp;
17475   Int4       row_num;
17476 
17477   if (OrganismMatchesAnotherRow (row, row_list, userdata))
17478   {
17479     return TRUE;
17480   }
17481 
17482   /* find the row we're interested in */
17483   for (row_vnp = row_list->next, row_num = 1;
17484        row_vnp != NULL && row_num != row;
17485        row_vnp = row_vnp->next, row_num++)
17486   {
17487   }
17488   if (row_vnp == NULL || row_vnp->data.ptrvalue == NULL)
17489   {
17490     return TRUE;
17491   }
17492 
17493   /* the second column contains the organism names */
17494   col_vnp = row_vnp->data.ptrvalue;
17495   if (col_vnp == NULL
17496       || col_vnp->next == NULL
17497       || StringHasNoText (col_vnp->next->data.ptrvalue))
17498   {
17499     return TRUE;
17500   }
17501   else
17502   {
17503     return FALSE;
17504   }
17505 }
17506 
GetStandardTableDisplayDialogWidth(SequencesFormPtr sqfp)17507 static Int4 GetStandardTableDisplayDialogWidth (SequencesFormPtr sqfp)
17508 {
17509   Int4 doc_width = 0;
17510   RecT r;
17511 
17512   if (sqfp == NULL || sqfp->annot_tbs == NULL)
17513   {
17514     SelectFont (GetTableDisplayDefaultFont ());
17515     doc_width = CharWidth ('0') * 40;
17516   }
17517   else
17518   {
17519     GetPosition (sqfp->annot_tbs, &r);
17520     doc_width = r.right - r.left;
17521   }
17522   return doc_width;
17523 }
17524 
17525 
SourceAssistantForDeflines(SeqEntryPtr seq_list,Int4 doc_width,Int2 seqPackage)17526 NLM_EXTERN Boolean SourceAssistantForDeflines (SeqEntryPtr seq_list, Int4 doc_width, Int2 seqPackage)
17527 {
17528   SourceAssistantData sad;
17529   WindoW              w;
17530   GrouP               h, k, g, g2, mod_btn_grp, c;
17531   Int4                i;
17532   ButtoN              export_btn;
17533   PrompT              ppt1, ppt2;
17534   ValNodePtr          modifier_choice_list = NULL, qual_list = NULL;
17535   Boolean             rval = FALSE;
17536   ButtoN              b;
17537 
17538   SeqEntryListToSourceAssistant (seq_list, &sad);
17539   if (sad.num_deflines < 1)
17540   {
17541     return rval;
17542   }
17543 
17544   SendHelpScrollMessage (helpForm, "Organism Page", "Add Source Modifiers");
17545 
17546   sad.seqPackage = seqPackage;
17547 
17548   sad.done = FALSE;
17549   sad.cancelled = FALSE;
17550   modedit_widths [0] = 7;
17551   modedit_widths [1] = 18;
17552 
17553   w = MovableModalWindow (-20, -13, -10, -10, "Specify Source Modifiers", NULL);
17554   h = HiddenGroup(w, -1, 0, NULL);
17555   SetGroupSpacing (h, 10, 10);
17556 
17557   k = HiddenGroup (h, 2, 0, NULL);
17558   StaticPrompt (k, "Import source modifiers table", 0, popupMenuHeight, programFont, 'l');
17559   b = PushButton (k, "Select File", SourceAssistantImportModsBtn);
17560   SetObjectExtra (b, &sad, NULL);
17561 
17562   g = NormalGroup (h, -1, 0, "", programFont, NULL);
17563   g2 = HiddenGroup (g, 2, 0, NULL);
17564   StaticPrompt (g2, "Select Modifier", 0, popupMenuHeight, programFont, 'l');
17565 
17566   ValNodeAddPointer (&modifier_choice_list, eModifierType_Organism, StringSave ("Organism"));
17567   ValNodeAddPointer (&modifier_choice_list, eModifierType_Location, StringSave ("Location"));
17568   qual_list = GetSourceQualDescList (TRUE, TRUE, FALSE, FALSE);
17569   AddOneSourceQualDesc (&qual_list, "Fwd-PCR-primer-name", FALSE, SUBSRC_fwd_primer_name, 0);
17570   AddOneSourceQualDesc (&qual_list, "Fwd-PCR-primer-seq", FALSE, SUBSRC_fwd_primer_seq, 0);
17571   AddOneSourceQualDesc (&qual_list, "Rev-PCR-primer-name", FALSE, SUBSRC_rev_primer_name, 0);
17572   AddOneSourceQualDesc (&qual_list, "Rev-PCR-primer-seq", FALSE, SUBSRC_rev_primer_seq, 0);
17573   qual_list = ValNodeSort (qual_list, SortVnpBySourceQualDesc);
17574   ValNodeLink (&modifier_choice_list, qual_list);
17575 
17576   /* note - ValNodeSelectionDialog cleans up modifier_choice_list */
17577   sad.mod_type_dlg = ValNodeSelectionDialog (g2, modifier_choice_list, 6,
17578                                              SourceQualValNodeName,
17579                                              ValNodeSimpleDataFree,
17580                                              SourceQualValNodeDataCopy,
17581                                              SourceQualValNodeMatch,
17582                                              "modifier",
17583                                              NULL, NULL, FALSE);
17584   modifier_choice_list = NULL;
17585   qual_list = NULL;
17586 
17587   /* set default value for mod_type_dlg */
17588   ValNodeAddPointer (&qual_list, eModifierType_Organism, StringSave ("Organism"));
17589   PointerToDialog (sad.mod_type_dlg, qual_list);
17590   qual_list = ValNodeFreeData (qual_list);
17591 
17592   mod_btn_grp = HiddenGroup (g, 2, 0, NULL);
17593   b = PushButton (mod_btn_grp, "Apply One Value to All", ApplyOrgModColBtn);
17594   SetObjectExtra (b, &sad, NULL);
17595   b = PushButton (mod_btn_grp, "Edit Individual Values", EditOrgModColBtn);
17596   SetObjectExtra (b, &sad, NULL);
17597 
17598   AlignObjects (ALIGN_CENTER, (HANDLE) g2, (HANDLE) mod_btn_grp, NULL);
17599 
17600   sad.mod_doc = DocumentPanel (h, doc_width, stdLineHeight * 4);
17601   SetDocAutoAdjust (sad.mod_doc, TRUE);
17602 
17603   ppt1 = StaticPrompt (h, "Sequence IDs in red have missing organism names", 0, 0, programFont, 'l');
17604   ppt2 = StaticPrompt (h, "or have source information that matches at least one other sequence.", 0, 0, programFont, 'l');
17605 
17606   sad.orgmod_dlg = TableDisplayDialog (h, doc_width, stdLineHeight * 8, 1, 1,
17607                                        OrgModDblClick, &sad,
17608                                        ShowRedSeqID, NULL);
17609   UpdateOrgModDlg (&sad);
17610 
17611   export_btn = PushButton (h, "Export Source Modifier Table", SourceAssistantExport);
17612   SetObjectExtra (export_btn, &sad, NULL);
17613 
17614   c = HiddenGroup (h, 3, 0, NULL);
17615   b = PushButton(c, "OK", SourceAssistantOk);
17616   SetObjectExtra (b, &sad, NULL);
17617   b = PushButton (c, "Clear All Source Modifiers", SourceAssistantClearAllModifiers);
17618   SetObjectExtra (b, &sad, NULL);
17619   b = PushButton(c, "Cancel", SourceAssistantCancel);
17620   SetObjectExtra (b, &sad, NULL);
17621   AlignObjects (ALIGN_CENTER, (HANDLE) k,
17622                               (HANDLE) g,
17623                               (HANDLE) sad.mod_doc,
17624                               (HANDLE) sad.orgmod_dlg,
17625                               (HANDLE) ppt1,
17626                               (HANDLE) ppt2,
17627                               (HANDLE) export_btn,
17628                               (HANDLE) c,
17629                               NULL);
17630 
17631   Show(w);
17632   Select (w);
17633   while (!sad.done)
17634   {
17635     ProcessExternalEvent ();
17636     Update ();
17637   }
17638   ProcessAnEvent ();
17639   if (!sad.cancelled)
17640   {
17641     ApplySourceAssistantToSeqEntryList (&sad, seq_list);
17642     rval = TRUE;
17643   }
17644 
17645   for (i = 0; i < sad.num_deflines; i++)
17646   {
17647     sad.defline_list[i] = MemFree (sad.defline_list[i]);
17648     sad.id_list[i] = MemFree (sad.id_list[i]);
17649   }
17650   sad.defline_list = MemFree (sad.defline_list);
17651   sad.id_list = MemFree (sad.id_list);
17652 
17653   Remove (w);
17654   return rval;
17655 }
17656 
17657 
SourceAssistant(ButtoN b)17658 static void SourceAssistant (ButtoN b)
17659 {
17660   SequencesFormPtr    sqfp;
17661   SeqEntryPtr         seq_list;
17662   FastaPagePtr        fpp;
17663   Int4                doc_width = stdCharWidth * 40;
17664 
17665   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17666   if (sqfp == NULL) return;
17667 
17668   doc_width = GetStandardTableDisplayDialogWidth (sqfp);
17669 
17670   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17671 
17672   if (SourceAssistantForDeflines (seq_list, doc_width, sqfp->seqPackage)) {
17673     SeqEntryPtrToSourceTab (sqfp);
17674     fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
17675     if (fpp != NULL)
17676     {
17677       Reset (fpp->doc);
17678       FormatFastaDoc (fpp);
17679     }
17680   }
17681 
17682 }
17683 
ApplyOneValueToAllSequencesDialog(ButtoN b,CharPtr mod_name)17684 static void ApplyOneValueToAllSequencesDialog (ButtoN b, CharPtr mod_name)
17685 {
17686   WindoW               w;
17687   SequencesFormPtr     sqfp;
17688   CharPtr              value;
17689   SeqEntryPtr          seq_list;
17690   FastaPagePtr         fpp;
17691   Int4                 mod_type;
17692   Boolean              is_nontext;
17693   IDAndTitleEditPtr    iatep;
17694   Boolean              all_default;
17695 
17696   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17697   if (sqfp == NULL)
17698   {
17699     return;
17700   }
17701 
17702   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17703   if (seq_list == NULL)
17704   {
17705     return;
17706   }
17707 
17708   is_nontext = IsNonTextModifier (mod_name);
17709   mod_type = GetModifierType (mod_name);
17710   value = GetFirstModValueFromSeqEntryTitles (seq_list, mod_name);
17711 
17712   if (mod_type == eModifierType_GeneticCode)
17713   {
17714     if (! ContinueWithAutopopulatedGeneticCodes (seq_list, NULL, NULL, -1))
17715     {
17716       value = MemFree (value);
17717       return;
17718     }
17719   }
17720   else if (!StringHasNoText (value)  && ! is_nontext
17721       && seq_list->next != NULL)
17722   {
17723     iatep = SeqEntryListToIDAndTitleEdit (seq_list);
17724     all_default = IDAndTitleEditHasAllDefaultValues (mod_type, mod_name, iatep);
17725     iatep = IDAndTitleEditFree (iatep);
17726 
17727     if (!all_default
17728         && ANS_YES != Message (MSG_YN, "Warning!  Some sequences already contain "
17729                             "a value for %s.  Are you sure you want to "
17730                             "overwrite these values?", mod_name))
17731     {
17732       value = MemFree (value);
17733       return;
17734     }
17735   }
17736 
17737 
17738   w = ParentWindow ((Nlm_GraphiC) b);
17739   if (w != (WindoW)sqfp->form)
17740   {
17741     Remove (w);
17742   }
17743 
17744   ApplyOrgModColumnOrCell (mod_name, value, -1, NULL, seq_list, NULL, 0, sqfp->seqPackage);
17745   value = MemFree (value);
17746 
17747   SeqEntryPtrToSourceTab (sqfp);
17748   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
17749   if (fpp != NULL)
17750   {
17751     Reset (fpp->doc);
17752     FormatFastaDoc (fpp);
17753   }
17754 }
17755 
ApplyModValuesIndividually(ButtoN b,CharPtr mod_name)17756 static void ApplyModValuesIndividually (ButtoN b, CharPtr mod_name)
17757 {
17758   SequencesFormPtr     sqfp;
17759   SeqEntryPtr          seq_list;
17760   FastaPagePtr         fpp;
17761 
17762   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17763   if (sqfp == NULL)
17764   {
17765     return;
17766   }
17767 
17768   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17769   if (seq_list == NULL)
17770   {
17771     return;
17772   }
17773 
17774   EditOrgModColumn (mod_name, NULL, seq_list, sqfp->seqPackage);
17775   SeqEntryPtrToSourceTab (sqfp);
17776   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
17777   if (fpp != NULL)
17778   {
17779     Reset (fpp->doc);
17780     FormatFastaDoc (fpp);
17781   }
17782 }
17783 
ApplyModValuesAllBtn(ButtoN b)17784 static void ApplyModValuesAllBtn (ButtoN b)
17785 {
17786   WindoW w;
17787   CharPtr mod_name;
17788 
17789   w = ParentWindow ((Nlm_GraphiC) b);
17790   mod_name = GetObjectExtra (w);
17791   if (StringHasNoText (mod_name))
17792   {
17793     return;
17794   }
17795   ApplyOneValueToAllSequencesDialog (b, mod_name);
17796   Remove (w);
17797 }
17798 
ApplyModValuesIndividuallyBtn(ButtoN b)17799 static void ApplyModValuesIndividuallyBtn (ButtoN b)
17800 {
17801   WindoW w;
17802   CharPtr mod_name;
17803 
17804   w = ParentWindow ((Nlm_GraphiC) b);
17805   mod_name = GetObjectExtra (w);
17806   if (StringHasNoText (mod_name))
17807   {
17808     return;
17809   }
17810   ApplyModValuesIndividually (b, mod_name);
17811   Remove (w);
17812 }
17813 
SpecifyModValueButton(ButtoN b,CharPtr mod_name)17814 static void SpecifyModValueButton (ButtoN b, CharPtr mod_name)
17815 {
17816   SequencesFormPtr sqfp;
17817   WindoW           w;
17818   GrouP            h;
17819   SeqEntryPtr      seq_list;
17820   Char             title[255];
17821   ButtoN           apply_one_btn;
17822   ButtoN           apply_all_btn;
17823   ButtoN           cancel_btn;
17824   CharPtr          mod_label = NULL;
17825 
17826   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17827   if (sqfp == NULL || StringHasNoText (mod_name))
17828   {
17829     return;
17830   }
17831 
17832   if (StringICmp (mod_name, "moltype") == 0)
17833   {
17834     mod_label = StringSave ("molecule type");
17835   }
17836   else
17837   {
17838     mod_label = StringSave (mod_name);
17839   }
17840 
17841   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17842   if (seq_list == NULL)
17843   {
17844     Message (MSG_ERROR, "You must add sequences before you can add %s information!", mod_label);
17845     mod_label = MemFree (mod_label);
17846     return;
17847   }
17848 
17849   sprintf (title, "Edit %s Information", mod_label);
17850   title [5] = TO_UPPER (title [5]);
17851 
17852   w = MovableModalWindow (-20, -13, -10, -10, title, NULL);
17853   SetObjectExtra (w, StringSave (mod_name), StdCleanupExtraProc);
17854   h = HiddenGroup(w, -1, 0, NULL);
17855   SetGroupSpacing (h, 10, 10);
17856 
17857   sprintf (title, "Apply one %s to all sequences", mod_label);
17858   apply_one_btn = PushButton (h, title, ApplyModValuesAllBtn);
17859   SetObjectExtra (apply_one_btn, sqfp, NULL);
17860   sprintf (title, "Apply %s to sequences individually", mod_label);
17861   apply_all_btn = PushButton (h, title, ApplyModValuesIndividuallyBtn);
17862   SetObjectExtra (apply_all_btn, sqfp, NULL);
17863   cancel_btn = PushButton (h, "Cancel", StdCancelButtonProc);
17864   mod_label = MemFree (mod_label);
17865 
17866   AlignObjects (ALIGN_CENTER, (HANDLE) apply_one_btn,
17867                              (HANDLE) apply_all_btn,
17868                              (HANDLE) cancel_btn,
17869                              NULL);
17870   Show(w);
17871   Select (w);
17872 
17873 }
17874 
SpecifyOrganismLocationGeneticCodeButton(ButtoN b)17875 static void SpecifyOrganismLocationGeneticCodeButton (ButtoN b)
17876 {
17877   SequencesFormPtr sqfp;
17878   SeqEntryPtr      seq_list;
17879 
17880   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17881   if (sqfp == NULL)
17882   {
17883     return;
17884   }
17885 
17886   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17887   if (seq_list == NULL)
17888   {
17889     Message (MSG_ERROR, "You must add sequences before you can add organisms, locations, or genetic codes!");
17890   }
17891   else
17892   {
17893     ApplyModValuesIndividually (b, "organism");
17894   }
17895 }
17896 
ClearAllSequenceModifiers(ButtoN b)17897 static void ClearAllSequenceModifiers (ButtoN b)
17898 {
17899   SequencesFormPtr sqfp;
17900   SeqEntryPtr      seq_list, sep, nsep;
17901   CharPtr          ttl;
17902   ValNodePtr       found_modifiers = NULL, mod_vnp;
17903   FastaPagePtr     fpp;
17904   Int4             mod_type;
17905 
17906   sqfp = (SequencesFormPtr) GetObjectExtra (b);
17907   if (sqfp == NULL)
17908   {
17909     return;
17910   }
17911 
17912   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17913   if (seq_list == NULL)
17914   {
17915     return;
17916   }
17917 
17918   if (ANS_YES != Message (MSG_YN, "Are you sure you want to clear all of the modifiers on all of your sequences?"))
17919   {
17920     return;
17921   }
17922 
17923   for (sep = seq_list; sep != NULL; sep = sep->next)
17924   {
17925     ttl = NULL;
17926     nsep = FindNucSeqEntry (sep);
17927     SeqEntryExplore (nsep, (Pointer) (&ttl), FindFirstTitle);
17928     found_modifiers = BuildModifierTypeList (found_modifiers, ttl, FALSE);
17929     for (mod_vnp = found_modifiers; mod_vnp != NULL; mod_vnp = mod_vnp->next)
17930     {
17931       mod_type = GetModifierType (mod_vnp->data.ptrvalue);
17932       if (mod_type != eModifierType_Protein
17933           && mod_type != eModifierType_Location
17934           && mod_type != eModifierType_Origin
17935           && mod_type != eModifierType_MolType
17936           && mod_type != eModifierType_Molecule
17937           && mod_type != eModifierType_Topology
17938           && mod_type != eModifierType_GeneticCode
17939           && mod_type != eModifierType_GeneticCodeComment
17940           && mod_type != eModifierType_Organism)
17941       {
17942         ApplyOneModToSeqEntry (nsep, mod_vnp->data.ptrvalue, NULL);
17943       }
17944     }
17945     found_modifiers = ValNodeFreeData (found_modifiers);
17946   }
17947   SeqEntryPtrToSourceTab (sqfp);
17948   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
17949   if (fpp != NULL)
17950   {
17951     Reset (fpp->doc);
17952     FormatFastaDoc (fpp);
17953   }
17954 }
17955 
SummaryDblClick(PoinT cell_coord,CharPtr header_text,CharPtr cell_text,Pointer userdata)17956 static void SummaryDblClick (PoinT cell_coord, CharPtr header_text, CharPtr cell_text, Pointer userdata)
17957 {
17958   ValNodePtr        found_modifiers = NULL, vnp;
17959   SequencesFormPtr  sqfp;
17960   SeqEntryPtr       seq_list;
17961   Int4              mod_num;
17962   FastaPagePtr      fpp;
17963   IDAndTitleEditPtr iatep;
17964 
17965   if (cell_coord.y < 1 || userdata == NULL)
17966   {
17967     return;
17968   }
17969 
17970   sqfp = (SequencesFormPtr) userdata;
17971 
17972   seq_list = GetSeqEntryFromSequencesForm (sqfp);
17973   if (seq_list == NULL)
17974   {
17975     return;
17976   }
17977 
17978   iatep = SeqEntryListToIDAndTitleEdit (seq_list);
17979 
17980   /* get list of modifiers */
17981   found_modifiers = GetListOfCurrentSourceModifiers (iatep);
17982 
17983   for (vnp = found_modifiers, mod_num = 1;
17984        vnp != NULL && mod_num < cell_coord.y;
17985        vnp = vnp->next, mod_num++)
17986   {
17987   }
17988   if (vnp != NULL)
17989   {
17990     EditOrgModColumn (vnp->data.ptrvalue, NULL, seq_list, sqfp->seqPackage);
17991     SeqEntryPtrToSourceTab (sqfp);
17992     fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
17993     if (fpp != NULL)
17994     {
17995       Reset (fpp->doc);
17996       FormatFastaDoc (fpp);
17997     }
17998   }
17999   ValNodeFreeData (found_modifiers);
18000   iatep = IDAndTitleEditFree (iatep);
18001 }
18002 
CreateSourceTab(GrouP h,SequencesFormPtr sqfp)18003 static GrouP CreateSourceTab (GrouP h, SequencesFormPtr sqfp)
18004 {
18005   GrouP              mod_grp;
18006   GrouP              src_btns_grp;
18007   GrouP              k;
18008   Int4               doc_width;
18009 
18010   if (h == NULL || sqfp == NULL)
18011   {
18012     return NULL;
18013   }
18014 
18015   modedit_widths [0] = 7;
18016   modedit_widths [1] = 18;
18017 
18018   mod_grp = HiddenGroup (h, -1, 0, NULL);
18019   SetGroupSpacing (mod_grp, 10, 20);
18020 
18021   doc_width = GetStandardTableDisplayDialogWidth (sqfp);
18022 
18023   sqfp->org_doc = DocumentPanel (mod_grp, doc_width, stdLineHeight * 3);
18024   SetDocAutoAdjust (sqfp->org_doc, TRUE);
18025   sqfp->ident_org_grp = HiddenGroup (mod_grp, -1, 0, NULL);
18026   StaticPrompt (sqfp->ident_org_grp, "Some sequences have identical source information.", 0, 0, programFont, 'c');
18027   StaticPrompt (sqfp->ident_org_grp, "Edit the source information using the", 0, 0, programFont, 'c');
18028   StaticPrompt (sqfp->ident_org_grp, "'Add Source Modifiers' button below.", 0, 0, programFont, 'c');
18029   Disable (sqfp->source_assist_btn);
18030 
18031   sqfp->summary_dlg = TableDisplayDialog (mod_grp, doc_width, stdLineHeight * 8, 1, 1,
18032                                        SummaryDblClick, sqfp,
18033                                        NULL, NULL);
18034 
18035 
18036   sqfp->specify_orgs_btn = PushButton (mod_grp,
18037                                        "Add Organisms, Locations, and Genetic Codes",
18038                                        SpecifyOrganismLocationGeneticCodeButton);
18039   SetObjectExtra (sqfp->specify_orgs_btn, sqfp, NULL);
18040   Disable (sqfp->specify_orgs_btn);
18041   k = HiddenGroup (mod_grp, -1, 0, NULL);
18042   SetGroupSpacing (k, 10, 10);
18043   src_btns_grp = HiddenGroup (k, 2, 0, NULL);
18044   SetGroupSpacing (src_btns_grp, 10, 10);
18045   sqfp->import_mod_btn = PushButton (src_btns_grp, "Import Source Modifiers", ImportModifiersButtonProc);
18046   SetObjectExtra (sqfp->import_mod_btn, sqfp, NULL);
18047   Disable (sqfp->import_mod_btn);
18048   sqfp->source_assist_btn = PushButton (src_btns_grp, "Add Source Modifiers", SourceAssistant);
18049   SetObjectExtra (sqfp->source_assist_btn, sqfp, NULL);
18050   Disable (sqfp->source_assist_btn);
18051   sqfp->specify_locs_btn = NULL;
18052   sqfp->specify_gcode_btn = NULL;
18053   sqfp->specify_mgcode_btn = NULL;
18054 
18055   sqfp->clear_mods_btn = PushButton (k, "Clear All Source Modifiers", ClearAllSequenceModifiers);
18056   SetObjectExtra (sqfp->clear_mods_btn, sqfp, NULL);
18057   Disable (sqfp->clear_mods_btn);
18058 
18059   AlignObjects (ALIGN_CENTER, (HANDLE) src_btns_grp, (HANDLE) sqfp->clear_mods_btn, NULL);
18060 
18061   SeqEntryPtrToSourceTab (sqfp);
18062   AlignObjects (ALIGN_CENTER, (HANDLE) sqfp->org_doc,
18063                               (HANDLE) sqfp->ident_org_grp,
18064                               (HANDLE) sqfp->summary_dlg,
18065                               (HANDLE) sqfp->specify_orgs_btn,
18066                               (HANDLE) k,
18067                               NULL);
18068 
18069   return mod_grp;
18070 }
18071 
18072 typedef struct fastasummary
18073 {
18074   DIALOG_MESSAGE_BLOCK
18075   PrompT summary_ppt;
18076 } FastaSummaryData, PNTR FastaSummaryPtr;
18077 
SequencesToFastaSummary(DialoG d,Pointer userdata)18078 static void SequencesToFastaSummary (DialoG d, Pointer userdata)
18079 {
18080   FastaSummaryPtr dlg;
18081   SeqEntryPtr     seq_list, sep, nsep;
18082   Int4            num_sequences = 0, tot_len = 0;
18083   BioseqPtr       bsp;
18084   CharPtr         str_format = "%d sequences, total length %d";
18085   Char            str[255];
18086 
18087   dlg = (FastaSummaryPtr) GetObjectExtra (d);
18088   if (dlg == NULL)
18089   {
18090     return;
18091   }
18092 
18093   seq_list = (SeqEntryPtr) userdata;
18094   for (sep = seq_list; sep != NULL; sep = sep->next)
18095   {
18096     num_sequences++;
18097     if (IS_Bioseq (sep))
18098     {
18099       bsp = (BioseqPtr) sep->data.ptrvalue;
18100       if (bsp != NULL)
18101       {
18102         tot_len += bsp->length;
18103       }
18104     }
18105     else if (IS_Bioseq_set (sep))
18106     {
18107       nsep = FindNucSeqEntry (sep);
18108       if (nsep != NULL && IS_Bioseq (nsep))
18109       {
18110         bsp = (BioseqPtr) nsep->data.ptrvalue;
18111         if (bsp != NULL)
18112         {
18113           tot_len += bsp->length;
18114         }
18115       }
18116     }
18117   }
18118   sprintf (str, str_format, num_sequences, tot_len);
18119   SetTitle (dlg->summary_ppt, str);
18120 }
18121 
FastaSummaryDialog(GrouP parent)18122 static DialoG FastaSummaryDialog (GrouP parent)
18123 {
18124   FastaSummaryPtr dlg;
18125   GrouP           p;
18126 
18127   dlg = (FastaSummaryPtr) MemNew (sizeof (FastaSummaryData));
18128   p = HiddenGroup (parent, -1, 0, NULL);
18129   SetObjectExtra (p, dlg, StdCleanupExtraProc);
18130   SetGroupSpacing (p, 10, 10);
18131 
18132   dlg->dialog = (DialoG) p;
18133   dlg->todialog = SequencesToFastaSummary;
18134   dlg->fromdialog = NULL;
18135   dlg->dialogmessage = NULL;
18136   dlg->testdialog = NULL;
18137 
18138   dlg->summary_ppt = StaticPrompt (p, NULL, stdCharWidth * 20,
18139                                    popupMenuHeight, programFont, 'l');
18140   return (DialoG) p;
18141 }
18142 
18143 /* The Sequence Assistant will allow users to paste in FASTA or create sequences one at a time.
18144  * The list control for selecting a sequence to view and edit should be a DocPanel, so that we
18145  * can add and remove sequences without needing to destroy and recreate the dialog.
18146  * The sequences created should be stored as a SeqEntry list.
18147  */
18148 typedef struct sequenceassistant
18149 {
18150   DoC            sequence_selector;
18151   DialoG         summary_dlg;
18152   DialoG         sequence_table;
18153   ButtoN         edit_btn;
18154   ButtoN         delete_btn;
18155   ButtoN         delete_all_btn;
18156   ButtoN         import_btn;
18157 
18158   Int2           sequence_row;
18159 
18160   SeqEntryPtr    seq_list;
18161 
18162   Int2           seqPackage;
18163 
18164   Boolean        done;
18165   Boolean        cancelled;
18166 } SequenceAssistantData, PNTR SequenceAssistantPtr;
18167 
PrepareSequenceAssistantTableData(SequenceAssistantPtr sap)18168 static ValNodePtr PrepareSequenceAssistantTableData (SequenceAssistantPtr sap)
18169 {
18170   ValNodePtr         column_list = NULL, row_list = NULL;
18171   ValNodePtr         header_list, header_vnp;
18172   Int4               column_width, num_columns = 0;
18173   Int4               max_column_width = 20;
18174   SeqEntryPtr        sep, nsep;
18175   BioseqPtr          bsp;
18176   Char               tmp[128];
18177   CharPtr            ttl = NULL, id;
18178   CharPtr            valstr;
18179 
18180   if (sap == NULL)
18181   {
18182     return NULL;
18183   }
18184   AddDefaultModifierValues (sap->seq_list);
18185 
18186   /* create header line for table */
18187   /* store max column width in choice */
18188   ValNodeAddPointer (&column_list, 6, StringSave ("Seq ID"));
18189   ValNodeAddPointer (&column_list, 6, StringSave ("Length"));
18190   ValNodeAddPointer (&column_list, 8, StringSave ("Molecule"));
18191   ValNodeAddPointer (&column_list, 8, StringSave ("Topology"));
18192   ValNodeAddPointer (&column_list, 11, StringSave ("Title"));
18193 
18194   ValNodeAddPointer (&row_list, 0, column_list);
18195   header_list = column_list;
18196 
18197   num_columns = 3;
18198 
18199   /* create data lines for table */
18200   for (sep = sap->seq_list; sep != NULL; sep = sep->next)
18201   {
18202     bsp = NULL;
18203 
18204     if (IS_Bioseq (sep))
18205     {
18206       bsp = (BioseqPtr) sep->data.ptrvalue;
18207     }
18208     else if (IS_Bioseq_set (sep))
18209     {
18210       nsep = FindNucSeqEntry (sep);
18211       if (nsep != NULL && IS_Bioseq (nsep))
18212       {
18213         bsp = (BioseqPtr) nsep->data.ptrvalue;
18214       }
18215     }
18216 
18217     if (bsp == NULL) continue;
18218 
18219     column_list = NULL;
18220 
18221     /* add Sequence ID */
18222     header_vnp = header_list;
18223     id = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
18224     column_width = MAX (StringLen (id), header_vnp->choice);
18225     column_width = MIN (column_width, max_column_width);
18226     header_vnp->choice = column_width;
18227     ValNodeAddPointer (&column_list, 0, id);
18228 
18229     /* add length */
18230     header_vnp = header_vnp->next;
18231     sprintf (tmp, "%d", bsp->length);
18232     column_width = MAX (StringLen (tmp), header_vnp->choice);
18233     column_width = MIN (column_width, max_column_width);
18234     header_vnp->choice = column_width;
18235     ValNodeAddPointer (&column_list, 0, StringSave (tmp));
18236 
18237     ttl = NULL;
18238     SeqEntryExplore (sep, (Pointer) (&ttl), FindFirstTitle);
18239 
18240     /* add molecule */
18241     header_vnp = header_vnp->next;
18242     valstr = FindValueFromPairInDefline ("moltype", ttl);
18243     if (StringHasNoText (valstr))
18244     {
18245       valstr = MemFree (valstr);
18246       valstr = StringSave ("Genomic DNA");
18247     }
18248     column_width = MAX (StringLen (tmp), header_vnp->choice);
18249     column_width = MIN (column_width, max_column_width);
18250     header_vnp->choice = column_width;
18251     ValNodeAddPointer (&column_list, 0, valstr);
18252 
18253     /* add topology */
18254     header_vnp = header_vnp->next;
18255     valstr = FindValueFromPairInDefline ("topology", ttl);
18256     if (StringHasNoText (valstr))
18257     {
18258       valstr = MemFree (valstr);
18259       valstr = StringSave ("Linear");
18260     }
18261     column_width = MAX (StringLen (tmp), header_vnp->choice);
18262     column_width = MIN (column_width, max_column_width);
18263     header_vnp->choice = column_width;
18264     ValNodeAddPointer (&column_list, 0, valstr);
18265 
18266     /* add title */
18267     header_vnp = header_vnp->next;
18268     column_width = MAX (StringLen (ttl), header_vnp->choice);
18269     column_width = MIN (column_width, max_column_width);
18270     header_vnp->choice = column_width;
18271     ValNodeAddPointer (&column_list, 0, StringSave (ttl));
18272 
18273     ValNodeAddPointer (&row_list, 0, column_list);
18274   }
18275   return row_list;
18276 }
18277 
PopulateSequenceSelector(SequenceAssistantPtr sap,DoC selector)18278 static void PopulateSequenceSelector (SequenceAssistantPtr sap, DoC selector)
18279 {
18280   SeqEntryPtr sep, nsep;
18281   BioseqPtr   bsp;
18282   SeqIdPtr    sip;
18283   Char        tmp[128];
18284   Int2        seq_num;
18285 
18286   if (sap == NULL || selector == NULL)
18287   {
18288     return;
18289   }
18290   Reset (selector);
18291   for (sep = sap->seq_list, seq_num = 0; sep != NULL; sep = sep->next, seq_num++)
18292   {
18293     bsp = NULL;
18294     if (IS_Bioseq (sep))
18295     {
18296       bsp = sep->data.ptrvalue;
18297     }
18298     else if (IS_Bioseq_set (sep))
18299     {
18300       nsep = FindNucSeqEntry (sep);
18301       if (nsep != NULL && IS_Bioseq (nsep))
18302       {
18303         bsp = nsep->data.ptrvalue;
18304       }
18305     }
18306 
18307     if (bsp != NULL)
18308     {
18309       /* add to sequence_selector doc */
18310       sip = SeqIdFindWorst (bsp->id);
18311       SeqIdWrite (sip, tmp, PRINTID_REPORT, sizeof (tmp) - 1);
18312   	  AppendText (selector, tmp, &faParFmt, &faColFmt, programFont);
18313     }
18314   }
18315   InvalDocRows (selector, 0, 0, 0);
18316   sap->sequence_row = -1;
18317   Disable (sap->edit_btn);
18318   Disable (sap->delete_btn);
18319 
18320 }
18321 
UpdateSequenceAssistant(SequenceAssistantPtr sap)18322 static void UpdateSequenceAssistant (SequenceAssistantPtr sap)
18323 {
18324   ValNodePtr row_list;
18325 
18326   if (sap == NULL)
18327   {
18328     return;
18329   }
18330 
18331   row_list = PrepareSequenceAssistantTableData (sap);
18332   PointerToDialog (sap->sequence_table, row_list);
18333   FreeTableDisplayRowList (row_list);
18334 
18335   PopulateSequenceSelector (sap, sap->sequence_selector);
18336   PointerToDialog (sap->summary_dlg, sap->seq_list);
18337 
18338   /* set title for import button */
18339   if (sap->seq_list != NULL && sap->seq_list->next != NULL)
18340   {
18341     SetTitle (sap->import_btn, "Import Additional Nucleotide FASTA");
18342   }
18343   else
18344   {
18345     SetTitle (sap->import_btn, "Import Nucleotide FASTA");
18346   }
18347 
18348   if (sap->seq_list == NULL)
18349   {
18350     Disable (sap->delete_all_btn);
18351   }
18352   else
18353   {
18354     Enable (sap->delete_all_btn);
18355   }
18356 }
18357 
18358 
ImportSequenceAssistantEditData(SequenceAssistantPtr sap,CharPtr seq_str)18359 static void ImportSequenceAssistantEditData (SequenceAssistantPtr sap, CharPtr seq_str)
18360 {
18361   Char         path [PATH_MAX];
18362   SeqEntryPtr  new_sep_list;
18363 
18364   if (sap == NULL || StringHasNoText (seq_str))
18365   {
18366     return;
18367   }
18368 
18369   TmpNam (path);
18370   new_sep_list = GetSequencesFromFile (path, sap->seq_list);
18371   if (new_sep_list != NULL)
18372   {
18373     ValNodeLink (&sap->seq_list, new_sep_list);
18374   }
18375   UpdateSequenceAssistant (sap);
18376 
18377   FileRemove (path);
18378 }
18379 
CopySeqEntryList(SeqEntryPtr seq_list)18380 static SeqEntryPtr CopySeqEntryList (SeqEntryPtr seq_list)
18381 {
18382   SeqEntryPtr new_seq_list, new_seq, last_seq;
18383   ErrSev      oldsev;
18384 
18385   if (seq_list == NULL)
18386   {
18387     return NULL;
18388   }
18389 
18390   oldsev = ErrSetMessageLevel (SEV_MAX);
18391 
18392   new_seq = AsnIoMemCopy ((Pointer) seq_list,
18393                           (AsnReadFunc) SeqEntryAsnRead,
18394                           (AsnWriteFunc) SeqEntryAsnWrite);
18395   new_seq_list = new_seq;
18396   last_seq = new_seq;
18397 
18398   seq_list = seq_list->next;
18399 
18400   while (last_seq != NULL && seq_list != NULL)
18401   {
18402     new_seq = AsnIoMemCopy ((Pointer) seq_list,
18403                             (AsnReadFunc) SeqEntryAsnRead,
18404                             (AsnWriteFunc) SeqEntryAsnWrite);
18405     last_seq->next = new_seq;
18406     last_seq = last_seq->next;
18407     seq_list = seq_list->next;
18408   }
18409 
18410   ErrSetMessageLevel (oldsev);
18411 
18412   return new_seq_list;
18413 }
18414 
18415 
18416 /* This section of code is used for correcting errors in the sequence IDs and
18417  * titles.
18418  */
18419 
18420 /* This structure contains the data used for editing and updating the new
18421  * and existing sequence lists.
18422  */
18423 typedef struct seqidedit
18424 {
18425   WindoW            w;
18426   IDAndTitleEditPtr iatep_new;
18427   IDAndTitleEditPtr iatep_current;
18428   DialoG            new_dlg;
18429   DialoG            current_dlg;
18430   GrouP             show_all_grp;
18431   DoC               auto_correct_doc;
18432   DialoG            bracket_dlg;
18433   PaneL             badvalue_pnl;
18434   PaneL             unrec_mod_pnl;
18435   ButtoN            auto_correct_btn;
18436   Boolean           auto_correct_ids;
18437   Boolean           auto_correct_bracketing;
18438   Boolean           auto_correct_modnames;
18439   ButtoN            accept_btn;
18440   ButtoN            refresh_err_btn;
18441   ButtoN            refresh_error_list_btn;
18442 
18443   Boolean           seqid_edit_phase;
18444   Boolean           is_nuc;
18445 } SeqIdEditData, PNTR SeqIdEditPtr;
18446 
18447 /* This section of code is used for detecting errors in lists of sequence IDs
18448  * and titles.
18449  */
18450 static Uint2 idedit_types [] = {
18451   TAGLIST_PROMPT, TAGLIST_PROMPT, TAGLIST_TEXT, TAGLIST_TEXT
18452 };
18453 
18454 static Uint2 idedit_widths [] = {
18455   6, 5, 10, 40,
18456 };
18457 
HasMissingIDs(IDAndTitleEditPtr iatep)18458 static Boolean HasMissingIDs (IDAndTitleEditPtr iatep)
18459 {
18460   Int4 i;
18461 
18462   if (iatep == NULL)
18463   {
18464     return FALSE;
18465   }
18466   for (i = 0; i < iatep->num_sequences; i++)
18467   {
18468     if (StringHasNoText (iatep->id_list [i]))
18469     {
18470       return TRUE;
18471     }
18472   }
18473   return FALSE;
18474 }
18475 
IsDuplicateEditID(IDAndTitleEditPtr iatep_new,Int4 new_pos,IDAndTitleEditPtr iatep_current)18476 static Boolean IsDuplicateEditID (IDAndTitleEditPtr iatep_new, Int4 new_pos, IDAndTitleEditPtr iatep_current)
18477 {
18478   Int4 j;
18479 
18480   if (iatep_new == NULL || iatep_new->num_sequences == 0
18481       || new_pos < 0 || new_pos >= iatep_new->num_sequences)
18482   {
18483     return FALSE;
18484   }
18485 
18486   for (j = 0; j < iatep_new->num_sequences; j++)
18487   {
18488     if (j == new_pos)
18489     {
18490       continue;
18491     }
18492     if (StringICmp (iatep_new->id_list [new_pos], iatep_new->id_list [j]) == 0)
18493     {
18494       return TRUE;
18495     }
18496   }
18497   if (iatep_current != NULL)
18498   {
18499     for (j = 0; j < iatep_current->num_sequences; j++)
18500     {
18501       if (StringICmp (iatep_new->id_list [new_pos], iatep_current->id_list [j]) == 0)
18502       {
18503         return TRUE;
18504       }
18505     }
18506   }
18507   return FALSE;
18508 }
18509 
EditHasDuplicateIDs(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current)18510 static Boolean EditHasDuplicateIDs (IDAndTitleEditPtr iatep_new, IDAndTitleEditPtr iatep_current)
18511 {
18512   Int4 i;
18513   ValNodeBlock block;
18514   ValNodePtr vnp;
18515   CharPtr prev = NULL;
18516   Boolean found_duplicate = FALSE;
18517 
18518   if (iatep_new == NULL || iatep_new->num_sequences == 0)
18519   {
18520     return FALSE;
18521   }
18522 
18523   InitValNodeBlock (&block, NULL);
18524   for (i = 0; i < iatep_new->num_sequences; i++)
18525   {
18526     ValNodeAddPointerToEnd (&block, 0, iatep_new->id_list[i]);
18527   }
18528   if (iatep_current != NULL)
18529   {
18530     for (i = 0; i < iatep_current->num_sequences; i++)
18531     {
18532       ValNodeAddPointerToEnd (&block, 0, iatep_current->id_list[i]);
18533     }
18534   }
18535   if (block.head == NULL || block.head->next == NULL)
18536   {
18537     block.head = ValNodeFree (block.head);
18538     return FALSE;
18539   }
18540 
18541   block.head = ValNodeSort (block.head, SortVnpByString);
18542   prev = block.head->data.ptrvalue;
18543   for (vnp = block.head->next; vnp != NULL && !found_duplicate; vnp = vnp->next)
18544   {
18545     if (StringICmp (prev, vnp->data.ptrvalue) == 0)
18546     {
18547       found_duplicate = TRUE;
18548     }
18549     prev = vnp->data.ptrvalue;
18550   }
18551   block.head = ValNodeFree (block.head);
18552   return found_duplicate;
18553 }
18554 
18555 
StringHasSpace(CharPtr str)18556 static Boolean StringHasSpace (CharPtr str)
18557 {
18558   CharPtr cp;
18559   Boolean has_space = FALSE;
18560 
18561   /* look for space in ID */
18562   for (cp = str; cp != NULL && *cp != 0 && !has_space; cp++)
18563   {
18564     if (isspace (*cp)) {
18565       has_space = TRUE;
18566     }
18567   }
18568   return has_space;
18569 }
18570 
18571 
EditHasSpaceInIDs(IDAndTitleEditPtr iatep)18572 static Boolean EditHasSpaceInIDs (IDAndTitleEditPtr iatep)
18573 {
18574   Int4 i;
18575   Boolean has_space = FALSE;
18576 
18577   if (iatep != NULL)
18578   {
18579     for (i = 0; i < iatep->num_sequences && !has_space; i++)
18580     {
18581       has_space = StringHasSpace (iatep->id_list[i]);
18582     }
18583   }
18584   return has_space;
18585 }
18586 
18587 
18588 /* This function creates a list of suggested IDs and titles
18589  * based on errors in the IDs of the original lists.
18590  * The errors that can be auto-corrected are:
18591  *     * spaces in the sequence IDs
18592  *     * brackets in the sequence IDs
18593  */
SuggestCorrectionForLocalIDs(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current)18594 static IDAndTitleEditPtr SuggestCorrectionForLocalIDs (IDAndTitleEditPtr iatep_new, IDAndTitleEditPtr iatep_current)
18595 {
18596   CharPtr           add_str, cp;
18597   Int4              len, add_str_len;
18598   Int4              seq_num;
18599   IDAndTitleEditPtr iatep_corrected = NULL;
18600   CharPtr           new_title_start;
18601 
18602   if (iatep_new == NULL || iatep_new->num_sequences < 1)
18603   {
18604     return NULL;
18605   }
18606 
18607   iatep_corrected = IDAndTitleEditCopy (iatep_new);
18608 
18609   for (seq_num = 0; seq_num < iatep_corrected->num_sequences; seq_num++)
18610   {
18611     if (!StringHasNoText (iatep_new->id_list [seq_num])
18612         && IsDuplicateEditID (iatep_new, seq_num, iatep_current)
18613         && !StringHasNoText (iatep_new->title_list [seq_num])
18614         && iatep_new->title_list [seq_num][0] != '[')
18615     {
18616       len = StringCSpn (iatep_corrected->title_list [seq_num], " \t[]");
18617       if (len > 0)
18618       {
18619         add_str_len = len + StringLen (iatep_corrected->id_list [seq_num]);
18620         add_str = (CharPtr) MemNew ((add_str_len + 1) * sizeof (Char));
18621         if (add_str != NULL)
18622         {
18623           StringCpy (add_str, iatep_corrected->id_list [seq_num]);
18624           StringNCat (add_str, iatep_corrected->title_list [seq_num], len);
18625           add_str [add_str_len] = 0;
18626           iatep_corrected->id_list [seq_num] = MemFree (iatep_corrected->id_list [seq_num]);
18627           iatep_corrected->id_list [seq_num] = add_str;
18628 
18629           new_title_start = iatep_corrected->title_list [seq_num] + len;
18630           new_title_start += StringSpn (new_title_start, " \t");
18631           new_title_start = StringSave (new_title_start);
18632           iatep_corrected->title_list [seq_num] = MemFree (iatep_corrected->title_list [seq_num]);
18633           iatep_corrected->title_list [seq_num] = new_title_start;
18634         }
18635       }
18636     }
18637 
18638     /* suggest correction for brackets in sequence IDs */
18639     if ((cp = StringChr (iatep_corrected->id_list [seq_num], '[')) != NULL)
18640     {
18641       len = StringLen (cp);
18642       if (StringNCmp (cp, iatep_corrected->title_list [seq_num], len) != 0)
18643       {
18644         add_str_len = len + StringLen (iatep_corrected->title_list [seq_num]);
18645         add_str = (CharPtr) MemNew ((add_str_len + 2) * sizeof (Char));
18646         if (add_str != NULL)
18647         {
18648           StringCpy (add_str, cp);
18649           StringCat (add_str, " ");
18650           StringCat (add_str, iatep_corrected->title_list [seq_num]);
18651           iatep_corrected->title_list [seq_num] = MemFree (iatep_corrected->title_list [seq_num]);
18652           iatep_corrected->title_list [seq_num] = add_str;
18653           *cp = 0;
18654         }
18655       }
18656       *cp = 0;
18657     }
18658 
18659     /* suggest correction for spaces in sequence IDs */
18660     cp = iatep_corrected->id_list [seq_num];
18661     while (*cp != 0) {
18662       if (isspace (*cp)) {
18663         *cp = '_';
18664       }
18665       cp++;
18666     }
18667 
18668   }
18669   return iatep_corrected;
18670 }
18671 
18672 /* This function creates a list of suggested titles based on bracketing errors
18673  * in the original list.
18674  */
SuggestCorrectionForTitleBracketing(IDAndTitleEditPtr iatep_orig)18675 static IDAndTitleEditPtr SuggestCorrectionForTitleBracketing (IDAndTitleEditPtr iatep_orig)
18676 {
18677   IDAndTitleEditPtr iatep_corrected;
18678   Int4              seq_num, msg_num;
18679 
18680   if (iatep_orig == NULL || iatep_orig->num_sequences < 1)
18681   {
18682     return NULL;
18683   }
18684 
18685   iatep_corrected = IDAndTitleEditCopy (iatep_orig);
18686 
18687   for (seq_num = 0; seq_num < iatep_corrected->num_sequences; seq_num++)
18688   {
18689     msg_num = DetectBadBracketing (iatep_corrected->title_list[seq_num]);
18690     if (msg_num != 0)
18691     {
18692       iatep_corrected->title_list[seq_num] = MemFree (iatep_corrected->title_list[seq_num]);
18693       iatep_corrected->title_list[seq_num] = SuggestCorrectBracketing(iatep_orig->title_list[seq_num]);
18694     }
18695   }
18696   return iatep_corrected;
18697 }
18698 
18699 /* This function indicates whether there are bracketing errors in the supplied list
18700  * of sequence IDs and titles.
18701  */
EditNeedsBracketingFixes(IDAndTitleEditPtr iatep_orig)18702 static Boolean EditNeedsBracketingFixes (IDAndTitleEditPtr iatep_orig)
18703 {
18704   Int4    seq_num;
18705   Boolean needs_fix = FALSE;
18706 
18707   if (iatep_orig == NULL || iatep_orig->num_sequences < 1)
18708   {
18709     return FALSE;
18710   }
18711 
18712   for (seq_num = 0; seq_num < iatep_orig->num_sequences && ! needs_fix; seq_num++)
18713   {
18714     if (DetectBadBracketing (iatep_orig->title_list[seq_num]) != 0)
18715     {
18716       needs_fix = TRUE;
18717     }
18718   }
18719   return needs_fix;
18720 }
18721 
18722 /* These functions are used to find and list unrecognized modifier names in
18723  * definition lines.
18724  */
18725 
IsUnrecognizedModifierName(ModifierInfoPtr mip,Boolean is_nuc)18726 static Boolean IsUnrecognizedModifierName (ModifierInfoPtr mip, Boolean is_nuc)
18727 {
18728   if (mip == NULL
18729       || (mip->modtype == eModifierType_SourceQual
18730   	      && mip->subtype == 255
18731           && !s_IsAllowedUnmapped(mip->name))
18732   	  || (!is_nuc && mip->modtype != eModifierType_Protein
18733   	      && StringICmp (mip->name, "note-orgmod") != 0))
18734   {
18735     return TRUE;
18736   }
18737   else
18738   {
18739     return FALSE;
18740   }
18741 }
18742 
18743 /* This function searches a single definition line for unrecognized modifiers
18744  * and adds them to the list if they are not already on the list.
18745  */
18746 static void
AddUnrecognizedModifiersForOneDefinitionLine(CharPtr defline,ValNodePtr PNTR unrecognized_list,Boolean is_nuc)18747 AddUnrecognizedModifiersForOneDefinitionLine
18748 (CharPtr         defline,
18749  ValNodePtr PNTR unrecognized_list,
18750  Boolean         is_nuc)
18751 {
18752   ValNodePtr      modifier_info_list;
18753   ValNodePtr      info_vnp, type_vnp;
18754   ModifierInfoPtr mip;
18755 
18756   if (StringHasNoText (defline) || unrecognized_list == NULL)
18757   {
18758     return;
18759   }
18760 
18761   modifier_info_list = ParseAllBracketedModifiers (defline);
18762   for (info_vnp = modifier_info_list; info_vnp != NULL; info_vnp = info_vnp->next)
18763   {
18764     mip = (ModifierInfoPtr)info_vnp->data.ptrvalue;
18765     if (mip == NULL || !IsUnrecognizedModifierName (mip, is_nuc))
18766     {
18767       mip = ModifierInfoFree (mip);
18768       continue;
18769     }
18770     for (type_vnp = *unrecognized_list;
18771          type_vnp != NULL && StringICmp (mip->name, type_vnp->data.ptrvalue) != 0;
18772          type_vnp = type_vnp->next)
18773     {
18774     }
18775     if (type_vnp == NULL)
18776     {
18777       ValNodeAddPointer (unrecognized_list, 0, StringSave (mip->name));
18778     }
18779   }
18780 }
18781 
18782 /* This function searches all of the titles in the supplied list of sequence IDs and titles
18783  * for unrecognized modifier names and adds them to the unrecognized_list if they are
18784  * not already in the list.
18785  */
18786 static void
AddUnrecognizedModifiers(IDAndTitleEditPtr iatep,ValNodePtr PNTR unrecognized_list,Boolean is_nuc)18787 AddUnrecognizedModifiers
18788 (IDAndTitleEditPtr iatep,
18789  ValNodePtr PNTR   unrecognized_list,
18790  Boolean           is_nuc)
18791 {
18792   Int4            seq_num;
18793 
18794   if (iatep == NULL || unrecognized_list == NULL)
18795   {
18796     return;
18797   }
18798 
18799   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
18800   {
18801     AddUnrecognizedModifiersForOneDefinitionLine (iatep->title_list [seq_num],
18802                                                   unrecognized_list,
18803                                                   is_nuc);
18804   }
18805 }
18806 
18807 /* This function searches all of the titles in the new and existing sets of sequences
18808  * for unrecognized modifier names and generates a list of unique unrecognized modifier names.
18809  */
18810 static ValNodePtr
ListUnrecognizedModifiers(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc)18811 ListUnrecognizedModifiers
18812 (IDAndTitleEditPtr iatep_new,
18813  IDAndTitleEditPtr iatep_current,
18814  Boolean           is_nuc)
18815 {
18816   ValNodePtr      unrecognized_list = NULL;
18817 
18818   AddUnrecognizedModifiers (iatep_new, &unrecognized_list, is_nuc);
18819   AddUnrecognizedModifiers (iatep_current, &unrecognized_list, is_nuc);
18820 
18821   return unrecognized_list;
18822 }
18823 
18824 /* This section of code will look for inappropriate values in definition line pairs */
18825 typedef struct badvalue
18826 {
18827   CharPtr seq_id;
18828   CharPtr mod_name;
18829   CharPtr value;
18830 } BadValueData, PNTR BadValuePtr;
18831 
BadValueFree(BadValuePtr bvp)18832 static BadValuePtr BadValueFree (BadValuePtr bvp)
18833 {
18834   if (bvp != NULL)
18835   {
18836     bvp->seq_id = MemFree (bvp->seq_id);
18837     bvp->mod_name = MemFree (bvp->mod_name);
18838     bvp->value = MemFree (bvp->value);
18839     bvp = MemFree (bvp);
18840   }
18841   return bvp;
18842 }
18843 
BadValueListFree(ValNodePtr list)18844 static ValNodePtr BadValueListFree (ValNodePtr list)
18845 {
18846   if (list != NULL)
18847   {
18848     list->next = BadValueListFree (list->next);
18849     list->data.ptrvalue = BadValueFree (list->data.ptrvalue);
18850     list = ValNodeFree (list);
18851   }
18852   return list;
18853 }
18854 
BadValueNew(CharPtr seq_id,CharPtr mod_name,CharPtr value)18855 static BadValuePtr BadValueNew (CharPtr seq_id, CharPtr mod_name, CharPtr value)
18856 {
18857   BadValuePtr bvp;
18858 
18859   bvp = (BadValuePtr) MemNew (sizeof (BadValueData));
18860   if (bvp != NULL)
18861   {
18862     bvp->seq_id = StringSave (seq_id);
18863     bvp->mod_name = StringSave (mod_name);
18864     if (StringHasNoText (value))
18865     {
18866       bvp->value = NULL;
18867     }
18868     else
18869     {
18870       bvp->value = StringSave (value);
18871     }
18872   }
18873   return bvp;
18874 }
18875 
18876 /* The FixModName structure and the functions SetFixModNameAccept
18877  * and FixOneModifierName are used to present a dialog that allows
18878  * a user to replace a modifier name, either for one sequence or
18879  * for all sequences.
18880  * SetFixModNameAccept is used to prevent the user from clicking on
18881  * Accept before choosing a new modifier name.
18882  */
18883 typedef struct fixmodname
18884 {
18885   DialoG  name_list;
18886   ButtoN  accept_btn;
18887 } FixModNameData, PNTR FixModNamePtr;
18888 
SetFixModNameAccept(Pointer userdata)18889 static void SetFixModNameAccept (Pointer userdata)
18890 {
18891   FixModNamePtr fmp;
18892   ValNodePtr    vnp;
18893   Boolean      ok_to_accept = TRUE;
18894 
18895   fmp = (FixModNamePtr) userdata;
18896   if (fmp == NULL)
18897   {
18898     return;
18899   }
18900 
18901   vnp = DialogToPointer (fmp->name_list);
18902   if (vnp == NULL)
18903   {
18904     ok_to_accept = FALSE;
18905   }
18906   vnp = ValNodeFreeData (vnp);
18907 
18908   if (ok_to_accept)
18909   {
18910     Enable (fmp->accept_btn);
18911   }
18912   else
18913   {
18914     Disable (fmp->accept_btn);
18915   }
18916 
18917 }
18918 
18919 static CharPtr
18920 ReplaceOneModifierValue
18921 (CharPtr title,
18922  CharPtr orig_name,
18923  CharPtr orig_value,
18924  CharPtr repl_value,
18925  Boolean is_nontext,
18926  Boolean copy_to_note);
18927 static void
18928 UpdateIdAndTitleEditDialog
18929 (DialoG            d,
18930  IDAndTitleEditPtr iatep_new,
18931  IDAndTitleEditPtr iatep_current,
18932  Boolean           seqid_edit_phase,
18933  Boolean           show_all,
18934  Boolean           is_nuc);
18935 static void ShowErrorInstructions (Pointer userdata);
18936 static void ScrollTagListToSeqId (DialoG d, CharPtr seq_id);
18937 
18938 
18939 static Boolean
FixOneModifierName(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,CharPtr seq_id,CharPtr orig_mod_name,Boolean is_nuc)18940 FixOneModifierName
18941 (IDAndTitleEditPtr iatep_new,
18942  IDAndTitleEditPtr iatep_current,
18943  CharPtr           seq_id,
18944  CharPtr           orig_mod_name,
18945  Boolean           is_nuc)
18946 {
18947   ValNodePtr vnp;
18948   WindoW     w;
18949   ValNodePtr mod_choices;
18950   GrouP      h, action_type, c;
18951   PrompT       p;
18952   FixModNamePtr fmp;
18953   ModalAcceptCancelData acd;
18954   Boolean               rval = FALSE;
18955   ButtoN                b;
18956   CharPtr               prompt_txt;
18957   CharPtr               prompt_fmt = "Please choose a valid modifier name to replace %s:";
18958   CharPtr               radio_txt;
18959   CharPtr               radio_fmt = "For sequence '%s' only";
18960   CharPtr               repl_name;
18961   Int4                  action_type_val, seq_num;
18962 
18963 
18964   if ((iatep_new == NULL && iatep_current == NULL)
18965       || StringHasNoText (orig_mod_name))
18966   {
18967     return FALSE;
18968   }
18969 
18970   fmp = (FixModNamePtr) MemNew (sizeof (FixModNameData));
18971   if (fmp == NULL)
18972   {
18973     return FALSE;
18974   }
18975 
18976   w = MovableModalWindow (-20, -13, -10, -10, "Replace Modifier Name", NULL);
18977   h = HiddenGroup(w, -1, 0, NULL);
18978   SetGroupSpacing (h, 10, 10);
18979   SetObjectExtra (w, fmp, StdCleanupExtraProc);
18980 
18981   prompt_txt = (CharPtr) MemNew ((StringLen (prompt_fmt) + StringLen (orig_mod_name)) * sizeof (Char));
18982   if (prompt_txt != NULL)
18983   {
18984     sprintf (prompt_txt, prompt_fmt, orig_mod_name);
18985   }
18986   p = StaticPrompt (h, prompt_txt,
18987                     0, 0, programFont, 'l');
18988   prompt_txt = MemFree (prompt_txt);
18989 
18990   mod_choices = GetFastaModifierList (is_nuc, !is_nuc);
18991   fmp->name_list = ValNodeSelectionDialog (h, mod_choices, 6,
18992                                           SourceQualValNodeName,
18993                                           ValNodeSimpleDataFree,
18994                                           SourceQualValNodeDataCopy,
18995                                           SourceQualValNodeMatch,
18996                                           "modifier",
18997                                           SetFixModNameAccept, fmp, FALSE);
18998 
18999   if (StringHasNoText (seq_id)
19000       || (iatep_new == NULL && iatep_current->num_sequences == 1)
19001       || (iatep_current == NULL && iatep_new->num_sequences == 1))
19002   {
19003     action_type = NULL;
19004   }
19005   else
19006   {
19007     action_type = HiddenGroup (h, 0, 2, NULL);
19008     SetGroupSpacing (action_type, 10, 10);
19009     RadioButton (action_type, "For all sequences");
19010     radio_txt = (CharPtr) MemNew ((StringLen (radio_fmt) + StringLen (seq_id)) * sizeof (Char));
19011     if (radio_txt != NULL)
19012     {
19013       sprintf (radio_txt, radio_fmt, seq_id);
19014     }
19015     RadioButton (action_type, radio_txt);
19016     radio_txt = MemFree (radio_txt);
19017     SetValue (action_type, 1);
19018   }
19019 
19020   c = HiddenGroup (h, 2, 0, NULL);
19021   fmp->accept_btn = PushButton (c, "Accept", ModalAcceptButton);
19022   SetObjectExtra (fmp->accept_btn, &acd, NULL);
19023   Disable (fmp->accept_btn);
19024   b = PushButton (c, "Cancel", ModalCancelButton);
19025   SetObjectExtra (b, &acd, NULL);
19026 
19027   AlignObjects (ALIGN_CENTER, (HANDLE) p,
19028                               (HANDLE) fmp->name_list,
19029                               (HANDLE) c,
19030                               (HANDLE) action_type,
19031                               NULL);
19032 
19033   Show (w);
19034   Select (w);
19035 
19036   acd.cancelled = FALSE;
19037   acd.accepted = FALSE;
19038   while (!acd.accepted && ! acd.cancelled)
19039   {
19040     ProcessExternalEvent ();
19041     Update ();
19042   }
19043   ProcessAnEvent ();
19044   if (acd.cancelled)
19045   {
19046     rval = FALSE;
19047   }
19048   else
19049   {
19050     vnp = DialogToPointer (fmp->name_list);
19051     repl_name = SourceQualValNodeName (vnp);
19052     if (action_type == NULL)
19053     {
19054       action_type_val = 1;
19055     }
19056     else
19057     {
19058       action_type_val = GetValue (action_type);
19059     }
19060 
19061     for (seq_num = 0; iatep_new != NULL && seq_num < iatep_new->num_sequences; seq_num++)
19062     {
19063       if (action_type_val == 1 /* replace value for all sequences */
19064           || StringCmp (iatep_new->id_list [seq_num], seq_id) == 0)
19065       {
19066         iatep_new->title_list [seq_num] = ReplaceOneModifierName (iatep_new->title_list [seq_num],
19067                                                                   orig_mod_name,
19068                                                                   repl_name);
19069       }
19070     }
19071     for (seq_num = 0; iatep_current != NULL && seq_num < iatep_current->num_sequences; seq_num++)
19072     {
19073       if (action_type_val == 1 /* replace value for all sequences */
19074           || StringCmp (iatep_current->id_list [seq_num], seq_id) == 0)
19075       {
19076         iatep_current->title_list [seq_num] = ReplaceOneModifierName (iatep_current->title_list [seq_num],
19077                                                                       orig_mod_name,
19078                                                                       repl_name);
19079       }
19080     }
19081     repl_name = MemFree (repl_name);
19082     vnp = ValNodeFreeData (vnp);
19083 
19084     rval = TRUE;
19085   }
19086 
19087   Remove (w);
19088 
19089   return rval;
19090 }
19091 
19092 /* This function presents a dialog that allows a user to replace a value for a
19093  * modifier, either in a single sequence or for every sequence.
19094  * The user also has the option to copy the original value into a note.
19095  */
19096 static Boolean
FixOneModifierValue(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,CharPtr seq_id,CharPtr orig_mod_name,CharPtr orig_mod_value,Int4 mod_type)19097 FixOneModifierValue
19098 (IDAndTitleEditPtr iatep_new,
19099  IDAndTitleEditPtr iatep_current,
19100  CharPtr           seq_id,
19101  CharPtr           orig_mod_name,
19102  CharPtr           orig_mod_value,
19103  Int4              mod_type)
19104 {
19105   WindoW     w;
19106   GrouP      h, action_type, c, instr_grp;
19107   PrompT       p;
19108   ModalAcceptCancelData acd;
19109   Boolean               rval = FALSE;
19110   ButtoN                b, accept_btn;
19111   CharPtr               prompt_txt;
19112   CharPtr               prompt_fmt = "Please choose a valid value for %s to replace %s where %s=%s:";
19113   CharPtr               radio_txt;
19114   CharPtr               radio_fmt = "For sequence '%s' only";
19115   Int4                  action_type_val, seq_num;
19116   ButtoN                copy_to_note_btn;
19117   DialoG                new_value_dlg;
19118   Boolean               is_nontext = FALSE;
19119   CharPtr               new_value;
19120   Boolean               copy_to_note;
19121 
19122   if ((iatep_new == NULL && iatep_current == NULL)
19123       || StringHasNoText (orig_mod_name)
19124       || StringHasNoText (seq_id))
19125   {
19126     return FALSE;
19127   }
19128 
19129   w = MovableModalWindow (-20, -13, -10, -10, "Replace Modifier Value", NULL);
19130   h = HiddenGroup(w, -1, 0, NULL);
19131   SetGroupSpacing (h, 10, 10);
19132 
19133   prompt_txt = (CharPtr) MemNew ((StringLen (prompt_fmt)
19134                                   + 2 * StringLen (orig_mod_name)
19135                                   + 2 * StringLen (orig_mod_value)) * sizeof (Char));
19136   if (prompt_txt != NULL)
19137   {
19138     sprintf (prompt_txt, prompt_fmt, orig_mod_name,
19139                                      orig_mod_value == NULL ? "" : orig_mod_value,
19140                                      orig_mod_name,
19141                                      orig_mod_value == NULL ? "" : orig_mod_value);
19142   }
19143   p = StaticPrompt (h, prompt_txt,
19144                     0, 0, programFont, 'l');
19145   prompt_txt = MemFree (prompt_txt);
19146 
19147   if (mod_type == eModifierType_SourceQual)
19148   {
19149     is_nontext = IsNonTextModifier (orig_mod_name);
19150   }
19151 
19152   instr_grp = MakeInstructionGroup (h, is_nontext, mod_type);
19153 
19154   new_value_dlg = SingleModValDialog (h, is_nontext, mod_type, 0);
19155 
19156   action_type = HiddenGroup (h, 0, 2, NULL);
19157   SetGroupSpacing (action_type, 10, 10);
19158   RadioButton (action_type, "For all sequences");
19159   radio_txt = (CharPtr) MemNew ((StringLen (radio_fmt) + StringLen (seq_id)) * sizeof (Char));
19160   if (radio_txt != NULL)
19161   {
19162     sprintf (radio_txt, radio_fmt, seq_id);
19163   }
19164   RadioButton (action_type, radio_txt);
19165   radio_txt = MemFree (radio_txt);
19166   SetValue (action_type, 1);
19167 
19168   copy_to_note_btn = CheckBox (h, "Copy original value to note", NULL);
19169   SetStatus (copy_to_note_btn, TRUE);
19170 
19171   c = HiddenGroup (h, 2, 0, NULL);
19172   accept_btn = PushButton (c, "Accept", ModalAcceptButton);
19173   SetObjectExtra (accept_btn, &acd, NULL);
19174   b = PushButton (c, "Cancel", ModalCancelButton);
19175   SetObjectExtra (b, &acd, NULL);
19176 
19177   AlignObjects (ALIGN_CENTER, (HANDLE) p,
19178                               (HANDLE) new_value_dlg,
19179                               (HANDLE) action_type,
19180                               (HANDLE) copy_to_note_btn,
19181                               (HANDLE) c,
19182                               (HANDLE) instr_grp,
19183                               NULL);
19184 
19185   Show (w);
19186   Select (w);
19187 
19188   acd.cancelled = FALSE;
19189   acd.accepted = FALSE;
19190   while (!acd.accepted && ! acd.cancelled)
19191   {
19192     ProcessExternalEvent ();
19193     Update ();
19194   }
19195   ProcessAnEvent ();
19196   if (acd.cancelled)
19197   {
19198     rval = FALSE;
19199   }
19200   else
19201   {
19202     new_value = DialogToPointer (new_value_dlg);
19203     action_type_val = GetValue (action_type);
19204     copy_to_note = GetStatus (copy_to_note_btn);
19205 
19206     for (seq_num = 0; iatep_new != NULL && seq_num < iatep_new->num_sequences; seq_num++)
19207     {
19208       if (action_type_val == 1 /* replace value for all sequences */
19209           || StringCmp (iatep_new->id_list [seq_num], seq_id) == 0)
19210       {
19211         iatep_new->title_list [seq_num] = ReplaceOneModifierValue (iatep_new->title_list [seq_num],
19212                                                                    orig_mod_name,
19213                                                                    orig_mod_value,
19214                                                                    new_value,
19215                                                                    is_nontext,
19216                                                                    copy_to_note);
19217       }
19218     }
19219     for (seq_num = 0; iatep_current != NULL && seq_num < iatep_current->num_sequences; seq_num++)
19220     {
19221       if (action_type_val == 1 /* replace value for all sequences */
19222           || StringCmp (iatep_current->id_list [seq_num], seq_id) == 0)
19223       {
19224         iatep_current->title_list [seq_num] = ReplaceOneModifierValue (iatep_current->title_list [seq_num],
19225                                                                        orig_mod_name,
19226                                                                        orig_mod_value,
19227                                                                        new_value,
19228                                                                        is_nontext,
19229                                                                        copy_to_note);
19230       }
19231     }
19232     new_value = MemFree (new_value);
19233 
19234     rval = TRUE;
19235   }
19236 
19237   Remove (w);
19238 
19239   return rval;
19240 }
19241 
FindBadLocationInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19242 static void FindBadLocationInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19243 {
19244   CharPtr value;
19245 
19246   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL
19247       || FindValuePairInDefLine ("location", title, NULL) == NULL)
19248   {
19249     return;
19250   }
19251 
19252   value = FindValueFromPairInDefline ("location", title);
19253   if (StringHasNoText (value))
19254   {
19255     ValNodeAddPointer (badlist, eModifierType_Location, BadValueNew (seq_id, "location", NULL));
19256   }
19257   else if (!IsValueInEnumAssoc (value, biosource_genome_simple_alist))
19258   {
19259     ValNodeAddPointer (badlist, eModifierType_Location, BadValueNew (seq_id, "location", value));
19260   }
19261   value = MemFree (value);
19262 }
19263 
FindBadOriginInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19264 static void FindBadOriginInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19265 {
19266   CharPtr value;
19267 
19268   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL
19269       || FindValuePairInDefLine ("origin", title, NULL) == NULL)
19270   {
19271     return;
19272   }
19273 
19274   value = FindValueFromPairInDefline ("origin", title);
19275   if (StringHasNoText (value))
19276   {
19277     ValNodeAddPointer (badlist, eModifierType_Origin, BadValueNew (seq_id, "origin", NULL));
19278   }
19279   else if (!IsValueInEnumAssoc (value, biosource_origin_alist))
19280   {
19281     ValNodeAddPointer (badlist, eModifierType_Origin, BadValueNew (seq_id, "origin", value));
19282   }
19283   value = MemFree (value);
19284 }
19285 
FindBadTopologyInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19286 static void FindBadTopologyInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19287 {
19288   CharPtr value;
19289 
19290   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL
19291       || FindValuePairInDefLine ("topology", title, NULL) == NULL)
19292   {
19293     return;
19294   }
19295 
19296   value = FindValueFromPairInDefline ("topology", title);
19297   if (StringHasNoText (value))
19298   {
19299     ValNodeAddPointer (badlist, eModifierType_Topology, BadValueNew (seq_id, "topology", NULL));
19300   }
19301   else if (!IsValueInEnumAssoc (value, topology_nuc_alist))
19302   {
19303     ValNodeAddPointer (badlist, eModifierType_Topology, BadValueNew (seq_id, "topology", value));
19304   }
19305   value = MemFree (value);
19306 }
19307 
FindBadMolTypeInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19308 static void FindBadMolTypeInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19309 {
19310   CharPtr value;
19311   Int4    moltype;
19312 
19313   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL
19314       || FindValuePairInDefLine ("moltype", title, NULL) == NULL)
19315   {
19316     return;
19317   }
19318 
19319   value = FindValueFromPairInDefline ("moltype", title);
19320   moltype = MolTypeFromString (value);
19321   if (moltype == 0)
19322   {
19323     ValNodeAddPointer (badlist, eModifierType_MolType, BadValueNew (seq_id, "moltype", value));
19324   }
19325   value = MemFree (value);
19326 }
19327 
FindBadMoleculeInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19328 static void FindBadMoleculeInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19329 {
19330   CharPtr value;
19331 
19332   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL
19333       || FindValuePairInDefLine ("molecule", title, NULL) == NULL)
19334   {
19335     return;
19336   }
19337 
19338   value = FindValueFromPairInDefline ("molecule", title);
19339   if (StringICmp (value, "dna") != 0 && StringICmp (value, "rna") != 0)
19340   {
19341     ValNodeAddPointer (badlist, eModifierType_Molecule, BadValueNew (seq_id, "molecule", value));
19342   }
19343   value = MemFree (value);
19344 }
19345 
19346 static void
FindBadGeneticCodeInTitle(CharPtr seq_id,CharPtr title,CharPtr mod_name,ValNodePtr PNTR badlist)19347 FindBadGeneticCodeInTitle
19348 (CharPtr seq_id,
19349  CharPtr title,
19350  CharPtr mod_name,
19351  ValNodePtr PNTR badlist)
19352 {
19353   CharPtr value;
19354   Int4    gcode;
19355 
19356   if (StringHasNoText (seq_id) || StringHasNoText (title)
19357       || StringHasNoText (mod_name)
19358       || badlist == NULL
19359       || FindValuePairInDefLine (mod_name, title, NULL) == NULL)
19360   {
19361     return;
19362   }
19363 
19364   value = FindValueFromPairInDefline (mod_name, title);
19365   gcode = GeneticCodeFromString (value);
19366   if (gcode == 0)
19367   {
19368     ValNodeAddPointer (badlist, GetModifierType (mod_name), BadValueNew (seq_id, mod_name, value));
19369   }
19370   value = MemFree (value);
19371 }
19372 
FindBadNonTextValueInTitle(CharPtr seq_id,CharPtr title,CharPtr mod_name,ValNodePtr PNTR badlist)19373 static void FindBadNonTextValueInTitle (CharPtr seq_id, CharPtr title, CharPtr mod_name, ValNodePtr PNTR badlist)
19374 {
19375   CharPtr value;
19376 
19377   if (StringHasNoText (seq_id) || StringHasNoText (title)
19378       || StringHasNoText (mod_name)
19379       || badlist == NULL
19380       || FindValuePairInDefLine (mod_name, title, NULL) == NULL)
19381   {
19382     return;
19383   }
19384 
19385   value = FindValueFromPairInDefline (mod_name, title);
19386   if (!StringHasNoText (value) && StringICmp (value, "TRUE") != 0 && StringICmp (value, "FALSE") != 0)
19387   {
19388     ValNodeAddPointer (badlist, eModifierType_SourceQual, BadValueNew (seq_id, mod_name, value));
19389   }
19390   value = MemFree (value);
19391 }
19392 
FindBadValuesInTitle(CharPtr seq_id,CharPtr title,ValNodePtr PNTR badlist)19393 static void FindBadValuesInTitle (CharPtr seq_id, CharPtr title, ValNodePtr PNTR badlist)
19394 {
19395   if (StringHasNoText (seq_id) || StringHasNoText (title) || badlist == NULL)
19396   {
19397     return;
19398   }
19399 
19400   FindBadLocationInTitle (seq_id, title, badlist);
19401   FindBadOriginInTitle (seq_id, title, badlist);
19402   FindBadGeneticCodeInTitle (seq_id, title, "gcode", badlist);
19403   FindBadGeneticCodeInTitle (seq_id, title, "mgcode", badlist);
19404   FindBadGeneticCodeInTitle (seq_id, title, "genetic_code", badlist);
19405   FindBadMolTypeInTitle (seq_id, title, badlist);
19406   FindBadMoleculeInTitle (seq_id, title, badlist);
19407   FindBadTopologyInTitle (seq_id, title, badlist);
19408 
19409   /* check nontext modifiers */
19410   FindBadNonTextValueInTitle (seq_id, title, "transgenic", badlist);
19411   FindBadNonTextValueInTitle (seq_id, title, "germline", badlist);
19412   FindBadNonTextValueInTitle (seq_id, title, "metagenomic", badlist);
19413   FindBadNonTextValueInTitle (seq_id, title, "environmental-sample", badlist);
19414   FindBadNonTextValueInTitle (seq_id, title, "rearranged", badlist);
19415 }
19416 
FindBadValuesInIDsAndTitles(IDAndTitleEditPtr iatep,ValNodePtr PNTR bad_list)19417 static void FindBadValuesInIDsAndTitles (IDAndTitleEditPtr iatep, ValNodePtr PNTR bad_list)
19418 {
19419   Int4 seq_num;
19420 
19421   if (iatep == NULL)
19422   {
19423     return;
19424   }
19425 
19426   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
19427   {
19428     FindBadValuesInTitle (iatep->id_list [seq_num], iatep->title_list [seq_num], bad_list);
19429   }
19430 }
19431 
19432 static CharPtr
GetIDAndTitleErrorMessage(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Int4 seq_num,Boolean has_dups,Boolean has_missing,Boolean seqid_edit_phase,Boolean has_bracket,Boolean has_unrec_mods,Boolean is_nuc)19433 GetIDAndTitleErrorMessage
19434 (IDAndTitleEditPtr iatep_new,
19435  IDAndTitleEditPtr iatep_current,
19436  Int4              seq_num,
19437  Boolean           has_dups,
19438  Boolean           has_missing,
19439  Boolean           seqid_edit_phase,
19440  Boolean           has_bracket,
19441  Boolean           has_unrec_mods,
19442  Boolean           is_nuc)
19443 {
19444   ValNodePtr unrec_mod_list = NULL, bad_value_list;
19445   Boolean    is_dup, has_id_bracket = FALSE, has_space = FALSE;
19446   Int4       msg_num = 0;
19447   CharPtr    err_msg = "";
19448   BadValuePtr bvp;
19449 
19450   /* get appropriate error message */
19451   unrec_mod_list = NULL;
19452   bad_value_list = NULL;
19453 
19454   /* determine whether this is a duplicate */
19455   if (is_nuc)
19456   {
19457     is_dup = IsDuplicateEditID (iatep_new, seq_num, iatep_current);
19458   }
19459   else
19460   {
19461     is_dup = FALSE;
19462   }
19463 
19464   /* look for bracket in ID */
19465   if (StringChr (iatep_new->id_list [seq_num], '['))
19466   {
19467     has_id_bracket = TRUE;
19468   }
19469 
19470   /* look for space in ID */
19471   has_space = StringHasSpace (iatep_new->id_list [seq_num]);
19472 
19473   if (has_dups || has_missing || has_id_bracket || has_space)
19474   {
19475     if (StringHasNoText (iatep_new->id_list [seq_num]))
19476     {
19477       err_msg = "Missing ID";
19478     }
19479     else if (is_dup)
19480     {
19481       err_msg = "Duplicate ID";
19482     }
19483     else if (has_id_bracket)
19484     {
19485       err_msg = "Bracket in ID";
19486     }
19487     else if (has_space)
19488     {
19489       err_msg = "Space in ID";
19490     }
19491   }
19492   else if (seqid_edit_phase)
19493   {
19494     err_msg = "";
19495   }
19496   else if (has_bracket)
19497   {
19498     msg_num = DetectBadBracketing (iatep_new->title_list [seq_num]);
19499     /* we have bracketing problems */
19500     switch (msg_num)
19501     {
19502       case BRACKET_ERR_MISMATCHED_BRACKETS:
19503         err_msg = "Mismatched []";
19504         break;
19505       case BRACKET_ERR_MISSING_EQUALS:
19506         err_msg = "Missing '='";
19507         break;
19508       case BRACKET_ERR_MULT_EQUALS:
19509         err_msg = "Too many '='";
19510         break;
19511       case BRACKET_ERR_NO_MOD_NAME:
19512         err_msg = "Missing name";
19513         break;
19514       case BRACKET_ERR_MISMATCHED_QUOTES:
19515         err_msg = "Mismatched \" or '";
19516         break;
19517     }
19518   }
19519   else if (has_unrec_mods)
19520   {
19521     AddUnrecognizedModifiersForOneDefinitionLine (iatep_new->title_list [seq_num],
19522                                                   &unrec_mod_list,
19523                                                   is_nuc);
19524     if (unrec_mod_list != NULL)
19525     {
19526       err_msg = unrec_mod_list->data.ptrvalue;
19527     }
19528   }
19529   else
19530   {
19531     FindBadValuesInTitle (iatep_new->id_list [seq_num], iatep_new->title_list [seq_num], &bad_value_list);
19532     if (bad_value_list != NULL && (bvp = bad_value_list->data.ptrvalue) != NULL)
19533     {
19534       err_msg = bvp->mod_name;
19535     }
19536   }
19537 
19538   err_msg = StringSave (err_msg);
19539   ValNodeFreeData (unrec_mod_list);
19540   BadValueListFree (bad_value_list);
19541   return err_msg;
19542 }
19543 
GetTagListErrValueForSeqNum(TagListPtr tlp,Int4 seq_num)19544 static CharPtr GetTagListErrValueForSeqNum (TagListPtr tlp, Int4 seq_num)
19545 {
19546   Char       seq_str [15];
19547   ValNodePtr vnp;
19548   Int4       row_num;
19549   CharPtr    pos_str;
19550 
19551   if (tlp == NULL)
19552   {
19553     return NULL;
19554   }
19555 
19556   sprintf (seq_str, "%d", seq_num + 1);
19557   for (vnp = tlp->vnp, row_num = 0; vnp != NULL; vnp = vnp->next, row_num++)
19558   {
19559     pos_str = GetTagListValueEx (tlp, row_num, 1);
19560     if (StringCmp (pos_str, seq_str) == 0)
19561     {
19562       pos_str = MemFree (pos_str);
19563       return GetTagListValueEx (tlp, row_num, 0);
19564     }
19565     pos_str = MemFree (pos_str);
19566   }
19567   return NULL;
19568 }
19569 
19570 /* This function displays information from a list of sequence IDs and titles
19571  * in a TagList dialog.
19572  * The TagList dialog has four columns: Error, Position, Sequence ID, and Title.
19573  */
19574 static void
UpdateIdAndTitleEditDialog(DialoG d,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean seqid_edit_phase,Boolean show_all,Boolean is_nuc)19575 UpdateIdAndTitleEditDialog
19576 (DialoG            d,
19577  IDAndTitleEditPtr iatep_new,
19578  IDAndTitleEditPtr iatep_current,
19579  Boolean           seqid_edit_phase,
19580  Boolean           show_all,
19581  Boolean           is_nuc)
19582 {
19583   TagListPtr tlp;
19584   Int4       seq_num, len;
19585   CharPtr    str;
19586   CharPtr    str_format = "%s\t%d\t%s\t%s\n";
19587   Int4       num_shown;
19588   CharPtr    err_msg, old_err_msg;
19589   ValNodePtr taglist_data = NULL;
19590   Int4       row_to_show, row_to_hide;
19591   Boolean    has_dups, has_missing, has_bracket, has_space, has_unrec_mods = FALSE;
19592   ValNodePtr unrec_mods = NULL;
19593 
19594   tlp = (TagListPtr) GetObjectExtra (d);
19595 
19596   if (tlp == NULL || iatep_new == NULL || iatep_new->num_sequences == 0)
19597   {
19598     return;
19599   }
19600 
19601   has_dups = EditHasDuplicateIDs (iatep_new, iatep_current);
19602   has_missing = HasMissingIDs (iatep_new) || HasMissingIDs (iatep_current);
19603   has_bracket = EditNeedsBracketingFixes (iatep_new)
19604                 || EditNeedsBracketingFixes (iatep_current);
19605   has_space = EditHasSpaceInIDs (iatep_new) || EditHasSpaceInIDs (iatep_current);
19606 
19607   if (!has_bracket)
19608   {
19609     unrec_mods = ListUnrecognizedModifiers (iatep_new, iatep_current, is_nuc);
19610   }
19611   if (unrec_mods != NULL)
19612   {
19613     has_unrec_mods = TRUE;
19614     unrec_mods = ValNodeFreeData (unrec_mods);
19615   }
19616 
19617   num_shown = 0;
19618   for (seq_num = 0; seq_num < iatep_new->num_sequences; seq_num++)
19619   {
19620     err_msg = GetIDAndTitleErrorMessage (iatep_new, iatep_current,
19621                                          seq_num, has_dups,
19622                                          has_missing, seqid_edit_phase,
19623                                          has_bracket,
19624                                          has_unrec_mods, is_nuc);
19625     if (seqid_edit_phase && StringHasNoText (err_msg))
19626     {
19627       old_err_msg = GetTagListErrValueForSeqNum (tlp, seq_num);
19628       if (StringCmp (old_err_msg, "Duplicate ID") == 0
19629           || StringCmp (old_err_msg, "Missing ID") == 0
19630           || StringCmp (old_err_msg, "Space in ID") == 0
19631           || StringCmp (old_err_msg, "Fixed") == 0)
19632       {
19633         err_msg = MemFree (err_msg);
19634         err_msg = StringSave ("Fixed");
19635       }
19636       old_err_msg = MemFree (old_err_msg);
19637     }
19638 
19639     if (StringHasNoText (err_msg) && !show_all)
19640     {
19641       err_msg = MemFree (err_msg);
19642       continue;
19643     }
19644 
19645     len = StringLen (str_format) + StringLen (err_msg) + 20
19646                      + StringLen (iatep_new->id_list [seq_num])
19647                      + StringLen (iatep_new->title_list [seq_num]);
19648     str = MemNew (len * sizeof (Char));
19649     if (str != NULL)
19650     {
19651       sprintf (str, str_format,
19652                err_msg,
19653                seq_num + 1,
19654                StringHasNoText (iatep_new->id_list [seq_num]) ? "" : iatep_new->id_list [seq_num],
19655                StringHasNoText (iatep_new->title_list [seq_num]) ? "" : iatep_new->title_list [seq_num]);
19656       ValNodeAddPointer (&taglist_data, 0, StringSave (str));
19657     }
19658     err_msg = MemFree (err_msg);
19659     num_shown ++;
19660   }
19661 
19662   SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
19663   tlp->vnp = taglist_data;
19664   SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
19665   tlp->max = MAX ((Int2) 0, (Int2) (num_shown - tlp->rows));
19666   CorrectBarMax (tlp->bar, tlp->max);
19667   CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
19668   CorrectBarMax (tlp->left_bar, tlp->max);
19669   CorrectBarPage (tlp->left_bar, tlp->rows - 1, tlp->rows - 1);
19670   for (row_to_show = 0; row_to_show < MIN (num_shown, tlp->rows); row_to_show ++)
19671   {
19672     SafeShow (tlp->control [row_to_show * MAX_TAGLIST_COLS + 2]);
19673     SafeShow (tlp->control [row_to_show * MAX_TAGLIST_COLS + 3]);
19674   }
19675   if (tlp->max > 0) {
19676     SafeShow (tlp->bar);
19677     SafeShow (tlp->left_bar);
19678   } else {
19679     SafeHide (tlp->bar);
19680     SafeHide (tlp->left_bar);
19681     for (row_to_hide = num_shown; row_to_hide < tlp->rows; row_to_hide ++)
19682     {
19683       SafeHide (tlp->control [row_to_hide * MAX_TAGLIST_COLS + 2]);
19684       SafeHide (tlp->control [row_to_hide * MAX_TAGLIST_COLS + 3]);
19685     }
19686   }
19687 }
19688 
19689 /* This function copies the contents of a TagList dialog into a list
19690  * of Sequence IDs and titles.  The first two columns of the TagList
19691  * dialog, Error and Position, are ignored.
19692  */
UpdateIdAndTitleData(DialoG d,IDAndTitleEditPtr iatep)19693 static void UpdateIdAndTitleData (DialoG d, IDAndTitleEditPtr iatep)
19694 {
19695   CharPtr      str;
19696   Int4         num_rows, row_num, seq_pos;
19697   TagListPtr   tlp;
19698 
19699   tlp = (TagListPtr) GetObjectExtra (d);
19700   if (tlp == NULL || iatep == NULL)
19701   {
19702     return;
19703   }
19704 
19705   num_rows = ValNodeLen (tlp->vnp);
19706   for (row_num = 0; row_num < num_rows; row_num++)
19707   {
19708     /* get position for this sequence */
19709     str = GetTagListValueEx (tlp, row_num, 1);
19710     seq_pos = atoi (str);
19711     str = MemFree (str);
19712     if (seq_pos < 1 || seq_pos > iatep->num_sequences)
19713     {
19714       continue;
19715     }
19716     seq_pos --;
19717 
19718     /* collect ID */
19719     iatep->id_list [seq_pos] = MemFree (iatep->id_list [seq_pos]);
19720     iatep->id_list [seq_pos] = GetTagListValueEx (tlp, row_num, 2);
19721     TrimSpacesAroundString (iatep->id_list [seq_pos]);
19722 
19723     /* collect title */
19724     iatep->title_list [seq_pos] = MemFree (iatep->title_list [seq_pos]);
19725     iatep->title_list [seq_pos] = GetTagListValueEx (tlp, row_num, 3);
19726   }
19727 }
19728 
SetIDAndTitleEditDialogErrorColumn(DialoG d,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean seqid_edit_phase,Boolean is_nuc)19729 static void SetIDAndTitleEditDialogErrorColumn
19730 (DialoG            d,
19731  IDAndTitleEditPtr iatep_new,
19732  IDAndTitleEditPtr iatep_current,
19733  Boolean           seqid_edit_phase,
19734  Boolean           is_nuc)
19735 {
19736   TagListPtr tlp;
19737   ValNodePtr vnp;
19738   Int4       seq_num, seq_pos;
19739   Int4       row_num;
19740   CharPtr    err_msg, old_err_msg;
19741   Boolean    has_dups, has_missing, has_bracket, has_unrec_mods = FALSE, has_space;
19742   CharPtr    str;
19743   ValNodePtr unrec_mods = NULL;
19744 
19745   tlp = (TagListPtr) GetObjectExtra (d);
19746 
19747   if (tlp == NULL || iatep_new == NULL || iatep_new->num_sequences == 0)
19748   {
19749     return;
19750   }
19751 
19752   has_dups = EditHasDuplicateIDs (iatep_new, iatep_current);
19753   has_missing = HasMissingIDs (iatep_new) || HasMissingIDs (iatep_current);
19754   has_bracket = EditNeedsBracketingFixes (iatep_new)
19755                 || EditNeedsBracketingFixes (iatep_current);
19756   has_space = EditHasSpaceInIDs (iatep_new) || EditHasSpaceInIDs (iatep_current);
19757 
19758   if (!has_bracket)
19759   {
19760     unrec_mods = ListUnrecognizedModifiers (iatep_new, iatep_current, is_nuc);
19761   }
19762   if (unrec_mods != NULL)
19763   {
19764     has_unrec_mods = TRUE;
19765     unrec_mods = ValNodeFreeData (unrec_mods);
19766   }
19767 
19768   for (row_num = 0, vnp = tlp->vnp; vnp != NULL; row_num++, vnp = vnp->next)
19769   {
19770     /* get position for this sequence */
19771     str = GetTagListValueEx (tlp, row_num, 1);
19772     if (str == NULL)
19773     {
19774       continue;
19775     }
19776     seq_pos = atoi (str);
19777     str = MemFree (str);
19778     if (seq_pos < 1 || seq_pos > iatep_new->num_sequences)
19779     {
19780       continue;
19781     }
19782     seq_num = seq_pos - 1;
19783 
19784     /* get appropriate error message */
19785     err_msg = GetIDAndTitleErrorMessage (iatep_new, iatep_current,
19786                                          seq_num, has_dups,
19787                                          has_missing, seqid_edit_phase,
19788                                          has_bracket,
19789                                          has_unrec_mods,
19790                                          is_nuc);
19791 
19792     if (seqid_edit_phase && StringHasNoText (err_msg))
19793     {
19794       old_err_msg = GetTagListValueEx (tlp, row_num, 0);
19795       if (StringCmp (old_err_msg, "Duplicate ID") == 0
19796           || StringCmp (old_err_msg, "Missing ID") == 0
19797           || StringCmp (old_err_msg, "Space in ID") == 0
19798           || StringCmp (old_err_msg, "Fixed") == 0)
19799       {
19800         err_msg = MemFree (err_msg);
19801         err_msg = StringSave ("Fixed");
19802       }
19803       old_err_msg = MemFree (old_err_msg);
19804     }
19805 
19806     SetTagListValue (tlp, row_num, 0, err_msg);
19807 
19808     err_msg = MemFree (err_msg);
19809   }
19810   SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
19811 }
19812 
ClearIDAndTitleEditDialogErrorColumn(DialoG d)19813 static void ClearIDAndTitleEditDialogErrorColumn (DialoG d)
19814 {
19815   TagListPtr tlp;
19816   ValNodePtr vnp;
19817   Int4       row_num;
19818 
19819   tlp = (TagListPtr) GetObjectExtra (d);
19820 
19821   if (tlp == NULL)
19822   {
19823     return;
19824   }
19825 
19826   for (row_num = 0, vnp = tlp->vnp; vnp != NULL; row_num++, vnp = vnp->next)
19827   {
19828     SetTagListValue (tlp, row_num, 0, "");
19829   }
19830 }
19831 
19832 
19833 static void
UpdateIDAndTitleEditDialogErrorColumns(DialoG d_new,DialoG d_current,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean seqid_edit_phase,Boolean is_nuc)19834 UpdateIDAndTitleEditDialogErrorColumns
19835 (DialoG            d_new,
19836  DialoG            d_current,
19837  IDAndTitleEditPtr iatep_new,
19838  IDAndTitleEditPtr iatep_current,
19839  Boolean           seqid_edit_phase,
19840  Boolean           is_nuc)
19841 {
19842   if (d_new == NULL)
19843   {
19844     iatep_new = NULL;
19845   }
19846   else
19847   {
19848     iatep_new = IDAndTitleEditCopy (iatep_new);
19849     UpdateIdAndTitleData (d_new, iatep_new);
19850   }
19851 
19852   if (d_current == NULL)
19853   {
19854     iatep_current = NULL;
19855   }
19856   else
19857   {
19858     iatep_current = IDAndTitleEditCopy (iatep_current);
19859     UpdateIdAndTitleData (d_current, iatep_current);
19860   }
19861 
19862   SetIDAndTitleEditDialogErrorColumn (d_new, iatep_new, iatep_current,
19863                                       seqid_edit_phase, is_nuc);
19864   SetIDAndTitleEditDialogErrorColumn (d_current, iatep_current, iatep_new,
19865                                       seqid_edit_phase, is_nuc);
19866   iatep_new = IDAndTitleEditFree (iatep_new);
19867   iatep_current = IDAndTitleEditFree (iatep_current);
19868 }
19869 
19870 
19871 typedef struct unrecmods
19872 {
19873   DialoG PNTR unrec_dlg;
19874   ValNodePtr unrecognized_list;
19875   Int4        num_unrecognized;
19876   ButtoN      accept_btn;
19877 } UnrecModsData, PNTR UnrecModsPtr;
19878 
CleanupUnrecMods(GraphiC g,VoidPtr data)19879 static void CleanupUnrecMods (GraphiC g, VoidPtr data)
19880 
19881 {
19882   UnrecModsPtr ump;
19883 
19884   ump = (UnrecModsPtr) data;
19885   if (ump != NULL)
19886   {
19887     ump->unrecognized_list = ValNodeFreeData (ump->unrecognized_list);
19888   }
19889   MemFree (ump);
19890 }
19891 
SetUnrecAccept(Pointer userdata)19892 static void SetUnrecAccept (Pointer userdata)
19893 {
19894   UnrecModsPtr ump;
19895   ValNodePtr   vnp;
19896   Int4       repl_num;
19897   Boolean      ok_to_accept = TRUE;
19898 
19899   ump = (UnrecModsPtr) userdata;
19900   if (ump == NULL)
19901   {
19902     return;
19903   }
19904 
19905   for (repl_num = 0; repl_num < ump->num_unrecognized && ok_to_accept && repl_num < 3; repl_num++)
19906   {
19907     vnp = DialogToPointer (ump->unrec_dlg [repl_num]);
19908     if (vnp == NULL)
19909     {
19910       ok_to_accept = FALSE;
19911     }
19912     vnp = ValNodeFreeData (vnp);
19913   }
19914   if (ok_to_accept)
19915   {
19916     Enable (ump->accept_btn);
19917   }
19918   else
19919   {
19920     Disable (ump->accept_btn);
19921   }
19922 
19923 }
19924 
ReplaceThreeUnrecognizedModifiers(IDAndTitleEditPtr iatep,UnrecModsPtr ump)19925 static void ReplaceThreeUnrecognizedModifiers (IDAndTitleEditPtr iatep, UnrecModsPtr ump)
19926 {
19927   ValNodePtr vnp, repl_vnp;
19928   Int4       repl_num, seq_num;
19929   CharPtr    repl_name;
19930 
19931   if (iatep == NULL || ump == NULL)
19932   {
19933     return;
19934   }
19935 
19936   for (repl_num = 0, vnp = ump->unrecognized_list;
19937        repl_num < ump->num_unrecognized && vnp != NULL && repl_num < 3;
19938        repl_num++, vnp = vnp->next)
19939   {
19940     if (StringHasNoText (vnp->data.ptrvalue))
19941     {
19942       continue;
19943     }
19944     repl_vnp = DialogToPointer (ump->unrec_dlg [repl_num]);
19945     if (repl_vnp == NULL)
19946     {
19947       continue;
19948     }
19949     repl_name = SourceQualValNodeName (repl_vnp);
19950     for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
19951     {
19952       iatep->title_list [seq_num] = ReplaceOneModifierName (iatep->title_list [seq_num],
19953                                                             vnp->data.ptrvalue,
19954                                                             repl_name);
19955     }
19956     repl_name = MemFree (repl_name);
19957     repl_vnp = ValNodeFreeData (repl_vnp);
19958   }
19959 }
19960 
19961 static Boolean
FixThreeUnrecognizedModifiers(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,ValNodePtr unrecognized_list)19962 FixThreeUnrecognizedModifiers
19963 (IDAndTitleEditPtr iatep_new,
19964  IDAndTitleEditPtr iatep_current,
19965  ValNodePtr        unrecognized_list)
19966 {
19967   ValNodePtr vnp;
19968   Int4       repl_num;
19969   WindoW     w;
19970   ValNodePtr mod_choices;
19971   GrouP      h, g, k, c;
19972   PrompT       p;
19973   UnrecModsPtr ump;
19974   ModalAcceptCancelData acd;
19975   Boolean               rval = FALSE;
19976   ButtoN                b;
19977 
19978   if (unrecognized_list == NULL || (iatep_new == NULL && iatep_current == NULL))
19979   {
19980     return FALSE;
19981   }
19982 
19983   ump = (UnrecModsPtr) MemNew (sizeof (UnrecModsData));
19984   if (ump == NULL)
19985   {
19986     return FALSE;
19987   }
19988 
19989   ump->unrecognized_list = unrecognized_list;
19990   ump->num_unrecognized = ValNodeLen(ump->unrecognized_list);
19991 
19992   ump->unrec_dlg = (DialoG PNTR) MemNew (sizeof (DialoG) * ump->num_unrecognized);
19993 
19994   w = MovableModalWindow (-20, -13, -10, -10, "Choose Valid Modifiers", NULL);
19995   h = HiddenGroup(w, -1, 0, NULL);
19996   SetGroupSpacing (h, 10, 10);
19997   SetObjectExtra (w, ump, CleanupUnrecMods);
19998 
19999   p = StaticPrompt (h, "Please choose a valid modifier name to replace these invalid names.",
20000                     0, 0, programFont, 'l');
20001 
20002   g = HiddenGroup (h, 3, 0, NULL);
20003   SetGroupSpacing (g, 10, 10);
20004   for (repl_num = 0, vnp = ump->unrecognized_list;
20005        repl_num < ump->num_unrecognized && repl_num < 3 && vnp != NULL;
20006        repl_num++, vnp = vnp->next)
20007   {
20008     k = HiddenGroup (g, 2, 0, NULL);
20009     SetGroupSpacing (k, 2, 2);
20010     mod_choices = GetFastaModifierList (TRUE, TRUE);
20011     StaticPrompt (k, vnp->data.ptrvalue, 0, 0, programFont, 'l');
20012     ump->unrec_dlg [repl_num] = ValNodeSelectionDialog (k, mod_choices, 6,
20013                                           SourceQualValNodeName,
20014                                           ValNodeSimpleDataFree,
20015                                           SourceQualValNodeDataCopy,
20016                                           SourceQualValNodeMatch,
20017                                           "modifier",
20018                                           SetUnrecAccept, ump, FALSE);
20019 
20020   }
20021 
20022   c = HiddenGroup (h, 2, 0, NULL);
20023   ump->accept_btn = PushButton (c, "Accept", ModalAcceptButton);
20024   SetObjectExtra (ump->accept_btn, &acd, NULL);
20025   Disable (ump->accept_btn);
20026   b = PushButton (c, "Cancel", ModalCancelButton);
20027   SetObjectExtra (b, &acd, NULL);
20028 
20029   AlignObjects (ALIGN_CENTER, (HANDLE) p,
20030                               (HANDLE) g,
20031                               (HANDLE) c,
20032                               NULL);
20033 
20034   Show (w);
20035   Select (w);
20036 
20037   acd.cancelled = FALSE;
20038   acd.accepted = FALSE;
20039   while (!acd.accepted && ! acd.cancelled)
20040   {
20041     ProcessExternalEvent ();
20042     Update ();
20043   }
20044   ProcessAnEvent ();
20045   if (acd.cancelled)
20046   {
20047     rval = FALSE;
20048   }
20049   else
20050   {
20051     ReplaceThreeUnrecognizedModifiers (iatep_new, ump);
20052     ReplaceThreeUnrecognizedModifiers (iatep_current, ump);
20053 
20054     rval = TRUE;
20055   }
20056 
20057   Remove (w);
20058 
20059   return rval;
20060 }
20061 
20062 static Boolean
FixAllUnrecognizedModifiers(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,SeqIdEditPtr siep)20063 FixAllUnrecognizedModifiers
20064 (IDAndTitleEditPtr iatep_new,
20065  IDAndTitleEditPtr iatep_current,
20066  SeqIdEditPtr siep)
20067 {
20068   ValNodePtr unrecognized_list;
20069   Boolean    rval = TRUE;
20070   Boolean    show_all;
20071 
20072   if (siep == NULL)
20073   {
20074     return FALSE;
20075   }
20076   if (GetValue (siep->show_all_grp) == 2)
20077   {
20078     show_all = TRUE;
20079   }
20080   else
20081   {
20082     show_all = FALSE;
20083   }
20084 
20085   unrecognized_list = ListUnrecognizedModifiers (iatep_new, iatep_current, siep->is_nuc);
20086   while (unrecognized_list != NULL && rval)
20087   {
20088     rval = FixThreeUnrecognizedModifiers (iatep_new, iatep_current, unrecognized_list);
20089     unrecognized_list = ListUnrecognizedModifiers (iatep_new, iatep_current, siep->is_nuc);
20090     UpdateIdAndTitleEditDialog (siep->new_dlg, siep->iatep_new, siep->iatep_current,
20091                                 siep->seqid_edit_phase, show_all, siep->is_nuc);
20092     UpdateIdAndTitleEditDialog (siep->current_dlg, siep->iatep_current, siep->iatep_new,
20093                                 siep->seqid_edit_phase, show_all, siep->is_nuc);
20094     ShowErrorInstructions (siep);
20095   }
20096   return rval;
20097 }
20098 
20099 
20100 static ParData     extendedIDParFmt = {FALSE, FALSE, FALSE, FALSE, FALSE, 0, 0};
20101 static ColData     extendedIDColFmt[] =
20102   {
20103     {0, 0, 40, 0, NULL, 'l', TRUE, FALSE, FALSE, FALSE, FALSE},
20104     {0, 0, 40, 0, NULL, 'l', TRUE, FALSE, FALSE, FALSE, FALSE},
20105     {0, 0, 40, 0, NULL, 'l', TRUE, FALSE, FALSE, FALSE, FALSE},
20106     {0, 0, 40, 0, NULL, 'l', TRUE, FALSE, FALSE, FALSE, TRUE}
20107   };
20108 
AnyBracketsInIDs(IDAndTitleEditPtr iatep)20109 static Boolean AnyBracketsInIDs (IDAndTitleEditPtr iatep)
20110 {
20111   Int4 seq_num;
20112   Boolean rval = FALSE;
20113 
20114   if (iatep == NULL)
20115   {
20116     return FALSE;
20117   }
20118   for (seq_num = 0; seq_num < iatep->num_sequences && !rval; seq_num++)
20119   {
20120     if (StringChr (iatep->id_list [seq_num], '['))
20121     {
20122       rval = TRUE;
20123     }
20124   }
20125   return rval;
20126 }
20127 
AnyIDCorrectionsToList(IDAndTitleEditPtr iatep,IDAndTitleEditPtr iatep_current,BoolPtr space_corr,BoolPtr bracket_corr)20128 static Boolean AnyIDCorrectionsToList
20129 (IDAndTitleEditPtr iatep,
20130  IDAndTitleEditPtr iatep_current,
20131  BoolPtr           space_corr,
20132  BoolPtr           bracket_corr)
20133 {
20134   IDAndTitleEditPtr suggested;
20135   Boolean           any_to_show = FALSE;
20136   Int4              seq_num;
20137 
20138   if (iatep == NULL || space_corr == NULL || bracket_corr == NULL)
20139   {
20140     return FALSE;
20141   }
20142 
20143   suggested = SuggestCorrectionForLocalIDs (iatep, iatep_current);
20144   if (suggested == NULL || iatep->num_sequences != suggested->num_sequences)
20145   {
20146     suggested = IDAndTitleEditFree (suggested);
20147     return FALSE;
20148   }
20149 
20150   for (seq_num = 0;
20151        seq_num < iatep->num_sequences && (!any_to_show || !*space_corr || !*bracket_corr);
20152        seq_num++)
20153   {
20154     if (! StringHasNoText (suggested->id_list [seq_num])
20155         && ! StringHasNoText (iatep->title_list [seq_num])
20156         && StringCmp (iatep->id_list [seq_num], suggested->id_list [seq_num]) != 0)
20157     {
20158       any_to_show = TRUE;
20159       if (StringChr (iatep->id_list [seq_num], '[') != NULL)
20160       {
20161         *bracket_corr = TRUE;
20162       }
20163       else
20164       {
20165         *space_corr = TRUE;
20166       }
20167     }
20168   }
20169   suggested = IDAndTitleEditFree (suggested);
20170 
20171   return any_to_show;
20172 }
20173 
ListIDCorrections(IDAndTitleEditPtr iatep,IDAndTitleEditPtr iatep_current,CharPtr str,DoC doc)20174 static Boolean ListIDCorrections
20175 (IDAndTitleEditPtr iatep,
20176  IDAndTitleEditPtr iatep_current,
20177  CharPtr           str,
20178  DoC               doc)
20179 {
20180   IDAndTitleEditPtr suggested;
20181   CharPtr           doc_txt;
20182   CharPtr           doc_txt_fmt = "%s%d\t%s\t%s\t%s\n";
20183   Int4              len;
20184   Boolean           any_to_show = FALSE;
20185   Int4              seq_num;
20186 
20187 
20188   if (iatep == NULL || doc == NULL)
20189   {
20190     return FALSE;
20191   }
20192 
20193   suggested = SuggestCorrectionForLocalIDs (iatep, iatep_current);
20194   if (suggested == NULL || iatep->num_sequences != suggested->num_sequences)
20195   {
20196     suggested = IDAndTitleEditFree (suggested);
20197     return FALSE;
20198   }
20199 
20200   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
20201   {
20202     if (StringHasNoText (suggested->id_list [seq_num])
20203         || StringHasNoText (iatep->title_list [seq_num])
20204         || StringCmp (iatep->id_list [seq_num], suggested->id_list [seq_num]) == 0)
20205     {
20206       continue;
20207     }
20208 
20209     len = StringLen (doc_txt_fmt)
20210                      + StringLen (suggested->id_list [seq_num])
20211                      + StringLen (iatep->id_list[seq_num])
20212                      + StringLen (str)
20213                      + 15
20214                      + StringLen (iatep->title_list [seq_num]);
20215     doc_txt = (CharPtr) MemNew (len * sizeof (Char));
20216     if (doc_txt != NULL)
20217     {
20218       sprintf (doc_txt, doc_txt_fmt,
20219                str == NULL ? "" : str,
20220                seq_num + 1,
20221                suggested->id_list [seq_num],
20222                iatep->id_list [seq_num] == NULL ? "" : iatep->id_list [seq_num],
20223                iatep->title_list [seq_num]);
20224       AppendText (doc, doc_txt, &extendedIDParFmt, extendedIDColFmt, programFont);
20225       doc_txt = MemFree (doc_txt);
20226       any_to_show = TRUE;
20227     }
20228   }
20229 
20230   suggested = IDAndTitleEditFree (suggested);
20231 
20232   return any_to_show;
20233 }
20234 
20235 static Boolean
ShowExtendedIDCorrections(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,DoC doc)20236 ShowExtendedIDCorrections
20237 (IDAndTitleEditPtr iatep_new,
20238  IDAndTitleEditPtr iatep_current,
20239  DoC               doc)
20240 {
20241   Boolean     any_to_show = FALSE, space_corr = FALSE, bracket_corr = FALSE;
20242   RecT        r;
20243 
20244   if (doc == NULL || iatep_new == NULL)
20245   {
20246     return FALSE;
20247   }
20248 
20249   if (! AnyIDCorrectionsToList (iatep_new, iatep_current, &space_corr, &bracket_corr)
20250       && ! AnyIDCorrectionsToList (iatep_current, iatep_new, &space_corr, &bracket_corr))
20251   {
20252     return FALSE;
20253   }
20254 
20255   ObjectRect (doc, &r);
20256   InsetRect (&r, 4, 4);
20257   extendedIDColFmt[0].pixWidth = (r.right - r.left) / 10;
20258   extendedIDColFmt[1].pixWidth = (r.right - r.left) / 4;
20259   extendedIDColFmt[2].pixWidth = (r.right - r.left) / 4;
20260   extendedIDColFmt[3].pixWidth = (r.right - r.left)
20261                                   - extendedIDColFmt[0].pixWidth
20262                                   - extendedIDColFmt[1].pixWidth
20263                                   - extendedIDColFmt[2].pixWidth;
20264 
20265   if (space_corr)
20266   {
20267     AppendText (doc, "Your sequence IDs are not unique.  Did you try to put spaces in your sequence IDs?  This is not allowed.\n", NULL, NULL, programFont);
20268   }
20269   if (AnyBracketsInIDs (iatep_new) || AnyBracketsInIDs (iatep_current))
20270   {
20271     AppendText (doc, "Did you forget to put spaces between your sequence IDs and your titles?  This is not allowed.\n", NULL, NULL, programFont);
20272   }
20273 
20274   AppendText (doc, "\nPosition\tSuggested ID\tOriginal ID\tOriginal Title\n", &extendedIDParFmt, extendedIDColFmt, programFont);
20275 
20276   if (iatep_current == NULL)
20277   {
20278     any_to_show = ListIDCorrections (iatep_new, iatep_current, "", doc);
20279   }
20280   else
20281   {
20282     any_to_show = ListIDCorrections (iatep_new, iatep_current, "new:", doc);
20283     any_to_show |= ListIDCorrections (iatep_current, iatep_new, "existing:", doc);
20284   }
20285 
20286   return any_to_show;
20287 }
20288 
SequenceIDsHaveNonFixableBrackets(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,DoC doc)20289 static Boolean SequenceIDsHaveNonFixableBrackets
20290 (IDAndTitleEditPtr iatep_new,
20291  IDAndTitleEditPtr iatep_current,
20292  DoC               doc)
20293 {
20294   if (doc == NULL)
20295   {
20296     return FALSE;
20297   }
20298 
20299   if (! AnyBracketsInIDs (iatep_new) && ! AnyBracketsInIDs (iatep_current))
20300   {
20301     return FALSE;
20302   }
20303 
20304   Reset (doc);
20305   AppendText (doc, "Your sequence IDs contain brackets ('[' and/or ']').  This is not allowed.\n", NULL, NULL, programFont);
20306 
20307   return TRUE;
20308 }
20309 
20310 /* This section of code is used to display bracketing errors in an AutonomousPanel.
20311  * The panel has a frozen title row and a frozen column with sequence IDs in it.
20312  * The panel scrolling affects only the sequence titles.
20313  * There are two rows for each sequence.  The top row displays the original title;
20314  * the bottom row displays the suggested bracketing corrections.  The differences
20315  * between the original and suggested titles will be colored in red.
20316  * Both rows in a pair will have the same background color which alternates between gray
20317  * and white for each pair.
20318  * Clicking on a sequence will scroll to the next difference for that pair.
20319  */
20320 typedef struct diffdlg
20321 {
20322   DIALOG_MESSAGE_BLOCK
20323   PaneL             pnl;
20324   IDAndTitleEditPtr new_original;
20325   IDAndTitleEditPtr new_suggested;
20326   IDAndTitleEditPtr existing_original;
20327   IDAndTitleEditPtr existing_suggested;
20328   FonT              display_font;
20329   Int4              char_width;
20330   Int4              descent;
20331   Int4              num_header_rows;
20332   Int4              max_title_length;
20333   Int4              max_id_length;
20334   Int4              table_inset;
20335 
20336 } DiffDlgData, PNTR DiffDlgPtr;
20337 
DrawDiffDlgExplanation(Int4 x,Int4 y,Int4 descent,Int4 win_width)20338 static void DrawDiffDlgExplanation (Int4 x, Int4 y, Int4 descent, Int4 win_width)
20339 {
20340   RecT rct;
20341 
20342   /* draw explanation rows */
20343   LoadRect (&rct, x, y + descent,
20344             x + win_width,
20345             y - stdLineHeight + descent);
20346   EraseRect (&rct);
20347 
20348   PaintStringEx ("Some of your titles have bracketing errors.", x, y);
20349   y += stdLineHeight;
20350   LoadRect (&rct, x, y + descent,
20351             x + win_width,
20352             y - stdLineHeight + descent);
20353   EraseRect (&rct);
20354 
20355   PaintStringEx ("Double-click on 'original' to scroll to the next error for that pair.", x, y);
20356 }
20357 
DrawDiffDlgTitle(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 max_id_length,Int4 win_width)20358 static void DrawDiffDlgTitle (Int4 x, Int4 y, Int4 char_width, Int4 descent, Int4 max_id_length, Int4 win_width)
20359 {
20360   RecT rct;
20361 
20362   /* draw title row */
20363   DkGray ();
20364   InvertColors ();
20365   White ();
20366   LoadRect (&rct, x, y + descent,
20367             x + win_width,
20368             y - stdLineHeight + descent);
20369   EraseRect (&rct);
20370 
20371   PaintStringEx ("Sequence ID", x, y);
20372   x += (max_id_length + 1) * char_width;
20373   x += 10 * char_width;
20374   PaintStringEx ("Title", x, y);
20375   InvertColors ();
20376   Black ();
20377 }
20378 
20379 static void
PaintColorizedString(Int4 x,Int4 y,Int4 char_width,CharPtr paintstring,Int4 string_offset,Boolean shade,ValNodePtr diff_list,Int4 diff_choice)20380 PaintColorizedString
20381 (Int4       x,
20382  Int4       y,
20383  Int4       char_width,
20384  CharPtr    paintstring,
20385  Int4       string_offset,
20386  Boolean    shade,
20387  ValNodePtr diff_list,
20388  Int4       diff_choice)
20389 {
20390   CharPtr    cp;
20391   Char       buf [2];
20392   ValNodePtr diff_vnp;
20393 
20394   if (shade)
20395   {
20396     White ();
20397   }
20398   else
20399   {
20400     Black ();
20401   }
20402   if (paintstring == NULL || (Int4) StringLen (paintstring) <= string_offset)
20403   {
20404     PaintStringEx (" ", x, y);
20405   }
20406   else
20407   {
20408     cp = paintstring + string_offset;
20409     diff_vnp = diff_list;
20410     buf [1] = 0;
20411 
20412     while (*cp != 0)
20413     {
20414       while (diff_vnp != NULL && (diff_vnp->choice != diff_choice || diff_vnp->data.intvalue < string_offset))
20415       {
20416         diff_vnp = diff_vnp->next;
20417       }
20418       if (diff_vnp != NULL && diff_vnp->choice == diff_choice && string_offset == diff_vnp->data.intvalue)
20419       {
20420         Red ();
20421       }
20422 
20423       buf [0] = *cp;
20424       PaintStringEx (buf, x, y);
20425       x += char_width;
20426       string_offset++;
20427       cp++;
20428 
20429       if (shade)
20430       {
20431         White ();
20432       }
20433       else
20434       {
20435         Black ();
20436       }
20437     }
20438   }
20439 }
20440 
20441 static void
DrawDiffDlgRow(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 max_id_length,Int4 win_width,CharPtr id_str,CharPtr title_str,Int4 offset,ValNodePtr diff_list,Int4 choice_num,Boolean shade)20442 DrawDiffDlgRow
20443 (Int4 x,
20444  Int4 y,
20445  Int4 char_width,
20446  Int4 descent,
20447  Int4 max_id_length,
20448  Int4 win_width,
20449  CharPtr id_str,
20450  CharPtr title_str,
20451  Int4       offset,
20452  ValNodePtr diff_list,
20453  Int4       choice_num,
20454  Boolean    shade)
20455 {
20456   RecT rct;
20457   PoinT      pt1, pt2;
20458 
20459   if (shade)
20460   {
20461     Gray ();
20462     InvertColors ();
20463     White ();
20464   }
20465   LoadRect (&rct, x, y + descent,
20466             x + win_width,
20467             y - stdLineHeight + descent);
20468   EraseRect (&rct);
20469 
20470   if (id_str != NULL)
20471   {
20472     PaintStringEx (id_str, x, y);
20473   }
20474   x += (max_id_length + 1) * char_width;
20475   if (choice_num == 1)
20476   {
20477     PaintStringEx (" original", x, y);
20478   }
20479   else
20480   {
20481     PaintStringEx ("suggested", x, y);
20482   }
20483   x += 9 * char_width;
20484   pt1.x = x + 2;
20485   pt2.x = x + 2;
20486   pt1.y = y + descent;
20487   pt2.y = y - stdLineHeight + descent;
20488   Black ();
20489   DrawLine (pt1, pt2);
20490 
20491   x += char_width;
20492 
20493   PaintColorizedString (x, y, char_width, title_str, offset, shade, diff_list,
20494                         choice_num);
20495 
20496   if (shade)
20497   {
20498     InvertColors ();
20499     Black ();
20500   }
20501 }
20502 
20503 /* This function produces a ValNode list of integers.
20504  * The choice value (1 or 2) indicates whether the difference is in string 1
20505  * or string 2; the integer value indicates the offset in that string of the
20506  * difference.
20507  * Space characters are ignored when computing differences.
20508  * The output from this function is used for displaying the differences between
20509  * the original definition line and a suggested bracketing correction.
20510  */
GetTextDifferences(CharPtr str1,CharPtr str2)20511 static ValNodePtr GetTextDifferences (CharPtr str1, CharPtr str2)
20512 {
20513   ValNodePtr diff_list = NULL;
20514   CharPtr    cp1, cp2, diff_end1, diff_end2;
20515   Int4       offset1, offset2, j;
20516 
20517   if (str1 == NULL && str2 == NULL)
20518   {
20519     return NULL;
20520   }
20521   cp1 = str1;
20522   cp2 = str2;
20523 
20524   offset1 = 0;
20525   offset2 = 0;
20526 
20527   while (cp1 != NULL || cp2 != NULL)
20528   {
20529     /* skip over spaces in cp1 */
20530     while (cp1 != NULL && *cp1 != 0 && isspace (*cp1))
20531     {
20532       cp1 ++;
20533       offset1 ++;
20534     }
20535     if (cp1 != NULL && *cp1 == 0)
20536     {
20537       cp1 = NULL;
20538     }
20539     /* skip over spaces in cp2 */
20540     while (cp2 != NULL && *cp2 != 0 && isspace (*cp2))
20541     {
20542       cp2 ++;
20543       offset2 ++;
20544     }
20545     if (cp2 != NULL && *cp2 == 0)
20546     {
20547       cp2 = NULL;
20548     }
20549 
20550     if (cp1 == NULL && cp2 == NULL)
20551     {
20552       /* both NULL, do nothing */
20553     }
20554     else if (cp1 == NULL)
20555     {
20556       ValNodeAddInt (&diff_list, 2, offset2);
20557     }
20558     else if (cp2 == NULL)
20559     {
20560       ValNodeAddInt (&diff_list, 1, offset1);
20561     }
20562     else
20563     {
20564       if (*cp1 != *cp2)
20565       {
20566         if ((diff_end1 = StringSearch (cp1, cp2)) != NULL)
20567         {
20568           while (diff_end1 != cp1)
20569           {
20570             if (!isspace (*cp1))
20571             {
20572               ValNodeAddInt (&diff_list, 1, offset1);
20573             }
20574             offset1++;
20575             cp1++;
20576           }
20577         }
20578         else if ((diff_end2 = StringSearch (cp2, cp1))!= NULL)
20579         {
20580           while (diff_end2 != cp2)
20581           {
20582             if (!isspace (*cp2))
20583             {
20584               ValNodeAddInt (&diff_list, 2, offset2);
20585             }
20586             offset2++;
20587             cp2++;
20588           }
20589         }
20590         else if (*(cp1 + 1) == *cp2)
20591         {
20592           if (!isspace (*cp1))
20593           {
20594             ValNodeAddInt (&diff_list, 1, offset1);
20595           }
20596           offset1++;
20597           cp1++;
20598         }
20599         else if (*(cp2 + 1) == *cp1)
20600         {
20601           if (!isspace (*cp2))
20602           {
20603             ValNodeAddInt (&diff_list, 2, offset2);
20604           }
20605           offset2++;
20606           cp2++;
20607         }
20608         else if ((Int4)StringLen (cp1) > len_fake_modifier_name
20609                  && StringNCmp (cp1, fake_modifier_name, len_fake_modifier_name) == 0
20610                  && *(cp1 + len_fake_modifier_name) == *cp2)
20611         {
20612           /* show all of fake modifier name in red */
20613           for (j = 0; j < len_fake_modifier_name; j++)
20614           {
20615             ValNodeAddInt (&diff_list, 1, offset1);
20616             cp1++;
20617             offset1++;
20618           }
20619         }
20620         else if ((Int4) StringLen (cp2) > len_fake_modifier_name
20621                  && StringNCmp (cp2, fake_modifier_name, len_fake_modifier_name) == 0
20622                  && *(cp2 + len_fake_modifier_name) == *cp1)
20623         {
20624           /* show all of fake modifier name in red */
20625           for (j = 0; j < len_fake_modifier_name; j++)
20626           {
20627             ValNodeAddInt (&diff_list, 2, offset2);
20628             cp2++;
20629             offset2++;
20630           }
20631         }
20632         else
20633         {
20634           diff_end1 = StringChr (cp1, *cp2);
20635           diff_end2 = StringChr (cp2, *cp1);
20636           if (diff_end1 == NULL || diff_end2 == NULL)
20637           {
20638             ValNodeAddInt (&diff_list, 1, offset1);
20639             ValNodeAddInt (&diff_list, 2, offset2);
20640           }
20641           else if (diff_end1 - cp1 < diff_end2 - cp2)
20642           {
20643             while (diff_end1 != cp1)
20644             {
20645               if (!isspace (*cp1))
20646               {
20647                 ValNodeAddInt (&diff_list, 1, offset1);
20648               }
20649               offset1++;
20650               cp1++;
20651             }
20652           }
20653           else
20654           {
20655             while (diff_end2 != cp2)
20656             {
20657               if (!isspace (*cp2))
20658               {
20659                 ValNodeAddInt (&diff_list, 2, offset2);
20660               }
20661               offset2++;
20662               cp2++;
20663             }
20664           }
20665         }
20666       }
20667     }
20668 
20669     if (cp1 != NULL)
20670     {
20671       cp1++;
20672       offset1++;
20673     }
20674 
20675     if (cp2 != NULL)
20676     {
20677       cp2++;
20678       offset2++;
20679     }
20680   }
20681   return diff_list;
20682 }
20683 
20684 static Int4
CountTitleCorrectionRows(IDAndTitleEditPtr original,IDAndTitleEditPtr suggested)20685 CountTitleCorrectionRows
20686 (IDAndTitleEditPtr original, IDAndTitleEditPtr suggested)
20687 {
20688   Int4 num_rows = 0, seq_num;
20689 
20690   if (original == NULL || suggested == NULL
20691       || original->num_sequences != suggested->num_sequences)
20692   {
20693     return 0;
20694   }
20695 
20696   for (seq_num = 0; seq_num < original->num_sequences; seq_num++)
20697   {
20698     if (StringCmp (original->title_list [seq_num], suggested->title_list [seq_num]) != 0)
20699     {
20700       num_rows += 2;
20701     }
20702   }
20703   return num_rows;
20704 }
20705 
20706 static Int4
DrawDiffPair(Int4 x,Int4 y,Int4 last_y,DiffDlgPtr dlg,IDAndTitleEditPtr original,IDAndTitleEditPtr suggested,Int4 row_length,Int4Ptr start_row,Int4 start_col,BoolPtr shade)20707 DrawDiffPair
20708 (Int4              x,
20709  Int4              y,
20710  Int4              last_y,
20711  DiffDlgPtr        dlg,
20712  IDAndTitleEditPtr original,
20713  IDAndTitleEditPtr suggested,
20714  Int4              row_length,
20715  Int4Ptr           start_row,
20716  Int4              start_col,
20717  BoolPtr           shade)
20718 {
20719   ValNodePtr diff_list;
20720   Int4       row, seq_num, visible_row;
20721 
20722   if (dlg == NULL || start_row == NULL || shade == NULL
20723       || y > last_y
20724       || original == NULL || suggested == NULL
20725       || original->num_sequences != suggested->num_sequences)
20726   {
20727     return y;
20728   }
20729 
20730   SelectFont (dlg->display_font);
20731 
20732   /* draw difference rows */
20733   diff_list = NULL;
20734   visible_row = 0;
20735   for (row = 0;
20736        row < original->num_sequences * 2 && y <= last_y;
20737        row++)
20738   {
20739     seq_num = row / 2;
20740 
20741     if (row % 2 == 0)
20742     {
20743       /* draw original */
20744       if (StringCmp (original->title_list [seq_num], suggested->title_list [seq_num]) != 0)
20745       {
20746         diff_list = ValNodeFree (diff_list);
20747         diff_list = GetTextDifferences (original->title_list [seq_num],
20748                                     suggested->title_list [seq_num]);
20749         if (visible_row == *start_row)
20750         {
20751           DrawDiffDlgRow (x, y, dlg->char_width, dlg->descent, dlg->max_id_length, row_length,
20752                           original->id_list [seq_num],
20753                           original->title_list [seq_num],
20754                           start_col, diff_list, 1, *shade);
20755           y += stdLineHeight;
20756           (*start_row) ++;
20757         }
20758         visible_row++;
20759       }
20760     }
20761     else
20762     {
20763       /* draw suggested */
20764       if (StringCmp (original->title_list [seq_num], suggested->title_list [seq_num]) != 0)
20765       {
20766         if (diff_list == NULL)
20767         {
20768           /* only calculate if it was NULL, otherwise use same as previous diff_list */
20769           diff_list = GetTextDifferences (original->title_list [seq_num],
20770                                           suggested->title_list [seq_num]);
20771         }
20772         if (visible_row == *start_row)
20773         {
20774           DrawDiffDlgRow (x, y, dlg->char_width, dlg->descent, dlg->max_id_length, row_length,
20775                           suggested->id_list [seq_num],
20776                           suggested->title_list [seq_num],
20777                           start_col, diff_list, 2, *shade);
20778           y += stdLineHeight;
20779           (*start_row) ++;
20780         }
20781         visible_row++;
20782         /* toggle the shading */
20783         *shade = !(*shade);
20784       }
20785     }
20786   }
20787   diff_list = ValNodeFree (diff_list);
20788   return y;
20789 }
20790 
OnDrawDiffDlg(PaneL p)20791 static void OnDrawDiffDlg (PaneL p)
20792 {
20793   DiffDlgPtr dlg;
20794   BaR          sb_vert, sb_horiz;
20795   Int4         start_row, start_col;
20796   RecT         r;
20797   Int4         x, y, row_length, last_y;
20798   Int4         num_new_rows, num_existing_rows, num_rows, visible_rows;
20799   Int4         new_vmax, new_hmax, old_vmax, old_hmax;
20800   Boolean      shade = TRUE;
20801 
20802   dlg = (DiffDlgPtr) GetObjectExtra (p);
20803   if (dlg == NULL)
20804   {
20805     return;
20806   }
20807 
20808   num_new_rows = CountTitleCorrectionRows (dlg->new_original, dlg->new_suggested);
20809   num_existing_rows = CountTitleCorrectionRows (dlg->existing_original, dlg->existing_suggested);
20810   num_rows = num_new_rows + num_existing_rows;
20811 
20812   if (!EditNeedsBracketingFixes (dlg->new_original) && ! EditNeedsBracketingFixes (dlg->existing_original))
20813   {
20814     return;
20815   }
20816 
20817   SelectFont (dlg->display_font);
20818 
20819   sb_vert  = GetSlateVScrollBar ((SlatE) p);
20820   Enable (sb_vert);
20821   sb_horiz = GetSlateHScrollBar ((SlatE) p);
20822   Enable (sb_horiz);
20823 
20824   start_row = GetBarValue (sb_vert);
20825   start_col = GetBarValue (sb_horiz);
20826 
20827   ObjectRect (p, &r);
20828   InsetRect (&r, dlg->table_inset, dlg->table_inset);
20829   x = r.left + 1;
20830   y = r.top + stdLineHeight;
20831   SelectFont (programFont);
20832 
20833   row_length = r.right - r.left - 2;
20834 
20835   visible_rows = (r.bottom - r.top - 2 * dlg->table_inset) / stdLineHeight - dlg->num_header_rows;
20836   new_vmax = num_rows - visible_rows;
20837   new_hmax = dlg->max_title_length - 1;
20838   if (new_vmax < 0)
20839   {
20840     new_vmax = 0;
20841   }
20842   if (new_hmax < 0)
20843   {
20844     new_hmax = 0;
20845   }
20846   old_vmax = GetBarMax (sb_vert);
20847   old_hmax = GetBarMax (sb_horiz);
20848 
20849   if (old_vmax != new_vmax)
20850   {
20851     CorrectBarMax (sb_vert, new_vmax);
20852     if (start_row > new_vmax)
20853     {
20854       start_row = new_vmax;
20855     }
20856     CorrectBarValue (sb_vert, start_row);
20857     CorrectBarPage (sb_vert, 1, 1);
20858   }
20859 
20860   if (old_hmax != new_hmax)
20861   {
20862     CorrectBarMax (sb_horiz, new_hmax);
20863     if (start_col > new_hmax)
20864     {
20865       start_col = new_hmax;
20866     }
20867     CorrectBarValue (sb_horiz, start_col);
20868     CorrectBarPage (sb_horiz, 1, 1);
20869   }
20870 
20871   last_y = r.bottom - 2 * dlg->table_inset;
20872 
20873   /* draw explanatory text */
20874   DrawDiffDlgExplanation (x, y, dlg->descent, row_length);
20875   y+= 2 * stdLineHeight;
20876 
20877   /* draw title row */
20878   DrawDiffDlgTitle (x, y, dlg->char_width, dlg->descent, dlg->max_id_length, row_length);
20879   y+= stdLineHeight;
20880 
20881   y = DrawDiffPair (x, y, last_y, dlg, dlg->new_original, dlg->new_suggested,
20882                     row_length, &start_row, start_col, &shade);
20883 
20884   start_row -= num_new_rows;
20885 
20886   y = DrawDiffPair (x, y, last_y, dlg, dlg->existing_original, dlg->existing_suggested,
20887                     row_length, &start_row, start_col, &shade);
20888 
20889 }
20890 
OnVScrollDiffDlg(BaR sb,SlatE s,Int4 newval,Int4 oldval)20891 static void OnVScrollDiffDlg (BaR sb, SlatE s, Int4 newval, Int4 oldval)
20892 {
20893   RecT r;
20894 
20895   ObjectRect (s, &r);
20896   InvalRect (&r);
20897 }
20898 
OnHScrollDiffDlg(BaR sb,SlatE s,Int4 newval,Int4 oldval)20899 static void OnHScrollDiffDlg (BaR sb, SlatE s, Int4 newval, Int4 oldval)
20900 {
20901   RecT r;
20902 
20903   ObjectRect (s, &r);
20904   InvalRect (&r);
20905 }
20906 
GetDiffDlgCoord(DiffDlgPtr dlg,PoinT pt)20907 static PoinT GetDiffDlgCoord (DiffDlgPtr dlg, PoinT pt)
20908 {
20909   BaR sb_horiz;
20910   BaR sb_vert;
20911   Int4 start_row, start_col;
20912   RecT r;
20913   PoinT cell_coord;
20914   Int4  x, y;
20915 
20916   cell_coord.x = -1;
20917   cell_coord.y = -1;
20918 
20919   if (dlg == NULL)
20920   {
20921     return cell_coord;
20922   }
20923 
20924   sb_vert  = GetSlateVScrollBar ((SlatE) dlg->pnl);
20925   sb_horiz = GetSlateHScrollBar ((SlatE) dlg->pnl);
20926 
20927   start_row = GetBarValue (sb_vert);
20928   start_col = GetBarValue (sb_horiz);
20929 
20930   ObjectRect (dlg->pnl, &r);
20931   InsetRect (&r, dlg->table_inset, dlg->table_inset);
20932   x = pt.x - r.left;
20933   y = pt.y - r.top;
20934 
20935   cell_coord.y = y / stdLineHeight;
20936 
20937   if (cell_coord.y >= dlg->num_header_rows)
20938   {
20939     cell_coord.y += GetBarValue (sb_vert);
20940   }
20941 
20942   cell_coord.x = x / dlg->char_width;
20943   if (cell_coord.x >= dlg->max_id_length + 10)
20944   {
20945     cell_coord.x += GetBarValue (sb_horiz);
20946   }
20947 
20948   return cell_coord;
20949 }
20950 
20951 static void
ScrollForDiffInRow(Int4 row,IDAndTitleEditPtr new_original,IDAndTitleEditPtr new_suggested,IDAndTitleEditPtr existing_original,IDAndTitleEditPtr existing_suggested,BaR sb_horiz)20952 ScrollForDiffInRow
20953 (Int4              row,
20954  IDAndTitleEditPtr new_original,
20955  IDAndTitleEditPtr new_suggested,
20956  IDAndTitleEditPtr existing_original,
20957  IDAndTitleEditPtr existing_suggested,
20958  BaR               sb_horiz)
20959 {
20960   Int4 seq_num, displayed_row;
20961   ValNodePtr diff_list = NULL, vnp;
20962   Boolean    found_row = FALSE;
20963   Int4       scroll_val = 0;
20964 
20965   if (sb_horiz == NULL
20966       || (new_original == NULL && existing_original == NULL)
20967       || (new_original == NULL && new_suggested != NULL)
20968       || (new_original != NULL && new_suggested == NULL)
20969       || (new_original != NULL && new_original->num_sequences != new_suggested->num_sequences)
20970       || (existing_original == NULL && existing_suggested != NULL)
20971       || (existing_original != NULL && existing_suggested == NULL)
20972       || (existing_original != NULL && existing_original->num_sequences != existing_suggested->num_sequences))
20973   {
20974     return;
20975   }
20976 
20977   displayed_row = 0;
20978 
20979   if (new_original != NULL)
20980   {
20981     for (seq_num = 0;
20982          seq_num < new_original->num_sequences && ! found_row;
20983          seq_num++)
20984     {
20985       if (StringCmp (new_original->title_list [seq_num],
20986                      new_suggested->title_list [seq_num]) != 0)
20987       {
20988         if (displayed_row == row)
20989         {
20990           found_row = TRUE;
20991           diff_list = GetTextDifferences (new_original->title_list [seq_num],
20992                                           new_suggested->title_list [seq_num]);
20993         }
20994         else
20995         {
20996           displayed_row += 2;
20997         }
20998       }
20999     }
21000   }
21001 
21002   if (existing_original != NULL)
21003   {
21004     for (seq_num = 0;
21005          seq_num < existing_original->num_sequences && ! found_row;
21006          seq_num++)
21007     {
21008       if (StringCmp (existing_original->title_list [seq_num],
21009                      existing_suggested->title_list [seq_num]) != 0)
21010       {
21011         if (row == displayed_row || row == displayed_row + 1 )
21012         {
21013           found_row = TRUE;
21014           diff_list = GetTextDifferences (existing_original->title_list [seq_num],
21015                                           existing_suggested->title_list [seq_num]);
21016         }
21017         else
21018         {
21019           displayed_row += 2;
21020         }
21021       }
21022     }
21023   }
21024 
21025   if (diff_list == NULL)
21026   {
21027     scroll_val = 0;
21028   }
21029   else
21030   {
21031     scroll_val = GetBarValue (sb_horiz);
21032     vnp = diff_list;
21033     while (vnp != NULL && vnp->data.intvalue <= scroll_val)
21034     {
21035       vnp = vnp->next;
21036     }
21037     if (vnp != NULL)
21038     {
21039       scroll_val = vnp->data.intvalue;
21040     }
21041     else
21042     {
21043       scroll_val = diff_list->data.intvalue;
21044     }
21045   }
21046   SetBarValue (sb_horiz, scroll_val);
21047   diff_list = ValNodeFree (diff_list);
21048 }
21049 
OnClickDiffDlg(PaneL p,PoinT pt)21050 static void OnClickDiffDlg (PaneL p, PoinT pt)
21051 {
21052   DiffDlgPtr dlg;
21053   Boolean    dbl_click;
21054   PoinT      cell_coord;
21055 
21056   dlg = (DiffDlgPtr) GetObjectExtra (p);
21057   if (dlg == NULL)
21058   {
21059     return;
21060   }
21061   dbl_click = dblClick;
21062   if (dbl_click)
21063   {
21064     cell_coord = GetDiffDlgCoord (dlg, pt);
21065     if (cell_coord.y >= dlg->num_header_rows)
21066     {
21067       cell_coord.y -= dlg->num_header_rows;
21068       ScrollForDiffInRow (cell_coord.y, dlg->new_original, dlg->new_suggested,
21069                           dlg->existing_original, dlg->existing_suggested,
21070                           GetSlateHScrollBar ((SlatE) dlg->pnl));
21071     }
21072   }
21073 }
21074 
21075 typedef struct diffset
21076 {
21077   IDAndTitleEditPtr new_original;
21078   IDAndTitleEditPtr new_suggested;
21079   IDAndTitleEditPtr existing_original;
21080   IDAndTitleEditPtr existing_suggested;
21081 } DiffSetData, PNTR DiffSetPtr;
21082 
SetToDiffDlg(DialoG d,Pointer userdata)21083 static void SetToDiffDlg (DialoG d, Pointer userdata)
21084 {
21085   DiffDlgPtr  dlg;
21086   DiffSetPtr  dsp;
21087   Int4        seq_num;
21088   RecT        r;
21089 
21090   dlg = (DiffDlgPtr) GetObjectExtra (d);
21091   dsp = (DiffSetPtr) userdata;
21092   if (dlg == NULL || dsp == NULL
21093       || (dsp->new_original == NULL && dsp->existing_original == NULL)
21094       || (dsp->new_original == NULL && dsp->new_suggested != NULL)
21095       || (dsp->new_original != NULL && dsp->new_suggested == NULL)
21096       || (dsp->new_original != NULL && dsp->new_original->num_sequences != dsp->new_suggested->num_sequences)
21097       || (dsp->existing_original == NULL && dsp->existing_suggested != NULL)
21098       || (dsp->existing_original != NULL && dsp->existing_suggested == NULL)
21099       || (dsp->existing_original != NULL && dsp->existing_original->num_sequences != dsp->existing_suggested->num_sequences))
21100   {
21101     return;
21102   }
21103 
21104   dlg->new_original = IDAndTitleEditCopy (dsp->new_original);
21105   dlg->new_suggested = IDAndTitleEditCopy (dsp->new_suggested);
21106   dlg->existing_original = IDAndTitleEditCopy (dsp->existing_original);
21107   dlg->existing_suggested = IDAndTitleEditCopy (dsp->existing_suggested);
21108 
21109   dlg->max_id_length = 0;
21110   dlg->max_title_length = 0;
21111 
21112   /* get max lengths from new set */
21113   if (dsp->new_original != NULL)
21114   {
21115     for (seq_num = 0; seq_num < dsp->new_original->num_sequences; seq_num++)
21116     {
21117       /* we want the maximum length only for those rows we'll actually display */
21118       if (StringCmp (dsp->new_original->title_list [seq_num],
21119                      dsp->new_suggested->title_list [seq_num]) == 0)
21120       {
21121         continue;
21122       }
21123       /* max ID length */
21124       dlg->max_id_length = MAX (dlg->max_id_length, (Int4) StringLen (dsp->new_original->id_list [seq_num]));
21125       dlg->max_id_length = MAX (dlg->max_id_length, (Int4) StringLen (dsp->new_suggested->id_list [seq_num]));
21126 
21127       /* max title length */
21128       dlg->max_title_length = MAX (dlg->max_title_length, (Int4) StringLen (dsp->new_original->title_list [seq_num]));
21129       dlg->max_title_length = MAX (dlg->max_title_length, (Int4) StringLen (dsp->new_suggested->title_list [seq_num]));
21130     }
21131   }
21132   /* get max lengths from existing set */
21133   if (dsp->existing_original != NULL)
21134   {
21135     for (seq_num = 0; seq_num < dsp->existing_original->num_sequences; seq_num++)
21136     {
21137       /* we want the maximum length only for those rows we'll actually display */
21138       if (StringCmp (dsp->existing_original->title_list [seq_num],
21139                      dsp->existing_suggested->title_list [seq_num]) == 0)
21140       {
21141         continue;
21142       }
21143       /* max ID length */
21144       dlg->max_id_length = MAX (dlg->max_id_length, (Int4) StringLen (dsp->existing_original->id_list [seq_num]));
21145       dlg->max_id_length = MAX (dlg->max_id_length, (Int4) StringLen (dsp->existing_suggested->id_list [seq_num]));
21146 
21147       /* max title length */
21148       dlg->max_title_length = MAX (dlg->max_title_length, (Int4) StringLen (dsp->existing_original->title_list [seq_num]));
21149       dlg->max_title_length = MAX (dlg->max_title_length, (Int4) StringLen (dsp->existing_suggested->title_list [seq_num]));
21150     }
21151   }
21152   ObjectRect (dlg->pnl, &r);
21153   InvalRect (&r);
21154 }
21155 
CleanupDifferenceDialog(GraphiC g,Pointer data)21156 static void CleanupDifferenceDialog (GraphiC g, Pointer data)
21157 {
21158   DiffDlgPtr dlg;
21159 
21160   dlg = (DiffDlgPtr) data;
21161   if (dlg != NULL)
21162   {
21163     dlg->new_original = IDAndTitleEditFree (dlg->new_original);
21164     dlg->new_suggested = IDAndTitleEditFree (dlg->new_suggested);
21165     dlg->existing_original = IDAndTitleEditFree (dlg->existing_original);
21166     dlg->existing_suggested = IDAndTitleEditFree (dlg->existing_suggested);
21167     dlg = MemFree (dlg);
21168   }
21169 }
21170 
21171 static DialoG
ShowDifferenceDialog(GrouP parent,Int4 width,Int4 height)21172 ShowDifferenceDialog
21173 (GrouP parent,
21174  Int4  width,
21175  Int4  height)
21176 {
21177   DiffDlgPtr dlg;
21178   GrouP        p;
21179 
21180   dlg = (DiffDlgPtr) MemNew (sizeof (DiffDlgData));
21181   if (dlg == NULL)
21182   {
21183     return NULL;
21184   }
21185 
21186   p = HiddenGroup (parent, 1, 0, NULL);
21187   SetObjectExtra (p, dlg, CleanupDifferenceDialog);
21188 
21189   dlg->dialog = (DialoG) p;
21190   dlg->todialog = SetToDiffDlg;
21191   dlg->fromdialog = NULL;
21192   dlg->dialogmessage = NULL;
21193   dlg->testdialog = NULL;
21194 
21195   dlg->new_original = NULL;
21196   dlg->new_suggested = NULL;
21197   dlg->existing_original = NULL;
21198   dlg->existing_suggested = NULL;
21199 
21200 #ifdef WIN_MAC
21201   dlg->display_font = ParseFont ("Monaco, 9");
21202 #endif
21203 #ifdef WIN_MSWIN
21204   dlg->display_font = ParseFont ("Courier, 9");
21205 #endif
21206 #ifdef WIN_MOTIF
21207   dlg->display_font = ParseFont ("fixed, 12");
21208 #endif
21209   SelectFont (dlg->display_font);
21210   dlg->char_width  = CharWidth ('0');
21211   dlg->descent = Descent ();
21212   dlg->table_inset = 4;
21213 
21214   dlg->max_title_length = 0;
21215 
21216   dlg->num_header_rows = 3;
21217 
21218   dlg->pnl = AutonomousPanel4 (p, width, height, OnDrawDiffDlg,
21219                                OnVScrollDiffDlg, OnHScrollDiffDlg,
21220                                sizeof (DiffDlgData), NULL, NULL);
21221   SetObjectExtra (dlg->pnl, dlg, NULL);
21222   SetPanelClick(dlg->pnl, OnClickDiffDlg, NULL, NULL, NULL);
21223 
21224   return (DialoG) p;
21225 }
21226 
21227 static Boolean
ShowBracketingCorrections(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_existing,DialoG dlg)21228 ShowBracketingCorrections
21229 (IDAndTitleEditPtr iatep_new,
21230  IDAndTitleEditPtr iatep_existing,
21231  DialoG dlg)
21232 {
21233   IDAndTitleEditPtr  suggested_new, suggested_existing;
21234   Boolean            any_to_show = FALSE;
21235   DiffSetData        dsd;
21236   Int4               num_to_show;
21237 
21238   if (dlg == NULL)
21239   {
21240     return FALSE;
21241   }
21242 
21243   suggested_new = SuggestCorrectionForTitleBracketing (iatep_new);
21244   suggested_existing = SuggestCorrectionForTitleBracketing (iatep_existing);
21245 
21246   num_to_show = CountTitleCorrectionRows (iatep_new, suggested_new)
21247                 + CountTitleCorrectionRows (iatep_existing, suggested_existing);
21248 
21249   if (num_to_show > 0)
21250   {
21251     dsd.new_original = iatep_new;
21252     dsd.new_suggested = suggested_new;
21253     dsd.existing_original = iatep_existing;
21254     dsd.existing_suggested = suggested_existing;
21255     PointerToDialog (dlg, &dsd);
21256     any_to_show = TRUE;
21257   }
21258   suggested_new = IDAndTitleEditFree (suggested_new);
21259   suggested_new = IDAndTitleEditFree (suggested_new);
21260 
21261   return any_to_show;
21262 }
21263 
21264 typedef Int4 (*DrawExplanationFunc) PROTO ((Int4, Int4, Int4, Int4, Int4));
21265 
21266 typedef ValNodePtr (*ColorizeStringFunc) PROTO ((CharPtr, Pointer, Boolean));
21267 
21268 typedef void (*UpdateColorizedPanelParentProc) PROTO ((Pointer));
21269 
21270 typedef void (*ScrollParentProc) PROTO ((Int4, Pointer));
21271 
21272 typedef struct colorizeddeflinedlg
21273 {
21274   PaneL                          pnl;
21275   IDAndTitleEditPtr              iatep_new;
21276   IDAndTitleEditPtr              iatep_current;
21277   Boolean                        is_nuc;
21278   FonT                           display_font;
21279   Int4                           char_width;
21280   Int4                           descent;
21281   Int4                           max_title_length;
21282   Int4                           max_id_length;
21283   Int4                           table_inset;
21284   Int4                           num_header_rows;
21285   DrawExplanationFunc            draw_explanation;
21286   Boolean                        edit_values;
21287   ColorizeStringFunc             colorize_title;
21288   Pointer                        colorize_data;
21289   UpdateColorizedPanelParentProc update_parent;
21290   Pointer                        update_parent_data;
21291   ScrollParentProc               scroll_parent;
21292   Pointer                        scroll_parent_data;
21293 } ColorizedDeflineDlgData, PNTR ColorizedDeflineDlgPtr;
21294 
CountRowsWithColor(IDAndTitleEditPtr iatep,Int4Ptr max_id_length,Int4Ptr max_title_length,ColorizeStringFunc colorize_title,Pointer colorize_data,Boolean is_nuc)21295 static Int4 CountRowsWithColor
21296 (IDAndTitleEditPtr  iatep,
21297  Int4Ptr            max_id_length,
21298  Int4Ptr            max_title_length,
21299  ColorizeStringFunc colorize_title,
21300  Pointer            colorize_data,
21301  Boolean            is_nuc)
21302 {
21303   ValNodePtr diff_list;
21304   Int4       seq_num, num_rows_with_color = 0;
21305 
21306   if (iatep == NULL || colorize_title == NULL)
21307   {
21308     return 0;
21309   }
21310 
21311   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
21312   {
21313     diff_list = colorize_title (iatep->title_list [seq_num], colorize_data, is_nuc);
21314     if (diff_list != NULL)
21315     {
21316       num_rows_with_color ++;
21317       diff_list = ValNodeFree (diff_list);
21318       if (max_id_length != NULL)
21319       {
21320         *max_id_length = MAX (*max_id_length, (Int4) StringLen (iatep->id_list [seq_num]));
21321       }
21322       if (max_title_length != NULL)
21323       {
21324         *max_title_length = MAX (*max_title_length, (Int4) StringLen (iatep->title_list [seq_num]));
21325       }
21326     }
21327   }
21328   return num_rows_with_color;
21329 }
21330 
DrawDeflineDlgTitle(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 max_id_length,Int4 win_width)21331 static void DrawDeflineDlgTitle (Int4 x, Int4 y, Int4 char_width, Int4 descent, Int4 max_id_length, Int4 win_width)
21332 {
21333   RecT rct;
21334 
21335   /* draw title row */
21336   DkGray ();
21337   InvertColors ();
21338   White ();
21339   LoadRect (&rct, x, y + descent,
21340             x + win_width,
21341             y - stdLineHeight + descent);
21342   EraseRect (&rct);
21343 
21344   PaintStringEx ("Sequence ID", x, y);
21345   x += (max_id_length + 2) * char_width;
21346   PaintStringEx ("Title", x, y);
21347   InvertColors ();
21348   Black ();
21349 }
21350 
21351 static void
DrawDeflineDlgRow(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 max_id_length,Int4 win_width,CharPtr id_str,CharPtr title_str,Int4 offset,ValNodePtr diff_list,Int4 choice_num)21352 DrawDeflineDlgRow
21353 (Int4 x,
21354  Int4 y,
21355  Int4 char_width,
21356  Int4 descent,
21357  Int4 max_id_length,
21358  Int4 win_width,
21359  CharPtr id_str,
21360  CharPtr title_str,
21361  Int4       offset,
21362  ValNodePtr diff_list,
21363  Int4       choice_num)
21364 {
21365   RecT rct;
21366   PoinT      pt1, pt2;
21367 
21368   LoadRect (&rct, x, y + descent,
21369             x + win_width,
21370             y - stdLineHeight + descent);
21371   EraseRect (&rct);
21372 
21373   if (id_str != NULL)
21374   {
21375     PaintStringEx (id_str, x, y);
21376   }
21377   x += (max_id_length + 1) * char_width;
21378   pt1.x = x + 2;
21379   pt2.x = x + 2;
21380   pt1.y = y + descent;
21381   pt2.y = y - stdLineHeight + descent;
21382   Black ();
21383   DrawLine (pt1, pt2);
21384 
21385   x += char_width;
21386 
21387   PaintColorizedString (x, y, char_width, title_str, offset, FALSE, diff_list,
21388                         choice_num);
21389 }
21390 
21391 static void
DrawColorizedDeflinesInSet(Int4 x,Int4Ptr y,Int4 last_y,Int4 char_width,Int4 descent,Int4 max_id_length,Int4 row_length,Int4Ptr start_row,Int4 start_col,IDAndTitleEditPtr iatep,ColorizeStringFunc colorize_title,Pointer colorize_data,Boolean is_nuc)21392 DrawColorizedDeflinesInSet
21393 (Int4                x,
21394  Int4Ptr             y,
21395  Int4                last_y,
21396  Int4                char_width,
21397  Int4                descent,
21398  Int4                max_id_length,
21399  Int4                row_length,
21400  Int4Ptr             start_row,
21401  Int4                start_col,
21402  IDAndTitleEditPtr   iatep,
21403  ColorizeStringFunc  colorize_title,
21404  Pointer             colorize_data,
21405  Boolean             is_nuc)
21406 {
21407   Int4       row, visible_row;
21408   ValNodePtr diff_list;
21409 
21410   if (iatep == NULL || y == NULL || start_row == NULL || colorize_title == NULL)
21411   {
21412     return;
21413   }
21414 
21415   visible_row = 0;
21416   for (row = 0;
21417        row < iatep->num_sequences && *y <= last_y;
21418        row++)
21419   {
21420     diff_list = NULL;
21421     diff_list = colorize_title (iatep->title_list [row], colorize_data, is_nuc);
21422     if (diff_list != NULL)
21423     {
21424       if (visible_row == *start_row)
21425       {
21426         DrawDeflineDlgRow (x, *y, char_width, descent, max_id_length, row_length,
21427                           iatep->id_list [row],
21428                           iatep->title_list [row],
21429                           start_col, diff_list, 1);
21430         (*y) += stdLineHeight;
21431         (*start_row) ++;
21432       }
21433       visible_row++;
21434       diff_list = ValNodeFree (diff_list);
21435     }
21436   }
21437 
21438 }
21439 
OnDrawColorizedDeflineDlg(PaneL p)21440 static void OnDrawColorizedDeflineDlg (PaneL p)
21441 {
21442   ColorizedDeflineDlgPtr dlg;
21443   BaR                    sb_vert, sb_horiz;
21444   Int4                   start_row, start_col;
21445   RecT                   r;
21446   Int4                   x, y, row_length, last_y;
21447   Int4                   num_new_rows, num_existing_rows, num_rows;
21448   Int4                   visible_rows;
21449   Int4                   new_vmax, new_hmax, old_vmax, old_hmax;
21450 
21451   dlg = (ColorizedDeflineDlgPtr) GetObjectExtra (p);
21452   if (dlg == NULL)
21453   {
21454     return;
21455   }
21456 
21457   num_rows = 0;
21458   dlg->max_id_length = 10;
21459   dlg->max_title_length = 5;
21460   num_new_rows = CountRowsWithColor (dlg->iatep_new,
21461                                      &(dlg->max_id_length), &(dlg->max_title_length),
21462                                      dlg->colorize_title, dlg->colorize_data, dlg->is_nuc);
21463   num_existing_rows = CountRowsWithColor (dlg->iatep_current,
21464                                           &(dlg->max_id_length), &(dlg->max_title_length),
21465                                           dlg->colorize_title, dlg->colorize_data, dlg->is_nuc);
21466   num_rows = num_new_rows + num_existing_rows;
21467 
21468   SelectFont (dlg->display_font);
21469 
21470   sb_vert  = GetSlateVScrollBar ((SlatE) p);
21471   Enable (sb_vert);
21472   sb_horiz = GetSlateHScrollBar ((SlatE) p);
21473   Enable (sb_horiz);
21474 
21475   start_row = GetBarValue (sb_vert);
21476   start_col = GetBarValue (sb_horiz);
21477 
21478   ObjectRect (p, &r);
21479   InsetRect (&r, dlg->table_inset, dlg->table_inset);
21480   x = r.left + 1;
21481   y = r.top + stdLineHeight;
21482   SelectFont (programFont);
21483 
21484   row_length = r.right - r.left - 2;
21485 
21486   dlg->num_header_rows = 0;
21487   /* draw explanatory text */
21488   if (dlg->draw_explanation != NULL)
21489   {
21490     dlg->num_header_rows = (dlg->draw_explanation) (x, y,
21491                                                     dlg->char_width,
21492                                                     dlg->descent,
21493                                                     row_length);
21494     y += stdLineHeight * dlg->num_header_rows;
21495   }
21496 
21497   /* draw title row */
21498   DrawDeflineDlgTitle (x, y, dlg->char_width, dlg->descent, dlg->max_id_length, row_length);
21499   y+= stdLineHeight;
21500   dlg->num_header_rows ++;
21501 
21502   visible_rows = (r.bottom - r.top - 2 * dlg->table_inset) / stdLineHeight - dlg->num_header_rows;
21503   new_vmax = num_rows - visible_rows;
21504   new_hmax = dlg->max_title_length - 1;
21505   if (new_vmax < 0)
21506   {
21507     new_vmax = 0;
21508   }
21509   if (new_hmax < 0)
21510   {
21511     new_hmax = 0;
21512   }
21513   old_vmax = GetBarMax (sb_vert);
21514   old_hmax = GetBarMax (sb_horiz);
21515 
21516   if (old_vmax != new_vmax)
21517   {
21518     CorrectBarMax (sb_vert, new_vmax);
21519     if (start_row > new_vmax)
21520     {
21521       start_row = new_vmax;
21522     }
21523     CorrectBarValue (sb_vert, start_row);
21524     CorrectBarPage (sb_vert, 1, 1);
21525   }
21526 
21527   if (old_hmax != new_hmax)
21528   {
21529     CorrectBarMax (sb_horiz, new_hmax);
21530     if (start_col > new_hmax)
21531     {
21532       start_col = new_hmax;
21533     }
21534     CorrectBarValue (sb_horiz, start_col);
21535     CorrectBarPage (sb_horiz, 1, 1);
21536   }
21537 
21538   last_y = r.bottom - 2 * dlg->table_inset;
21539 
21540   DrawColorizedDeflinesInSet (x, &y, last_y, dlg->char_width, dlg->descent,
21541                               dlg->max_id_length, row_length, &start_row, start_col,
21542                               dlg->iatep_new,
21543                               dlg->colorize_title, dlg->colorize_data,
21544                               dlg->is_nuc);
21545 
21546   start_row -= num_new_rows;
21547 
21548   DrawColorizedDeflinesInSet (x, &y, last_y, dlg->char_width, dlg->descent,
21549                               dlg->max_id_length, row_length, &start_row, start_col,
21550                               dlg->iatep_current,
21551                               dlg->colorize_title, dlg->colorize_data,
21552                               dlg->is_nuc);
21553 }
21554 
GetColorizedDeflineCoord(ColorizedDeflineDlgPtr dlg,PoinT pt)21555 static PoinT GetColorizedDeflineCoord (ColorizedDeflineDlgPtr dlg, PoinT pt)
21556 {
21557   BaR sb_horiz;
21558   BaR sb_vert;
21559   Int4 start_row, start_col;
21560   RecT r;
21561   PoinT cell_coord;
21562   Int4  x, y, apparent_x, apparent_y, vis_row, new_rows = 0;
21563   ValNodePtr diff_list;
21564 
21565   cell_coord.x = -1;
21566   cell_coord.y = -1;
21567 
21568   if (dlg == NULL)
21569   {
21570     return cell_coord;
21571   }
21572 
21573   sb_vert  = GetSlateVScrollBar ((SlatE) dlg->pnl);
21574   sb_horiz = GetSlateHScrollBar ((SlatE) dlg->pnl);
21575 
21576   start_row = GetBarValue (sb_vert);
21577   start_col = GetBarValue (sb_horiz);
21578 
21579   ObjectRect (dlg->pnl, &r);
21580   InsetRect (&r, dlg->table_inset, dlg->table_inset);
21581   x = pt.x - r.left;
21582   y = pt.y - r.top;
21583 
21584   apparent_y = y / stdLineHeight;
21585 
21586   if (apparent_y < dlg->num_header_rows)
21587   {
21588     cell_coord.y = -1;
21589   }
21590   else
21591   {
21592     apparent_y = apparent_y - dlg->num_header_rows + start_row;
21593     cell_coord.y = 0;
21594     vis_row = -1;
21595     while (vis_row < apparent_y && dlg->iatep_new != NULL && cell_coord.y < dlg->iatep_new->num_sequences)
21596     {
21597       diff_list = NULL;
21598       diff_list = dlg->colorize_title (dlg->iatep_new->title_list [cell_coord.y],
21599                                        dlg->colorize_data,
21600                                        dlg->is_nuc);
21601       if (diff_list != NULL)
21602       {
21603         vis_row++;
21604         new_rows ++;
21605         if (vis_row < apparent_y)
21606         {
21607           cell_coord.y ++;
21608         }
21609       }
21610       else
21611       {
21612         cell_coord.y ++;
21613       }
21614       diff_list = ValNodeFree (diff_list);
21615     }
21616     while (vis_row < apparent_y && dlg->iatep_current != NULL
21617            && cell_coord.y - new_rows < dlg->iatep_current->num_sequences)
21618     {
21619       diff_list = NULL;
21620       diff_list = dlg->colorize_title (dlg->iatep_current->title_list [cell_coord.y - new_rows],
21621                                        dlg->colorize_data,
21622                                        dlg->is_nuc);
21623       if (diff_list != NULL)
21624       {
21625         vis_row++;
21626         if (vis_row < apparent_y)
21627         {
21628           cell_coord.y ++;
21629         }
21630       }
21631       else
21632       {
21633         cell_coord.y ++;
21634       }
21635       diff_list = ValNodeFree (diff_list);
21636     }
21637   }
21638 
21639   apparent_x = x / dlg->char_width;
21640   if (apparent_x <= dlg->max_id_length + 1)
21641   {
21642     cell_coord.x = -1;
21643   }
21644   else
21645   {
21646     cell_coord.x = apparent_x - dlg->max_id_length - 2 + start_col;
21647   }
21648 
21649   return cell_coord;
21650 }
21651 
GetModValuePairForCoord(ColorizedDeflineDlgPtr dlg,PoinT coord,BoolPtr is_value)21652 static BadValuePtr GetModValuePairForCoord (ColorizedDeflineDlgPtr dlg, PoinT coord, BoolPtr is_value)
21653 {
21654   Int4                   current_num;
21655   CharPtr                seq_id = NULL, title = NULL, mod_name = NULL, mod_value = NULL;
21656   CharPtr                cp, eq_loc, start_bracket, end_bracket;
21657   BadValuePtr            bvp = NULL;
21658   ModifierInfoPtr        mip;
21659 
21660   if (dlg == NULL || coord.x < 0 || coord.y < 0)
21661   {
21662     return NULL;
21663   }
21664 
21665   if (dlg->iatep_new != NULL && coord.y < dlg->iatep_new->num_sequences)
21666   {
21667     seq_id = dlg->iatep_new->id_list [coord.y];
21668     title = dlg->iatep_new->title_list [coord.y];
21669   }
21670   else if (dlg->iatep_current != NULL)
21671   {
21672     current_num = coord.y;
21673     if (dlg->iatep_new != NULL)
21674     {
21675       current_num -= dlg->iatep_new->num_sequences;
21676     }
21677     if (current_num < dlg->iatep_current->num_sequences)
21678     {
21679       seq_id = dlg->iatep_current->id_list [current_num];
21680       title = dlg->iatep_current->id_list [current_num];
21681     }
21682   }
21683   if (seq_id == NULL || title == NULL || (Int4) StringLen (title) < coord.x)
21684   {
21685     return NULL;
21686   }
21687 
21688   cp = title + coord.x;
21689 
21690   mip = ParseOneBracketedModifier (title, &start_bracket, &end_bracket);
21691   while (mip != NULL && start_bracket != NULL && end_bracket != NULL
21692          && cp > end_bracket)
21693   {
21694     mip = ModifierInfoFree (mip);
21695     mip = ParseOneBracketedModifier (end_bracket + 1, &start_bracket, &end_bracket);
21696   }
21697   mip = ModifierInfoFree (mip);
21698 
21699   mod_name = NULL;
21700   mod_value = NULL;
21701 
21702   if (start_bracket <= cp && end_bracket >= cp)
21703   {
21704     eq_loc = NextBracketToken (start_bracket + 1);
21705     if (eq_loc != NULL && *eq_loc == '=')
21706     {
21707       mod_name = (CharPtr) MemNew ((eq_loc - start_bracket) * sizeof (Char));
21708       if (mod_name != NULL)
21709       {
21710         StringNCpy (mod_name, start_bracket + 1, eq_loc - start_bracket - 1);
21711         mod_name [eq_loc - start_bracket - 1] = 0;
21712         TrimSpacesAroundString (mod_name);
21713       }
21714 
21715       mod_value = (CharPtr) MemNew (end_bracket - eq_loc);
21716       if (mod_value != NULL)
21717       {
21718         StringNCpy (mod_value, eq_loc + 1, end_bracket - eq_loc - 1);
21719         mod_value [end_bracket - eq_loc - 1] = 0;
21720         TrimSpacesAroundString (mod_value);
21721       }
21722     }
21723   }
21724 
21725   if (mod_name == NULL || mod_value == NULL)
21726   {
21727     mod_name = MemFree (mod_name);
21728     mod_value = MemFree (mod_value);
21729     return NULL;
21730   }
21731 
21732   bvp = BadValueNew (seq_id, mod_name, mod_value);
21733   mod_name = MemFree (mod_name);
21734   mod_value = MemFree (mod_value);
21735 
21736   if (is_value != NULL)
21737   {
21738     if (title + coord.x < eq_loc)
21739     {
21740       *is_value = FALSE;
21741     }
21742     else if (title + coord.x >= eq_loc)
21743     {
21744       *is_value = TRUE;
21745     }
21746   }
21747   return bvp;
21748 }
21749 
ScrollToColor(ColorizedDeflineDlgPtr dlg,PoinT coord)21750 static void ScrollToColor (ColorizedDeflineDlgPtr dlg, PoinT coord)
21751 {
21752   BaR  sb_horiz;
21753   Int4 current_scroll_pos, current_row = 0, new_scroll_pos = 0;
21754   ValNodePtr diff_list, vnp;
21755   CharPtr    title = NULL;
21756   Boolean    found_in_new = FALSE;
21757 
21758   if (dlg == NULL || dlg->colorize_title == NULL || coord.y < 0)
21759   {
21760     return;
21761   }
21762 
21763   sb_horiz = GetSlateHScrollBar ((SlatE) dlg->pnl);
21764   current_scroll_pos = GetBarValue (sb_horiz);
21765 
21766   current_row = coord.y;
21767 
21768   if (dlg->iatep_new != NULL)
21769   {
21770     if (coord.y < dlg->iatep_new->num_sequences)
21771     {
21772       title = dlg->iatep_new->title_list [coord.y];
21773       found_in_new = TRUE;
21774     }
21775     else
21776     {
21777       current_row = coord.y - dlg->iatep_new->num_sequences;
21778     }
21779   }
21780 
21781   if (!found_in_new && dlg->iatep_current != NULL && current_row < dlg->iatep_current->num_sequences)
21782   {
21783     title = dlg->iatep_current->title_list [current_row];
21784   }
21785 
21786   if (title == NULL)
21787   {
21788     return;
21789   }
21790 
21791   diff_list = (dlg->colorize_title) (title, dlg->colorize_data, dlg->is_nuc);
21792   vnp = diff_list;
21793   while (vnp != NULL && vnp->data.intvalue <= current_scroll_pos)
21794   {
21795     vnp = vnp->next;
21796   }
21797   while (vnp != NULL && vnp->data.intvalue == current_scroll_pos + 1)
21798   {
21799     vnp = vnp->next;
21800     current_scroll_pos ++;
21801   }
21802   if (vnp != NULL)
21803   {
21804     new_scroll_pos = vnp->data.intvalue;
21805   }
21806   if (new_scroll_pos > GetBarMax (sb_horiz))
21807   {
21808     new_scroll_pos = GetBarMax (sb_horiz);
21809   }
21810   SetBarValue (sb_horiz, new_scroll_pos);
21811 
21812 }
21813 
OnClickColorizedDeflinePanel(PaneL p,PoinT pt)21814 static void OnClickColorizedDeflinePanel (PaneL p, PoinT pt)
21815 {
21816   ColorizedDeflineDlgPtr dlg;
21817   PoinT                  cell_coord;
21818   BadValuePtr            bvp;
21819   Boolean                is_value = FALSE, rval = FALSE;
21820 
21821   dlg = (ColorizedDeflineDlgPtr) GetObjectExtra (p);
21822   if (dlg == NULL || ! dblClick)
21823   {
21824     return;
21825   }
21826 
21827   cell_coord = GetColorizedDeflineCoord (dlg, pt);
21828   if (cell_coord.y < 0)
21829   {
21830     return;
21831   }
21832   else if (cell_coord.x < 0)
21833   {
21834     if (dlg->scroll_parent != NULL)
21835     {
21836       (dlg->scroll_parent) (cell_coord.y, dlg->scroll_parent_data);
21837     }
21838     ScrollToColor (dlg, cell_coord);
21839   }
21840   else
21841   {
21842     bvp = GetModValuePairForCoord (dlg, cell_coord, &is_value);
21843     if (bvp != NULL)
21844     {
21845       if (is_value)
21846       {
21847         if (dlg->edit_values)
21848         {
21849           rval = FixOneModifierValue (dlg->iatep_new,
21850                                       dlg->iatep_current,
21851                                       bvp->seq_id,
21852                                       bvp->mod_name,
21853                                       bvp->value,
21854                                       GetModifierType (bvp->mod_name));
21855         }
21856       }
21857       else
21858       {
21859         rval = FixOneModifierName (dlg->iatep_new,
21860                                    dlg->iatep_current,
21861                                    bvp->seq_id,
21862                                    bvp->mod_name,
21863                                    dlg->is_nuc);
21864       }
21865     }
21866     bvp = BadValueFree (bvp);
21867     if (rval && dlg->update_parent != NULL)
21868     {
21869       (dlg->update_parent) (dlg->update_parent_data);
21870     }
21871   }
21872 }
21873 
21874 static void
UpdateColorizedDeflinePanelData(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,PaneL pnl)21875 UpdateColorizedDeflinePanelData
21876 (IDAndTitleEditPtr iatep_new,
21877  IDAndTitleEditPtr iatep_current,
21878  PaneL             pnl)
21879 {
21880   ColorizedDeflineDlgPtr dlg;
21881   RecT                   r;
21882 
21883   dlg = (ColorizedDeflineDlgPtr) GetObjectExtra (pnl);
21884   if (dlg == NULL)
21885   {
21886     return;
21887   }
21888   dlg->iatep_new = iatep_new;
21889   dlg->iatep_current = iatep_current;
21890 
21891 
21892   ObjectRect ((SlatE) pnl, &r);
21893   InvalRect (&r);
21894 }
21895 
21896 static PaneL
ColorizedDeflinePanel(GrouP parent,Int4 width,Int4 height,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc,DrawExplanationFunc draw_explanation,Boolean edit_values,ColorizeStringFunc colorize_title,Pointer colorize_data,UpdateColorizedPanelParentProc update_parent,Pointer update_parent_data,ScrollParentProc scroll_parent,Pointer scroll_parent_data)21897 ColorizedDeflinePanel
21898 (GrouP parent,
21899  Int4  width,
21900  Int4  height,
21901  IDAndTitleEditPtr   iatep_new,
21902  IDAndTitleEditPtr   iatep_current,
21903  Boolean             is_nuc,
21904  DrawExplanationFunc draw_explanation,
21905  Boolean             edit_values,
21906  ColorizeStringFunc  colorize_title,
21907  Pointer             colorize_data,
21908  UpdateColorizedPanelParentProc update_parent,
21909  Pointer                        update_parent_data,
21910  ScrollParentProc               scroll_parent,
21911  Pointer                        scroll_parent_data)
21912 {
21913   ColorizedDeflineDlgPtr dlg;
21914 
21915   dlg = (ColorizedDeflineDlgPtr) MemNew (sizeof (ColorizedDeflineDlgData));
21916   if (dlg == NULL)
21917   {
21918     return NULL;
21919   }
21920 
21921 #ifdef WIN_MAC
21922   dlg->display_font = ParseFont ("Monaco, 9");
21923 #endif
21924 #ifdef WIN_MSWIN
21925   dlg->display_font = ParseFont ("Courier, 9");
21926 #endif
21927 #ifdef WIN_MOTIF
21928   dlg->display_font = ParseFont ("fixed, 12");
21929 #endif
21930   SelectFont (dlg->display_font);
21931   dlg->char_width  = CharWidth ('0');
21932   dlg->descent = Descent ();
21933   dlg->table_inset = 4;
21934 
21935   dlg->max_title_length = 0;
21936   dlg->max_id_length = 0;
21937 
21938   dlg->draw_explanation = draw_explanation;
21939   dlg->edit_values = edit_values;
21940   dlg->colorize_title = colorize_title;
21941   dlg->colorize_data = colorize_data;
21942   dlg->iatep_new = iatep_new;
21943   dlg->iatep_current = iatep_current;
21944   dlg->is_nuc = is_nuc;
21945   dlg->update_parent = update_parent;
21946   dlg->update_parent_data = update_parent_data;
21947   dlg->scroll_parent = scroll_parent;
21948   dlg->scroll_parent_data = scroll_parent_data;
21949 
21950   dlg->pnl = AutonomousPanel4 (parent, width, height, OnDrawColorizedDeflineDlg,
21951                                OnVScrollDiffDlg, OnHScrollDiffDlg,
21952                                sizeof (ColorizedDeflineDlgData), NULL, NULL);
21953   SetObjectExtra (dlg->pnl, dlg, NULL);
21954 
21955   SetPanelClick(dlg->pnl, OnClickColorizedDeflinePanel, NULL, NULL, NULL);
21956 
21957   return dlg->pnl;
21958 }
21959 
UpdateSeqIdEditForColorizedPanel(Pointer userdata)21960 static void UpdateSeqIdEditForColorizedPanel (Pointer userdata)
21961 {
21962   Boolean      show_all;
21963   SeqIdEditPtr siep;
21964 
21965   siep = (SeqIdEditPtr) userdata;
21966   if (siep == NULL)
21967   {
21968     return;
21969   }
21970 
21971   if (GetValue (siep->show_all_grp) == 2)
21972   {
21973     show_all = TRUE;
21974   }
21975   else
21976   {
21977     show_all = FALSE;
21978   }
21979 
21980   UpdateIdAndTitleEditDialog (siep->new_dlg,
21981                               siep->iatep_new,
21982                               siep->iatep_current,
21983                               siep->seqid_edit_phase,
21984                               show_all,
21985                               siep->is_nuc);
21986   UpdateIdAndTitleEditDialog (siep->current_dlg,
21987                               siep->iatep_current,
21988                               siep->iatep_new,
21989                               siep->seqid_edit_phase,
21990                               show_all,
21991                               siep->is_nuc);
21992   ShowErrorInstructions (siep);
21993 }
21994 
ScrollSeqIdEditForColorizedPanel(Int4 seq_num,Pointer userdata)21995 static void ScrollSeqIdEditForColorizedPanel (Int4 seq_num, Pointer userdata)
21996 {
21997   SeqIdEditPtr siep;
21998   Int4         current_num;
21999 
22000   if (seq_num < 0)
22001   {
22002     return;
22003   }
22004 
22005   siep = (SeqIdEditPtr) userdata;
22006   if (siep == NULL)
22007   {
22008     return;
22009   }
22010 
22011   if (siep->iatep_new != NULL && seq_num < siep->iatep_new->num_sequences)
22012   {
22013     ScrollTagListToSeqId (siep->new_dlg, siep->iatep_new->id_list [seq_num]);
22014   }
22015   else if (siep->iatep_current != NULL)
22016   {
22017     current_num = seq_num;
22018     if (siep->iatep_new != NULL)
22019     {
22020       current_num -= siep->iatep_new->num_sequences;
22021     }
22022     if (current_num < siep->iatep_current->num_sequences)
22023     {
22024       ScrollTagListToSeqId (siep->current_dlg,
22025                             siep->iatep_current->id_list [current_num]);
22026     }
22027   }
22028 }
22029 
22030 static Int4
DrawExplanation(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 win_width,CharPtr line1,CharPtr exp_part1,CharPtr exp_red,CharPtr exp_part2)22031 DrawExplanation
22032 (Int4 x,
22033  Int4 y,
22034  Int4 char_width,
22035  Int4 descent,
22036  Int4 win_width,
22037  CharPtr line1,
22038  CharPtr exp_part1,
22039  CharPtr exp_red,
22040  CharPtr exp_part2)
22041 {
22042   RecT rct;
22043   Int4 num_lines = 0, tmp_x;
22044   CharPtr  dbl_click = "Double-click on a sequence ID to scroll to the next invalid ";
22045 
22046 
22047   /* draw first explanation row */
22048   LoadRect (&rct, x, y + descent,
22049             x + win_width,
22050             y - stdLineHeight + descent);
22051   EraseRect (&rct);
22052 
22053   if (!StringHasNoText (line1))
22054   {
22055     LoadRect (&rct, x, y + descent,
22056               x + win_width,
22057               y - stdLineHeight + descent);
22058     EraseRect (&rct);
22059     PaintStringEx (line1, x, y);
22060     y += stdLineHeight;
22061     num_lines ++;
22062   }
22063 
22064   if (!StringHasNoText (exp_part1) || !StringHasNoText (exp_red) || !StringHasNoText (exp_part2))
22065   {
22066     LoadRect (&rct, x, y + descent,
22067               x + win_width,
22068               y - stdLineHeight + descent);
22069     EraseRect (&rct);
22070 
22071     tmp_x = x;
22072     PaintStringEx (exp_part1, tmp_x, y);
22073     tmp_x += StringLen (exp_part1) * char_width;
22074 
22075     Red ();
22076     PaintStringEx (exp_red, tmp_x, y);
22077     Black ();
22078     tmp_x += StringLen (exp_red) * char_width;
22079 
22080     PaintStringEx (exp_part2, tmp_x, y);
22081 
22082     y += stdLineHeight;
22083     num_lines ++;
22084   }
22085 
22086   if (!StringHasNoText (exp_red))
22087   {
22088     /* draw second row */
22089     LoadRect (&rct, x, y + descent,
22090               x + win_width,
22091               y - stdLineHeight + descent);
22092     EraseRect (&rct);
22093     PaintStringEx (dbl_click, x, y);
22094     tmp_x = x + StringLen (dbl_click) * char_width;
22095     Red ();
22096     PaintStringEx (exp_red, tmp_x, y);
22097     Black ();
22098     tmp_x += StringLen (exp_red) * char_width;
22099     PaintStringEx (".", tmp_x, y);
22100     y += stdLineHeight;
22101     num_lines ++;
22102   }
22103 
22104 
22105   return num_lines;
22106 }
22107 
22108 static Int4
DrawInvalidNameExplanation(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 win_width)22109 DrawInvalidNameExplanation
22110 (Int4 x,
22111  Int4 y,
22112  Int4 char_width,
22113  Int4 descent,
22114  Int4 win_width)
22115 {
22116   /* draw explanation rows */
22117   return DrawExplanation (x, y, char_width, descent, win_width,
22118                           "Some of your modifiers have invalid names.",
22119                           "Double-click on a ",
22120                           "modifier name",
22121                           " to change the name.");
22122 }
22123 
ColorizeUnrecognizedNames(CharPtr title,Pointer userdata,Boolean is_nuc)22124 static ValNodePtr ColorizeUnrecognizedNames (CharPtr title, Pointer userdata, Boolean is_nuc)
22125 {
22126   Int4            offset;
22127   CharPtr         stop, cp;
22128   ModifierInfoPtr mip;
22129   ValNodePtr      diff_list = NULL;
22130 
22131   if (StringHasNoText (title))
22132   {
22133     return NULL;
22134   }
22135 
22136   cp = StringChr (title, '[');
22137   mip = ParseOneBracketedModifier (cp, NULL, &stop);
22138   while (mip != NULL && stop != NULL)
22139   {
22140   	if (IsUnrecognizedModifierName (mip, is_nuc))
22141   	{
22142   	  cp++;
22143       offset = cp - title;
22144       while (*cp != 0 && *cp != '=')
22145       {
22146         if (!isspace (*cp))
22147         {
22148           ValNodeAddInt (&diff_list, 1, offset);
22149         }
22150         cp++;
22151         offset++;
22152       }
22153   	}
22154   	mip = ModifierInfoFree (mip);
22155   	cp = StringChr (stop + 1, '[');
22156   	mip = ParseOneBracketedModifier (cp, NULL, &stop);
22157   }
22158   mip = ModifierInfoFree (mip);
22159   return diff_list;
22160 }
22161 
22162 static PaneL
UnrecognizedModifiersPanel(GrouP parent,Int4 width,Int4 height,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc,UpdateColorizedPanelParentProc update_parent,Pointer update_parent_data,ScrollParentProc scroll_parent,Pointer scroll_parent_data)22163 UnrecognizedModifiersPanel
22164 (GrouP                          parent,
22165  Int4                           width,
22166  Int4                           height,
22167  IDAndTitleEditPtr              iatep_new,
22168  IDAndTitleEditPtr              iatep_current,
22169  Boolean                        is_nuc,
22170  UpdateColorizedPanelParentProc update_parent,
22171  Pointer                        update_parent_data,
22172  ScrollParentProc               scroll_parent,
22173  Pointer                        scroll_parent_data)
22174 {
22175   return ColorizedDeflinePanel (parent, width, height,
22176                                 iatep_new,
22177                                 iatep_current,
22178                                 is_nuc,
22179                                 DrawInvalidNameExplanation,
22180                                 FALSE,
22181                                 ColorizeUnrecognizedNames,
22182                                 NULL,
22183                                 update_parent,
22184                                 update_parent_data,
22185                                 scroll_parent,
22186                                 scroll_parent_data);
22187 }
22188 
22189 static Int4
DrawBadValuesExplanation(Int4 x,Int4 y,Int4 char_width,Int4 descent,Int4 win_width)22190 DrawBadValuesExplanation
22191 (Int4 x,
22192  Int4 y,
22193  Int4 char_width,
22194  Int4 descent,
22195  Int4 win_width)
22196 {
22197   /* draw explanation rows */
22198   return DrawExplanation (x, y, char_width, descent, win_width,
22199                           "Some of your modifiers have inappropriate values.",
22200                           "Double-click on a modifier name to replace the modifier name, double-click on a ",
22201                           "value",
22202                           " to replace the value.");
22203 }
22204 
ColorizeInvalidValues(CharPtr title,Pointer userdata,Boolean is_nuc)22205 static ValNodePtr ColorizeInvalidValues (CharPtr title, Pointer userdata, Boolean is_nuc)
22206 {
22207   Int4            offset;
22208   CharPtr         start, stop, cp;
22209   ModifierInfoPtr mip;
22210   ValNodePtr      diff_list = NULL;
22211 
22212   if (StringHasNoText (title))
22213   {
22214     return NULL;
22215   }
22216 
22217   cp = StringChr (title, '[');
22218   mip = ParseOneBracketedModifier (cp, &start, &stop);
22219   while (mip != NULL && stop != NULL)
22220   {
22221   	if (ModifierHasInvalidValue (mip))
22222   	{
22223   	  cp = NextBracketToken (start + 1);
22224       offset = cp - title;
22225       while (*cp != 0 && cp != stop)
22226       {
22227         if (!isspace (*cp))
22228         {
22229           ValNodeAddInt (&diff_list, 1, offset);
22230         }
22231         cp++;
22232         offset++;
22233       }
22234   	}
22235   	mip = ModifierInfoFree (mip);
22236   	cp = StringChr (stop + 1, '[');
22237   	mip = ParseOneBracketedModifier (cp, &start, &stop);
22238   }
22239   mip = ModifierInfoFree (mip);
22240   return diff_list;
22241 }
22242 
22243 static PaneL
InvalidValuesPanel(GrouP parent,Int4 width,Int4 height,IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc,UpdateColorizedPanelParentProc update_parent,Pointer update_parent_data,ScrollParentProc scroll_parent,Pointer scroll_parent_data)22244 InvalidValuesPanel
22245 (GrouP                          parent,
22246  Int4                           width,
22247  Int4                           height,
22248  IDAndTitleEditPtr              iatep_new,
22249  IDAndTitleEditPtr              iatep_current,
22250  Boolean                        is_nuc,
22251  UpdateColorizedPanelParentProc update_parent,
22252  Pointer                        update_parent_data,
22253  ScrollParentProc               scroll_parent,
22254  Pointer                        scroll_parent_data)
22255 {
22256   return ColorizedDeflinePanel (parent, width, height,
22257                                 iatep_new,
22258                                 iatep_current,
22259                                 is_nuc,
22260                                 DrawBadValuesExplanation,
22261                                 TRUE,
22262                                 ColorizeInvalidValues,
22263                                 NULL,
22264                                 update_parent,
22265                                 update_parent_data,
22266                                 scroll_parent,
22267                                 scroll_parent_data);
22268 }
22269 
22270 static Boolean
ShowUnrecognizedModifiers(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc)22271 ShowUnrecognizedModifiers
22272 (IDAndTitleEditPtr iatep_new,
22273  IDAndTitleEditPtr iatep_current,
22274  Boolean           is_nuc)
22275 {
22276   ValNodePtr   unrecognized_list;
22277 
22278   unrecognized_list = ListUnrecognizedModifiers (iatep_new, iatep_current, is_nuc);
22279 
22280   if (unrecognized_list == NULL)
22281   {
22282     return FALSE;
22283   }
22284   else
22285   {
22286     unrecognized_list = ValNodeFreeData (unrecognized_list);
22287     return TRUE;
22288   }
22289 }
22290 
ShowBadValues(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current)22291 static Boolean ShowBadValues (IDAndTitleEditPtr iatep_new, IDAndTitleEditPtr iatep_current)
22292 {
22293   ValNodePtr badlist = NULL;
22294   Boolean    rval = FALSE;
22295 
22296   FindBadValuesInIDsAndTitles (iatep_new, &badlist);
22297   FindBadValuesInIDsAndTitles (iatep_current, &badlist);
22298 
22299   if (badlist != NULL)
22300   {
22301     rval = TRUE;
22302   }
22303 
22304   badlist = BadValueListFree (badlist);
22305   return rval;
22306 }
22307 
22308 static Boolean
IDAndTitleEditIDsNeedFix(IDAndTitleEditPtr iatep_new,IDAndTitleEditPtr iatep_current,Boolean is_nuc)22309 IDAndTitleEditIDsNeedFix
22310 (IDAndTitleEditPtr iatep_new,
22311  IDAndTitleEditPtr iatep_current,
22312  Boolean           is_nuc)
22313 {
22314   if (HasMissingIDs (iatep_new) || HasMissingIDs (iatep_current)
22315       || (EditHasDuplicateIDs (iatep_new, iatep_current) && is_nuc)
22316       || AnyBracketsInIDs (iatep_new) || AnyBracketsInIDs (iatep_current)
22317       || EditHasSpaceInIDs (iatep_new) || EditHasSpaceInIDs (iatep_current))
22318   {
22319     return TRUE;
22320   }
22321   else
22322   {
22323     return FALSE;
22324   }
22325 }
22326 
IDAndTitleEditTitlesNeedFix(IDAndTitleEditPtr iatep,Boolean is_nuc)22327 static Boolean IDAndTitleEditTitlesNeedFix (IDAndTitleEditPtr iatep, Boolean is_nuc)
22328 {
22329   Boolean    need_fix = FALSE;
22330   ValNodePtr unrec_list = NULL;
22331 
22332   if (EditNeedsBracketingFixes (iatep)
22333       || (unrec_list = ListUnrecognizedModifiers (iatep, NULL, is_nuc)) != NULL
22334       || ShowBadValues (iatep, NULL))
22335   {
22336     need_fix = TRUE;
22337   }
22338   unrec_list = ValNodeFreeData (unrec_list);
22339   return need_fix;
22340 }
22341 
EditIDsNeedFix(SeqIdEditPtr siep)22342 static Boolean EditIDsNeedFix (SeqIdEditPtr siep)
22343 {
22344   Boolean      need_fix = FALSE;
22345 
22346   if (siep == NULL || siep->iatep_new == NULL || siep->iatep_new->num_sequences < 1)
22347   {
22348     return FALSE;
22349   }
22350 
22351   if (IDAndTitleEditIDsNeedFix (siep->iatep_new, siep->iatep_current, siep->is_nuc)
22352       || IDAndTitleEditTitlesNeedFix (siep->iatep_new, siep->is_nuc)
22353       || IDAndTitleEditTitlesNeedFix (siep->iatep_current, siep->is_nuc))
22354   {
22355     need_fix = TRUE;
22356   }
22357   return need_fix;
22358 }
22359 
IsSeqNumHidden(DialoG d,Int4 seq_num)22360 static Boolean IsSeqNumHidden (DialoG d, Int4 seq_num)
22361 {
22362   TagListPtr tlp;
22363   Char       seq_str [15];
22364   ValNodePtr vnp;
22365   Int4       row_num;
22366   CharPtr    pos_str;
22367   Boolean    rval = TRUE;
22368 
22369   tlp = (TagListPtr) GetObjectExtra (d);
22370 
22371   if (tlp == NULL)
22372   {
22373     return FALSE;
22374   }
22375 
22376   sprintf (seq_str, "%d", seq_num + 1);
22377   for (vnp = tlp->vnp, row_num = 0;
22378        vnp != NULL && rval;
22379        vnp = vnp->next, row_num++)
22380   {
22381     pos_str = GetTagListValueEx (tlp, row_num, 1);
22382     if (StringCmp (pos_str, seq_str) == 0)
22383     {
22384       rval = FALSE;
22385     }
22386     pos_str = MemFree (pos_str);
22387   }
22388   return rval;
22389 }
22390 
SomeErrorMessagesAreHidden(SeqIdEditPtr siep)22391 static Boolean SomeErrorMessagesAreHidden (SeqIdEditPtr siep)
22392 {
22393   Boolean has_dups, has_missing, has_bracket, has_unrec_mods = FALSE;
22394   ValNodePtr unrec_mods = NULL;
22395   Int4    seq_num;
22396   CharPtr err_msg;
22397   Boolean some_hidden = FALSE;
22398 
22399   if (siep == NULL)
22400   {
22401     return FALSE;
22402   }
22403 
22404   has_dups = EditHasDuplicateIDs (siep->iatep_new, siep->iatep_current);
22405   has_missing = HasMissingIDs (siep->iatep_new) || HasMissingIDs (siep->iatep_current);
22406   has_bracket = EditNeedsBracketingFixes (siep->iatep_new)
22407                 || EditNeedsBracketingFixes (siep->iatep_current);
22408 
22409   if (!has_bracket)
22410   {
22411     unrec_mods = ListUnrecognizedModifiers (siep->iatep_new, siep->iatep_current, siep->is_nuc);
22412   }
22413   if (unrec_mods != NULL)
22414   {
22415     has_unrec_mods = TRUE;
22416     unrec_mods = ValNodeFreeData (unrec_mods);
22417   }
22418 
22419 
22420   if (siep->iatep_new != NULL)
22421   {
22422     for (seq_num = 0; seq_num < siep->iatep_new->num_sequences && !some_hidden; seq_num++)
22423     {
22424       if (IsSeqNumHidden (siep->new_dlg, seq_num))
22425       {
22426         err_msg = GetIDAndTitleErrorMessage (siep->iatep_new, siep->iatep_current,
22427                                          seq_num, has_dups,
22428                                          has_missing, siep->seqid_edit_phase,
22429                                          has_bracket,
22430                                          has_unrec_mods,
22431                                          siep->is_nuc);
22432         if (!StringHasNoText (err_msg))
22433         {
22434           some_hidden = TRUE;
22435         }
22436         err_msg = MemFree (err_msg);
22437       }
22438     }
22439   }
22440   if (siep->iatep_current != NULL)
22441   {
22442     for (seq_num = 0; seq_num < siep->iatep_current->num_sequences && !some_hidden; seq_num++)
22443     {
22444       if (IsSeqNumHidden (siep->current_dlg, seq_num))
22445       {
22446         err_msg = GetIDAndTitleErrorMessage (siep->iatep_current, siep->iatep_new,
22447                                          seq_num, has_dups,
22448                                          has_missing, siep->seqid_edit_phase,
22449                                          has_bracket,
22450                                          has_unrec_mods,
22451                                          siep->is_nuc);
22452         if (!StringHasNoText (err_msg))
22453         {
22454           some_hidden = TRUE;
22455         }
22456         err_msg = MemFree (err_msg);
22457       }
22458     }
22459   }
22460   return some_hidden;
22461 }
22462 
ShowErrorInstructions(Pointer userdata)22463 static void ShowErrorInstructions (Pointer userdata)
22464 {
22465   SeqIdEditPtr      siep;
22466   Boolean           has_missing, has_dups, has_bracket_ids, has_space;
22467   Boolean           old_seqid_edit_phase;
22468   Boolean           show_all;
22469 
22470   siep = (SeqIdEditPtr) userdata;
22471   if (siep == NULL)
22472   {
22473     return;
22474   }
22475 
22476   UpdateIdAndTitleData (siep->new_dlg, siep->iatep_new);
22477   UpdateIdAndTitleData (siep->current_dlg, siep->iatep_current);
22478 
22479   has_missing = HasMissingIDs (siep->iatep_new) || HasMissingIDs (siep->iatep_current);
22480   has_dups = EditHasDuplicateIDs (siep->iatep_new, siep->iatep_current);
22481   has_bracket_ids = AnyBracketsInIDs (siep->iatep_new) || AnyBracketsInIDs (siep->iatep_current);
22482   has_space = EditHasSpaceInIDs (siep->iatep_new) || EditHasSpaceInIDs (siep->iatep_current);
22483 
22484   old_seqid_edit_phase = siep->seqid_edit_phase;
22485   siep->seqid_edit_phase |= has_missing | (has_dups && siep->is_nuc) | has_bracket_ids;
22486 
22487   if (siep->seqid_edit_phase)
22488   {
22489     SetTitle (siep->w, "Provide Sequence IDs For Your Sequences");
22490     Show (siep->refresh_err_btn);
22491   }
22492   else
22493   {
22494     SetTitle (siep->w, "Provide Correctly Formatted Titles For Your Sequences");
22495     Hide (siep->refresh_err_btn);
22496   }
22497 
22498   if (siep->seqid_edit_phase && !old_seqid_edit_phase)
22499   {
22500     if (GetValue (siep->show_all_grp) == 2)
22501     {
22502       show_all = TRUE;
22503     }
22504     else
22505     {
22506       show_all = FALSE;
22507     }
22508 
22509     UpdateIdAndTitleEditDialog (siep->new_dlg,
22510                                 siep->iatep_new, siep->iatep_current,
22511                                 siep->seqid_edit_phase,
22512                                 show_all,
22513                                 siep->is_nuc);
22514     UpdateIdAndTitleEditDialog (siep->current_dlg,
22515                                 siep->iatep_current, siep->iatep_new,
22516                                 siep->seqid_edit_phase,
22517                                 show_all,
22518                                 siep->is_nuc);
22519 
22520   }
22521 
22522 
22523   Reset (siep->auto_correct_doc);
22524   Show (siep->auto_correct_doc);
22525   Hide (siep->bracket_dlg);
22526   Hide (siep->badvalue_pnl);
22527   Hide (siep->unrec_mod_pnl);
22528   if (has_missing || (has_dups && siep->is_nuc) || has_space)
22529   {
22530     if (has_missing)
22531     {
22532       AppendText (siep->auto_correct_doc, "Some of your sequences lack sequence IDs.", NULL, NULL, programFont);
22533     }
22534     if (has_dups && siep->is_nuc)
22535     {
22536       AppendText (siep->auto_correct_doc, "Some of your sequence IDs are duplicated.", NULL, NULL, programFont);
22537     }
22538     AppendText (siep->auto_correct_doc, "Please provide unique sequence IDs for every sequence.", NULL, NULL, programFont);
22539 
22540     if (has_space)
22541     {
22542       AppendText (siep->auto_correct_doc, "Some of your sequence IDs contain spaces.", NULL, NULL, programFont);
22543     }
22544 
22545     if (SomeErrorMessagesAreHidden (siep))
22546     {
22547       AppendText (siep->auto_correct_doc, "Press 'Refresh Error List' to see the complete list of sequences with errors.", NULL, NULL, programFont);
22548     }
22549 
22550     if (ShowExtendedIDCorrections (siep->iatep_new, siep->iatep_current, siep->auto_correct_doc))
22551     {
22552       siep->auto_correct_ids = TRUE;
22553       SetTitle (siep->auto_correct_btn, "Autocorrect Sequence IDs");
22554       Show (siep->auto_correct_btn);
22555     }
22556     else
22557     {
22558       siep->auto_correct_ids = FALSE;
22559       Hide (siep->auto_correct_btn);
22560     }
22561     siep->auto_correct_bracketing = FALSE;
22562     siep->auto_correct_modnames = FALSE;
22563   }
22564   else if (ShowExtendedIDCorrections (siep->iatep_new, siep->iatep_current, siep->auto_correct_doc))
22565   {
22566     siep->auto_correct_ids = TRUE;
22567     SetTitle (siep->auto_correct_btn, "Autocorrect Sequence IDs");
22568     Show (siep->auto_correct_btn);
22569     siep->auto_correct_bracketing = FALSE;
22570     siep->auto_correct_modnames = FALSE;
22571   }
22572   else if (SequenceIDsHaveNonFixableBrackets (siep->iatep_new, siep->iatep_current, siep->auto_correct_doc))
22573   {
22574     Hide (siep->auto_correct_btn);
22575   }
22576   else if (siep->seqid_edit_phase && EditIDsNeedFix (siep))
22577   {
22578     siep->auto_correct_ids = FALSE;
22579     siep->auto_correct_bracketing = FALSE;
22580     siep->auto_correct_modnames = FALSE;
22581     Reset (siep->auto_correct_doc);
22582     AppendText (siep->auto_correct_doc, "Sequence ID errors have been corrected.\nErrors are present within your sequence titles.\nClick 'Proceed to Title Correction' to correct sequence title errors.\n", NULL, NULL, programFont);
22583     SetTitle (siep->auto_correct_btn, "Proceed to Title Correction");
22584     Show (siep->auto_correct_btn);
22585   }
22586   else if (ShowBracketingCorrections (siep->iatep_new, siep->iatep_current, siep->bracket_dlg))
22587   {
22588     Show (siep->bracket_dlg);
22589     Hide (siep->auto_correct_doc);
22590     SetTitle (siep->auto_correct_btn, "Autocorrect Bracketing");
22591     Show (siep->auto_correct_btn);
22592     siep->auto_correct_ids = FALSE;
22593     siep->auto_correct_bracketing = TRUE;
22594     siep->auto_correct_modnames = FALSE;
22595     UpdateIDAndTitleEditDialogErrorColumns (siep->new_dlg, siep->current_dlg,
22596                                             siep->iatep_new, siep->iatep_current,
22597                                             siep->seqid_edit_phase, siep->is_nuc);
22598 
22599   }
22600   else if (ShowUnrecognizedModifiers (siep->iatep_new, siep->iatep_current, siep->is_nuc))
22601   {
22602     Hide (siep->auto_correct_doc);
22603     Show (siep->unrec_mod_pnl);
22604 
22605     Show (siep->auto_correct_btn);
22606     SetTitle (siep->auto_correct_btn, "Correct Modifier Names");
22607     siep->auto_correct_ids = FALSE;
22608     siep->auto_correct_bracketing = FALSE;
22609     siep->auto_correct_modnames = TRUE;
22610     UpdateIDAndTitleEditDialogErrorColumns (siep->new_dlg, siep->current_dlg,
22611                                             siep->iatep_new, siep->iatep_current,
22612                                             siep->seqid_edit_phase, siep->is_nuc);
22613   }
22614   else if (ShowBadValues (siep->iatep_new, siep->iatep_current))
22615   {
22616     Show (siep->badvalue_pnl);
22617     Hide (siep->auto_correct_doc);
22618     Hide (siep->auto_correct_btn);
22619   }
22620   else
22621   {
22622     AppendText (siep->auto_correct_doc, "Sequence ID and title errors have been corrected.", NULL, NULL, programFont);
22623 
22624     Hide (siep->auto_correct_btn);
22625     siep->auto_correct_ids = FALSE;
22626     siep->auto_correct_bracketing = FALSE;
22627     siep->auto_correct_modnames = TRUE;
22628   }
22629   UpdateDocument (siep->auto_correct_doc, 0, 0);
22630   UpdateIDAndTitleEditDialogErrorColumns (siep->new_dlg, siep->current_dlg,
22631                                           siep->iatep_new, siep->iatep_current,
22632                                           siep->seqid_edit_phase, siep->is_nuc);
22633 
22634   if (IDAndTitleEditIDsNeedFix (siep->iatep_new, siep->iatep_current, siep->is_nuc))
22635   {
22636     Disable (siep->accept_btn);
22637   }
22638   else
22639   {
22640     Enable (siep->accept_btn);
22641   }
22642 }
22643 
22644 static TaglistCallback callback_list[4] =
22645  { ShowErrorInstructions, ShowErrorInstructions, ShowErrorInstructions, ShowErrorInstructions };
22646 
CreateIDsAndTitlesDialog(GrouP parent,SeqIdEditPtr siep,Boolean is_new)22647 static GrouP CreateIDsAndTitlesDialog (GrouP parent, SeqIdEditPtr siep, Boolean is_new)
22648 {
22649   GrouP      g, k, ppt;
22650   PrompT     p_desc = NULL, p1, p2, p3, p4;
22651   Int4       num_sequences;
22652   TagListPtr tlp;
22653   DialoG     dlg;
22654 
22655   if (siep == NULL)
22656   {
22657     return NULL;
22658   }
22659 
22660   g = HiddenGroup (parent, -1, 0, NULL);
22661 
22662   if (is_new)
22663   {
22664     if (siep->iatep_current != NULL)
22665     {
22666       p_desc = StaticPrompt (g, "New Sequences", 0, 0, programFont, 'l');
22667     }
22668   }
22669   else
22670   {
22671     p_desc = StaticPrompt (g, "Existing Sequences", 0, 0, programFont, 'l');
22672   }
22673 
22674   ppt = HiddenGroup (g, 4, 0, NULL);
22675   p1 = StaticPrompt (ppt, "Error", 6 * stdCharWidth, 0, programFont, 'l');
22676   p2 = StaticPrompt (ppt, "Position", 5 * stdCharWidth, 0, programFont, 'l');
22677   p3 = StaticPrompt (ppt, "Sequence ID", 10 * stdCharWidth, 0, programFont, 'l');
22678   p4 = StaticPrompt (ppt, "Title", 40 * stdCharWidth, 0, programFont, 'l');
22679 
22680   if (is_new)
22681   {
22682     num_sequences = siep->iatep_new->num_sequences;
22683   }
22684   else
22685   {
22686     num_sequences = siep->iatep_current->num_sequences;
22687   }
22688 
22689 
22690   k = NormalGroup (g, 1, 0, "", programFont, NULL);
22691   dlg = CreateTagListDialogExEx (k, MIN (num_sequences, 4), 4, 2,
22692                                  idedit_types, idedit_widths,
22693                                  NULL, TRUE, TRUE, NULL, NULL,
22694                                  callback_list, siep, TRUE);
22695 
22696   tlp = (TagListPtr) GetObjectExtra (dlg);
22697   if (tlp == NULL) return NULL;
22698 
22699   AlignObjects (ALIGN_CENTER, (HANDLE) dlg, (HANDLE) p_desc, NULL);
22700 
22701   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [0], (HANDLE) p1, NULL);
22702   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [1], (HANDLE) p2, NULL);
22703   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [2], (HANDLE) p3, NULL);
22704   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [3], (HANDLE) p4, NULL);
22705 
22706   if (is_new)
22707   {
22708     siep->new_dlg = dlg;
22709   }
22710   else
22711   {
22712     siep->current_dlg = dlg;
22713   }
22714 
22715   return g;
22716 }
22717 
ScrollTagListToSeqId(DialoG d,CharPtr seq_id)22718 static void ScrollTagListToSeqId (DialoG d, CharPtr seq_id)
22719 {
22720   TagListPtr tlp;
22721   ValNodePtr vnp;
22722   Int4       row_num, sb_max;
22723   Int4       scroll_value = -1;
22724   CharPtr    id_from_tag;
22725 
22726   tlp = (TagListPtr) GetObjectExtra (d);
22727 
22728   if (tlp == NULL || StringHasNoText (seq_id))
22729   {
22730     return;
22731   }
22732 
22733   for (row_num = 0, vnp = tlp->vnp;
22734        vnp != NULL && scroll_value < 0;
22735        vnp = vnp->next, row_num++)
22736   {
22737     id_from_tag = GetTagListValueEx (tlp, row_num, 2);
22738     if (StringCmp (id_from_tag, seq_id) == 0)
22739     {
22740       scroll_value = row_num;
22741     }
22742     id_from_tag = MemFree (id_from_tag);
22743   }
22744 
22745   if (scroll_value < 0)
22746   {
22747     return;
22748   }
22749 
22750   sb_max = GetBarMax (tlp->bar);
22751 
22752   if (scroll_value >= sb_max)
22753   {
22754     SetBarValue (tlp->bar, sb_max);
22755   }
22756   else
22757   {
22758     SetBarValue (tlp->bar, scroll_value);
22759   }
22760   SendMessageToDialog (d, VIB_MSG_REDRAW);
22761 }
22762 
ShowAllSequences(GrouP g)22763 static void ShowAllSequences (GrouP g)
22764 {
22765   SeqIdEditPtr siep;
22766   Boolean      show_all = FALSE;
22767 
22768   siep = (SeqIdEditPtr) GetObjectExtra (g);
22769   if (siep == NULL)
22770   {
22771     return;
22772   }
22773 
22774   show_all = HasMissingIDs (siep->iatep_new) || HasMissingIDs (siep->iatep_current);
22775 
22776   if (show_all)
22777   {
22778     SetValue (siep->show_all_grp, 2);
22779     Disable (siep->show_all_grp);
22780   }
22781   else
22782   {
22783     Enable (siep->show_all_grp);
22784   }
22785 
22786   if (GetValue (siep->show_all_grp) == 2)
22787   {
22788     show_all = TRUE;
22789   }
22790 
22791   if (siep->new_dlg != NULL)
22792   {
22793     UpdateIdAndTitleData (siep->new_dlg, siep->iatep_new);
22794     UpdateIdAndTitleEditDialog (siep->new_dlg, siep->iatep_new, siep->iatep_current,
22795                                 siep->seqid_edit_phase, show_all,
22796                                 siep->is_nuc);
22797   }
22798 
22799   if (siep->current_dlg != NULL)
22800   {
22801     UpdateIdAndTitleData (siep->current_dlg, siep->iatep_current);
22802     UpdateIdAndTitleEditDialog (siep->current_dlg, siep->iatep_current, siep->iatep_new,
22803                                 siep->seqid_edit_phase, show_all,
22804                                 siep->is_nuc);
22805   }
22806 
22807   ShowErrorInstructions (siep);
22808 }
22809 
RefreshErrorList(ButtoN b)22810 static void RefreshErrorList (ButtoN b)
22811 {
22812   SeqIdEditPtr siep;
22813 
22814   siep = (SeqIdEditPtr) GetObjectExtra (b);
22815   if (siep == NULL)
22816   {
22817     return;
22818   }
22819   ShowAllSequences (siep->show_all_grp);
22820 }
22821 
AutoCorrectIDsAndTitles(ButtoN b)22822 static void AutoCorrectIDsAndTitles (ButtoN b)
22823 {
22824   SeqIdEditPtr      siep;
22825   IDAndTitleEditPtr suggested, suggested_new, suggested_current;
22826   Boolean           show_all;
22827 
22828   siep = (SeqIdEditPtr) GetObjectExtra (b);
22829   if (siep == NULL)
22830   {
22831     return;
22832   }
22833 
22834   if (siep->auto_correct_ids)
22835   {
22836     suggested_new = SuggestCorrectionForLocalIDs (siep->iatep_new, siep->iatep_current);
22837     suggested_current = SuggestCorrectionForLocalIDs (siep->iatep_current, siep->iatep_new);
22838     UpdateColorizedDeflinePanelData (suggested_new, suggested_current, siep->badvalue_pnl);
22839     UpdateColorizedDeflinePanelData (suggested_new, suggested_current, siep->unrec_mod_pnl);
22840 
22841     siep->iatep_new = IDAndTitleEditFree (siep->iatep_new);
22842     siep->iatep_new = suggested_new;
22843 
22844     if (siep->iatep_current == NULL)
22845     {
22846       suggested_current = IDAndTitleEditFree (siep->iatep_current);
22847     }
22848     else
22849     {
22850       siep->iatep_current = IDAndTitleEditFree (siep->iatep_current);
22851       siep->iatep_current = suggested_current;
22852     }
22853   }
22854   else if (siep->seqid_edit_phase)
22855   {
22856     siep->seqid_edit_phase = FALSE;
22857   }
22858   else if (siep->auto_correct_bracketing)
22859   {
22860     suggested = SuggestCorrectionForTitleBracketing (siep->iatep_new);
22861     UpdateColorizedDeflinePanelData (suggested, siep->iatep_current, siep->badvalue_pnl);
22862     UpdateColorizedDeflinePanelData (suggested, siep->iatep_current, siep->unrec_mod_pnl);
22863 
22864     siep->iatep_new = IDAndTitleEditFree (siep->iatep_new);
22865     siep->iatep_new = suggested;
22866     if (siep->iatep_current != NULL)
22867     {
22868       suggested = SuggestCorrectionForTitleBracketing (siep->iatep_current);
22869       UpdateColorizedDeflinePanelData (siep->iatep_new, suggested, siep->badvalue_pnl);
22870       UpdateColorizedDeflinePanelData (siep->iatep_new, suggested, siep->unrec_mod_pnl);
22871 
22872       siep->iatep_current = IDAndTitleEditFree (siep->iatep_current);
22873       siep->iatep_current = suggested;
22874     }
22875   }
22876   else if (siep->auto_correct_modnames)
22877   {
22878     FixAllUnrecognizedModifiers (siep->iatep_new, siep->iatep_current, siep);
22879   }
22880 
22881   show_all = HasMissingIDs (siep->iatep_new) || HasMissingIDs (siep->iatep_current);
22882   if (show_all)
22883   {
22884     Disable (siep->show_all_grp);
22885   }
22886   else
22887   {
22888     Enable (siep->show_all_grp);
22889   }
22890 
22891   if (GetValue (siep->show_all_grp) == 2)
22892   {
22893     show_all = TRUE;
22894   }
22895 
22896   UpdateIdAndTitleEditDialog (siep->new_dlg, siep->iatep_new, siep->iatep_current,
22897                               siep->seqid_edit_phase, show_all, siep->is_nuc);
22898   UpdateIdAndTitleEditDialog (siep->current_dlg, siep->iatep_current, siep->iatep_new,
22899                               siep->seqid_edit_phase, show_all, siep->is_nuc);
22900 
22901   ShowErrorInstructions (siep);
22902 }
22903 
RefreshErrorButton(ButtoN b)22904 static void RefreshErrorButton (ButtoN b)
22905 {
22906   SeqIdEditPtr siep;
22907 
22908   siep = (SeqIdEditPtr) GetObjectExtra (b);
22909   if (siep == NULL)
22910   {
22911     return;
22912   }
22913 
22914   UpdateIdAndTitleData (siep->new_dlg, siep->iatep_new);
22915   ClearIDAndTitleEditDialogErrorColumn (siep->new_dlg);
22916   UpdateIdAndTitleData (siep->current_dlg, siep->iatep_current);
22917   ClearIDAndTitleEditDialogErrorColumn (siep->current_dlg);
22918   UpdateIDAndTitleEditDialogErrorColumns (siep->new_dlg, siep->current_dlg,
22919                                           siep->iatep_new, siep->iatep_current,
22920                                           siep->seqid_edit_phase, siep->is_nuc);
22921 }
22922 
CleanupDuplicateAndEmptyPairs(IDAndTitleEditPtr iatep)22923 static void CleanupDuplicateAndEmptyPairs (IDAndTitleEditPtr iatep)
22924 {
22925   Int4 seq_num;
22926   if (iatep == NULL)
22927   {
22928     return;
22929   }
22930 
22931   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
22932   {
22933     iatep->title_list [seq_num] = RemoveAllDuplicatePairsFromOneTitle (iatep->title_list [seq_num]);
22934     RemoveMeaninglessEmptyPairsFromOneTitle (iatep->title_list [seq_num]);
22935   }
22936 }
22937 
22938 /* This function will insert the escape character before each double-quotation mark
22939  * in a string, starting at the position offset and ending at the portion of the
22940  * string that corresponds to the position stop_offset in the original string -
22941  * this offset is adjusted for the additional escape characters inserted.
22942  */
EscapeQuotesBetweenPositions(CharPtr orig_title,Int4 start_offset,Int4 stop_offset)22943 static CharPtr EscapeQuotesBetweenPositions (CharPtr orig_title, Int4 start_offset, Int4 stop_offset)
22944 {
22945   CharPtr next_quote;
22946 
22947   if (StringHasNoText (orig_title))
22948   {
22949     return orig_title;
22950   }
22951 
22952   next_quote = NextUnescapedQuote (orig_title + start_offset);
22953   while (next_quote != NULL && next_quote - orig_title < stop_offset)
22954   {
22955     orig_title = InsertStringAtOffset (orig_title, "\\", next_quote - orig_title);
22956     stop_offset ++;
22957     next_quote = NextUnescapedQuote (orig_title + start_offset);
22958   }
22959   return orig_title;
22960 
22961 }
22962 
22963 /* This function will insert the escape character before each double-quotation mark in a
22964  * string, starting at position offset.  This is useful when the first portion of a
22965  * title appears to be parseable, but the remainder is not.
22966  */
EscapeQuotesAfterOffset(CharPtr orig_title,Int4 offset)22967 static CharPtr EscapeQuotesAfterOffset (CharPtr orig_title, Int4 offset)
22968 {
22969   CharPtr next_quote;
22970 
22971   if (StringHasNoText (orig_title))
22972   {
22973     return orig_title;
22974   }
22975 
22976   next_quote = NextUnescapedQuote (orig_title + offset);
22977   while (next_quote != NULL)
22978   {
22979     orig_title = InsertStringAtOffset (orig_title, "\\", next_quote - orig_title);
22980     next_quote = NextUnescapedQuote (orig_title + offset);
22981   }
22982   return orig_title;
22983 }
22984 
22985 /* This function inserts escape characters before each double-quotation mark in
22986  * a string starting at position offset, and then puts the portion of the string
22987  * starting at position offset inside a pair of double-quotes.
22988  */
QuoteToEndFromOffset(CharPtr orig_title,Int4 offset)22989 static CharPtr QuoteToEndFromOffset (CharPtr orig_title, Int4 offset)
22990 {
22991   if (StringHasNoText (orig_title))
22992   {
22993     return orig_title;
22994   }
22995 
22996   orig_title = EscapeQuotesAfterOffset (orig_title, offset);
22997   orig_title = InsertStringAtOffset (orig_title, "\"", offset);
22998   orig_title = InsertStringAtOffset (orig_title, "\"", StringLen (orig_title));
22999   return orig_title;
23000 }
23001 
23002 /* This function finds the first position where bracketing is incorrect
23003  * and puts the remainder of the string inside quotation marks.
23004  */
FixUnparseableBracketing(CharPtr orig_title)23005 static CharPtr FixUnparseableBracketing (CharPtr orig_title)
23006 {
23007   CharPtr next_token, next_next_token, last_start = NULL;
23008   Char    ch_expected;
23009 
23010   next_token = NextBracketToken (orig_title);
23011   if (next_token == NULL)
23012   {
23013     return orig_title;
23014   }
23015 
23016   if (*next_token == '[')
23017   {
23018     last_start = next_token;
23019   }
23020   else
23021   {
23022     orig_title = QuoteToEndFromOffset (orig_title, next_token - orig_title);
23023     return orig_title;
23024   }
23025 
23026   while (next_token != NULL)
23027   {
23028     ch_expected = ExpectToken (next_token);
23029     next_next_token = NextBracketToken (next_token + 1);
23030     if ((next_next_token == NULL && ch_expected != '[')
23031          || (next_next_token != NULL && *next_next_token != ch_expected))
23032     {
23033       orig_title = QuoteToEndFromOffset (orig_title, last_start - orig_title);
23034       return orig_title;
23035     }
23036     else
23037     {
23038       next_token = next_next_token;
23039       if (next_token != NULL && *next_token == '[')
23040       {
23041         last_start = next_token;
23042       }
23043     }
23044   }
23045   return orig_title;
23046 }
23047 
23048 /* This function finds individual bracketed name-value pairs that either have
23049  * unrecognizable names or invalid values and puts them in quotation marks, so
23050  * that they will not be parsed.
23051  */
23052 static CharPtr
QuoteUnrecognizedModifierNamesAndValues(CharPtr orig_title,Boolean is_nuc)23053 QuoteUnrecognizedModifierNamesAndValues
23054 (CharPtr orig_title,
23055  Boolean is_nuc)
23056 {
23057   CharPtr         start, stop, cp;
23058   ModifierInfoPtr mip;
23059   Int4            stop_offset, start_offset;
23060 
23061   cp = orig_title;
23062   mip = ParseOneBracketedModifier (cp, &start, &stop);
23063   while (mip != NULL && stop != NULL && start != NULL)
23064   {
23065     if (IsUnrecognizedModifierName (mip, is_nuc))
23066     {
23067       /* note - put stop quote in first, because position of stop will change
23068        * after putting in quote for start */
23069       stop_offset = stop - orig_title + 1;
23070       start_offset = start - orig_title;
23071       if (is_nuc)
23072       {
23073         orig_title = InsertStringAtOffset (orig_title, "\"", stop_offset);
23074       }
23075       else
23076       {
23077         orig_title = InsertStringAtOffset (orig_title, "\"]", stop_offset);
23078       }
23079       orig_title = EscapeQuotesBetweenPositions (orig_title, start_offset, stop_offset);
23080       if (is_nuc)
23081       {
23082         orig_title = InsertStringAtOffset (orig_title, "\"", start_offset);
23083       }
23084       else
23085       {
23086         orig_title = InsertStringAtOffset (orig_title, "[comment=\"", start_offset);
23087       }
23088       cp = orig_title + start_offset;
23089     }
23090     else
23091     {
23092       cp = stop + 1;
23093     }
23094     mip = ModifierInfoFree (mip);
23095     mip = ParseOneBracketedModifier (cp, &start, &stop);
23096   }
23097   return orig_title;
23098 }
23099 
23100 /* This section will insert double-quotation marks around the sections of a title
23101  * that cannot be parsed.
23102  */
QuoteUnparseableSections(IDAndTitleEditPtr iatep,Boolean is_nuc)23103 static void QuoteUnparseableSections (IDAndTitleEditPtr iatep, Boolean is_nuc)
23104 {
23105   Int4 seq_num;
23106 
23107   if (iatep == NULL)
23108   {
23109     return;
23110   }
23111 
23112   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
23113   {
23114     iatep->title_list [seq_num] = FixUnparseableBracketing (iatep->title_list [seq_num]);
23115     iatep->title_list [seq_num] = QuoteUnrecognizedModifierNamesAndValues (iatep->title_list [seq_num], is_nuc);
23116   }
23117 }
23118 
PositionRefreshErrorListBtn(SeqIdEditPtr siep)23119 static void PositionRefreshErrorListBtn (SeqIdEditPtr siep)
23120 {
23121   RecT a, b;
23122 
23123   if (siep == NULL || siep->refresh_err_btn == NULL
23124       || siep->refresh_error_list_btn == NULL)
23125   {
23126     return;
23127   }
23128 
23129   ObjectRect (siep->refresh_err_btn, &a);
23130   ObjectRect (siep->refresh_error_list_btn, &b);
23131   b.left += a.right - a.left + 10;
23132   b.right += a.right - a.left + 10;
23133   SetPosition (siep->refresh_error_list_btn, &b);
23134 }
23135 
23136 /* This function checks to see if all of the sequences in new_list and current_list
23137  * have non-empty, unique sequences and if any of their titles have bracketing errors,
23138  * invalid modifier names, or invalid modifier values.
23139  * Problems with sequence IDs must be fixed; problems with titles that are left
23140  * unfixed will be cordoned off with double-quotation marks.
23141  */
FixIDsAndTitles(SeqEntryPtr new_list,SeqEntryPtr current_list,Boolean is_nuc)23142 NLM_EXTERN Boolean FixIDsAndTitles (SeqEntryPtr new_list, SeqEntryPtr current_list, Boolean is_nuc)
23143 {
23144   WindoW                w;
23145   GrouP                 h, instr_grp, k, j, c, new_grp, current_grp = NULL;
23146   ButtoN                b;
23147   Boolean               need_fix = FALSE;
23148   ModalAcceptCancelData acd;
23149   Boolean               show_all;
23150   SeqIdEditData         sied;
23151   Boolean               rval;
23152 
23153   sied.iatep_new = SeqEntryListToIDAndTitleEdit (new_list);
23154   sied.iatep_current = SeqEntryListToIDAndTitleEdit (current_list);
23155   sied.is_nuc = is_nuc;
23156 
23157   /* check for unique IDs - don't need to present dialog if they
23158    * are all present and unique */
23159   need_fix = EditIDsNeedFix (&sied);
23160 
23161   /* if no fixes are needed, do not present dialog */
23162   if (!need_fix)
23163   {
23164     sied.iatep_new = IDAndTitleEditFree (sied.iatep_new);
23165     sied.iatep_current = IDAndTitleEditFree (sied.iatep_current);
23166     return TRUE;
23167   }
23168 
23169   show_all = HasMissingIDs (sied.iatep_new) || HasMissingIDs (sied.iatep_current);
23170 
23171   if (!show_all
23172       && (! EditHasDuplicateIDs (sied.iatep_new, sied.iatep_current) || !sied.is_nuc)
23173       && !AnyBracketsInIDs (sied.iatep_new)
23174       && !AnyBracketsInIDs (sied.iatep_current))
23175   {
23176     sied.seqid_edit_phase = FALSE;
23177   }
23178   else
23179   {
23180     sied.seqid_edit_phase = TRUE;
23181   }
23182 
23183 
23184   w = MovableModalWindow (-20, -13, -10, -10, "Provide Sequence IDs for your Sequences", NULL);
23185   sied.w = w;
23186   h = HiddenGroup(w, -1, 0, NULL);
23187   SetGroupSpacing (h, 10, 10);
23188 
23189   instr_grp = HiddenGroup (h, -1, 0, NULL);
23190   k = HiddenGroup (instr_grp, 0, 0, NULL);
23191   sied.auto_correct_doc = DocumentPanel (k, stdCharWidth * 63, stdLineHeight * 12);
23192   SetDocAutoAdjust (sied.auto_correct_doc, TRUE);
23193   SetObjectExtra (sied.auto_correct_doc, &sied, NULL);
23194   sied.bracket_dlg = ShowDifferenceDialog (k, stdCharWidth * 63, stdLineHeight * 12);
23195   sied.badvalue_pnl = InvalidValuesPanel (k, stdCharWidth * 63, stdLineHeight * 12,
23196                                           sied.iatep_new, sied.iatep_current, is_nuc,
23197                                           UpdateSeqIdEditForColorizedPanel,
23198                                           &sied,
23199                                           ScrollSeqIdEditForColorizedPanel,
23200                                           &sied);
23201   sied.unrec_mod_pnl = UnrecognizedModifiersPanel (k, stdCharWidth * 63, stdLineHeight * 12,
23202                                                    sied.iatep_new, sied.iatep_current, is_nuc,
23203                                                    UpdateSeqIdEditForColorizedPanel,
23204                                                    &sied,
23205                                                    ScrollSeqIdEditForColorizedPanel,
23206                                                    &sied);
23207 
23208   AlignObjects (ALIGN_CENTER, (HANDLE) sied.auto_correct_doc,
23209                               (HANDLE) sied.bracket_dlg,
23210                               (HANDLE) sied.badvalue_pnl,
23211                               (HANDLE) sied.unrec_mod_pnl,
23212                               NULL);
23213 
23214   j = HiddenGroup (h, 0, 0, NULL);
23215   sied.refresh_err_btn = PushButton (j, "Clear Fixed Errors", RefreshErrorButton);
23216   SetObjectExtra (sied.refresh_err_btn, &sied, NULL);
23217 
23218   sied.refresh_error_list_btn = PushButton (j, "Refresh Error List", RefreshErrorList);
23219   SetObjectExtra (sied.refresh_error_list_btn, &sied, NULL);
23220 
23221   sied.auto_correct_btn = PushButton (j, "Make automatic corrections", AutoCorrectIDsAndTitles);
23222   SetObjectExtra (sied.auto_correct_btn, &sied, NULL);
23223   AlignObjects (ALIGN_CENTER, (HANDLE) k, (HANDLE) sied.auto_correct_btn, NULL);
23224 
23225   sied.new_dlg = NULL;
23226   sied.current_dlg = NULL;
23227 
23228   new_grp = CreateIDsAndTitlesDialog (h, &sied, TRUE);
23229   if (sied.iatep_current != NULL)
23230   {
23231     current_grp = CreateIDsAndTitlesDialog (h, &sied, FALSE);
23232   }
23233 
23234   UpdateIdAndTitleEditDialog (sied.new_dlg, sied.iatep_new, sied.iatep_current,
23235                               sied.seqid_edit_phase, show_all, sied.is_nuc);
23236   UpdateIdAndTitleEditDialog (sied.current_dlg, sied.iatep_current, sied.iatep_new,
23237                               sied.seqid_edit_phase, show_all, sied.is_nuc);
23238 
23239   k = HiddenGroup (h, 2, 0, NULL);
23240   SetGroupSpacing (k, 10, 10);
23241   sied.show_all_grp = HiddenGroup (k, 2, 0, ShowAllSequences);
23242   RadioButton (sied.show_all_grp, "Show only sequences with errors");
23243   RadioButton (sied.show_all_grp, "Show all sequences in set");
23244   SetValue (sied.show_all_grp, 1);
23245   SetObjectExtra (sied.show_all_grp, &sied, NULL);
23246   if (show_all)
23247   {
23248     Disable (sied.show_all_grp);
23249   }
23250   else
23251   {
23252     Enable (sied.show_all_grp);
23253   }
23254 
23255 
23256   c = HiddenGroup (h, 2, 0, NULL);
23257   sied.accept_btn = PushButton (c, "Accept", ModalAcceptButton);
23258   SetObjectExtra (sied.accept_btn, &acd, NULL);
23259   b = PushButton (c, "Cancel", ModalCancelButton);
23260   SetObjectExtra (b, &acd, NULL);
23261 
23262   ShowErrorInstructions (&sied);
23263 
23264   AlignObjects (ALIGN_CENTER, (HANDLE) instr_grp,
23265                               (HANDLE) k,
23266                               (HANDLE) c,
23267                               (HANDLE) new_grp,
23268                               (HANDLE) current_grp,
23269                               NULL);
23270 
23271   AlignObjects (ALIGN_LEFT, (HANDLE) sied.refresh_err_btn,
23272                             (HANDLE) sied.new_dlg,
23273                             NULL);
23274 
23275   PositionRefreshErrorListBtn (&sied);
23276 
23277   Show (w);
23278   Select (w);
23279 
23280   acd.cancelled = FALSE;
23281   while (need_fix && ! acd.cancelled)
23282   {
23283     acd.accepted = FALSE;
23284     while (!acd.accepted && ! acd.cancelled)
23285     {
23286       ProcessExternalEvent ();
23287       Update ();
23288     }
23289     ProcessAnEvent ();
23290     if (! acd.cancelled)
23291     {
23292       UpdateIdAndTitleData (sied.new_dlg, sied.iatep_new);
23293       UpdateIdAndTitleData (sied.current_dlg, sied.iatep_current);
23294 
23295       if (IDAndTitleEditTitlesNeedFix (sied.iatep_new, sied.is_nuc)
23296           || IDAndTitleEditTitlesNeedFix (sied.iatep_current, sied.is_nuc))
23297       {
23298         if (ANS_YES == Message (MSG_YN, "Your titles contain unparseable elements.  If you choose to continue, the unparseable sections will be placed in quotes and no data will be parsed from these sections.  You will have to add information about the sequence manually.  Are you sure you want to continue?"))
23299         {
23300           QuoteUnparseableSections (sied.iatep_new, is_nuc);
23301           QuoteUnparseableSections (sied.iatep_current, is_nuc);
23302           need_fix = FALSE;
23303         }
23304       }
23305       else
23306       {
23307         need_fix = FALSE;
23308       }
23309 
23310       show_all = HasMissingIDs (sied.iatep_new) || HasMissingIDs (sied.iatep_current);
23311       if (show_all)
23312       {
23313         Disable (sied.show_all_grp);
23314       }
23315       else
23316       {
23317         Enable (sied.show_all_grp);
23318         if (GetValue (sied.show_all_grp) == 2)
23319         {
23320           show_all = TRUE;
23321         }
23322         else
23323         {
23324           show_all = FALSE;
23325         }
23326       }
23327       UpdateIdAndTitleEditDialog (sied.new_dlg, sied.iatep_new, sied.iatep_current,
23328                                   sied.seqid_edit_phase, show_all, sied.is_nuc);
23329       UpdateIdAndTitleEditDialog (sied.current_dlg, sied.iatep_current, sied.iatep_new,
23330                                   sied.seqid_edit_phase, show_all, sied.is_nuc);
23331       ShowErrorInstructions (&sied);
23332     }
23333   }
23334 
23335   if (acd.cancelled)
23336   {
23337     rval = FALSE;
23338   }
23339   else
23340   {
23341     CleanupDuplicateAndEmptyPairs (sied.iatep_new);
23342     CleanupDuplicateAndEmptyPairs (sied.iatep_current);
23343     ApplyIDAndTitleEditToSeqEntryList (new_list, sied.iatep_new);
23344     ApplyIDAndTitleEditToSeqEntryList (current_list, sied.iatep_current);
23345     rval = TRUE;
23346   }
23347   Remove (w);
23348 
23349   sied.iatep_new = IDAndTitleEditFree (sied.iatep_new);
23350   sied.iatep_current = IDAndTitleEditFree (sied.iatep_current);
23351 
23352   return rval;
23353 }
23354 
CollectIDsAndTitles(SeqEntryPtr new_list,SeqEntryPtr current_list,Boolean is_nuc)23355 static Boolean CollectIDsAndTitles (SeqEntryPtr new_list, SeqEntryPtr current_list, Boolean is_nuc)
23356 {
23357 
23358   ArrowCursor ();
23359 
23360   return FixIDsAndTitles (new_list, current_list, is_nuc);
23361 }
23362 
23363 /* The following section of code is used for editing titles with the Sequence Assistant.
23364  * A user may edit a single title or the list of titles from the Sequence Assistant.
23365  * This code provides the same bracketing, modifier name, and modifier value checks
23366  * that are used when sequences are imported or created.
23367  */
23368 
23369 typedef struct titleedit
23370 {
23371   DialoG            bracket_dlg;
23372   PaneL             unrec_mod_pnl;
23373   PaneL             badvalue_pnl;
23374   TexT              title_txt;
23375   DialoG            multi_title;
23376   IDAndTitleEditPtr iatep;
23377   ButtoN            accept_btn;
23378 } TitleEditData, PNTR TitleEditPtr;
23379 
ShowTitleEditErrors(TitleEditPtr tep)23380 static void ShowTitleEditErrors (TitleEditPtr tep)
23381 {
23382   if (tep == NULL)
23383   {
23384     return;
23385   }
23386 
23387   Hide (tep->bracket_dlg);
23388   Hide (tep->unrec_mod_pnl);
23389   Hide (tep->badvalue_pnl);
23390 
23391   if (ShowBracketingCorrections (tep->iatep, NULL, tep->bracket_dlg))
23392   {
23393     Show (tep->bracket_dlg);
23394     Disable (tep->accept_btn);
23395   }
23396   else if (ShowUnrecognizedModifiers (tep->iatep, NULL, TRUE))
23397   {
23398     Show (tep->unrec_mod_pnl);
23399     Disable (tep->accept_btn);
23400   }
23401   else if (ShowBadValues (tep->iatep, NULL))
23402   {
23403     Show (tep->badvalue_pnl);
23404     Disable (tep->accept_btn);
23405   }
23406   else
23407   {
23408     Enable (tep->accept_btn);
23409   }
23410 }
23411 
OnTitleEditChange(TexT t)23412 static void OnTitleEditChange (TexT t)
23413 {
23414   TitleEditPtr tep;
23415 
23416   tep = (TitleEditPtr) GetObjectExtra (t);
23417   if (tep == NULL)
23418   {
23419     return;
23420   }
23421 
23422   tep->iatep->title_list [0] = MemFree (tep->iatep->title_list [0]);
23423   tep->iatep->title_list [0] = SaveStringFromText (tep->title_txt);
23424 
23425   ShowTitleEditErrors (tep);
23426 }
23427 
UpdateTitleEditForColorizedPanel(Pointer userdata)23428 static void UpdateTitleEditForColorizedPanel (Pointer userdata)
23429 {
23430   TitleEditPtr tep;
23431 
23432   tep = (TitleEditPtr) userdata;
23433   if (tep == NULL)
23434   {
23435     return;
23436   }
23437 
23438   SetTitle (tep->title_txt, tep->iatep->title_list [0]);
23439   ShowTitleEditErrors (tep);
23440 }
23441 
EditOneSequenceTitle(SequenceAssistantPtr sap,Int4 seq_num)23442 static void EditOneSequenceTitle (SequenceAssistantPtr sap, Int4 seq_num)
23443 {
23444   Int4                  seq_pos;
23445   SeqEntryPtr           sep, nsep;
23446   WindoW                w;
23447   CharPtr               title = NULL;
23448   BioseqPtr             bsp = NULL;
23449   CharPtr               title_fmt = "Title for %s";
23450   GrouP                 h, g, c, err_grp;
23451   ButtoN                b;
23452   ModalAcceptCancelData acd;
23453   SeqDescrPtr           sdp;
23454   TitleEditData         ted;
23455 
23456   if (sap == NULL)
23457   {
23458     return;
23459   }
23460 
23461   for (seq_pos = 0, sep = sap->seq_list;
23462        seq_pos != seq_num && sep != NULL;
23463        seq_pos ++, sep = sep->next)
23464   {
23465   }
23466   if (sep == NULL)
23467   {
23468     return;
23469   }
23470 
23471   if (IS_Bioseq (sep))
23472   {
23473     bsp = (BioseqPtr) sep->data.ptrvalue;
23474   }
23475   else if (IS_Bioseq_set (sep))
23476   {
23477     nsep = FindNucSeqEntry (sep);
23478     if (nsep != NULL && IS_Bioseq (nsep))
23479     {
23480       bsp = (BioseqPtr) nsep->data.ptrvalue;
23481     }
23482   }
23483   if (bsp == NULL)
23484   {
23485     return;
23486   }
23487 
23488   sdp = bsp->descr;
23489   while (sdp != NULL && sdp->choice != Seq_descr_title)
23490   {
23491     sdp = sdp->next;
23492   }
23493 
23494   ted.iatep = IDAndTitleEditNew ();
23495   if (ted.iatep == NULL)
23496   {
23497     return;
23498   }
23499 
23500   /* set up IDAndTitleEdit to use for single sequence */
23501   ted.iatep->num_sequences = 1;
23502   ted.iatep->id_list = (CharPtr PNTR) MemNew (ted.iatep->num_sequences * sizeof (CharPtr));
23503   ted.iatep->title_list = (CharPtr PNTR) MemNew (ted.iatep->num_sequences * sizeof (CharPtr));
23504   ted.iatep->id_list [0] = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
23505   if (sdp != NULL && !StringHasNoText (sdp->data.ptrvalue))
23506   {
23507     ted.iatep->title_list [0] = StringSave (sdp->data.ptrvalue);
23508   }
23509   else
23510   {
23511     ted.iatep->title_list [0] = StringSave ("");
23512   }
23513 
23514   title = (CharPtr) MemNew ((StringLen (title_fmt)
23515                                + StringLen (ted.iatep->id_list [0])) * sizeof (Char));
23516   sprintf (title, title_fmt, ted.iatep->id_list [0]);
23517   w = MovableModalWindow (-20, -13, -10, -10, title, NULL);
23518   title = MemFree (title);
23519 
23520   h = HiddenGroup(w, -1, 0, NULL);
23521   SetGroupSpacing (h, 10, 10);
23522 
23523   err_grp = HiddenGroup (h, 0, 0, NULL);
23524   ted.bracket_dlg = ShowDifferenceDialog (err_grp, stdCharWidth * 63, stdLineHeight * 5);
23525   ted.badvalue_pnl = InvalidValuesPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
23526                                          ted.iatep, NULL, TRUE,
23527                                          UpdateTitleEditForColorizedPanel,
23528                                          &ted,
23529                                          NULL, NULL);
23530   ted.unrec_mod_pnl = UnrecognizedModifiersPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
23531                                                   ted.iatep, NULL, TRUE,
23532                                                   UpdateTitleEditForColorizedPanel,
23533                                                   &ted,
23534                                                   NULL, NULL);
23535   AlignObjects (ALIGN_CENTER, (HANDLE) ted.bracket_dlg,
23536                               (HANDLE) ted.badvalue_pnl,
23537                               (HANDLE) ted.unrec_mod_pnl,
23538                               NULL);
23539 
23540   g = HiddenGroup (h, 2, 0, NULL);
23541   StaticPrompt (g, "Title", 0, popupMenuHeight, programFont, 'l');
23542   ted.title_txt = DialogText (g, "", 40, OnTitleEditChange);
23543   SetObjectExtra (ted.title_txt, &ted, NULL);
23544   if (sdp != NULL && !StringHasNoText (sdp->data.ptrvalue))
23545   {
23546     SetTitle (ted.title_txt, sdp->data.ptrvalue);
23547   }
23548 
23549   c = HiddenGroup (h, 2, 0, NULL);
23550   ted.accept_btn = PushButton (c, "Accept", ModalAcceptButton);
23551   SetObjectExtra (ted.accept_btn, &acd, NULL);
23552   b = PushButton (c, "Cancel", ModalCancelButton);
23553   SetObjectExtra (b, &acd, NULL);
23554 
23555   AlignObjects (ALIGN_CENTER, (HANDLE) err_grp, (HANDLE) g, (HANDLE) c, NULL);
23556 
23557   Show(w);
23558   Select (w);
23559 
23560   ShowTitleEditErrors (&ted);
23561 
23562   acd.accepted = FALSE;
23563   acd.cancelled = FALSE;
23564 
23565   while (!acd.cancelled && ! acd.accepted)
23566   {
23567     ProcessExternalEvent ();
23568     Update ();
23569   }
23570   ProcessAnEvent ();
23571   if (acd.accepted)
23572   {
23573     CleanupDuplicateAndEmptyPairs (ted.iatep);
23574     SetTitle (ted.title_txt, ted.iatep->title_list [0]);
23575     if (sdp == NULL)
23576     {
23577       sdp = SeqDescrNew (bsp->descr);
23578       if (bsp->descr == NULL)
23579       {
23580         bsp->descr = sdp;
23581       }
23582       sdp->choice = Seq_descr_title;
23583     }
23584     sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
23585     sdp->data.ptrvalue = SaveStringFromText (ted.title_txt);
23586   }
23587 
23588   Remove (w);
23589 }
23590 
23591 #define SEQUENCE_ASSISTANT_MOLECULE_COLUMN 2
23592 #define SEQUENCE_ASSISTANT_TOPOLOGY_COLUMN 3
23593 #define SEQUENCE_ASSISTANT_TITLE_COLUMN    4
23594 
23595 static Uint2 titleedit_types [] = {
23596   TAGLIST_PROMPT, TAGLIST_TEXT
23597 };
23598 
23599 static Uint2 titleedit_widths [] = {
23600   10, 40,
23601 };
23602 
IDAndTitleEditToTagData(DialoG d,Pointer userdata)23603 static void IDAndTitleEditToTagData (DialoG d, Pointer userdata)
23604 {
23605   Int4              len;
23606   CharPtr           str;
23607   IDAndTitleEditPtr iatep;
23608   TagListPtr        tlp;
23609   Int4              seq_num;
23610   ValNodePtr        list = NULL;
23611 
23612   tlp =(TagListPtr) GetObjectExtra (d);
23613   if (tlp == NULL)
23614   {
23615     return;
23616   }
23617 
23618   iatep = (IDAndTitleEditPtr) userdata;
23619   if (iatep == NULL)
23620   {
23621     return;
23622   }
23623 
23624   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
23625   {
23626     len = StringLen (iatep->id_list [seq_num]) + StringLen (iatep->title_list [seq_num]) + 4;
23627     str = (CharPtr) MemNew (len * sizeof (Char));
23628     if (str != NULL) {
23629       sprintf (str, "%s\t%s\n", iatep->id_list [seq_num],
23630                                 iatep->title_list [seq_num] == NULL ? "" : iatep->title_list [seq_num]);
23631     }
23632     ValNodeAddPointer (&list, 0, str);
23633   }
23634 
23635   SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
23636   tlp->vnp = list;
23637   SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
23638   tlp->max = MAX ((Int2) 0, (Int2) (iatep->num_sequences - tlp->rows));
23639   CorrectBarMax (tlp->bar, tlp->max);
23640   CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
23641   if (tlp->max > 0) {
23642     SafeShow (tlp->bar);
23643   } else {
23644     SafeHide (tlp->bar);
23645   }
23646 }
23647 
TagDataToIDAndTitleEdit(DialoG d)23648 static Pointer TagDataToIDAndTitleEdit (DialoG d)
23649 {
23650   IDAndTitleEditPtr iatep;
23651   TagListPtr        tlp;
23652   Int4              seq_num;
23653 
23654   tlp =(TagListPtr) GetObjectExtra (d);
23655   if (tlp == NULL)
23656   {
23657     return NULL;
23658   }
23659 
23660   iatep = IDAndTitleEditNew ();
23661   if (iatep == NULL)
23662   {
23663     return NULL;
23664   }
23665 
23666   iatep->num_sequences = ValNodeLen (tlp->vnp);
23667   iatep->id_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
23668   iatep->title_list = (CharPtr PNTR) MemNew (iatep->num_sequences * sizeof (CharPtr));
23669 
23670   for (seq_num = 0; seq_num < iatep->num_sequences; seq_num++)
23671   {
23672     iatep->id_list [seq_num] = GetTagListValueEx (tlp, seq_num, 0);
23673     iatep->title_list [seq_num] = GetTagListValueEx (tlp, seq_num, 1);
23674   }
23675   return iatep;
23676 }
23677 
UpdateMultiTitleEditForColorizedPanel(Pointer userdata)23678 static void UpdateMultiTitleEditForColorizedPanel (Pointer userdata)
23679 {
23680   TitleEditPtr tep;
23681 
23682   tep = (TitleEditPtr) userdata;
23683   if (tep == NULL)
23684   {
23685     return;
23686   }
23687 
23688   PointerToDialog (tep->multi_title, tep->iatep);
23689 
23690   ShowTitleEditErrors (tep);
23691 }
23692 
ShowMultiTitleErrors(Pointer userdata)23693 static void ShowMultiTitleErrors (Pointer userdata)
23694 {
23695   TitleEditPtr tep;
23696 
23697   tep = (TitleEditPtr) userdata;
23698 
23699   if (tep == NULL)
23700   {
23701     return;
23702   }
23703 
23704   tep->iatep = IDAndTitleEditFree (tep->iatep);
23705   tep->iatep = DialogToPointer (tep->multi_title);
23706   UpdateColorizedDeflinePanelData (tep->iatep, NULL, tep->badvalue_pnl);
23707   UpdateColorizedDeflinePanelData (tep->iatep, NULL, tep->unrec_mod_pnl);
23708 
23709   ShowTitleEditErrors (tep);
23710 }
23711 
23712 static TaglistCallback title_callback_list[2] =
23713  { ShowMultiTitleErrors, ShowMultiTitleErrors };
23714 
EditSequenceTitleColumns(SequenceAssistantPtr sap)23715 static void EditSequenceTitleColumns (SequenceAssistantPtr sap)
23716 {
23717   WindoW                w;
23718   GrouP                 h, err_grp, c;
23719   PrompT                ppt;
23720   ButtoN                b;
23721   Int4                  rows_shown = 0;
23722   TagListPtr            tlp;
23723   ModalAcceptCancelData acd;
23724   TitleEditData         ted;
23725 
23726   if (sap == NULL || sap->seq_list == NULL)
23727   {
23728     return;
23729   }
23730 
23731   ted.iatep = SeqEntryListToIDAndTitleEdit (sap->seq_list);
23732   if (ted.iatep == NULL)
23733   {
23734     return;
23735   }
23736 
23737   rows_shown = MIN (ted.iatep->num_sequences, 5);
23738 
23739   w = MovableModalWindow (-20, -13, -10, -10, "Sequence Titles", NULL);
23740 
23741   h = HiddenGroup(w, -1, 0, NULL);
23742   SetGroupSpacing (h, 10, 10);
23743 
23744   err_grp = HiddenGroup (h, 0, 0, NULL);
23745   ted.bracket_dlg = ShowDifferenceDialog (err_grp, stdCharWidth * 63, stdLineHeight * 5);
23746   ted.badvalue_pnl = InvalidValuesPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
23747                                          ted.iatep, NULL, TRUE,
23748                                          UpdateMultiTitleEditForColorizedPanel,
23749                                          &ted,
23750                                          NULL, NULL);
23751   ted.unrec_mod_pnl = UnrecognizedModifiersPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
23752                                                   ted.iatep, NULL, TRUE,
23753                                                   UpdateMultiTitleEditForColorizedPanel,
23754                                                   &ted,
23755                                                   NULL, NULL);
23756   AlignObjects (ALIGN_CENTER, (HANDLE) ted.bracket_dlg,
23757                               (HANDLE) ted.badvalue_pnl,
23758                               (HANDLE) ted.unrec_mod_pnl,
23759                               NULL);
23760 
23761   ppt = StaticPrompt (h, "Title", 18 * stdCharWidth, 0, programFont, 'l');
23762 
23763   ted.multi_title = CreateTagListDialogExEx (h, rows_shown, 2, 2,
23764                                            titleedit_types, titleedit_widths,
23765                                            NULL, TRUE, TRUE,
23766                                            IDAndTitleEditToTagData,
23767                                            TagDataToIDAndTitleEdit,
23768                                            title_callback_list, &ted, FALSE);
23769 
23770   PointerToDialog (ted.multi_title, ted.iatep);
23771 
23772   tlp = (TagListPtr) GetObjectExtra (ted.multi_title);
23773   if (tlp == NULL) return;
23774 
23775 
23776   c = HiddenGroup (h, 2, 0, NULL);
23777   ted.accept_btn = PushButton (c, "Accept", ModalAcceptButton);
23778   SetObjectExtra (ted.accept_btn, &acd, NULL);
23779   b = PushButton (c, "Cancel", ModalCancelButton);
23780   SetObjectExtra (b, &acd, NULL);
23781 
23782   AlignObjects (ALIGN_CENTER, (HANDLE) ted.multi_title, (HANDLE) c, (HANDLE) NULL);
23783 
23784   AlignObjects (ALIGN_JUSTIFY, (HANDLE) tlp->control [1], (HANDLE) ppt, NULL);
23785 
23786   ShowMultiTitleErrors (&ted);
23787 
23788   Show (w);
23789   Select (w);
23790   acd.accepted = FALSE;
23791   acd.cancelled = FALSE;
23792   while (!acd.accepted && ! acd.cancelled)
23793   {
23794     ProcessExternalEvent ();
23795     Update ();
23796   }
23797   ProcessAnEvent ();
23798   Hide (w);
23799   if (acd.accepted)
23800   {
23801     ted.iatep = IDAndTitleEditFree (ted.iatep);
23802     ted.iatep = DialogToPointer (ted.multi_title);
23803     ApplyIDAndTitleEditToSeqEntryList (sap->seq_list, ted.iatep);
23804     UpdateSequenceAssistant (sap);
23805   }
23806   ted.iatep = IDAndTitleEditFree (ted.iatep);
23807   Remove (w);
23808 }
23809 
23810 
23811 /* This section of code is for importing sequences from a file or creating new sequences.
23812  */
23813 
23814 NLM_EXTERN SeqEntryPtr
GetSequencesFromFileEx(CharPtr path,SeqEntryPtr current_list,Nlm_ImportSeqCallbackProc callback,Pointer callback_data)23815 GetSequencesFromFileEx
23816 (CharPtr path,
23817  SeqEntryPtr current_list,
23818  Nlm_ImportSeqCallbackProc callback,
23819  Pointer callback_data)
23820 {
23821   FILE         *fp;
23822   SeqEntryPtr  new_sep_list, new_sep, test_sep;
23823   Boolean      cancelled = FALSE;
23824   Boolean      chars_stripped = FALSE;
23825 
23826   fp = FileOpen (path, "r");
23827   if (fp == NULL)
23828   {
23829     Message (MSG_ERROR, "Unable to open %s", path);
23830     return NULL;
23831   }
23832 
23833   new_sep_list = ImportSequencesFromFileExEx (fp, NULL, TRUE, TRUE, NULL, NULL, &chars_stripped, FALSE, callback, callback_data);
23834   if (chars_stripped && new_sep_list != NULL)
23835   {
23836     if (ANS_CANCEL == Message (MSG_OKC, "Illegal characters will be stripped from your sequence data.  Do you want to continue?"))
23837     {
23838       new_sep_list = SeqEntryFree (new_sep_list);
23839       return NULL;
23840     }
23841   }
23842 
23843   if (new_sep_list == NULL)
23844   {
23845     Message (MSG_ERROR, "Unable to read sequences");
23846     return NULL;
23847   }
23848   else if (! RejectZeroLengthSequences (&new_sep_list))
23849   {
23850     return NULL;
23851   }
23852   else if (!CollectIDsAndTitles (new_sep_list, current_list, TRUE) || BadSeqIdLengths (new_sep_list))
23853   {
23854     new_sep = new_sep_list;
23855     while (new_sep != NULL)
23856     {
23857       test_sep = new_sep->next;
23858       SeqEntryFree (new_sep);
23859       new_sep = test_sep;
23860     }
23861     FileClose (fp);
23862     return NULL;
23863   }
23864 
23865   if (cancelled)
23866   {
23867     new_sep = new_sep_list;
23868     while (new_sep != NULL)
23869     {
23870       test_sep = new_sep->next;
23871       SeqEntryFree (new_sep);
23872       new_sep = test_sep;
23873     }
23874     FileClose (fp);
23875     return NULL;
23876   }
23877 
23878   FileClose (fp);
23879 
23880   return new_sep_list;
23881 }
23882 
GetSequencesFromFile(CharPtr path,SeqEntryPtr current_list)23883 NLM_EXTERN SeqEntryPtr GetSequencesFromFile (CharPtr path, SeqEntryPtr current_list)
23884 {
23885   return GetSequencesFromFileEx (path, current_list, NULL, NULL) ;
23886 }
23887 
23888 
GetSequencesFromText(TexT t,SeqEntryPtr current_list)23889 static SeqEntryPtr GetSequencesFromText (TexT t, SeqEntryPtr current_list)
23890 {
23891   CharPtr      seq_str;
23892   Char         path [PATH_MAX];
23893   SeqEntryPtr  sep_list;
23894   FILE *fp;
23895 
23896   seq_str = SaveStringFromText (t);
23897 
23898   TmpNam (path);
23899   fp = FileOpen (path, "w");
23900   if (fp == NULL) return NULL;
23901   fprintf (fp, "%s", seq_str);
23902   FileClose (fp);
23903 
23904   seq_str = MemFree (seq_str);
23905 
23906   sep_list = GetSequencesFromFile (path, current_list);
23907   FileRemove (path);
23908   return sep_list;
23909 }
23910 
ReadLinesOfFile(CharPtr path)23911 static ValNodePtr ReadLinesOfFile (CharPtr path)
23912 {
23913   ReadBufferData    rbd;
23914   CharPtr           line;
23915   FILE              *f;
23916   ValNodePtr        line_list = NULL;
23917 
23918   f = FileOpen (path, "r");
23919   if (f == NULL)
23920   {
23921     Message (MSG_ERROR, "Unable to open %s", path);
23922     return NULL;
23923   }
23924 
23925   rbd.fp = f;
23926   rbd.current_data = NULL;
23927   line = AbstractReadFunction (&rbd);
23928   while (line != NULL)
23929   {
23930     ValNodeAddPointer (&line_list, 0, line);
23931     line = AbstractReadFunction (&rbd);
23932   }
23933   FileClose (f);
23934   return line_list;
23935 }
23936 
AddLineListToText(ValNodePtr line_list,TexT t)23937 static void AddLineListToText (ValNodePtr line_list, TexT t)
23938 {
23939   CharPtr              old_seqstr, new_seqstr;
23940   Int4                 len;
23941   ValNodePtr           vnp;
23942 
23943   if (line_list == NULL || t == NULL)
23944   {
23945     return;
23946   }
23947 
23948   old_seqstr = SaveStringFromText (t);
23949   len = StringLen (old_seqstr) + 1;
23950   for (vnp = line_list; vnp != NULL; vnp = vnp->next)
23951   {
23952     len += StringLen (vnp->data.ptrvalue) + 3;
23953   }
23954 
23955   new_seqstr = (CharPtr) MemNew (len * sizeof (Char));
23956   if (new_seqstr != NULL)
23957   {
23958     StringCpy (new_seqstr, old_seqstr);
23959     StringCat (new_seqstr, "\n");
23960     for (vnp = line_list; vnp != NULL; vnp = vnp->next)
23961     {
23962       StringCat (new_seqstr, vnp->data.ptrvalue);
23963       StringCat (new_seqstr, "\n");
23964     }
23965     SetTitle (t, new_seqstr);
23966     new_seqstr = MemFree (new_seqstr);
23967   }
23968   old_seqstr = MemFree (old_seqstr);
23969 }
23970 
AddSequenceImportFasta(ButtoN b)23971 static void AddSequenceImportFasta (ButtoN b)
23972 {
23973   CharPtr              extension;
23974   Char                 path [PATH_MAX];
23975   TexT                 t;
23976   ValNodePtr           line_list;
23977 
23978   t = (TexT) GetObjectExtra (b);
23979   if (t == NULL)
23980   {
23981     return;
23982   }
23983 
23984   /* get filename from user */
23985   extension = GetAppProperty ("FastaNucExtension");
23986   if (! GetInputFileName (path, sizeof (path), extension, "TEXT")) return;
23987 
23988   line_list = ReadLinesOfFile (path);
23989   AddLineListToText (line_list, t);
23990   ValNodeFreeData (line_list);
23991 }
23992 
CheckSequenceAssistantCharInput(TexT t)23993 static void CheckSequenceAssistantCharInput (TexT t)
23994 {
23995   SequenceAssistantPtr sap;
23996   CharPtr              seq_str;
23997   CharPtr              found_bracket, found_next_bracket;
23998   CharPtr              found_ret;
23999   Int4                 num_seq = 0;
24000   MsgAnswer            ans;
24001 
24002   sap = (SequenceAssistantPtr) GetObjectExtra (t);
24003   if (sap == NULL) return;
24004 
24005   seq_str = SaveStringFromText (t);
24006   found_bracket = StringChr (seq_str, '>');
24007   if (found_bracket == NULL || *(found_bracket + 1) == 0
24008       || found_bracket != seq_str)
24009   {
24010     MemFree (seq_str);
24011     return;
24012   }
24013   found_ret = StringChr (found_bracket, '\n');
24014   if (found_ret == NULL)
24015   {
24016     MemFree (seq_str);
24017     return;
24018   }
24019   if (*(found_ret + 1) == 0)
24020   {
24021     MemFree (seq_str);
24022     return;
24023   }
24024 
24025   while (found_bracket != NULL)
24026   {
24027     found_ret = StringChr (found_bracket, '\n');
24028     if (found_ret == NULL)
24029     {
24030       /* last line is a defline */
24031       found_next_bracket = NULL;
24032     }
24033     else
24034     {
24035       found_next_bracket = StringChr (found_ret, '>');
24036     }
24037     num_seq++;
24038     found_bracket = found_next_bracket;
24039   }
24040   ans = Message (MSG_YN, "You are pasting in %d sequences, correct?", num_seq);
24041   if (ans == ANS_YES)
24042   {
24043     SetTitle (t, StringSave (""));
24044 
24045     ImportSequenceAssistantEditData (sap, seq_str);
24046     UpdateSequenceAssistant (sap);
24047   }
24048   MemFree (seq_str);
24049 }
24050 
GetSequenceString(SeqEntryPtr sep)24051 static CharPtr GetSequenceString (SeqEntryPtr sep)
24052 {
24053   CharPtr     seqbuf;
24054   BioseqPtr   bsp;
24055   SeqPortPtr  spp;
24056   Int2        ctr;
24057   Int4        read_len;
24058 
24059   if (sep == NULL || ! IS_Bioseq (sep))
24060   {
24061     return NULL;
24062   }
24063 
24064   bsp = (BioseqPtr) sep->data.ptrvalue;
24065   if (bsp == NULL || bsp->length < 1)
24066   {
24067     return NULL;
24068   }
24069 
24070   spp = SeqPortNew (bsp, 0, bsp->length - 1, Seq_strand_plus, Seq_code_iupacna);
24071   seqbuf = (CharPtr) MemNew ((bsp->length + 1) * sizeof (Char));
24072   if (seqbuf != NULL)
24073   {
24074     SeqPortSeek (spp, 0, SEEK_SET);
24075     read_len = 0;
24076     while (read_len < bsp->length)
24077     {
24078       ctr = SeqPortRead (spp, (UcharPtr)(seqbuf + read_len), INT2_MAX);
24079       seqbuf[ctr + read_len] = 0;
24080       read_len += INT2_MAX;
24081     }
24082   }
24083   spp = SeqPortFree (spp);
24084 
24085   return seqbuf;
24086 }
24087 
ReformatSequenceText(CharPtr seq_text)24088 static CharPtr ReformatSequenceText (CharPtr seq_text)
24089 {
24090   CharPtr src, dst;
24091   CharPtr new_text;
24092   Int4    num_lines;
24093   Int4    len;
24094   Int4    line_len = 80;
24095   Int4    counter;
24096 
24097   if (StringHasNoText (seq_text))
24098   {
24099   	MemFree (seq_text);
24100   	return NULL;
24101   }
24102   len = StringLen (seq_text);
24103   num_lines = len / line_len;
24104   len += num_lines + 2;
24105   new_text = (CharPtr) MemNew (len * sizeof (Char));
24106   if (new_text == NULL)
24107   {
24108   	return seq_text;
24109   }
24110   dst = new_text;
24111   counter = 0;
24112   for (src = seq_text; *src != 0; src++)
24113   {
24114   	if (!isspace ((Int4)(*src)))
24115   	{
24116   	  *dst = *src;
24117   	  dst++;
24118   	  counter++;
24119   	  if (counter == line_len)
24120   	  {
24121   	  	*dst = '\n';
24122   	  	dst++;
24123   	  	counter = 0;
24124   	  }
24125   	}
24126   }
24127   *dst = 0;
24128   MemFree (seq_text);
24129   return new_text;
24130 }
24131 
FixStringForByteStore(CharPtr seq_str)24132 static void FixStringForByteStore (CharPtr seq_str)
24133 {
24134   CharPtr cp_src, cp_dst;
24135 
24136   if (seq_str == NULL)
24137   {
24138     return;
24139   }
24140 
24141   cp_src = seq_str;
24142   cp_dst = seq_str;
24143   while (*cp_src != 0)
24144   {
24145     if (isalpha (*cp_src))
24146     {
24147       *cp_dst = TO_UPPER (*cp_src);
24148       cp_dst++;
24149     }
24150     cp_src++;
24151   }
24152   *cp_dst = 0;
24153 }
24154 
24155 static Boolean
IsDuplicateID(SeqEntryPtr seq_list,BioseqPtr edit_bsp,SeqIdPtr sip)24156 IsDuplicateID (SeqEntryPtr seq_list, BioseqPtr edit_bsp, SeqIdPtr sip)
24157 {
24158   SeqEntryPtr  sep;
24159   BioseqPtr    bsp;
24160   BioseqSetPtr bssp;
24161   SeqIdPtr     tmp_sip;
24162   Boolean      is_dup = FALSE;
24163 
24164   if (seq_list == NULL || sip == NULL)
24165   {
24166     return FALSE;
24167   }
24168 
24169   for (sep = seq_list; sep != NULL && !is_dup; sep = sep->next)
24170   {
24171     if (sep->data.ptrvalue == NULL)
24172     {
24173       continue;
24174     }
24175     if (IS_Bioseq (sep))
24176     {
24177       bsp = (BioseqPtr) sep->data.ptrvalue;
24178       if (bsp != edit_bsp)
24179       {
24180         for (tmp_sip = sip; tmp_sip != NULL; tmp_sip = tmp_sip->next)
24181         {
24182           if (SeqIdIn (tmp_sip, bsp->id))
24183           {
24184             is_dup = TRUE;
24185           }
24186         }
24187       }
24188     }
24189     else if (IS_Bioseq_set (sep))
24190     {
24191       bssp = (BioseqSetPtr) sep->data.ptrvalue;
24192       is_dup |= IsDuplicateID (bssp->seq_set, edit_bsp, sip);
24193     }
24194   }
24195   return is_dup;
24196 }
24197 
PasteSequenceAssistant(IteM i)24198 static void PasteSequenceAssistant (IteM i)
24199 {
24200   TexT    txt;
24201   CharPtr str;
24202 
24203   txt = (TexT) GetObjectExtra (i);
24204   if (txt == NULL)
24205   {
24206     return;
24207   }
24208 
24209   str = ClipboardToString ();
24210   SetTitle (txt, str);
24211   str = MemFree (str);
24212 }
24213 
SequenceAssistantAddSequence(SequenceAssistantPtr sap)24214 static void SequenceAssistantAddSequence (SequenceAssistantPtr sap)
24215 {
24216   ModalAcceptCancelData acd;
24217   WindoW                w;
24218   GrouP                 h, sequence_grp, c;
24219   ButtoN                import_fasta_btn;
24220   TexT                  sequence_txt;
24221   Char                  str [200];
24222   ButtoN                b;
24223   SeqEntryPtr           new_sep;
24224   Boolean               done = FALSE;
24225   MenU                  edit_menu;
24226   IteM                  local_item;
24227 
24228   if (sap == NULL)
24229   {
24230     return;
24231   }
24232 
24233   w = MovableModalWindow (-20, -13, -10, -10, "Add New Sequence", NULL);
24234   h = HiddenGroup(w, -1, 0, NULL);
24235   SetGroupSpacing (h, 10, 10);
24236 
24237   import_fasta_btn = PushButton (h, "Import Nucleotide FASTA", AddSequenceImportFasta);
24238 
24239   sequence_grp = NormalGroup (h, 1, 0, "Sequence Characters", programFont, NULL);
24240   SetGroupSpacing (sequence_grp, 10, 10);
24241   StaticPrompt (sequence_grp, "Paste or type the nucleotide sequence.", 0,
24242                 popupMenuHeight, programFont, 'l');
24243   sequence_txt = ScrollText (sequence_grp, 60, 10, programFont, FALSE, NULL);
24244   SetObjectExtra (sequence_txt, sap, NULL);
24245 
24246   SetObjectExtra (import_fasta_btn, sequence_txt, NULL);
24247 
24248   sprintf (str, "You may only use the valid IUPAC characters (%s).",
24249                 valid_iupac_characters);
24250   MultiLinePrompt (sequence_grp, str, 60 * stdCharWidth, programFont);
24251 
24252   c = HiddenGroup (h, 2, 0, NULL);
24253   b = PushButton(c, "Save", ModalAcceptButton);
24254   SetObjectExtra (b, &acd, NULL);
24255   b = PushButton(c, "Cancel", ModalCancelButton);
24256   SetObjectExtra (b, &acd, NULL);
24257 
24258   AlignObjects (ALIGN_CENTER, (HANDLE) import_fasta_btn,
24259                               (HANDLE) sequence_grp,
24260                               (HANDLE) c,
24261                               NULL);
24262 
24263   /* Edit Menu */
24264   edit_menu = PulldownMenu (w, "Edit");
24265   local_item = CommandItem (edit_menu, "Paste", PasteSequenceAssistant);
24266   SetObjectExtra (local_item, sequence_txt, NULL);
24267 
24268   Show(w);
24269   Select (w);
24270 
24271   while (!done)
24272   {
24273     acd.accepted = FALSE;
24274     acd.cancelled = FALSE;
24275     while (!acd.accepted && ! acd.cancelled)
24276     {
24277       ProcessExternalEvent ();
24278       Update ();
24279     }
24280     ProcessAnEvent ();
24281 
24282     if (acd.accepted)
24283     {
24284       new_sep = GetSequencesFromText (sequence_txt, sap->seq_list);
24285       if (new_sep != NULL)
24286       {
24287         TrimAmbiguousBases (&new_sep);
24288         ValNodeLink (&(sap->seq_list), new_sep);
24289         UpdateSequenceAssistant (sap);
24290         done = TRUE;
24291       }
24292     }
24293     else if (acd.cancelled)
24294     {
24295       done = TRUE;
24296     }
24297     acd.accepted = FALSE;
24298     acd.cancelled = TRUE;
24299   }
24300 
24301   Remove (w);
24302 }
24303 
SequenceAssistantEditSequence(SequenceAssistantPtr sap,Int4 seq_num)24304 static void SequenceAssistantEditSequence (SequenceAssistantPtr sap, Int4 seq_num)
24305 {
24306   ModalAcceptCancelData acd;
24307   WindoW                w;
24308   GrouP                 h, g1 = NULL, g2, sequence_grp, err_grp, c;
24309   SeqEntryPtr           sep;
24310   Int4                  i;
24311   Char                  window_title [150];
24312   BioseqPtr             bsp = NULL;
24313   SeqIdPtr              sip;
24314   Char                  id_txt [128];
24315   TexT                  sequence_id_txt, sequence_txt;
24316   CharPtr               ttl = NULL;
24317   Char                  str [200];
24318   ButtoN                b;
24319   CharPtr               seqbuf;
24320   SeqEntryPtr           last_sep = NULL, new_sep;
24321   CharPtr               id_str, title_str, seq_str;
24322   ValNodePtr            err_list = NULL;
24323   Int2                  seq_num_for_error = seq_num;
24324   Boolean               done = FALSE;
24325   ByteStorePtr          new_bs = NULL;
24326   SeqIdPtr              new_sip = NULL;
24327   SeqDescrPtr           sdp;
24328   MenU                  edit_menu;
24329   IteM                  local_item;
24330   TitleEditData         ted;
24331 
24332   if (sap == NULL)
24333   {
24334     return;
24335   }
24336 
24337   for (i = 0, sep = sap->seq_list; i != seq_num && sep != NULL; i++, sep = sep->next)
24338   {
24339   }
24340 
24341   if (sep != NULL)
24342   {
24343     if (sep->data.ptrvalue == NULL)
24344     {
24345       return;
24346     }
24347     else if (IS_Bioseq_set (sep))
24348     {
24349       Message (MSG_ERROR, "Can't edit segmented set!");
24350       return;
24351     }
24352     else if (! IS_Bioseq (sep))
24353     {
24354       return;
24355     }
24356     bsp = sep->data.ptrvalue;
24357     if (bsp->repr == Seq_repr_delta)
24358     {
24359       Message (MSG_ERROR, "Can't edit gapped sequence!");
24360       return;
24361     }
24362     sip = SeqIdFindWorst (bsp->id);
24363     SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
24364     sprintf (window_title, "Edit %s", id_txt);
24365   }
24366   else
24367   {
24368     sprintf (window_title, "Add new sequence");
24369   }
24370 
24371   w = MovableModalWindow (-20, -13, -10, -10, window_title, NULL);
24372   h = HiddenGroup(w, -1, 0, NULL);
24373   SetGroupSpacing (h, 10, 10);
24374 
24375   /* set up IDAndTitleEdit to use for single sequence */
24376   ted.iatep = IDAndTitleEditNew ();
24377   if (ted.iatep == NULL)
24378   {
24379     return;
24380   }
24381 
24382   ted.iatep->num_sequences = 1;
24383   ted.iatep->id_list = (CharPtr PNTR) MemNew (ted.iatep->num_sequences * sizeof (CharPtr));
24384   ted.iatep->title_list = (CharPtr PNTR) MemNew (ted.iatep->num_sequences * sizeof (CharPtr));
24385   if (bsp != NULL) {
24386     ted.iatep->id_list[0] = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
24387   }
24388 
24389   ttl = NULL;
24390   SeqEntryExplore (sep, (Pointer) (&ttl), FindFirstTitle);
24391   if (StringHasNoText (ttl))
24392   {
24393     ted.iatep->title_list [0] = StringSave ("");
24394   }
24395   else
24396   {
24397     ted.iatep->title_list [0] = StringSave (ttl);
24398   }
24399 
24400   err_grp = HiddenGroup (h, 0, 0, NULL);
24401   ted.bracket_dlg = ShowDifferenceDialog (err_grp, stdCharWidth * 63, stdLineHeight * 5);
24402   ted.badvalue_pnl = InvalidValuesPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
24403                                          ted.iatep, NULL, TRUE,
24404                                          UpdateTitleEditForColorizedPanel,
24405                                          &ted,
24406                                          NULL, NULL);
24407   ted.unrec_mod_pnl = UnrecognizedModifiersPanel (err_grp, stdCharWidth * 63, stdLineHeight * 5,
24408                                                   ted.iatep, NULL, TRUE,
24409                                                   UpdateTitleEditForColorizedPanel,
24410                                                   &ted,
24411                                                   NULL, NULL);
24412 
24413   /* users can enter titles and IDs for individual sequences */
24414   g2 = HiddenGroup (h, 2, 0, NULL);
24415   StaticPrompt (g2, "Sequence ID", 0, popupMenuHeight, programFont, 'l');
24416   sequence_id_txt = DialogText (g2, "", 20, NULL);
24417   if (bsp != NULL)
24418   {
24419     SetTitle (sequence_id_txt, id_txt);
24420   }
24421   StaticPrompt (g2, "Sequence Title", 0, popupMenuHeight, programFont, 'l');
24422   ted.title_txt = DialogText (g2, "", 20, OnTitleEditChange);
24423   SetObjectExtra (ted.title_txt, &ted, NULL);
24424   if (!StringHasNoText (ttl))
24425   {
24426     SetTitle (ted.title_txt, ttl);
24427   }
24428 
24429   sequence_grp = NormalGroup (h, 1, 0, "Sequence Characters", programFont, NULL);
24430   SetGroupSpacing (sequence_grp, 10, 10);
24431   StaticPrompt (sequence_grp, "Paste or type the nucleotide sequence.", 0,
24432                 popupMenuHeight, programFont, 'l');
24433   sequence_txt = ScrollText (sequence_grp, 60, 10, programFont, FALSE, CheckSequenceAssistantCharInput);
24434   SetObjectExtra (sequence_txt, sap, NULL);
24435   seqbuf = GetSequenceString (sep);
24436   seqbuf = ReformatSequenceText (seqbuf);
24437 
24438   SetTitle (sequence_txt, seqbuf);
24439   MemFree (seqbuf);
24440 
24441   sprintf (str, "You may only use the valid IUPAC characters (%s).",
24442                 valid_iupac_characters);
24443   MultiLinePrompt (sequence_grp, str, 60 * stdCharWidth, programFont);
24444 
24445   c = HiddenGroup (h, 2, 0, NULL);
24446   ted.accept_btn = PushButton(c, "Save", ModalAcceptButton);
24447   SetObjectExtra (ted.accept_btn, &acd, NULL);
24448   b = PushButton(c, "Cancel", ModalCancelButton);
24449   SetObjectExtra (b, &acd, NULL);
24450 
24451   AlignObjects (ALIGN_CENTER, (HANDLE) err_grp, (HANDLE) g2, (HANDLE) sequence_grp,
24452                                 (HANDLE) c, (HANDLE) g1, NULL);
24453 
24454   /* Edit Menu */
24455   edit_menu = PulldownMenu (w, "Edit");
24456   local_item = CommandItem (edit_menu, "Paste", PasteSequenceAssistant);
24457   SetObjectExtra (local_item, sequence_txt, NULL);
24458 
24459   ShowTitleEditErrors (&ted);
24460 
24461   Show(w);
24462   Select (w);
24463 
24464   while (!done)
24465   {
24466     acd.accepted = FALSE;
24467     acd.cancelled = FALSE;
24468     while (!acd.accepted && ! acd.cancelled)
24469     {
24470       ProcessExternalEvent ();
24471       Update ();
24472     }
24473     ProcessAnEvent ();
24474 
24475     if (acd.accepted)
24476     {
24477       id_str = SaveStringFromText (sequence_id_txt);
24478       id_str = ReformatLocalId (id_str);
24479       title_str = SaveStringFromText (ted.title_txt);
24480       seq_str = SaveStringFromText (sequence_txt);
24481       if (seq_num_for_error < 0)
24482       {
24483         seq_num_for_error = 0;
24484       }
24485 
24486       if (StringHasNoText (seq_str))
24487       {
24488         Message (MSG_ERROR, "You must supply sequence characters!");
24489       }
24490       else if (StringHasNoText (id_str))
24491       {
24492         Message (MSG_ERROR, "You must supply a sequence ID!");
24493       }
24494       else
24495       {
24496         done = TRUE;
24497         if (!SeqCharsOk (seq_str, seq_num_for_error, id_str, &err_list))
24498         {
24499           if (!ContinueWithErrorList (err_list, TRUE))
24500           {
24501             done = FALSE;
24502           }
24503         }
24504 
24505 
24506         if (done)
24507         {
24508           new_sip = MakeSeqID (id_str);
24509           if (IsDuplicateID (sap->seq_list, bsp, new_sip))
24510           {
24511             Message (MSG_ERROR,
24512                    "Sequence IDs must be unique within the record!  %s is a duplicate ID",
24513                    id_str);
24514             done = FALSE;
24515             new_sip = SeqIdFree (new_sip);
24516           }
24517         }
24518 
24519         if (done)
24520         {
24521           FixStringForByteStore (seq_str);
24522           new_bs = BSNew (1000);
24523           if (new_bs != NULL)
24524           {
24525             BSWrite (new_bs, (VoidPtr) seq_str, (Int4) StringLen (seq_str));
24526           }
24527 
24528           if (bsp == NULL)
24529           {
24530             /* create new Bioseq and add to list */
24531             bsp = BioseqNew ();
24532 
24533             new_sep = SeqEntryNew ();
24534             new_sep->choice = 1;
24535             new_sep->data.ptrvalue = bsp;
24536             for (sep = sap->seq_list;
24537                  sep != NULL;
24538                  sep = sep->next)
24539             {
24540               last_sep = sep;
24541             }
24542             if (last_sep == NULL)
24543             {
24544               sap->seq_list = new_sep;
24545             }
24546             else
24547             {
24548               last_sep->next = new_sep;
24549             }
24550           }
24551 
24552           /* replace ID */
24553           bsp->id = SeqIdFree (bsp->id);
24554           bsp->id = new_sip;
24555 
24556           /* replace title */
24557           for (sdp = bsp->descr;
24558                sdp != NULL && sdp->choice != Seq_descr_title;
24559                sdp = sdp->next)
24560           {
24561           }
24562           if (sdp == NULL)
24563           {
24564             sdp = SeqDescrNew (bsp->descr);
24565             if (bsp->descr == NULL)
24566             {
24567               bsp->descr = sdp;
24568             }
24569             sdp->choice = Seq_descr_title;
24570           }
24571           sdp->data.ptrvalue = MemFree (sdp->data.ptrvalue);
24572           sdp->data.ptrvalue = StringSave (title_str);
24573 
24574           /* replace sequence data */
24575           bsp->seq_data = SeqDataFree (bsp->seq_data, bsp->seq_data_type);
24576           bsp->repr = Seq_repr_raw;
24577           bsp->mol = Seq_mol_na;
24578           bsp->seq_data_type = Seq_code_iupacna;
24579           bsp->seq_data = (SeqDataPtr) new_bs;
24580           bsp->length = BSLen (new_bs);
24581 
24582           BioseqPack (bsp);
24583 
24584           UpdateSequenceAssistant (sap);
24585         }
24586       }
24587       id_str = MemFree (id_str);
24588       title_str = MemFree (title_str);
24589       seq_str = MemFree (seq_str);
24590       err_list = ValNodeFreeData (err_list);
24591     }
24592     else if (acd.cancelled)
24593     {
24594       done = TRUE;
24595     }
24596     acd.accepted = FALSE;
24597     acd.cancelled = TRUE;
24598   }
24599 
24600   ted.iatep = IDAndTitleEditFree (ted.iatep);
24601   Remove (w);
24602 }
24603 
24604 
SequenceAssistantOk(SequenceAssistantPtr sap)24605 static void SequenceAssistantOk (SequenceAssistantPtr sap)
24606 {
24607   if (sap == NULL) return;
24608   sap->cancelled = FALSE;
24609   sap->done = TRUE;
24610 }
24611 
SequenceAssistantOkButton(ButtoN b)24612 static void SequenceAssistantOkButton (ButtoN b)
24613 {
24614   SequenceAssistantPtr sap;
24615 
24616   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24617   SequenceAssistantOk (sap);
24618 }
24619 
SequenceAssistantOkItem(IteM i)24620 static void SequenceAssistantOkItem (IteM i)
24621 {
24622   SequenceAssistantPtr sap;
24623 
24624   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24625   SequenceAssistantOk (sap);
24626 }
24627 
24628 
SequenceAssistantCancel(SequenceAssistantPtr sap)24629 static void SequenceAssistantCancel (SequenceAssistantPtr sap)
24630 {
24631   if (sap == NULL) return;
24632 
24633   if (Message (MSG_YN,
24634       "Are you sure you want to cancel (and lose all your editing)?") != ANS_YES)
24635   {
24636     return;
24637   }
24638   sap->cancelled = TRUE;
24639   sap->done = TRUE;
24640 }
24641 
SequenceAssistantCancelButton(ButtoN b)24642 static void SequenceAssistantCancelButton (ButtoN b)
24643 {
24644   SequenceAssistantPtr sap;
24645 
24646   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24647   SequenceAssistantCancel (sap);
24648 }
24649 
SequenceAssistantCancelItem(IteM i)24650 static void SequenceAssistantCancelItem (IteM i)
24651 {
24652   SequenceAssistantPtr sap;
24653 
24654   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24655   SequenceAssistantCancel (sap);
24656 }
24657 
DeleteAllSequences(SequenceAssistantPtr sap)24658 static void DeleteAllSequences (SequenceAssistantPtr sap)
24659 {
24660   SeqEntryPtr sep, next_sep;
24661   if (sap == NULL || sap->seq_list == NULL)
24662   {
24663     return;
24664   }
24665   if (Message (MSG_YN, "Are you sure you want to delete all of your sequences?") != ANS_YES)
24666   {
24667     return;
24668   }
24669 
24670   sep = sap->seq_list;
24671   while (sep != NULL)
24672   {
24673     next_sep = sep->next;
24674     sep->next = NULL;
24675     SeqEntryFree (sep);
24676     sep = next_sep;
24677   }
24678   sap->seq_list = NULL;
24679   UpdateSequenceAssistant (sap);
24680 }
24681 
DeleteAllSequencesButton(ButtoN b)24682 static void DeleteAllSequencesButton (ButtoN b)
24683 {
24684   SequenceAssistantPtr sap;
24685 
24686   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24687   DeleteAllSequences (sap);
24688 }
24689 
DeleteAllSequencesItem(IteM i)24690 static void DeleteAllSequencesItem (IteM i)
24691 {
24692   SequenceAssistantPtr sap;
24693 
24694   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24695   DeleteAllSequences (sap);
24696 }
24697 
SelectSequenceDoc(DoC d,PoinT pt)24698 static void SelectSequenceDoc (DoC d, PoinT pt)
24699 {
24700   Int2      item, row, prevrow;
24701   SequenceAssistantPtr sap;
24702 
24703   sap = (SequenceAssistantPtr) GetObjectExtra (d);
24704   if (sap == NULL) return;
24705 
24706   MapDocPoint (d, pt, &item, &row, NULL, NULL);
24707   if (item > 0 && row > 0) {
24708     prevrow = sap->sequence_row;
24709     sap->sequence_row = item;
24710     if (item != prevrow)
24711     {
24712       if (prevrow != -1)
24713       {
24714         InvalDocRows (d, prevrow, 1, 1);
24715       }
24716       InvalDocRows (d, item, 1, 1);
24717     }
24718     Enable (sap->edit_btn);
24719     Enable (sap->delete_btn);
24720   }
24721 }
24722 
SequenceHighlight(DoC doc,Int2 item,Int2 row,Int2 col)24723 static Boolean SequenceHighlight (DoC doc, Int2 item, Int2 row, Int2 col)
24724 {
24725   SequenceAssistantPtr sap;
24726 
24727   sap = (SequenceAssistantPtr) GetObjectExtra (doc);
24728   if (sap == NULL) return FALSE;
24729 
24730   if (item == sap->sequence_row) return TRUE;
24731   return FALSE;
24732 }
24733 
AddSequence(SequenceAssistantPtr sap)24734 static void AddSequence (SequenceAssistantPtr sap)
24735 {
24736   if (sap == NULL) return;
24737   SequenceAssistantAddSequence (sap);
24738 }
24739 
AddSequenceButton(ButtoN b)24740 static void AddSequenceButton (ButtoN b)
24741 {
24742   SequenceAssistantPtr sap;
24743 
24744   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24745 
24746   AddSequence (sap);
24747 }
24748 
AddSequenceItem(IteM i)24749 static void AddSequenceItem (IteM i)
24750 {
24751   SequenceAssistantPtr sap;
24752 
24753   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24754   AddSequence (sap);
24755 }
24756 
ConfirmSequenceDelete(SeqEntryPtr sep)24757 static Boolean ConfirmSequenceDelete (SeqEntryPtr sep)
24758 {
24759   MsgAnswer            ans;
24760   BioseqPtr            bsp;
24761   SeqIdPtr             sip;
24762   Char                 tmp[128];
24763   SeqDescrPtr          sdp;
24764 
24765   if (sep == NULL)
24766   {
24767     return FALSE;
24768   }
24769 
24770   bsp = (BioseqPtr) sep->data.ptrvalue;
24771   sdp = bsp->descr;
24772   while (sdp != NULL && sdp->choice != Seq_descr_title)
24773   {
24774     sdp = sdp->next;
24775   }
24776   sip = SeqIdFindWorst (bsp->id);
24777   SeqIdWrite (sip, tmp, PRINTID_REPORT, sizeof (tmp));
24778   if (sdp != NULL && ! StringHasNoText (sdp->data.ptrvalue))
24779   {
24780     ans = Message (MSG_YN, "Are you sure you want to delete %s (%s)?", tmp, sdp->data.ptrvalue);
24781   }
24782   else
24783   {
24784     ans = Message (MSG_YN, "Are you sure you want to delete %s?", tmp);
24785   }
24786   if (ans == ANS_YES)
24787   {
24788     return TRUE;
24789   }
24790   else
24791   {
24792     return FALSE;
24793   }
24794 }
24795 
DeleteSequence(SequenceAssistantPtr sap)24796 static void DeleteSequence (SequenceAssistantPtr sap)
24797 {
24798   Int2                 seq_num;
24799   SeqEntryPtr          sep, last_sep = NULL;
24800 
24801   if (sap == NULL || sap->seq_list == NULL) return;
24802 
24803   if (sap->seq_list != NULL)
24804   {
24805     for (sep = sap->seq_list, seq_num = 1;
24806          sep != NULL && seq_num < sap->sequence_row;
24807          sep = sep->next, seq_num++)
24808     {
24809       last_sep = sep;
24810     }
24811     if (!ConfirmSequenceDelete (sep))
24812     {
24813       return;
24814     }
24815 
24816     if (sep == NULL)
24817     {
24818       /* do nothing, deleted non-existent sequence */
24819     }
24820     else if (last_sep == NULL)
24821     {
24822       /* remove first in list */
24823       sap->seq_list = sap->seq_list->next;
24824       sep->next = NULL;
24825       sep = SeqEntryFree (sep);
24826     }
24827     else
24828     {
24829       last_sep->next = sep->next;
24830       sep->next = NULL;
24831       sep = SeqEntryFree (sep);
24832     }
24833     if (sap->sequence_row > 1)
24834     {
24835       sap->sequence_row --;
24836     }
24837   }
24838   UpdateSequenceAssistant (sap);
24839 }
24840 
DeleteSequenceButton(ButtoN b)24841 static void DeleteSequenceButton (ButtoN b)
24842 {
24843   SequenceAssistantPtr sap;
24844 
24845   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24846   DeleteSequence (sap);
24847 }
24848 
DeleteSequenceItem(IteM i)24849 static void DeleteSequenceItem (IteM i)
24850 {
24851   SequenceAssistantPtr sap;
24852 
24853   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24854   DeleteSequence (sap);
24855 }
24856 
ImportFastaFileItem(IteM i)24857 static void ImportFastaFileItem (IteM i)
24858 {
24859   SequenceAssistantPtr sap;
24860   CharPtr              extension;
24861   Char                 path [PATH_MAX];
24862   SeqEntryPtr          new_sep_list;
24863 
24864   sap = (SequenceAssistantPtr) GetObjectExtra (i);
24865   if (sap == NULL)
24866   {
24867     return;
24868   }
24869 
24870   extension = GetAppProperty ("FastaNucExtension");
24871   if (! GetInputFileName (path, sizeof (path), extension, "TEXT")) return;
24872 
24873   new_sep_list = GetSequencesFromFile (path, sap->seq_list);
24874   if (new_sep_list != NULL
24875       && ImportedSequenceTypeOk (new_sep_list, sap->seqPackage))
24876   {
24877     ValNodeLink (&sap->seq_list, new_sep_list);
24878   }
24879   else
24880   {
24881     new_sep_list = SeqEntryFree (new_sep_list);
24882   }
24883 
24884   UpdateSequenceAssistant (sap);
24885 }
24886 
ImportFastaFileButton(ButtoN b)24887 static void ImportFastaFileButton (ButtoN b)
24888 {
24889   SequenceAssistantPtr sap;
24890   CharPtr              extension;
24891   Char                 path [PATH_MAX];
24892   SeqEntryPtr          new_sep_list;
24893 
24894   sap = (SequenceAssistantPtr) GetObjectExtra (b);
24895   if (sap == NULL)
24896   {
24897     return;
24898   }
24899 
24900   extension = GetAppProperty ("FastaNucExtension");
24901   if (! GetInputFileName (path, sizeof (path), extension, "TEXT")) return;
24902 
24903   new_sep_list = GetSequencesFromFile (path, sap->seq_list);
24904   if (new_sep_list != NULL
24905       && ImportedSequenceTypeOk (new_sep_list, sap->seqPackage))
24906   {
24907     TrimAmbiguousBases(&new_sep_list);
24908     ValNodeLink (&sap->seq_list, new_sep_list);
24909   }
24910   else
24911   {
24912     new_sep_list = SeqEntryFree (new_sep_list);
24913   }
24914 
24915   UpdateSequenceAssistant (sap);
24916 }
24917 
24918 static Boolean
SequenceAssistantValidateSegments(SeqEntryPtr seq_list,ValNodePtr PNTR err_list)24919 SequenceAssistantValidateSegments
24920 (SeqEntryPtr seq_list,
24921  ValNodePtr PNTR err_list)
24922 {
24923   SeqEntryPtr sep;
24924   Boolean     all_one_char = TRUE;
24925   CharPtr     seqbuf;
24926   Char        first_seg_char = 0;
24927   Char        first_seg_char_this = 0;
24928   Boolean     rval = TRUE;
24929   Int4        total_len = 0;
24930   Boolean     non_N = FALSE;
24931 
24932   if (seq_list == NULL)
24933   {
24934     return TRUE;
24935   }
24936 
24937   for (sep = seq_list; sep != NULL; sep = sep->next)
24938   {
24939     seqbuf = GetSequenceString (sep);
24940     if (!StringHasNoText (seqbuf))
24941     {
24942       if (all_one_char)
24943       {
24944         if (IsSequenceAllOneCharacter(seqbuf))
24945         {
24946           if (first_seg_char == 0)
24947           {
24948             first_seg_char = seqbuf[StringSpn(seqbuf, " \t\n")];
24949           }
24950           else
24951           {
24952             first_seg_char_this = seqbuf[StringSpn(seqbuf, " \t\n")];
24953             if (first_seg_char_this != first_seg_char)
24954             {
24955               all_one_char = FALSE;
24956             }
24957           }
24958         }
24959         else
24960         {
24961           all_one_char = FALSE;
24962         }
24963       }
24964 
24965       total_len += CountSeqChars (seqbuf);
24966       non_N |= ! IsSequenceAllNs (seqbuf);
24967     }
24968     seqbuf = MemFree (seqbuf);
24969   }
24970   if (all_one_char && err_list != NULL)
24971   {
24972     ValNodeAddPointer (err_list, CREATE_FASTA_WARNING,
24973              StringSave ("Your segmented set sequences all consist entirely of the same character."));
24974   }
24975   if (!non_N)
24976   {
24977     if (err_list != NULL)
24978     {
24979       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED,
24980                          StringSave ("Your segmented set consists entirely of Ns. "
24981                          "This is not a valid sequence.  Please edit."));
24982 
24983     }
24984     rval = FALSE;
24985   }
24986   if (total_len < 50)
24987   {
24988     if (err_list != NULL)
24989     {
24990       /* Note - this is a required error because only small molecules can have
24991        * less than 50 base pairs, and the small molecules should never have
24992        * been sequenced in segments.
24993        */
24994       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED, StringSave (
24995                      "You have fewer than 50 total base pairs in this "
24996                      "segmented set. GenBank will not accept segmented sets with "
24997                      "fewer than 50 base pairs. Please edit your sequence."));
24998     }
24999     rval = FALSE;
25000   }
25001   return rval;
25002 }
25003 
25004 static Boolean
SequenceAssistantValidateOneBioseqContentAndLength(BioseqPtr bsp,ValNodePtr PNTR all_N_list,ValNodePtr PNTR all_one_char_list,ValNodePtr PNTR too_short_list)25005 SequenceAssistantValidateOneBioseqContentAndLength
25006 (BioseqPtr       bsp,
25007  ValNodePtr PNTR all_N_list,
25008  ValNodePtr PNTR all_one_char_list,
25009  ValNodePtr PNTR too_short_list)
25010 {
25011   CharPtr     seqbuf;
25012   Boolean     rval = TRUE;
25013   Int2        seq_num = 0;
25014   CharPtr     id_str;
25015   SeqEntryPtr sep;
25016 
25017   if (bsp == NULL || too_short_list == NULL || all_N_list == NULL || all_one_char_list == NULL)
25018   {
25019     return FALSE;
25020   }
25021 
25022   sep = SeqMgrGetSeqEntryForData (bsp);
25023 
25024   seqbuf = GetSequenceString (sep);
25025 
25026   id_str = SeqIdWholeLabel (SeqIdFindWorst (bsp->id), PRINTID_REPORT);
25027 
25028   if (CountSeqChars (seqbuf) < 50)
25029   {
25030     ValNodeAddPointer (too_short_list, seq_num, StringSave (id_str));
25031     rval = FALSE;
25032   }
25033   if (IsSequenceAllNs (seqbuf))
25034   {
25035     ValNodeAddPointer (all_N_list, seq_num, id_str);
25036     rval = FALSE;
25037   }
25038   else if (IsSequenceAllOneCharacter(seqbuf))
25039   {
25040     ValNodeAddPointer (all_one_char_list, seq_num, id_str);
25041     rval = FALSE;
25042   }
25043   else
25044   {
25045     id_str = MemFree (id_str);
25046   }
25047   seqbuf = MemFree (seqbuf);
25048   return rval;
25049 }
25050 
25051 static Boolean
AddBioseqErrors(ValNodePtr all_N_list,ValNodePtr all_one_char_list,ValNodePtr too_short_list,ValNodePtr PNTR err_list)25052 AddBioseqErrors
25053 (ValNodePtr all_N_list,
25054  ValNodePtr all_one_char_list,
25055  ValNodePtr too_short_list,
25056  ValNodePtr PNTR err_list)
25057 {
25058   CharPtr err_msg = NULL;
25059   Boolean rval = TRUE;
25060 
25061   if (all_N_list != NULL)
25062   {
25063     if (err_list != NULL)
25064     {
25065       if (all_N_list->next == NULL)
25066       {
25067         err_msg = CreateListMessage ("In sequence",
25068                      " there are only Ns. This not a valid sequence, please edit it.",
25069                      all_N_list);
25070       }
25071       else
25072       {
25073         err_msg = CreateListMessage ("In sequence",
25074                    " there are only Ns. These are not valid sequences, please edit them.",
25075                      all_N_list);
25076       }
25077       ValNodeAddPointer (err_list, CREATE_FASTA_REQUIRED, err_msg);
25078     }
25079     rval = FALSE;
25080   }
25081   if (all_one_char_list != NULL)
25082   {
25083     if (err_list != NULL)
25084     {
25085       if (all_one_char_list->next == NULL)
25086       {
25087         err_msg = CreateListMessage ("In sequence",
25088                    " one character is repeated for the entire sequence. This is not a valid sequence, please edit it.",
25089                    all_one_char_list);
25090       }
25091       else
25092       {
25093         err_msg = CreateListMessage ("In sequence",
25094                      " one character is repeated for the entire sequence. These are not valid sequences, please edit them.",
25095                      all_one_char_list);
25096       }
25097       ValNodeAddPointer (err_list, CREATE_FASTA_WARNING, err_msg);
25098     }
25099     rval = FALSE;
25100   }
25101   if (too_short_list != NULL && err_list != NULL)
25102   {
25103     if (too_short_list->next == NULL)
25104     {
25105       err_msg = CreateListMessage ("Sequence", " is shorter than 50 base pairs. "
25106                      "GenBank will not accept sequences with "
25107                      "fewer than 50 base pairs. Please edit your sequence or "
25108                      "make sure that your comment explains why your sequence "
25109                      "is so short.",
25110                      too_short_list);
25111     }
25112     else
25113     {
25114       err_msg = CreateListMessage ("Sequence", " are shorter than 50 base pairs. "
25115                      "GenBank will not accept sequences with "
25116                      "fewer than 50 base pairs. Please edit your sequences or "
25117                      "make sure that your comments explain why your sequences "
25118                      "are so short.",
25119                      too_short_list);
25120     }
25121     ValNodeAddPointer (err_list, CREATE_FASTA_WARNING, err_msg);
25122   }
25123   return rval;
25124 }
25125 
25126 /* This function will add to a list of errors.  Any errors in the list with a choice of 0
25127  * cause the sequence to be unacceptable.  Any errors in the list with a choice of 1 are
25128  * a yes-no question - yes means that the user wants to go back and correct the problems.
25129  * no means the user would like to continue anyway.
25130  */
25131 static Boolean
SequenceAssistantValidateContentAndLength(SeqEntryPtr seq_list,ValNodePtr PNTR all_N_list,ValNodePtr PNTR all_one_char_list,ValNodePtr PNTR too_short_list,ValNodePtr PNTR err_list)25132 SequenceAssistantValidateContentAndLength
25133 (SeqEntryPtr     seq_list,
25134  ValNodePtr PNTR all_N_list,
25135  ValNodePtr PNTR all_one_char_list,
25136  ValNodePtr PNTR too_short_list,
25137  ValNodePtr PNTR err_list)
25138 {
25139   Boolean           rval = TRUE;
25140   BioseqSetPtr      bssp;
25141 
25142   if (seq_list == NULL)
25143   {
25144     return TRUE;
25145   }
25146 
25147   if (IS_Bioseq_set (seq_list))
25148   {
25149     bssp = (BioseqSetPtr) seq_list->data.ptrvalue;
25150     if (bssp->_class == BioseqseqSet_class_parts)
25151     {
25152 /*      rval = SequenceAssistantValidateSegments (bssp->seq_set, err_list); */
25153     }
25154     else
25155     {
25156       rval = SequenceAssistantValidateContentAndLength (bssp->seq_set,
25157                                                         all_N_list,
25158                                                         all_one_char_list,
25159                                                         too_short_list,
25160                                                         err_list);
25161     }
25162   }
25163   else if (IS_Bioseq (seq_list))
25164   {
25165     rval = SequenceAssistantValidateOneBioseqContentAndLength (seq_list->data.ptrvalue,
25166                                                                all_N_list,
25167                                                                all_one_char_list,
25168                                                                too_short_list);
25169   }
25170   rval &= SequenceAssistantValidateContentAndLength (seq_list->next,
25171                                                             all_N_list,
25172                                                             all_one_char_list,
25173                                                             too_short_list,
25174                                                            err_list);
25175   return rval;
25176 }
25177 
SequenceAssistantValidate(SeqEntryPtr seq_list)25178 static Boolean SequenceAssistantValidate (SeqEntryPtr seq_list)
25179 {
25180   ValNodePtr err_list = NULL;
25181   Boolean    rval;
25182   ValNodePtr all_N_list = NULL;
25183   ValNodePtr all_one_char_list = NULL;
25184   ValNodePtr too_short_list = NULL;
25185 
25186   rval = SequenceAssistantValidateContentAndLength (seq_list,
25187                                                     &all_N_list,
25188                                                     &all_one_char_list,
25189                                                     &too_short_list,
25190                                                     &err_list);
25191 
25192   rval &= AddBioseqErrors (all_N_list, all_one_char_list, too_short_list, &err_list);
25193 
25194   all_N_list = ValNodeFreeData (all_N_list);
25195   too_short_list = ValNodeFreeData (too_short_list);
25196   all_one_char_list = ValNodeFreeData (all_one_char_list);
25197 
25198   if (err_list != NULL)
25199   {
25200     rval = ContinueWithErrorList (err_list, TRUE);
25201   }
25202   err_list = ValNodeFreeData (err_list);
25203   return rval;
25204 }
25205 
SequenceDblClick(PoinT cell_coord,CharPtr header_text,CharPtr cell_text,Pointer userdata)25206 static void SequenceDblClick (PoinT cell_coord, CharPtr header_text, CharPtr cell_text, Pointer userdata)
25207 {
25208   SequenceAssistantPtr sap;
25209 
25210   sap = (SequenceAssistantPtr) userdata;
25211   if (sap == NULL)
25212   {
25213     return;
25214   }
25215   if (cell_coord.x < 2)
25216   {
25217     return;
25218   }
25219   if (cell_coord.y == 0)
25220   {
25221     if (cell_coord.x == SEQUENCE_ASSISTANT_MOLECULE_COLUMN)
25222     {
25223       /* edit all molecule types */
25224       EditOrgModColumn ("moltype", NULL, sap->seq_list, sap->seqPackage);
25225     }
25226     else if (cell_coord.x == SEQUENCE_ASSISTANT_TOPOLOGY_COLUMN)
25227     {
25228       /* edit all topologies */
25229       EditOrgModColumn ("topology", NULL, sap->seq_list, sap->seqPackage);
25230     }
25231     else
25232     {
25233       /* edit all titles */
25234       EditSequenceTitleColumns (sap);
25235     }
25236     UpdateSequenceAssistant (sap);
25237   }
25238   else
25239   {
25240     if (cell_coord.x == SEQUENCE_ASSISTANT_MOLECULE_COLUMN)
25241     {
25242       /* edit one molecule type */
25243       ApplyOrgModColumnOrCell ("moltype", cell_text, cell_coord.y - 1, NULL, sap->seq_list,
25244                                NULL, 0, sap->seqPackage);
25245     }
25246     else if (cell_coord.x == SEQUENCE_ASSISTANT_TOPOLOGY_COLUMN)
25247     {
25248       /* edit one topology */
25249       ApplyOrgModColumnOrCell ("topology", cell_text, cell_coord.y - 1, NULL, sap->seq_list,
25250                                NULL, 0, sap->seqPackage);
25251     }
25252     else
25253     {
25254       /* edit one title */
25255       EditOneSequenceTitle (sap, cell_coord.y - 1);
25256     }
25257     UpdateSequenceAssistant (sap);
25258   }
25259 }
25260 
SequenceAssistantEditButton(ButtoN b)25261 static void SequenceAssistantEditButton (ButtoN b)
25262 {
25263   SequenceAssistantPtr sap;
25264 
25265   sap = (SequenceAssistantPtr) GetObjectExtra (b);
25266   if (sap == NULL)
25267   {
25268     return;
25269   }
25270 
25271   SequenceAssistantEditSequence (sap, sap->sequence_row - 1);
25272 }
25273 
SequenceAssistant(ButtoN b)25274 static void SequenceAssistant (ButtoN b)
25275 {
25276   SequencesFormPtr      sqfp;
25277   SequenceAssistantData sad;
25278   WindoW                w;
25279   MenU                  m;
25280   IteM                  i;
25281   GrouP                 h, k, edit_grp, selector_grp, c;
25282   FastaPagePtr          fpp;
25283   RecT                  r;
25284   SeqEntryPtr           sep, next_sep;
25285   ButtoN                add_btn;
25286   Int4                  doc_width;
25287 
25288   sqfp = (SequencesFormPtr) GetObjectExtra (b);
25289   if (sqfp == NULL)
25290   {
25291     return;
25292   }
25293 
25294   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
25295   if (fpp == NULL)
25296   {
25297     return;
25298   }
25299 
25300   sad.done = FALSE;
25301   sad.cancelled = FALSE;
25302 
25303   sad.seq_list = CopySeqEntryList (fpp->list);
25304   sad.sequence_row = -1;
25305 
25306   sad.seqPackage = sqfp->seqPackage;
25307 
25308   w = MovableModalWindow (-20, -13, -10, -10, "Specify Sequences", NULL);
25309   /* add menus */
25310   m = PulldownMenu (w, "File");
25311   i = CommandItem (m, "Import FASTA file", ImportFastaFileItem);
25312   SetObjectExtra (i, &sad, NULL);
25313   i = CommandItem (m, "Done", SequenceAssistantOkItem);
25314   SetObjectExtra (i, &sad, NULL);
25315   i = CommandItem (m, "Cancel", SequenceAssistantCancelItem);
25316   SetObjectExtra (i, &sad, NULL);
25317 
25318   /* edit menu */
25319   m = PulldownMenu (w, "Edit");
25320   i = CommandItem (m, "Add Sequence", AddSequenceItem);
25321   SetObjectExtra (i, &sad, NULL);
25322   i = CommandItem (m, "Delete Sequence", DeleteSequenceItem);
25323   SetObjectExtra (i, &sad, NULL);
25324   i = CommandItem (m, "Delete All Sequences", DeleteAllSequencesItem);
25325   SetObjectExtra (i, &sad, NULL);
25326 
25327   h = HiddenGroup(w, -1, 0, NULL);
25328   SetGroupSpacing (h, 10, 10);
25329 
25330   k = HiddenGroup (h, 2, 0, NULL);
25331   sad.import_btn = PushButton (k, "Import Additional Nucleotide FASTA", ImportFastaFileButton);
25332   SetObjectExtra (sad.import_btn, &sad, NULL);
25333 
25334   add_btn = PushButton (k, "Add New Sequence", AddSequenceButton);
25335   SetObjectExtra (add_btn, &sad, NULL);
25336 
25337   edit_grp = HiddenGroup (h, 2, 0, NULL);
25338   sad.sequence_selector = DocumentPanel (edit_grp, stdCharWidth * 10, stdLineHeight * 6);
25339   SetObjectExtra (sad.sequence_selector, &sad, NULL);
25340   SetDocProcs (sad.sequence_selector, SelectSequenceDoc, NULL, NULL, NULL);
25341   SetDocShade (sad.sequence_selector, NULL, NULL, SequenceHighlight, NULL);
25342   selector_grp = HiddenGroup (edit_grp, 0, 4, NULL);
25343   sad.edit_btn = PushButton (selector_grp, "Edit Sequence", SequenceAssistantEditButton);
25344   SetObjectExtra (sad.edit_btn, &sad, NULL);
25345   StaticPrompt (selector_grp, "", 0, popupMenuHeight, programFont, 'l');
25346   sad.delete_btn = PushButton (selector_grp, "Delete Sequence", DeleteSequenceButton);
25347   SetObjectExtra (sad.delete_btn, &sad, NULL);
25348   sad.delete_all_btn = PushButton (selector_grp, "Delete All Sequences", DeleteAllSequencesButton);
25349   SetObjectExtra (sad.delete_all_btn, &sad, NULL);
25350 
25351   sad.summary_dlg = FastaSummaryDialog (h);
25352 
25353   doc_width = GetStandardTableDisplayDialogWidth (sqfp);
25354 
25355   sad.sequence_table = TableDisplayDialog (h, doc_width, stdLineHeight * 8, 1, 2,
25356                                        SequenceDblClick, &sad,
25357                                        NULL, NULL);
25358 
25359   c = HiddenGroup (h, 2, 0, NULL);
25360   b = PushButton(c, "Done", SequenceAssistantOkButton);
25361   SetObjectExtra (b, &sad, NULL);
25362   b = PushButton(c, "Cancel", SequenceAssistantCancelButton);
25363   SetObjectExtra (b, &sad, NULL);
25364 
25365   AlignObjects (ALIGN_CENTER, (HANDLE) k,
25366                               (HANDLE) edit_grp,
25367                               (HANDLE) sad.summary_dlg,
25368                               (HANDLE) sad.sequence_table,
25369                               (HANDLE) c, NULL);
25370 
25371   UpdateSequenceAssistant (&sad);
25372 
25373   Show(w);
25374   Select (w);
25375 
25376   while (!sad.done)
25377   {
25378     while (!sad.done)
25379     {
25380       ProcessExternalEvent ();
25381       Update ();
25382     }
25383     ProcessAnEvent ();
25384     if (!sad.cancelled)
25385     {
25386 
25387       if (SequenceAssistantValidate (sad.seq_list))
25388       {
25389         /* check for number of sequences */
25390         if (sad.seq_list != NULL && sad.seq_list->next != NULL
25391             && PackageTypeIsSingle (sqfp->seqPackage))
25392         {
25393           if (Message (MSG_YN, "You are importing multiple sequences - did you intend to create a batch submission?") == ANS_YES)
25394           {
25395             sqfp->seqPackage = SEQ_PKG_GENBANK;
25396             fpp->single = FALSE;
25397             SafeHide (fpp->singleIdGrp);
25398           }
25399           else
25400           {
25401             sad.done = FALSE;
25402             sad.cancelled = FALSE;
25403           }
25404         }
25405       }
25406       else
25407       {
25408         sad.done = FALSE;
25409         sad.cancelled = FALSE;
25410       }
25411 
25412       if (sad.done)
25413       {
25414         ResetFastaPage (fpp);
25415         Reset (fpp->doc);
25416         if (sad.seq_list == NULL)
25417         {
25418           SafeHide (fpp->have_seq_instr_grp);
25419           SafeShow (fpp->instructions);
25420           Update ();
25421           Enable (fpp->import_btn);
25422           SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
25423           Disable (fpp->clear_btn);
25424           ClearOrganismModifiers (sqfp);
25425           Disable (sqfp->molecule_btn);
25426           Disable (sqfp->topology_btn);
25427           Disable (sqfp->vecscreen_btn);
25428         }
25429         else
25430         {
25431           /* these statements make sure the column width is large enough */
25432           ObjectRect (fpp->doc, &r);
25433           InsetRect (&r, 4, 4);
25434           faColFmt.pixWidth = r.right - r.left;
25435 
25436           fpp->list = sad.seq_list;
25437           SafeHide (fpp->instructions);
25438           Update ();
25439 
25440           if (PackageTypeIsSingle (sqfp->seqPackage) || sqfp->seqPackage == SEQ_PKG_GENOMICCDNA)
25441           {
25442             Disable (fpp->import_btn);
25443           }
25444           else
25445           {
25446             Enable (fpp->import_btn);
25447             SetTitle (fpp->import_btn, "Import Additional Nucleotide FASTA");
25448           }
25449           Enable (fpp->clear_btn);
25450           FormatFastaDoc (fpp);
25451           SafeShow (fpp->have_seq_instr_grp);
25452           NucleotideImportFinish (sqfp);
25453           if (sqfp->seqFormat == SEQ_FMT_FASTA
25454               && SuggestJumpingToWizard (fpp->list)) {
25455             fpp->list = NULL;
25456             Remove (sqfp->form);
25457           }
25458         }
25459       }
25460       else
25461       {
25462         sad.cancelled = FALSE;
25463         sad.done = FALSE;
25464       }
25465     }
25466     else
25467     {
25468       /* clean up list of sequences from form, since they will not be used */
25469       sep = sad.seq_list;
25470       while (sep != NULL)
25471       {
25472         next_sep = sep->next;
25473         sep->next = NULL;
25474         SeqEntryFree (sep);
25475         sep = next_sep;
25476       }
25477       sad.seq_list = NULL;
25478     }
25479   }
25480   Remove (w);
25481 }
25482 
SpecifyMolecule(ButtoN b)25483 static void SpecifyMolecule (ButtoN b)
25484 {
25485   SpecifyModValueButton (b, "moltype");
25486 }
25487 
SpecifyTopology(ButtoN b)25488 static void SpecifyTopology (ButtoN b)
25489 {
25490   SpecifyModValueButton (b, "topology");
25491 }
25492 
CustomAlnSettings(ButtoN b)25493 static void CustomAlnSettings (ButtoN b)
25494 {
25495   SequencesFormPtr sqfp;
25496   TSequenceInfoPtr new_settings = NULL;
25497   PhylipPagePtr    ppp;
25498 
25499   sqfp = (SequencesFormPtr) GetObjectExtra (b);
25500   if (sqfp == NULL) return;
25501 
25502   ppp = (PhylipPagePtr) GetObjectExtra (sqfp->dnaseq);
25503   if (ppp == NULL) return;
25504 
25505   new_settings = GetAlignmentOptions (NULL, ppp->aln_settings);
25506   if (new_settings != NULL)
25507   {
25508     SequenceInfoFree (ppp->aln_settings);
25509     ppp->aln_settings = new_settings;
25510   }
25511 }
25512 
25513 
TrimVectorToolBtn(ButtoN b)25514 static void TrimVectorToolBtn (ButtoN b)
25515 {
25516   SequencesFormPtr sqfp;
25517   FastaPagePtr     fpp;
25518   RecT             r;
25519 
25520   sqfp = (SequencesFormPtr) GetObjectExtra (b);
25521   if (sqfp == NULL) {
25522     return;
25523   }
25524   fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
25525   if (fpp == NULL)
25526   {
25527     return;
25528   }
25529 
25530   WizardVectorTool(&(fpp->list));
25531 
25532   Reset (fpp->doc);
25533   if (fpp->list == NULL)
25534   {
25535     SafeHide (fpp->have_seq_instr_grp);
25536     SafeShow (fpp->instructions);
25537     Update ();
25538     Enable (fpp->import_btn);
25539     SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
25540     Disable (fpp->clear_btn);
25541     ClearOrganismModifiers (sqfp);
25542     Disable (sqfp->molecule_btn);
25543     Disable (sqfp->topology_btn);
25544     Disable (sqfp->vecscreen_btn);
25545   }
25546   else
25547   {
25548     /* these statements make sure the column width is large enough */
25549     ObjectRect (fpp->doc, &r);
25550     InsetRect (&r, 4, 4);
25551     faColFmt.pixWidth = r.right - r.left;
25552 
25553     SafeHide (fpp->instructions);
25554     Update ();
25555 
25556     if (PackageTypeIsSingle (sqfp->seqPackage) || sqfp->seqPackage == SEQ_PKG_GENOMICCDNA)
25557     {
25558       Disable (fpp->import_btn);
25559     }
25560     else
25561     {
25562       Enable (fpp->import_btn);
25563       SetTitle (fpp->import_btn, "Import Additional Nucleotide FASTA");
25564     }
25565     Enable (fpp->clear_btn);
25566     FormatFastaDoc (fpp);
25567     SafeShow (fpp->have_seq_instr_grp);
25568     NucleotideImportFinish (sqfp);
25569   }
25570 }
25571 
25572 
CreateNucleotideTab(GrouP h,SequencesFormPtr sqfp)25573 static GrouP CreateNucleotideTab (GrouP h, SequencesFormPtr sqfp)
25574 {
25575   GrouP              q, g, x, y, k;
25576   ButtoN             b = NULL;
25577   Handle             h1 = NULL, h2 = NULL;
25578   Boolean            single;
25579   GrouP              import_btn_grp = NULL;
25580   FastaPagePtr       fpp;
25581 
25582   q = HiddenGroup (h, -1, 0, NULL);
25583   SetGroupSpacing (q, 10, 10);
25584   g = HiddenGroup (q, -1, 0, NULL);
25585   SetGroupSpacing (g, 10, 10);
25586 
25587   y = HiddenGroup (g, -2, 0, NULL);
25588   SetGroupSpacing (y, 10, 2);
25589 
25590   if (PackageTypeIsSet (sqfp->seqPackage)
25591       && sqfp->seqPackage != SEQ_PKG_GENBANK /* exclude batch submissions */
25592       && sqfp->seqPackage != SEQ_PKG_TSA /* exclude TSA */
25593       && sqfp->seqFormat == SEQ_FMT_FASTA)
25594 	{
25595         sqfp->makeAlign = CheckBox (g, "Create Alignment", NULL);
25596       /*
25597       if (sqfp->seqPackage < SEQ_PKG_GENBANK) {
25598         SetStatus (sqfp->makeAlign, TRUE);
25599       }
25600       */
25601   }
25602 
25603   k = HiddenGroup (g, 0, 2, NULL);
25604   if (sqfp->seqFormat == SEQ_FMT_FASTA) {
25605     single = PackageTypeIsSingle (sqfp->seqPackage);
25606     if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
25607       sqfp->dnaseq = CreateFastaDialog (k, "", TRUE, FALSE, fastaGenMsg, single, NULL);
25608       fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
25609       import_btn_grp = HiddenGroup (g, 4, 0, NULL);
25610       fpp->import_btn = PushButton (import_btn_grp, "Import Genomic FASTA", ImportBtnProc);
25611     } else {
25612       sqfp->dnaseq = CreateFastaDialog (k, "", TRUE, FALSE, fastaNucMsg, single, &(sqfp->seqPackage));
25613       fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq);
25614       import_btn_grp = HiddenGroup (g, 4, 0, NULL);
25615       fpp->import_btn = PushButton (import_btn_grp, "Import Additional Nucleotide FASTA", ImportBtnProc);
25616       SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
25617       if (sqfp->seqPackage == SEQ_PKG_GAPPED)
25618       {
25619         fpp->is_delta = TRUE;
25620       }
25621       else
25622       {
25623         fpp->is_delta = FALSE;
25624       }
25625     }
25626     SetObjectExtra (fpp->import_btn, sqfp, NULL);
25627 
25628     if (sqfp->seqPackage != SEQ_PKG_GAPPED
25629         && sqfp->seqPackage != SEQ_PKG_GENOMICCDNA)
25630     {
25631       b = PushButton (import_btn_grp, "Add/Modify Sequences", SequenceAssistant);
25632       SetObjectExtra (b, sqfp, NULL);
25633     }
25634     fpp->clear_btn = PushButton (import_btn_grp, "Clear Sequences", ClearSequencesButton);
25635     SetObjectExtra (fpp->clear_btn, sqfp, NULL);
25636     Disable (fpp->clear_btn);
25637   } else if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT) {
25638     sqfp->dnaseq = CreatePhylipDialog (k, "", phylipNucMsg, sqfp->seqFormat, "",
25639                                        sqfp->seqPackage);
25640     import_btn_grp = HiddenGroup (g, 4, 0, NULL);
25641     b = PushButton (import_btn_grp, "Import Nucleotide Alignment", ImportBtnProc);
25642     SetObjectExtra (b, sqfp, NULL);
25643     b = PushButton (import_btn_grp, "Optional Alignment Settings", CustomAlnSettings);
25644     SetObjectExtra (b, sqfp, NULL);
25645   }
25646 
25647   x = HiddenGroup (g, -4, 0, NULL);
25648 
25649   sqfp->molecule_btn = PushButton (x, "Specify Molecule", SpecifyMolecule);
25650   SetObjectExtra (sqfp->molecule_btn, sqfp, NULL);
25651   Disable (sqfp->molecule_btn);
25652   sqfp->topology_btn = PushButton (x, "Specify Topology", SpecifyTopology);
25653   SetObjectExtra (sqfp->topology_btn, sqfp, NULL);
25654   Disable (sqfp->topology_btn);
25655   sqfp->vecscreen_btn = PushButton (x, "Vector Trim Tool", TrimVectorToolBtn);
25656   SetObjectExtra (sqfp->vecscreen_btn, sqfp, NULL);
25657   Disable (sqfp->vecscreen_btn);
25658 
25659   if (sqfp->makeAlign != NULL) {
25660     h1 = (Handle) sqfp->makeAlign;
25661     h2 = (Handle) import_btn_grp;
25662   } else {
25663     h1 = import_btn_grp;
25664     h2 = NULL;
25665   }
25666   AlignObjects (ALIGN_CENTER, (HANDLE) x, (HANDLE) y, (HANDLE) k,
25667                   (HANDLE) h1, (HANDLE) h2, NULL);
25668   return q;
25669 }
25670 
CreateSequencingMethodTab(GrouP h,SequencesFormPtr sqfp)25671 static GrouP CreateSequencingMethodTab (GrouP h, SequencesFormPtr sqfp)
25672 {
25673   sqfp->sequencing_method_dlg = SequencingMethodDialog (h, eWizardType_NormalDialogs);
25674   return (GrouP) sqfp->sequencing_method_dlg;
25675 }
25676 
CreateTranscriptsTab(GrouP h,SequencesFormPtr sqfp)25677 static GrouP CreateTranscriptsTab (GrouP h, SequencesFormPtr sqfp)
25678 {
25679   GrouP   q, g, y, k;
25680   ButtoN  b;
25681 
25682   q = HiddenGroup (h, -1, 0, NULL);
25683   SetGroupSpacing (q, 10, 20);
25684   g = HiddenGroup (q, -1, 0, NULL);
25685   SetGroupSpacing (g, 10, 10);
25686   y = HiddenGroup (g, -2, 0, NULL);
25687   SetGroupSpacing (y, 10, 2);
25688   sqfp->partialmRNA5 = CheckBox (y, "Incomplete at 5' end", NULL);
25689   sqfp->partialmRNA3 = CheckBox (y, "Incomplete at 3' end", NULL);
25690 
25691   k = HiddenGroup (g, 0, 2, NULL);
25692   sqfp->mrnaseq = CreateFastaDialog (k, "", TRUE, TRUE, fastaMrnaMsg, FALSE, NULL);
25693   b = PushButton (g, "Import Transcript FASTA", ImportBtnProc);
25694   SetObjectExtra (b, sqfp, NULL);
25695 
25696   AlignObjects (ALIGN_CENTER, (HANDLE) y, (HANDLE) k, (HANDLE) b, NULL);
25697   return q;
25698 }
25699 
25700 
CreateProteinTab(GrouP h,SequencesFormPtr sqfp)25701 static GrouP CreateProteinTab (GrouP h, SequencesFormPtr sqfp)
25702 {
25703   GrouP        q, g, y, k, prot_btns;
25704   ButtoN       mrna = NULL, b;
25705   Char         str [32];
25706   FastaPagePtr fpp;
25707 
25708   q = HiddenGroup (h, -1, 0, NULL);
25709   SetGroupSpacing (q, 10, 20);
25710   g = HiddenGroup (q, -1, 0, NULL);
25711   SetGroupSpacing (g, 10, 10);
25712   y = HiddenGroup (g, -2, 0, NULL);
25713   SetGroupSpacing (y, 10, 2);
25714   sqfp->partialN = CheckBox (y, "Incomplete at NH2 end", NULL);
25715   sqfp->partialC = CheckBox (y, "Incomplete at CO2H end", NULL);
25716 
25717   sqfp->makeMRNA = FALSE;
25718   if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA) {
25719     mrna = CheckBox (g, "Create initial mRNA with CDS intervals", ChangeMrnaFlag);
25720     SetObjectExtra (mrna, sqfp, NULL);
25721     if (GetAppParam ("SEQUIN", "PREFERENCES", "CREATEMRNA", NULL, str, sizeof (str))) {
25722       if (StringICmp (str, "TRUE") == 0) {
25723         sqfp->makeMRNA = TRUE;
25724       }
25725     }
25726   }
25727   SafeSetStatus (mrna, sqfp->makeMRNA);
25728   k = HiddenGroup (g, 0, 2, NULL);
25729   sqfp->protseq = CreateFastaDialog (k, "", FALSE, FALSE, fastaProtMsg, FALSE, NULL);
25730   prot_btns = HiddenGroup (g, 2, 0, NULL);
25731   SetGroupSpacing (prot_btns, 10, 10);
25732   b = PushButton (prot_btns, "Import Protein FASTA", ImportBtnProc);
25733   SetObjectExtra (b, sqfp, NULL);
25734 
25735   fpp = (FastaPagePtr) GetObjectExtra (sqfp->protseq);
25736   if (fpp != NULL)
25737   {
25738     fpp->clear_btn = PushButton (prot_btns, "Clear Protein Sequences", ClearSequencesButton);
25739     SetObjectExtra (fpp->clear_btn, sqfp, NULL);
25740     Disable (fpp->clear_btn);
25741   }
25742 
25743   AlignObjects (ALIGN_CENTER, (HANDLE) y, (HANDLE) k,
25744                 (HANDLE) prot_btns, (HANDLE) mrna, NULL);
25745 
25746 
25747   return q;
25748 }
25749 
25750 
CreateAnnotTab(GrouP h,SequencesFormPtr sqfp)25751 static GrouP CreateAnnotTab (GrouP h, SequencesFormPtr sqfp)
25752 {
25753   GrouP  q, z;
25754   PrompT ppt1, ppt2;
25755   RecT   comment_rect, defline_rect;
25756 
25757   q = HiddenGroup (h, -1, 0, NULL);
25758   SetGroupSpacing (q, 10, 10);
25759   ppt1 = StaticPrompt (q, "Add feature across full length of all sequences",
25760                        0, 0, programFont, 'l');
25761   sqfp->feature_info = SubmissionFeatureInfoDialog (q);
25762   ppt2 = StaticPrompt (q, "Add title to all sequences if not in definition line",
25763                        0, 0, programFont, 'c');
25764   z = HiddenGroup (q, 2, 0, NULL);
25765   StaticPrompt (z, "Title       ", 0, 3 * Nlm_stdLineHeight, programFont, 'c');
25766   sqfp->defline = ScrollText (z, 20, 3, programFont, TRUE, NULL);
25767   sqfp->orgPrefix = CheckBox (q, "Prefix title with organism name", NULL);
25768   AlignObjects (ALIGN_CENTER, (HANDLE) ppt1, (HANDLE) sqfp->feature_info,
25769                 (HANDLE) ppt2,
25770                 (HANDLE) sqfp->orgPrefix, NULL);
25771 
25772   /* fix title scroll to be the same size as and aligned with the comment scroll */
25773   GetSubmissionFeatureInfoFeatCommentRect(sqfp->feature_info, &comment_rect);
25774   ObjectRect (sqfp->defline, &defline_rect);
25775   /* because the doc has a vertical scroll bar and the set position subtracts the
25776    * width of the scroll bar before positioning the list, must add the width of
25777    * the scroll bar to the rightt.
25778    */
25779   defline_rect.left = comment_rect.left;
25780   defline_rect.right = comment_rect.right + Nlm_vScrollBarWidth;
25781 
25782   SetPosition (sqfp->defline, &defline_rect);
25783 
25784   return q;
25785 }
25786 
25787 
RemoveSequencesFromSubmission(IteM i)25788 static void RemoveSequencesFromSubmission (IteM i)
25789 {
25790   SequencesFormPtr sqfp;
25791   SeqEntryPtr  sep;
25792 
25793   sqfp = (SequencesFormPtr) GetObjectExtra (i);
25794   if (sqfp == NULL) {
25795     return;
25796   }
25797 
25798   sep = GetSeqEntryFromSequencesForm (sqfp);
25799   if (RemoveSequencesFromWizardList (&sep, 200)) {
25800     /* redraw */
25801     SetSequencesForSubmissionForm ((WindoW)sqfp->form, sep, 0);
25802     Update();
25803   }
25804 }
25805 
25806 
25807 static void SetSequencesFormPage (SequencesFormPtr sqfp, Int4 page);
RemoveAllSequencesFromSubmission(IteM i)25808 static void RemoveAllSequencesFromSubmission (IteM i)
25809 {
25810   SequencesFormPtr sqfp;
25811 
25812   sqfp = (SequencesFormPtr) GetObjectExtra (i);
25813   if (sqfp == NULL) {
25814     return;
25815   }
25816   /* delete the sequences and organism modifiers */
25817   DeleteAllSequencesFromForm (sqfp);
25818   /* put the user back on the sequences page */
25819   SetSequencesFormPage (sqfp, 0);
25820   Update();
25821 }
25822 
25823 
CleanupSequencesForm(GraphiC g,Pointer data)25824 static void CleanupSequencesForm (GraphiC g, Pointer data)
25825 {
25826   SequencesFormPtr sqfp;
25827 
25828   if (data != NULL)
25829   {
25830     sqfp = (SequencesFormPtr) data;
25831     sqfp->nuc_prot_assoc_list = FreeAssociationList (sqfp->nuc_prot_assoc_list);
25832   }
25833   StdCleanupFormProc (g, data);
25834 }
25835 
CreateInitOrgNucProtForm(Int2 left,Int2 top,CharPtr title,FormatBlockPtr format,BtnActnProc goToNext,BtnActnProc goBack,WndActnProc activateForm)25836 extern ForM CreateInitOrgNucProtForm (Int2 left, Int2 top, CharPtr title,
25837                                       FormatBlockPtr format,
25838                                       BtnActnProc goToNext,
25839                                       BtnActnProc goBack,
25840                                       WndActnProc activateForm)
25841 
25842 {
25843   GrouP              c;
25844   GrouP              h;
25845   GrouP              j;
25846   GrouP              tbs_grp;
25847   Int2               page;
25848   StdEditorProcsPtr  sepp;
25849   SequencesFormPtr   sqfp;
25850   WindoW             w;
25851   MenU               edit_menu = NULL;
25852   IteM               i;
25853 
25854   w = NULL;
25855   sqfp = MemNew (sizeof (SequencesForm));
25856   if (sqfp != NULL) {
25857 
25858     if (format != NULL) {
25859       sqfp->seqPackage = format->seqPackage;
25860       sqfp->seqFormat = format->seqFormat;
25861       sqfp->numSeqs = format->numSeqs;
25862       sqfp->submType = format->submType;
25863     } else {
25864       sqfp->seqPackage = SEQ_PKG_SINGLE;
25865       sqfp->seqFormat = SEQ_FMT_FASTA;
25866       sqfp->numSeqs = 0;
25867       sqfp->submType = SEQ_ORIG_SUBMISSION;
25868     }
25869     sqfp->nuc_prot_assoc_list = NULL;
25870 
25871     w = FixedWindow (left, top, -10, -10, title, NULL);
25872     SetObjectExtra (w, sqfp, CleanupSequencesForm);
25873     sqfp->form = (ForM) w;
25874     sqfp->toform = NULL;
25875     if (sqfp->seqFormat == SEQ_FMT_FASTA) {
25876       sqfp->fromform = FastaSequencesFormToSeqEntryPtr;
25877     } else if (sqfp->seqFormat == SEQ_FMT_ALIGNMENT) {
25878       sqfp->fromform = PhylipSequencesFormToSeqEntryPtr;
25879     }
25880     sqfp->testform = NULL;
25881     sqfp->importform = ImportSequencesForm;
25882     sqfp->exportform = ExportSequencesForm;
25883     sqfp->formmessage = SequencesFormMessage;
25884 
25885 #ifndef WIN_MAC
25886     edit_menu = CreateSqnInitialFormMenus (w);
25887     if (edit_menu && sqfp->seqFormat == SEQ_FMT_FASTA) {
25888       i = CommandItem (edit_menu, "Sequence Deletion Tool", RemoveSequencesFromSubmission);
25889       SetObjectExtra (i, sqfp, NULL);
25890       i = CommandItem (edit_menu, "Delete All Sequences", RemoveAllSequencesFromSubmission);
25891       SetObjectExtra (i, sqfp, NULL);
25892     }
25893 #endif
25894 
25895     sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm");
25896     if (sepp != NULL) {
25897       sqfp->appmessage = sepp->handleMessages;
25898     }
25899 
25900     SetGroupSpacing (w, 10, 10);
25901 
25902     j = HiddenGroup (w, 10, 0, NULL);
25903 
25904     tbs_grp = HiddenGroup (j, 0, 0, NULL);
25905     if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
25906       sqfp->seq_tbs = CreateFolderTabs (tbs_grp, cdnaGenSeqFormTabs, NUCLEOTIDE_PAGE,
25907                                     0, 0, SYSTEM_FOLDER_TAB,
25908                                     ChangeSequencesPage, (Pointer) sqfp);
25909       sqfp->annot_tbs = CreateFolderTabs (tbs_grp, cdnaGenAnnotFormTabs, 1,
25910                                     0, 0, SYSTEM_FOLDER_TAB,
25911                                     ChangeSequencesPage, (Pointer) sqfp);
25912     } else if (sqfp->seqPackage == SEQ_PKG_GAPPED) {
25913       sqfp->seq_tbs = CreateFolderTabs (tbs_grp, seqSegSeqFormTabs, NUCLEOTIDE_PAGE,
25914                                     0, 0, SYSTEM_FOLDER_TAB,
25915                                     ChangeSequencesPage, (Pointer) sqfp);
25916       sqfp->annot_tbs = CreateFolderTabs (tbs_grp, seqSegAnnotFormTabs, 1,
25917                                     0, 0, SYSTEM_FOLDER_TAB,
25918                                     ChangeSequencesPage, (Pointer) sqfp);
25919     } else {
25920       sqfp->seq_tbs = CreateFolderTabs (tbs_grp, popPhyMutSeqFormTabs, NUCLEOTIDE_PAGE,
25921                                     0, 0, SYSTEM_FOLDER_TAB,
25922                                     ChangeSequencesPage, (Pointer) sqfp);
25923       sqfp->annot_tbs = CreateFolderTabs (tbs_grp, popPhyMutAnnotFormTabs, 1,
25924                                     0, 0, SYSTEM_FOLDER_TAB,
25925                                     ChangeSequencesPage, (Pointer) sqfp);
25926     }
25927     AlignObjects (ALIGN_CENTER, (HANDLE) sqfp->seq_tbs, (HANDLE) sqfp->annot_tbs, NULL);
25928     Hide (sqfp->annot_tbs);
25929     sqfp->currentPage = 0;
25930     page = 0;
25931 
25932     h = HiddenGroup (w, 0, 0, NULL);
25933 
25934     sqfp->pages [page] = CreateNucleotideTab (h, sqfp);
25935     Hide (sqfp->pages [page]);
25936     sqfp->tagFromPage [page] = NUCLEOTIDE_PAGE;
25937     page++;
25938 
25939     sqfp->pages[page] = CreateSequencingMethodTab (h, sqfp);
25940     Hide (sqfp->pages [page]);
25941     sqfp->tagFromPage [page] = SEQUENCING_METHOD_PAGE;
25942     page++;
25943 
25944     sqfp->pages [page] = CreateSourceTab (h, sqfp);
25945     Hide (sqfp->pages [page]);
25946     sqfp->tagFromPage [page] = ORGANISM_PAGE;
25947     page++;
25948 
25949 
25950     if (sqfp->seqPackage == SEQ_PKG_GENOMICCDNA) {
25951       sqfp->pages [page] = CreateTranscriptsTab (h, sqfp);
25952       Hide (sqfp->pages [page]);
25953       sqfp->tagFromPage [page] = MRNA_PAGE;
25954       page++;
25955     }
25956 
25957     sqfp->pages [page] = CreateProteinTab (h, sqfp);
25958     Hide (sqfp->pages [page]);
25959     sqfp->tagFromPage [page] = PROTEIN_PAGE;
25960     page++;
25961 
25962     if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA
25963         && sqfp->seqPackage != SEQ_PKG_GAPPED)
25964     {
25965       sqfp->pages [page] = CreateAnnotTab (h, sqfp);
25966       Hide (sqfp->pages [page]);
25967       sqfp->tagFromPage [page] = ANNOTATE_PAGE;
25968       page++;
25969     }
25970 
25971     sqfp->numPages = page;
25972 
25973     if (sqfp->seqPackage == SEQ_PKG_TSA) {
25974       ChooseFeatureTypeForSubmissionFeatureInfoDialog (sqfp->feature_info, eSubmitAnnotType_None);
25975     }
25976 
25977     c = HiddenGroup (w, 3, 0, NULL);
25978     SetGroupSpacing (c, 10, 2);
25979     sqfp->goToPrev = goBack;
25980     sqfp->prevBtn = PushButton (c, " << Prev Form ", PrevSequencesFormBtn);
25981     SetObjectExtra (sqfp->prevBtn, sqfp, NULL);
25982     sqfp->goToNext = goToNext;
25983     sqfp->nextBtn = PushButton (c, " Next Page >> ", NextSequencesFormBtn);
25984     SetObjectExtra (sqfp->nextBtn, sqfp, NULL);
25985 
25986     AlignObjects (ALIGN_CENTER, (HANDLE) j, (HANDLE) c,
25987                   (HANDLE) sqfp->pages [0], (HANDLE) sqfp->pages [1],
25988                   (HANDLE) sqfp->pages [2], (HANDLE) sqfp->pages [3], NULL);
25989 
25990     RealizeWindow (w);
25991 
25992     SafeSetTitle (sqfp->prevBtn, "<< Prev Form");
25993     SafeSetTitle (sqfp->nextBtn, "Next Page >>");
25994 
25995     sqfp->activate = activateForm;
25996     SetActivate (w, InitOrgNucProtFormActivate);
25997 
25998     SendMessageToDialog (sqfp->seq_tbs, VIB_MSG_INIT);
25999     SendMessageToDialog (sqfp->annot_tbs, VIB_MSG_INIT);
26000     SendMessageToDialog (sqfp->dnaseq, VIB_MSG_INIT);
26001     SendMessageToDialog (sqfp->protseq, VIB_MSG_INIT);
26002 
26003     Show (sqfp->pages [sqfp->currentPage]);
26004   }
26005   return (ForM) w;
26006 }
26007 
26008 
SetSequencesFormPage(SequencesFormPtr sqfp,Int4 page)26009 static void SetSequencesFormPage (SequencesFormPtr sqfp, Int4 page)
26010 {
26011   if (sqfp == NULL) {
26012     return;
26013   }
26014   if (page > -1 && page < 4 && page != sqfp->currentPage) {
26015     Hide (sqfp->pages[sqfp->currentPage]);
26016     if (page > 1) {
26017       sqfp->show_annot = TRUE;
26018       Hide (sqfp->seq_tbs);
26019       Show (sqfp->annot_tbs);
26020       SetValue (sqfp->annot_tbs, page - 2);
26021     } else {
26022       sqfp->show_annot = FALSE;
26023       Show (sqfp->seq_tbs);
26024       Hide (sqfp->annot_tbs);
26025       SetValue (sqfp->seq_tbs, page);
26026     }
26027   }
26028 }
26029 
SetSequencesForSubmissionForm(WindoW w,SeqEntryPtr sep_list,Int4 page)26030 NLM_EXTERN void SetSequencesForSubmissionForm (WindoW w, SeqEntryPtr sep_list, Int4 page)
26031 {
26032   SequencesFormPtr sqfp;
26033   FastaPagePtr     fpp;
26034 
26035   sqfp = GetObjectExtra (w);
26036   if (sqfp == NULL || sqfp->seqFormat != SEQ_FMT_FASTA || (fpp = (FastaPagePtr) GetObjectExtra (sqfp->dnaseq)) == NULL) {
26037     return;
26038   }
26039 
26040   fpp->list = sep_list;
26041   fpp->errmsgs = ValNodeFreeData (fpp->errmsgs);
26042   if (fpp->list == NULL) {
26043     fpp->path [0] = '\0';
26044     SafeHide (fpp->have_seq_instr_grp);
26045     Reset (fpp->doc);
26046     SafeShow (fpp->instructions);
26047     if (sqfp->seqPackage != SEQ_PKG_GENOMICCDNA)
26048     {
26049       SetTitle (fpp->import_btn, "Import Nucleotide FASTA");
26050     }
26051     Enable (fpp->import_btn);
26052     Disable (fpp->clear_btn);
26053   } else {
26054     SafeHide (fpp->instructions);
26055     Update ();
26056     SetTitle (fpp->import_btn, "Import Additional Nucleotide FASTA");
26057     Enable (fpp->clear_btn);
26058     Reset (fpp->doc);
26059     FormatFastaDoc (fpp);
26060     SafeShow (fpp->have_seq_instr_grp);
26061   }
26062   NucleotideImportFinish (sqfp);
26063   SetSequencesFormPage (sqfp, page);
26064 }
26065 
MakePubAndDefLine(SequinBlockPtr sbp,SeqEntryPtr sep)26066 static void MakePubAndDefLine (SequinBlockPtr sbp, SeqEntryPtr sep)
26067 
26068 {
26069   AffilPtr     affil;
26070   AuthListPtr  alp;
26071   CitGenPtr    cgp;
26072   PubdescPtr   pdp;
26073   ValNodePtr   pep;
26074   ValNodePtr   vnp;
26075   /*
26076   BioseqSetPtr  bssp;
26077   Char          str [256];
26078   CharPtr       ttl;
26079   */
26080 
26081   if (sep == NULL) return;
26082   /*
26083   if (SeqEntryGetTitle (sep) != NULL) return;
26084   ttl = NULL;
26085   SeqEntryExplore (sep, (Pointer) (&ttl), FindFirstTitle);
26086   if (ttl != NULL) {
26087     vnp = CreateNewDescriptor (sep, Seq_descr_title);
26088     if (vnp != NULL) {
26089       StringNCpy_0 (str, ttl, sizeof (str) - 32);
26090       if (IS_Bioseq_set (sep)) {
26091         bssp = (BioseqSetPtr) sep->data.ptrvalue;
26092         if (bssp != NULL && bssp->_class == 1) {
26093           StringCat (str, ", and translated products");
26094         }
26095       }
26096       vnp->data.ptrvalue = StringSave (str);
26097     }
26098   }
26099   */
26100   if (sbp == NULL || sbp->citsubauthors == NULL) return;
26101   pdp = PubdescNew ();
26102   if (pdp != NULL) {
26103     vnp = CreateNewDescriptor (sep, Seq_descr_pub);
26104     if (vnp != NULL) {
26105       vnp->data.ptrvalue = (Pointer) pdp;
26106       pdp->reftype = 0;
26107       pep = ValNodeNew (NULL);
26108       pdp->pub = pep;
26109       if (pep != NULL) {
26110         cgp = CitGenNew ();
26111         if (cgp != NULL) {
26112           pep->choice = PUB_Gen;
26113           pep->data.ptrvalue = cgp;
26114           cgp->cit = StringSave ("unpublished");
26115           alp = AsnIoMemCopy ((Pointer) sbp->citsubauthors,
26116                               (AsnReadFunc) AuthListAsnRead,
26117                               (AsnWriteFunc) AuthListAsnWrite);
26118           cgp->authors = alp;
26119           if (alp != NULL) {
26120             affil = AsnIoMemCopy ((Pointer) sbp->citsubaffil,
26121                                   (AsnReadFunc) AffilAsnRead,
26122                                   (AsnWriteFunc) AffilAsnWrite);
26123             alp->affil = affil;
26124             if (affil != NULL) {
26125               affil->phone = MemFree (affil->phone);
26126               affil->fax = MemFree (affil->fax);
26127               affil->email = MemFree (affil->email);
26128             }
26129           }
26130           cgp->title = sbp->citsubtitle;
26131           sbp->citsubtitle = NULL;
26132         }
26133       }
26134     }
26135   }
26136 }
26137 
26138 extern SubmitBlockPtr ConvertSequinBlockToSubmitBlock (SequinBlockPtr sqp);
26139 
ConvertSequinBlockToSubmitBlock(SequinBlockPtr sqp)26140 extern SubmitBlockPtr ConvertSequinBlockToSubmitBlock (SequinBlockPtr sqp)
26141 
26142 {
26143   AffilPtr        affil;
26144   AuthorPtr       ap;
26145   AuthListPtr     authors;
26146   ContactInfoPtr  cip;
26147   CitSubPtr       csp;
26148   DatePtr         dp;
26149   CharPtr         os;
26150   SubmitBlockPtr  sbp;
26151   Char            str [64];
26152 
26153   sbp = NULL;
26154   if (sqp != NULL) {
26155     sbp = SubmitBlockNew ();
26156     if (sbp != NULL) {
26157       sbp->subtype = 1;
26158       os = GetOpSysString ();
26159       if (os != NULL) {
26160         sprintf (str, "Sequin %s - %s", SEQUIN_APPLICATION, os);
26161       } else {
26162         sprintf (str, "Sequin %s", SEQUIN_APPLICATION);
26163       }
26164       sbp->tool = StringSave (str);
26165       MemFree (os);
26166       sbp->reldate = sqp->releasedate;
26167       dp = sbp->reldate;
26168       if (dp != NULL && dp->data [0] == 1 && dp->data [1] > 0) {
26169         if (dp->data [2] == 0) {
26170           dp->data [2] = 1;
26171         }
26172         if (dp->data [3] == 0) {
26173           switch (dp->data [2]) {
26174             case 4 :
26175             case 6 :
26176             case 9 :
26177             case 11 :
26178               dp->data [3] = 30;
26179               break;
26180             case 2 :
26181               dp->data [3] = 28;
26182               break;
26183             default :
26184               dp->data [3] = 31;
26185               break;
26186           }
26187         }
26188       }
26189       cip = ContactInfoNew ();
26190       if (cip != NULL) {
26191         ap = sqp->contactperson;
26192         cip->contact = ap;
26193         if (ap != NULL) {
26194           affil = sqp->citsubaffil;
26195           if (affil != NULL) {
26196             if (ap->affil != NULL) {
26197               affil->phone = MemFree (affil->phone);
26198               affil->fax = MemFree (affil->fax);
26199               affil->email = MemFree (affil->email);
26200               affil->phone = StringSave (ap->affil->phone);
26201               affil->fax = StringSave (ap->affil->fax);
26202               affil->email = StringSave (ap->affil->email);
26203               ap->affil = AffilFree (ap->affil);
26204             }
26205             ap->affil = affil;
26206           }
26207         }
26208       }
26209       sbp->contact = cip;
26210       csp = CitSubFromContactInfo (cip);
26211       sbp->cit = csp;
26212       if (csp != NULL) {
26213         authors = csp->authors;
26214         if (authors != NULL) {
26215           affil = authors->affil;
26216           authors->affil = NULL;
26217           csp->authors = AuthListFree (csp->authors);
26218           csp->authors = sqp->citsubauthors;
26219           authors = csp->authors;
26220           if (authors != NULL) {
26221             authors->affil = affil;
26222             if (affil != NULL) {
26223               affil->phone = MemFree (affil->phone);
26224               affil->fax = MemFree (affil->fax);
26225               affil->email = MemFree (affil->email);
26226             }
26227           }
26228         }
26229       }
26230       sbp->hup = sqp->holduntilpublished;
26231     }
26232     MemFree (sqp);
26233   }
26234   return sbp;
26235 }
26236 
26237 
IsUnpubForCitSub(SeqDescPtr sdp)26238 static Boolean IsUnpubForCitSub (SeqDescPtr sdp)
26239 {
26240   ValNodePtr vnp;
26241   CitGenPtr  cgp;
26242   PubdescPtr pdp;
26243 
26244   if (sdp == NULL) {
26245     return FALSE;
26246   }
26247   if (sdp->choice == Seq_descr_pub && sdp->data.ptrvalue != NULL) {
26248     pdp = (PubdescPtr) sdp->data.ptrvalue;
26249     vnp = pdp->pub;
26250     while (vnp != NULL && vnp->choice != PUB_Gen) {
26251       vnp = vnp->next;
26252     }
26253     if (vnp != NULL && vnp->data.ptrvalue != NULL) {
26254       cgp = (CitGenPtr) vnp->data.ptrvalue;
26255       if (StringICmp (cgp->cit, "unpublished") == 0
26256           && !StringHasNoText (cgp->title)) {
26257         return TRUE;
26258       }
26259     }
26260   }
26261   return FALSE;
26262 }
26263 
26264 
AddTemplateDescriptors(SeqEntryPtr sep_list,SeqDescrPtr sdp_list)26265 static void AddTemplateDescriptors (SeqEntryPtr sep_list, SeqDescrPtr sdp_list)
26266 {
26267   SeqDescPtr   sdp;
26268   BioseqPtr    bsp;
26269   BioseqSetPtr bssp;
26270   SeqEntryPtr  sep;
26271 
26272   if (sep_list == NULL || sdp_list == NULL) {
26273     return;
26274   }
26275   for (sdp = sdp_list; sdp != NULL; sdp = sdp->next) {
26276     if ((sdp->choice == Seq_descr_pub && !IsUnpubForCitSub(sdp)) || sdp->choice == Seq_descr_user) {
26277       for (sep = sep_list; sep != NULL; sep = sep->next) {
26278         if (IS_Bioseq(sep) && (bsp = (BioseqPtr) sep->data.ptrvalue) != NULL) {
26279           ValNodeLink (&(bsp->descr), (SeqDescPtr) AsnIoMemCopy (sdp, (AsnReadFunc) SeqDescAsnRead, (AsnWriteFunc) SeqDescAsnWrite));
26280         } else if (IS_Bioseq_set(sep) && (bssp = (BioseqSetPtr) sep->data.ptrvalue) != NULL) {
26281           ValNodeLink (&(bssp->descr), (SeqDescPtr) AsnIoMemCopy (sdp, (AsnReadFunc) SeqDescAsnRead, (AsnWriteFunc) SeqDescAsnWrite));
26282         }
26283       }
26284     }
26285   }
26286 }
26287 
26288 
PackageFormResults(SequinBlockPtr sbp,SeqEntryPtr sep,Boolean makePubAndDefLine)26289 extern Uint2 PackageFormResults (SequinBlockPtr sbp, SeqEntryPtr sep, Boolean makePubAndDefLine)
26290 
26291 {
26292   Uint2         entityID;
26293   SeqSubmitPtr  ssp;
26294 
26295   entityID = 0;
26296   if (sep != NULL) {
26297     if (sbp != NULL) {
26298       ssp = SeqSubmitNew ();
26299       if (ssp != NULL) {
26300         ssp->datatype = 1;
26301         ssp->data = (Pointer) sep;
26302         if (makePubAndDefLine) {
26303           MakePubAndDefLine (sbp, sep);
26304         }
26305         AddTemplateDescriptors (sep, sbp->descriptors);
26306         sbp->citsubtitle = MemFree (sbp->citsubtitle);
26307         ssp->sub = ConvertSequinBlockToSubmitBlock (sbp);
26308         ObjMgrConnect (OBJ_SEQENTRY, sep->data.ptrvalue, OBJ_SEQSUB, (Pointer) ssp);
26309         if (! ObjMgrRegister (OBJ_SEQSUB, (Pointer) ssp)) {
26310           ErrPostEx (SEV_ERROR, 0, 0, "ObjMgrRegister failed.");
26311         }
26312       } else {
26313         if (! ObjMgrRegister (OBJ_SEQENTRY, (Pointer) sep)) {
26314           ErrPostEx (SEV_ERROR, 0, 0, "ObjMgrRegister failed.");
26315         }
26316       }
26317     } else {
26318       if (! ObjMgrRegister (OBJ_SEQENTRY, (Pointer) sep)) {
26319         ErrPostEx (SEV_ERROR, 0, 0, "ObjMgrRegister failed.");
26320       }
26321     }
26322     if (EntrezASN1Detected (sep)) {
26323       ErrPostEx (SEV_WARNING, 0, 0, "This record was retrieved from Entrez.");
26324     }
26325     entityID = ObjMgrGetEntityIDForChoice (sep);
26326   }
26327   return entityID;
26328 }
26329 
GetRawBsps(BioseqPtr bsp,Pointer userdata)26330 static void GetRawBsps (BioseqPtr bsp, Pointer userdata)
26331 
26332 {
26333   ValNodePtr PNTR  head;
26334 
26335   if (bsp->repr != Seq_repr_raw) return;
26336   head = (ValNodePtr PNTR) userdata;
26337   ValNodeAddPointer (head, 0, (Pointer) bsp);
26338 }
26339 
ParseInMoreProteins(IteM i)26340 extern void ParseInMoreProteins (IteM i)
26341 {
26342   MsgAnswer    ans;
26343   BaseFormPtr  bfp;
26344   NucProtAssocPtr   nuc_prot_assoc_list;
26345   CharPtr      extension;
26346   Char         path [PATH_MAX];
26347   FILE         *fp;
26348   Boolean      parseSeqId;
26349   SeqEntryPtr  sep, prot_list = NULL, nuc_list = NULL;
26350   Boolean      chars_stripped = FALSE;
26351   Boolean      makeMRNA = FALSE;
26352 
26353 #ifdef WIN_MAC
26354   bfp = currentFormDataPtr;
26355 #else
26356   bfp = GetObjectExtra (i);
26357 #endif
26358   if (bfp == NULL) return;
26359   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
26360   if (sep == NULL) return;
26361   extension = GetAppProperty ("FastaProtExtension");
26362   if (! GetInputFileName (path, sizeof (path), extension, "TEXT")) return;
26363   fp = FileOpen (path, "r");
26364   if (fp == NULL) return;
26365 
26366   ans = Message (MSG_YN, "Do FASTA definition lines start with seqID?");
26367   parseSeqId = (Boolean) (ans == ANS_YES);
26368 
26369   WatchCursor();
26370   Update();
26371   /* read in proteins */
26372   prot_list = ImportSequencesFromFile (fp, NULL, FALSE, parseSeqId, NULL, NULL, &chars_stripped);
26373   FileClose (fp);
26374   if (chars_stripped && prot_list != NULL)
26375   {
26376     if (ANS_CANCEL == Message (MSG_OKC, "Illegal characters will be stripped from your sequence data.  Do you want to continue?"))
26377     {
26378       prot_list = SeqEntryFree (prot_list);
26379       ArrowCursor ();
26380       Update ();
26381       return;
26382     }
26383   }
26384 
26385   /* get list of nucleotide sequences */
26386   nuc_list = sep;
26387 
26388   ArrowCursor();
26389   Update();
26390   nuc_prot_assoc_list = AssignProteinsForSequenceSet (nuc_list, prot_list, TRUE);
26391   if (nuc_prot_assoc_list == NULL) {
26392     prot_list = SeqEntryFree (prot_list);
26393     return;
26394   }
26395 
26396   ans = Message (MSG_YN, "Do you wish to make default mRNAs?");
26397   makeMRNA = (Boolean) (ans == ANS_YES);
26398 
26399   ReplaceDuplicateProteinIDs (nuc_list, prot_list);
26400   AssignProteinsToSelectedNucleotides (nuc_prot_assoc_list,
26401                                        nuc_list,
26402                                        prot_list,
26403                                        NULL,
26404                                        1, makeMRNA);
26405 
26406   nuc_prot_assoc_list = FreeAssociationList (nuc_prot_assoc_list);
26407 
26408   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
26409   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
26410   ArrowCursor ();
26411   Update ();
26412 }
26413 
26414 
26415 /*#ifdef ALLOW_DOWNLOAD*/
26416 typedef struct fetchform {
26417   FORM_MESSAGE_BLOCK
26418   GrouP           accntype;
26419   TexT            accession;
26420   ButtoN          accept;
26421 } FetchForm, PNTR FetchFormPtr;
26422 
FetchFormMessage(ForM f,Int2 mssg)26423 static void FetchFormMessage (ForM f, Int2 mssg)
26424 
26425 {
26426   FetchFormPtr  ffp;
26427 
26428   ffp = (FetchFormPtr) GetObjectExtra (f);
26429   if (ffp != NULL) {
26430     switch (mssg) {
26431       case VIB_MSG_CUT :
26432         StdCutTextProc (NULL);
26433         break;
26434       case VIB_MSG_COPY :
26435         StdCopyTextProc (NULL);
26436         break;
26437       case VIB_MSG_PASTE :
26438         StdPasteTextProc (NULL);
26439         break;
26440       case VIB_MSG_DELETE :
26441         StdDeleteTextProc (NULL);
26442         break;
26443       default :
26444         if (ffp->appmessage != NULL) {
26445           ffp->appmessage (f, mssg);
26446         }
26447         break;
26448     }
26449   }
26450 }
26451 
ExamineIdProc(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)26452 static void ExamineIdProc (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
26453 
26454 {
26455   BioseqPtr  bsp;
26456   Int2       i;
26457   BoolPtr    idTypes;
26458   SeqIdPtr   sip;
26459 
26460   if (sep == NULL || sep->data.ptrvalue == NULL || mydata == NULL) return;
26461   idTypes = (BoolPtr) mydata;
26462   if (IS_Bioseq (sep)) {
26463     bsp = (BioseqPtr) sep->data.ptrvalue;
26464     sip = bsp->id;
26465     while (sip != NULL) {
26466       i = (Int2) sip->choice;
26467       if (i >= 0 && i < NUM_SEQID) {
26468         (idTypes [i])++;
26469       }
26470       sip = sip->next;
26471     }
26472   }
26473 }
26474 
OwnedByOtherDatabase(SeqEntryPtr sep,BoolPtr idTypes)26475 static Boolean OwnedByOtherDatabase (SeqEntryPtr sep, BoolPtr idTypes)
26476 
26477 {
26478   Int2  i;
26479 
26480   if (sep == NULL || idTypes == NULL) return FALSE;
26481   for (i = 0; i < NUM_SEQID; i++) {
26482     idTypes [i] = FALSE;
26483   }
26484   BioseqExplore (sep, (Pointer) idTypes, ExamineIdProc);
26485   if (! (idTypes [SEQID_GENBANK])) return TRUE;
26486   if (idTypes [SEQID_EMBL] || idTypes [SEQID_DDBJ]) return TRUE;
26487   if (! FindNucSeqEntry (sep)) return TRUE;
26488   return FALSE;
26489 }
26490 
AccessionToGi(CharPtr string)26491 static Int4 AccessionToGi (CharPtr string)
26492 {
26493    /*
26494    CharPtr str;
26495    LinkSetPtr lsp;
26496    Int4 gi;
26497 
26498    str = MemNew (StringLen (string) + 10);
26499    sprintf (str, "\"%s\" [ACCN]", string);
26500    lsp = EntrezTLEvalString (str, TYP_NT, -1, NULL, NULL);
26501    MemFree (str);
26502    if (lsp == NULL) return 0;
26503    if (lsp->num <= 0) {
26504        LinkSetFree (lsp);
26505        return 0;
26506    }
26507    gi = lsp->uids [0];
26508    LinkSetFree (lsp);
26509    return gi;
26510    */
26511    Int4      gi;
26512    SeqIdPtr  sip;
26513 
26514    sip = SeqIdFromAccessionDotVersion (string);
26515    if (sip == NULL) return 0;
26516    gi = GetGIForSeqId (sip);
26517    SeqIdFree (sip);
26518    return gi;
26519 }
26520 
LookForReplacedByCallback(SeqEntryPtr sep,Pointer mydata,Int4 index,Int2 indent)26521 static void LookForReplacedByCallback (SeqEntryPtr sep, Pointer mydata, Int4 index, Int2 indent)
26522 
26523 {
26524   BioseqPtr   bsp;
26525   SeqHistPtr  hist;
26526   BoolPtr     rsult;
26527 
26528   if (! IS_Bioseq (sep)) return;
26529   bsp = (BioseqPtr) sep->data.ptrvalue;
26530   if (bsp == NULL) return;
26531   hist = bsp->hist;
26532   if (hist == NULL) return;
26533   if (hist->replaced_by_ids != NULL) {
26534     rsult = (BoolPtr) mydata;
26535     if (rsult == NULL) return;
26536     *rsult = TRUE;
26537   }
26538 }
26539 
26540 #ifdef USE_SMARTNET
26541 extern Pointer ReadFromDirSub (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
26542 extern Pointer ReadFromSmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
26543 extern Pointer ReadFromTPASmart (CharPtr accn, Uint2Ptr datatype, Uint2Ptr entityID);
26544 #endif
26545 
26546 
LaunchDisplay(Uint2 entityID)26547 extern void LaunchDisplay (Uint2 entityID)
26548 {
26549   Int2          handled;
26550   Char          str [32];
26551 
26552   seqviewprocs.filepath = str;
26553   seqviewprocs.forceSeparateViewer = TRUE;
26554   handled = GatherProcLaunch (OMPROC_VIEW, FALSE, entityID, 1,
26555                               OBJ_BIOSEQ, 0, 0, OBJ_BIOSEQ, 0);
26556   seqviewprocs.filepath = NULL;
26557   ArrowCursor ();
26558   if (handled != OM_MSG_RET_DONE || handled == OM_MSG_RET_NOPROC) {
26559     Message (MSG_FATAL, "Unable to launch viewer.");
26560     return;
26561   } else {
26562     SendHelpScrollMessage (helpForm, "Editing the Record", NULL);
26563   }
26564   ObjMgrSetOptions (OM_OPT_FREE_IF_NO_VIEW, entityID);
26565   ObjMgrSetDirtyFlag (entityID, TRUE);
26566 }
26567 
DownloadAndDisplay(SeqIdPtr sip)26568 extern void DownloadAndDisplay (SeqIdPtr sip)
26569 {
26570   MsgAnswer     ans;
26571   BioseqPtr     bsp;
26572   BioseqSetPtr  bssp;
26573   Pointer       dataptr = NULL;
26574   Uint2         datatype;
26575   CharPtr       dbname;
26576   Uint2         entityID;
26577   Int2          handled;
26578   Boolean       idTypes [NUM_SEQID];
26579   Boolean       isReplaced = FALSE;
26580   SeqEntryPtr   sep;
26581   Char          str [32];
26582 
26583   sep = PubSeqSynchronousQueryId (sip, 0, /* -1 */ 0);
26584   /* EntrezFini (); */
26585   if (sep == NULL) {
26586     ArrowCursor ();
26587     Message (MSG_OK, "Unable to find this record in the database.");
26588     return;
26589   }
26590   if (IS_Bioseq (sep)) {
26591     datatype = OBJ_BIOSEQ;
26592   } else if (IS_Bioseq_set (sep)) {
26593     datatype = OBJ_BIOSEQSET;
26594   } else {
26595     Message (MSG_OK, "Unable to find this record in the database.");
26596     return;
26597   }
26598   SeqEntryExplore (sep, (Pointer) (&isReplaced), LookForReplacedByCallback);
26599   if (isReplaced) {
26600     ans = Message (MSG_YN, "This record has been replaced.  Are you sure you want to edit it?");
26601     if (ans == ANS_NO) {
26602       SeqEntryFree (sep);
26603       ArrowCursor ();
26604       return;
26605     }
26606   }
26607   dataptr = (Pointer) sep->data.ptrvalue;
26608 
26609   if (dataptr != NULL) {
26610     entityID = ObjMgrRegister (datatype, dataptr);
26611     if (dataptr != NULL && entityID > 0) {
26612       if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
26613           datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
26614         WatchCursor ();
26615         sep = GetTopSeqEntryForEntityID (entityID);
26616         if (sep == NULL) {
26617           sep = SeqEntryNew ();
26618           if (sep != NULL) {
26619             if (datatype == OBJ_BIOSEQ) {
26620               bsp = (BioseqPtr) dataptr;
26621               sep->choice = 1;
26622               sep->data.ptrvalue = bsp;
26623               SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
26624             } else if (datatype == OBJ_BIOSEQSET) {
26625               bssp = (BioseqSetPtr) dataptr;
26626               sep->choice = 2;
26627               sep->data.ptrvalue = bssp;
26628               SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
26629             } else {
26630               sep = SeqEntryFree (sep);
26631             }
26632           }
26633           sep = GetTopSeqEntryForEntityID (entityID);
26634         }
26635         if (sep != NULL && OwnedByOtherDatabase (sep, idTypes)) {
26636           dbname = NULL;
26637           if (idTypes [SEQID_EMBL]) {
26638             dbname = "EMBL";
26639           } else if (idTypes [SEQID_DDBJ]) {
26640             dbname = "DDBJ";
26641           }
26642         }
26643         seqviewprocs.filepath = str;
26644         seqviewprocs.forceSeparateViewer = TRUE;
26645         handled = GatherProcLaunch (OMPROC_VIEW, FALSE, entityID, 1,
26646                                     OBJ_BIOSEQ, 0, 0, OBJ_BIOSEQ, 0);
26647         seqviewprocs.filepath = NULL;
26648         ArrowCursor ();
26649         if (handled != OM_MSG_RET_DONE || handled == OM_MSG_RET_NOPROC) {
26650           Message (MSG_FATAL, "Unable to launch viewer.");
26651           SeqEntryFree (sep);
26652           return;
26653         } else {
26654           SendHelpScrollMessage (helpForm, "Editing the Record", NULL);
26655         }
26656         ObjMgrSetOptions (OM_OPT_FREE_IF_NO_VIEW, entityID);
26657         ObjMgrSetDirtyFlag (entityID, TRUE);
26658       } else {
26659         Message (MSG_ERROR, "Unable to process object type %d.", (int) datatype);
26660         ObjMgrDelete (datatype, dataptr);
26661         ArrowCursor ();
26662       }
26663     } else {
26664       ArrowCursor ();
26665     }
26666   } else {
26667     /* EntrezFini (); */
26668     ArrowCursor ();
26669     Message (MSG_OK, "Unable to find this record in the database.");
26670   }
26671 }
26672 
26673 
StrToULong(CharPtr str,Uint4Ptr longval)26674 static Boolean StrToULong (CharPtr str, Uint4Ptr longval)
26675 
26676 {
26677   Char           ch;
26678   Int2           i;
26679   Int2           len;
26680   Char           local [64];
26681   Boolean        nodigits;
26682   Boolean        rsult;
26683   unsigned long val;
26684 
26685   rsult = FALSE;
26686   if (longval != NULL) {
26687     *longval = (Uint4) 0;
26688   }
26689   len = (Int2) StringLen (str);
26690   if (len != 0) {
26691     rsult = TRUE;
26692     nodigits = TRUE;
26693     for (i = 0; i < len; i++) {
26694       ch = str [i];
26695       if (ch == ' ' || ch == '+' || ch == '-') {
26696       } else if (ch < '0' || ch > '9') {
26697         rsult = FALSE;
26698       } else {
26699         nodigits = FALSE;
26700       }
26701     }
26702     if (nodigits) {
26703       rsult = FALSE;
26704     }
26705     if (rsult && longval != NULL) {
26706       StringNCpy_0 (local, str, sizeof (local));
26707       if (sscanf (local, "%lu", &val) == 1) {
26708         *longval = val;
26709       }
26710     }
26711   }
26712   return rsult;
26713 }
26714 
26715 
DownloadProc(ButtoN b)26716 static void DownloadProc (ButtoN b)
26717 
26718 {
26719   CharPtr       accn = NULL;
26720   MsgAnswer     ans;
26721   BioseqPtr     bsp;
26722   BioseqSetPtr  bssp;
26723   Pointer       dataptr = NULL;
26724   Uint2         datatype;
26725   CharPtr       dbname;
26726   Uint2         entityID;
26727   FetchFormPtr  ffp;
26728   Int4          flags = 0;
26729   Int2          handled;
26730   Boolean       idTypes [NUM_SEQID];
26731   Boolean       isReplaced = FALSE;
26732   Int2          retcode = 0;
26733   SeqEntryPtr   sep;
26734   Char          str [64];
26735   Uint4         tid;
26736   CharPtr       tmp1 = NULL;
26737   CharPtr       tmp2 = NULL;
26738   Int4          uid;
26739   long int      val;
26740   ForM          w;
26741 
26742   ffp = (FetchFormPtr) GetObjectExtra (b);
26743   if (ffp == NULL) return;
26744   w = ffp->form;
26745   Hide (w);
26746   WatchCursor ();
26747   Update ();
26748   GetTitle (ffp->accession, str, sizeof (str));
26749   TrimSpacesAroundString (str);
26750   if (StringHasNoText (str)) {
26751     Message (MSG_OK, "Please enter an accession number or gi");
26752     Show (w);
26753     Select (w);
26754     Select (ffp->accession);
26755     return;
26756   }
26757 
26758   tmp1 = StringChr (str, ',');
26759   if (tmp1 != NULL) {
26760     *tmp1 = '\0';
26761     tmp1++;
26762     tmp2 = StringChr (tmp1, ',');
26763     if (tmp2 != NULL) {
26764       *tmp2 = '\0';
26765       tmp2++;
26766       if (StringDoesHaveText (tmp2) && sscanf (tmp2, "%ld", &val) == 1) {
26767         flags = (Int4) val;
26768       }
26769     }
26770     if (StringDoesHaveText (tmp1) && sscanf (tmp1, "%ld", &val) == 1) {
26771       retcode = (Int2) val;
26772     }
26773   }
26774 
26775   sep = NULL;
26776   uid = 0;
26777   tid = 0;
26778   /*
26779   if (! EntrezIsInited ()) {
26780     if (! SequinEntrezInit ("Sequin", FALSE, NULL)) {
26781       Remove (w);
26782       Show (startupForm);
26783       Select (startupForm);
26784       ArrowCursor ();
26785       return;
26786     }
26787   }
26788   */
26789   if (GetValue (ffp->accntype) == 1) {
26790     /*
26791     sip = ValNodeNew (NULL);
26792     if (sip != NULL) {
26793       tsip = TextSeqIdNew ();
26794       if (tsip != NULL) {
26795         tsip->accession = StringSave (str);
26796         sip->choice = SEQID_GENBANK;
26797         sip->data.ptrvalue = (Pointer) tsip;
26798         uid = EntrezFindSeqId (sip);
26799         if (uid == 0) {
26800           sip->choice = SEQID_EMBL;
26801           uid = EntrezFindSeqId (sip);
26802         }
26803         if (uid == 0) {
26804           sip->choice = SEQID_DDBJ;
26805           uid = EntrezFindSeqId (sip);
26806         }
26807       }
26808     }
26809     SeqIdFree (sip);
26810     */
26811     if (StringNICmp (str, "ti|", 3) == 0 && StringIsAllDigits (str + 3)) {
26812       if (! StrToULong (str + 3, &tid)) {
26813         tid = 0;
26814       }
26815       if (tid > 0) {
26816         sep = PubSeqSynchronousQueryTI (tid, 0, /* -1 */ 0);
26817       }
26818     } else {
26819       uid = AccessionToGi (str);
26820       accn = str;
26821     }
26822   } else {
26823     if (! StrToLong (str, &uid)) {
26824      uid = 0;
26825     }
26826   }
26827   if (uid > 0) {
26828     sep = PubSeqSynchronousQuery (uid, retcode, flags);
26829   } else {
26830     sep = PubSeqSynchronousQueryEx (uid, retcode, flags, str);
26831   }
26832   if (uid > 0 || tid > 0 || sep != NULL) {
26833     /* EntrezFini (); */
26834     if (sep == NULL) {
26835       ArrowCursor ();
26836       Message (MSG_OK, "Unable to find this record in the database.");
26837       Show (w);
26838       Select (w);
26839       Select (ffp->accession);
26840       return;
26841     }
26842     if (IS_Bioseq (sep)) {
26843       datatype = OBJ_BIOSEQ;
26844     } else if (IS_Bioseq_set (sep)) {
26845       datatype = OBJ_BIOSEQSET;
26846     } else {
26847       ArrowCursor ();
26848       Message (MSG_OK, "Unable to find this record in the database.");
26849       Show (w);
26850       Select (w);
26851       Select (ffp->accession);
26852       return;
26853     }
26854     Remove (w);
26855     SeqEntryExplore (sep, (Pointer) (&isReplaced), LookForReplacedByCallback);
26856     if (isReplaced) {
26857       ans = Message (MSG_YN, "This record has been replaced.  Are you sure you want to edit it?");
26858       if (ans == ANS_NO) {
26859         SeqEntryFree (sep);
26860         Show (startupForm);
26861         Select (startupForm);
26862         ArrowCursor ();
26863         return;
26864       }
26865     }
26866     dataptr = (Pointer) sep->data.ptrvalue;
26867   } else if (! StringHasNoText (accn)) {
26868 #ifdef USE_SMARTNET
26869     if (accn != NULL) {
26870       dataptr = ReadFromTPASmart (accn, &datatype, NULL);
26871       if (dataptr == NULL) {
26872         dataptr = ReadFromSmart (accn, &datatype, NULL);
26873         if (dataptr == NULL) {
26874           dataptr = ReadFromDirSub (accn, &datatype, NULL);
26875         }
26876       }
26877     }
26878 #endif
26879   }
26880   if (dataptr != NULL) {
26881     entityID = ObjMgrRegister (datatype, dataptr);
26882     SeqEntrySetScope (NULL);
26883     if (dataptr != NULL && entityID > 0) {
26884       if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
26885           datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
26886         WatchCursor ();
26887         sep = GetTopSeqEntryForEntityID (entityID);
26888         if (sep == NULL) {
26889           sep = SeqEntryNew ();
26890           if (sep != NULL) {
26891             if (datatype == OBJ_BIOSEQ) {
26892               bsp = (BioseqPtr) dataptr;
26893               sep->choice = 1;
26894               sep->data.ptrvalue = bsp;
26895               SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
26896             } else if (datatype == OBJ_BIOSEQSET) {
26897               bssp = (BioseqSetPtr) dataptr;
26898               sep->choice = 2;
26899               sep->data.ptrvalue = bssp;
26900               SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
26901             } else {
26902               sep = SeqEntryFree (sep);
26903             }
26904           }
26905           sep = GetTopSeqEntryForEntityID (entityID);
26906         }
26907         if (sep != NULL && OwnedByOtherDatabase (sep, idTypes)) {
26908           dbname = NULL;
26909           if (idTypes [SEQID_EMBL]) {
26910             dbname = "EMBL";
26911           } else if (idTypes [SEQID_DDBJ]) {
26912             dbname = "DDBJ";
26913           }
26914         }
26915         if (datatype != OBJ_SEQSUB && uid > 0) {
26916           ArrowCursor ();
26917           Update ();
26918           if (!indexerVersion && Message (MSG_YN, repackageMsg) == ANS_YES) {
26919             globalEntityID = entityID;
26920             globalsep = sep;
26921             StringNCpy_0 (globalPath, str, sizeof (globalPath));
26922             WatchCursor ();
26923             Update ();
26924             w = CreateSubmitBlockForm (-50, -33, "Submitting Authors",
26925                                        FALSE, TRUE, NULL, JustRegisterSeqEntryBtn,
26926                                        AddSubmitBlockToSeqEntry);
26927             ArrowCursor ();
26928             if (w != NULL) {
26929               Show (w);
26930               Select (w);
26931               SendHelpScrollMessage (helpForm, "Submitting Authors Form", NULL);
26932               return;
26933             } else {
26934               Message (MSG_FATAL, "Unable to create window.");
26935               SeqEntryFree (sep);
26936               Show (startupForm);
26937               Select (startupForm);
26938               return;
26939             }
26940           }
26941         }
26942         seqviewprocs.filepath = str;
26943         seqviewprocs.forceSeparateViewer = TRUE;
26944         handled = GatherProcLaunch (OMPROC_VIEW, FALSE, entityID, 1,
26945                                     OBJ_BIOSEQ, 0, 0, OBJ_BIOSEQ, 0);
26946         seqviewprocs.filepath = NULL;
26947         ArrowCursor ();
26948         if (handled != OM_MSG_RET_DONE || handled == OM_MSG_RET_NOPROC) {
26949           Message (MSG_FATAL, "Unable to launch viewer.");
26950           SeqEntryFree (sep);
26951           Show (startupForm);
26952           Select (startupForm);
26953           return;
26954         } else {
26955           SendHelpScrollMessage (helpForm, "Editing the Record", NULL);
26956         }
26957         ObjMgrSetOptions (OM_OPT_FREE_IF_NO_VIEW, entityID);
26958         ObjMgrSetDirtyFlag (entityID, TRUE);
26959       } else {
26960         Message (MSG_ERROR, "Unable to process object type %d.", (int) datatype);
26961         ObjMgrDelete (datatype, dataptr);
26962         Show (startupForm);
26963         Select (startupForm);
26964         ArrowCursor ();
26965       }
26966     } else {
26967       Show (startupForm);
26968       Select (startupForm);
26969       ArrowCursor ();
26970     }
26971   } else {
26972     /* EntrezFini (); */
26973     ArrowCursor ();
26974     Message (MSG_OK, "Unable to find this record in the database.");
26975     Show (w);
26976     Select (w);
26977     Select (ffp->accession);
26978   }
26979 }
26980 
26981 
CancelFetchProc(ButtoN b)26982 static void CancelFetchProc (ButtoN b)
26983 
26984 {
26985   StdCancelButtonProc (b);
26986   Show (startupForm);
26987   Select (startupForm);
26988 }
26989 
26990 
FetchTextProc(TexT t)26991 static void FetchTextProc (TexT t)
26992 
26993 {
26994   Boolean       alldigits;
26995   FetchFormPtr  ffp;
26996   CharPtr       ptr;
26997   Char          str [64];
26998 
26999   ffp = (FetchFormPtr) GetObjectExtra (t);
27000   if (ffp == NULL) return;
27001   GetTitle (t, str, sizeof (str));
27002   if (StringHasNoText (str)) {
27003     SafeDisable (ffp->accept);
27004   } else {
27005     SafeEnable (ffp->accept);
27006     ptr = StringChr (str, ',');
27007     if (ptr != NULL) {
27008       *ptr = '\0';
27009     }
27010     TrimSpacesAroundString (str);
27011     alldigits = StringIsAllDigits (str);
27012     if (alldigits) {
27013       SafeSetValue (ffp->accntype, 2);
27014     } else {
27015       SafeSetValue (ffp->accntype, 1);
27016     }
27017   }
27018 }
27019 
CommonFetchFromNet(BtnActnProc actn,BtnActnProc cancel)27020 extern void CommonFetchFromNet (BtnActnProc actn, BtnActnProc cancel)
27021 
27022 {
27023   GrouP              c;
27024   FetchFormPtr       ffp;
27025   GrouP              g;
27026   StdEditorProcsPtr  sepp;
27027   WindoW             w;
27028 
27029   Hide (startupForm);
27030   Update ();
27031   w = NULL;
27032   ffp = MemNew (sizeof (FetchForm));
27033   if (ffp != NULL) {
27034     w = FixedWindow (-50, -33, -10, -10, "Download From Entrez", NULL);
27035     SetObjectExtra (w, ffp, StdCleanupFormProc);
27036     ffp->form = (ForM) w;
27037     ffp->formmessage = FetchFormMessage;
27038 
27039 #ifndef WIN_MAC
27040     CreateSqnInitialFormMenus (w);
27041 #endif
27042 
27043     sepp = (StdEditorProcsPtr) GetAppProperty ("StdEditorForm");
27044     if (sepp != NULL) {
27045       ffp->appmessage = sepp->handleMessages;
27046     }
27047     SetGroupSpacing (w, 10, 10);
27048 
27049     g = HiddenGroup (w, -3, 0, NULL);
27050     StaticPrompt (g, "Type", 0, stdLineHeight, programFont, 'l');
27051     ffp->accntype = HiddenGroup (g, 4, 0, NULL);
27052     RadioButton (ffp->accntype, "Accession");
27053     RadioButton (ffp->accntype, "GI");
27054     SetValue (ffp->accntype, 1);
27055     ffp->accession = DialogText (g, "", 6, FetchTextProc);
27056     SetObjectExtra (ffp->accession, ffp, NULL);
27057 
27058     c = HiddenGroup (w, 4, 0, NULL);
27059     SetGroupSpacing (c, 10, 2);
27060     ffp->accept = DefaultButton (c, "Retrieve", actn);
27061     SetObjectExtra (ffp->accept, ffp, NULL);
27062     Disable (ffp->accept);
27063     PushButton (c, "Cancel", cancel);
27064 
27065     AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
27066     RealizeWindow (w);
27067 
27068     if (sepp != NULL) {
27069       SetActivate (w, sepp->activateForm);
27070     }
27071     Select (ffp->accession);
27072     Show (w);
27073     Select (w);
27074     Update ();
27075   } else {
27076     Show (startupForm);
27077     Select (startupForm);
27078   }
27079 }
27080 
FetchFromNet(ButtoN b)27081 extern void FetchFromNet (ButtoN b)
27082 
27083 {
27084   CommonFetchFromNet (DownloadProc, CancelFetchProc);
27085 }
27086 
27087 /*#else
27088 #define FetchFromNet NULL
27089 #endif*/
27090 
27091 /*
27092 static Boolean FindPerfectSubMatch (CharPtr prot, CharPtr trans, Int4 start,
27093                                     Int4 len, Uint1 frame, Int2 strand,
27094                                     Int4Ptr fromPtr, Int4Ptr toPtr)
27095 
27096 {
27097   int      ch;
27098   Int2     d [256];
27099   Int4     from;
27100   int      i;
27101   int      j;
27102   int      k;
27103   size_t   protLen;
27104   Boolean  rsult;
27105   Int4     to;
27106   size_t   transLen;
27107 
27108   rsult = FALSE;
27109   from = 0;
27110   to = 0;
27111   if (prot != NULL && trans != NULL) {
27112     protLen = StringLen (prot);
27113     transLen = StringLen (trans);
27114     if (protLen <= transLen) {
27115       for (ch = 0; ch < 256; ch++) {
27116         d [ch] = protLen;
27117       }
27118       for (j = 0; j < protLen - 1; j++) {
27119         d [(int) prot [j]] = protLen - j - 1;
27120       }
27121       i = protLen;
27122       do {
27123         j = protLen;
27124         k = i;
27125         do {
27126           k--;
27127           j--;
27128         } while (j >= 0 && prot [j] == trans [k]);
27129         if (j >= 0) {
27130           i += d [(int) trans [i - 1]];
27131         }
27132       } while (j >= 0 && i <= transLen);
27133       if (j < 0) {
27134         i -= protLen;
27135         from = (long) (i * 3 + (frame - 1));
27136         to = from + 3 * protLen;
27137         if (trans [i + protLen] == '*') {
27138           to += 3;
27139         }
27140         if (strand == Seq_strand_plus) {
27141           from += 1;
27142         } else if (strand == Seq_strand_minus) {
27143           from = len - from;
27144           to = len - to + 1;
27145         }
27146         rsult = TRUE;
27147       }
27148     }
27149   }
27150   if (fromPtr != NULL) {
27151     *fromPtr = from + start;
27152   }
27153   if (toPtr != NULL) {
27154     *toPtr = to + start;
27155   }
27156   return rsult;
27157 }
27158 
27159 static Boolean CheckOneFrame (BioseqPtr bsp, Int4 start, Int4 len,
27160                               CharPtr prot, Int2 gencode,
27161                               Uint1 frame, Int2 strand,
27162                               Int4Ptr fromPtr, Int4Ptr toPtr)
27163 
27164 {
27165   ByteStorePtr  bs;
27166   Char          ch;
27167   ValNodePtr    code;
27168   CdRegionPtr   crp;
27169   CharPtr       ptr;
27170   Boolean       rsult;
27171   SeqFeatPtr    sfp;
27172   CharPtr       trans;
27173   ValNodePtr    vnp;
27174 
27175   rsult = FALSE;
27176   if (bsp != NULL && gencode > 0) {
27177     sfp = SeqFeatNew ();
27178     if (sfp != NULL) {
27179       sfp->data.choice = SEQFEAT_CDREGION;
27180       crp = CdRegionNew ();
27181       sfp->data.value.ptrvalue = (Pointer) crp;
27182       if (crp != NULL) {
27183         crp->orf = FALSE;
27184         crp->conflict = FALSE;
27185         crp->frame = frame;
27186         crp->gaps = 0;
27187         crp->mismatch = 0;
27188         crp->stops = 0;
27189         code = ValNodeNew (NULL);
27190         if (code != NULL) {
27191           code->choice = 254;
27192           vnp = ValNodeNew (NULL);
27193           code->data.ptrvalue = vnp;
27194           if (vnp != NULL) {
27195             vnp->choice = 2;
27196             vnp->data.intvalue = (Int4) gencode;
27197           }
27198         }
27199         crp->genetic_code = code;
27200         crp->code_break = NULL;
27201         AddIntToSeqFeat (sfp, start, start + len - 1, bsp, -1, -1, strand);
27202         trans = NULL;
27203         bs = ProteinFromCdRegion (sfp, TRUE);
27204         if (bs != NULL) {
27205           trans = BSMerge (bs, NULL);
27206           BSFree (bs);
27207         }
27208         if (trans != NULL) {
27209           ptr = trans;
27210           ch = *ptr;
27211           while (ch != '\0') {
27212             *ptr = TO_UPPER (ch);
27213             ptr++;
27214             ch = *ptr;
27215           }
27216           if (trans [0] == '-') {
27217             trans [0] = prot [0];
27218           }
27219           rsult = FindPerfectSubMatch (prot, trans, start, len,
27220                                        frame, strand, fromPtr, toPtr);
27221           MemFree (trans);
27222         }
27223       }
27224       SeqFeatFree (sfp);
27225     }
27226   }
27227   return rsult;
27228 }
27229 
27230 #define PREDICT_BLOCK_SIZE 30000L
27231 
27232 static SeqLocPtr FindSingleCodingInterval (BioseqPtr nuc, BioseqPtr prot, Int2 genCode)
27233 
27234 {
27235   Int4        cdsFrom;
27236   Int4        cdsTo;
27237   Char        ch;
27238   Int4        cntr;
27239   Uint1       frame;
27240   Int4        from;
27241   Int4        incr;
27242   Int4        len;
27243   Boolean     matched;
27244   size_t      protLen;
27245   CharPtr     protstr;
27246   CharPtr     ptr;
27247   SeqFeatPtr  sfp;
27248   SeqLocPtr   slp;
27249   Int4        start;
27250   Int2        strand;
27251   Int4        tmp;
27252   Int4        to;
27253 
27254   slp = NULL;
27255   if (nuc != NULL && prot != NULL) {
27256     cdsFrom = 0;
27257     cdsTo = 0;
27258     strand = Seq_strand_unknown;
27259     protstr = NULL;
27260     if (prot->length > 0) {
27261       protstr = BSMerge (prot->seq_data, NULL);
27262       if (protstr != NULL) {
27263         ptr = protstr;
27264         ch = *ptr;
27265         while (ch != '\0') {
27266           *ptr = TO_UPPER (ch);
27267           ptr++;
27268           ch = *ptr;
27269         }
27270         protLen = StringLen (protstr);
27271         matched = FALSE;
27272         for (frame = 1; frame <= 3 && (! matched); frame++) {
27273           strand = Seq_strand_plus;
27274           start = 0;
27275           cntr = nuc->length;
27276           len = MIN (cntr, (Int4) (PREDICT_BLOCK_SIZE + (Int4) protLen * 3L));
27277           while (len > 0 && (! matched)) {
27278             incr = MIN (cntr, PREDICT_BLOCK_SIZE);
27279             matched = CheckOneFrame (nuc, start, len, protstr, genCode, frame,
27280                                      strand, &cdsFrom, &cdsTo);
27281             start += incr;
27282             cntr -= incr;
27283             len = MIN (cntr, (Int4) (PREDICT_BLOCK_SIZE + (Int4) protLen * 3L));
27284           }
27285         }
27286         for (frame = 1; frame <= 3 && (! matched); frame++) {
27287           strand = Seq_strand_minus;
27288           start = 0;
27289           cntr = nuc->length;
27290           len = MIN (cntr, (Int4) (PREDICT_BLOCK_SIZE + (Int4) protLen * 3L));
27291           while (len > 0 && (! matched)) {
27292             incr = MIN (cntr, PREDICT_BLOCK_SIZE);
27293             matched = CheckOneFrame (nuc, start, len, protstr, genCode, frame,
27294                                      strand, &cdsFrom, &cdsTo);
27295             start += incr;
27296             cntr -= incr;
27297             len = MIN (cntr, (Int4) (PREDICT_BLOCK_SIZE + (Int4) protLen * 3L));
27298           }
27299         }
27300         if (matched) {
27301           sfp = SeqFeatNew ();
27302           if (sfp != NULL) {
27303             from = cdsFrom - 1;
27304             to = cdsTo - 1;
27305             if (from > to) {
27306               tmp = from;
27307               from = to;
27308               to = tmp;
27309             }
27310             AddIntToSeqFeat (sfp, from, to, nuc, -1, -1, strand);
27311             slp = sfp->location;
27312             sfp->location = NULL;
27313           }
27314           SeqFeatFree (sfp);
27315         }
27316       }
27317       MemFree (protstr);
27318     }
27319   }
27320   return slp;
27321 }
27322 */
27323 
FindBioseqByIDString(CharPtr seqid,SeqEntryPtr sep)27324 static BioseqPtr FindBioseqByIDString (
27325   CharPtr    seqid,
27326   SeqEntryPtr sep
27327 )
27328 {
27329   BioseqSetPtr      bssp;
27330   BioseqPtr         bsp;
27331   CharPtr           str;
27332 
27333   if (sep == NULL || sep->data.ptrvalue == NULL) return FALSE;
27334   if (IS_Bioseq_set (sep)) {
27335     bssp = (BioseqSetPtr) sep->data.ptrvalue;
27336     /* this also delves into nuc-prot sets */
27337     if (bssp != NULL && (bssp->_class == 7 ||
27338                          (IsPopPhyEtcSet (bssp->_class)) ||
27339                          bssp->_class == 1)) {
27340       for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
27341       {
27342         bsp = FindBioseqByIDString (seqid, sep);
27343         if (bsp != NULL) return bsp;
27344       }
27345       return NULL;
27346     }
27347   }
27348   if (!IS_Bioseq (sep)) return FALSE;
27349 
27350   bsp = (BioseqPtr) sep->data.ptrvalue;
27351   if (bsp == NULL) return NULL;
27352   str = SeqIdWholeLabel (bsp->id, PRINTID_REPORT);
27353   if (StringCmp (str, seqid) != 0) {
27354     bsp = NULL;
27355   }
27356   str = MemFree (str);
27357   return bsp;
27358 }
27359 
27360 
27361 typedef struct tpafromfile {
27362   BioseqPtr bsp;
27363   ValNodePtr acc_list;
27364 } TPAFromFileData, PNTR TPAFromFilePtr;
27365 
27366 
TPAFromFileNew(BioseqPtr bsp,ValNodePtr acc_list)27367 static TPAFromFilePtr TPAFromFileNew (BioseqPtr bsp, ValNodePtr acc_list)
27368 {
27369   TPAFromFilePtr tpfp;
27370 
27371   tpfp = (TPAFromFilePtr) MemNew (sizeof (TPAFromFileData));
27372   tpfp->bsp = bsp;
27373   tpfp->acc_list = acc_list;
27374   return tpfp;
27375 }
27376 
TPAFromFileFree(TPAFromFilePtr tpfp)27377 static TPAFromFilePtr TPAFromFileFree (TPAFromFilePtr tpfp)
27378 {
27379   if (tpfp != NULL)
27380   {
27381     tpfp->acc_list = ValNodeFreeData (tpfp->acc_list);
27382     tpfp = MemFree (tpfp);
27383   }
27384   return tpfp;
27385 }
27386 
27387 
TPAFromFileListFree(ValNodePtr vnp)27388 static ValNodePtr TPAFromFileListFree (ValNodePtr vnp)
27389 {
27390   ValNodePtr vnp_next;
27391 
27392   while (vnp != NULL)
27393   {
27394     vnp_next = vnp->next;
27395     vnp->next = NULL;
27396     vnp->data.ptrvalue = TPAFromFileFree (vnp->data.ptrvalue);
27397     vnp = ValNodeFree (vnp);
27398     vnp = vnp_next;
27399   }
27400   return vnp;
27401 }
27402 
27403 
GetTPAAssembly(BioseqPtr bsp)27404 NLM_EXTERN UserObjectPtr GetTPAAssembly (BioseqPtr bsp)
27405 {
27406   SeqDescrPtr       sdp;
27407   SeqMgrDescContext context;
27408   UserObjectPtr     uop = NULL;
27409 
27410   sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &context);
27411   while (sdp != NULL && uop == NULL)
27412   {
27413     uop = (UserObjectPtr)sdp->data.ptrvalue;
27414     if (uop != NULL && uop->type != NULL && StringICmp (uop->type->str, "TpaAssembly") == 0)
27415     {
27416       return uop;
27417     }
27418     else
27419     {
27420       uop = NULL;
27421     }
27422     sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &context);
27423   }
27424   return uop;
27425 }
27426 
27427 
HasTPAAccessions(BioseqPtr bsp)27428 static Boolean HasTPAAccessions (BioseqPtr bsp)
27429 {
27430   UserObjectPtr uop;
27431   Boolean       found = FALSE;
27432   UserFieldPtr  ufp;
27433 
27434   uop = GetTPAAssembly (bsp);
27435   if (uop != NULL)
27436   {
27437     ufp = uop->data;
27438     while (ufp != NULL && ufp->choice != 11)
27439     {
27440       ufp = ufp->next;
27441     }
27442     if (ufp != NULL)
27443     {
27444       found = TRUE;
27445     }
27446   }
27447   return found;
27448 }
27449 
27450 
ReadTPAAccessionNumbersFile(FILE * fp,SeqEntryPtr sep,BoolPtr replace)27451 static ValNodePtr ReadTPAAccessionNumbersFile (FILE *fp, SeqEntryPtr sep, BoolPtr replace)
27452 {
27453   LogInfoPtr notfound_lip, alreadyhas_lip;
27454   Boolean    need_seqid;
27455   ValNodePtr tpalist = NULL;
27456   BioseqPtr      bsp;
27457   Char          str [8192];
27458   size_t        len = 8192;
27459   Char          seqid[100];
27460   Int4          seqid_len;
27461   CharPtr       cp;
27462   CharPtr       acc_end;
27463   Boolean       found_end;
27464   ValNodePtr    acc_list;
27465   Boolean       found_other_acc = FALSE;
27466   MsgAnswer     ans;
27467   SeqIdPtr      sip;
27468 
27469   notfound_lip = OpenLog ("Accessions in Table Not Found in Record");
27470   alreadyhas_lip = OpenLog ("Accessions that Already Have TPA Acccesion Numbers");
27471 
27472   need_seqid = TRUE;
27473   acc_list = NULL;
27474   ReadLine (fp, str, len);
27475   while (Nlm_fileDone)
27476   {
27477     cp = str;
27478     if (strlen (str) == 0)
27479     {
27480       ReadLine (fp, str, len);
27481       continue;
27482     }
27483     if (need_seqid)
27484     {
27485       seqid_len = StringCSpn (str, " \t");
27486       if (seqid_len > 0)
27487       {
27488         StringNCpy (seqid, str, seqid_len);
27489         seqid [seqid_len] = 0;
27490         need_seqid = FALSE;
27491       }
27492       cp = str + seqid_len + 1;
27493     }
27494     if (need_seqid)
27495     {
27496       ReadLine (fp, str, len);
27497       continue;
27498     }
27499     if (str [strlen (str) - 1] != ',')
27500     {
27501       need_seqid = TRUE;
27502     }
27503 
27504     found_end = FALSE;
27505     while (*cp != 0)
27506     {
27507       if (*cp == ' ' || *cp == ',' || *cp == '\t')
27508       {
27509         cp++;
27510       }
27511       else
27512       {
27513         acc_end = cp + 1;
27514         while (*acc_end != 0 && *acc_end != ',')
27515         {
27516           acc_end++;
27517         }
27518         if (*acc_end == 0)
27519         {
27520           found_end = TRUE;
27521         }
27522         else
27523         {
27524           *acc_end = 0;
27525         }
27526         ValNodeAddStr (&acc_list, 0, StringSave (cp));
27527         if (found_end)
27528         {
27529           cp = acc_end;
27530         }
27531         else
27532         {
27533           cp = acc_end + 1;
27534         }
27535       }
27536     }
27537 
27538     if (need_seqid == TRUE)
27539     {
27540       sip = CreateSeqIdFromText (seqid, sep);
27541       bsp = BioseqFind (sip);
27542       sip = SeqIdFree (sip);
27543       if (bsp == NULL) {
27544         bsp = FindBioseqByIDString (seqid, sep);
27545       }
27546       if (bsp == NULL)
27547       {
27548         fprintf (notfound_lip->fp,
27549                  "Unable to update accession numbers for %s (not found)\n",
27550                  seqid);
27551         notfound_lip->data_in_log = TRUE;
27552         acc_list = ValNodeFreeData (acc_list);
27553       }
27554       else
27555       {
27556         ValNodeAddPointer (&tpalist, 0, TPAFromFileNew (bsp, acc_list));
27557         if (HasTPAAccessions (bsp))
27558         {
27559           fprintf (alreadyhas_lip->fp,
27560                    "Already have accessions for %s\n",
27561                    seqid);
27562           alreadyhas_lip->data_in_log = TRUE;
27563         }
27564       }
27565       acc_list = NULL;
27566     }
27567 
27568     ReadLine (fp, str, len);
27569   }
27570   if (acc_list != NULL)
27571   {
27572     bsp = FindBioseqByIDString (seqid, sep);
27573     if (bsp == NULL)
27574     {
27575       fprintf (notfound_lip->fp,
27576                "Unable to update accession numbers for %s (not found)\n",
27577                seqid);
27578       notfound_lip->data_in_log = TRUE;
27579       acc_list = ValNodeFreeData (acc_list);
27580     }
27581     else
27582     {
27583       ValNodeAddPointer (&tpalist, 0, TPAFromFileNew (bsp, acc_list));
27584       if (GetTPAAssembly (bsp) != NULL)
27585       {
27586         fprintf (alreadyhas_lip->fp,
27587                  "Already have accessions for %s\n",
27588                  seqid);
27589         alreadyhas_lip->data_in_log = TRUE;
27590       }
27591     }
27592     acc_list = NULL;
27593   }
27594 
27595   CloseLog (notfound_lip);
27596   notfound_lip = FreeLog (notfound_lip);
27597   found_other_acc = alreadyhas_lip->data_in_log;
27598   CloseLog (alreadyhas_lip);
27599   alreadyhas_lip = FreeLog (alreadyhas_lip);
27600 
27601   if (tpalist == NULL)
27602   {
27603     Message (MSG_ERROR, "Unable to read accession lists from file!");
27604   }
27605   else if (replace != NULL)
27606   {
27607     if (found_other_acc)
27608     {
27609       ans = Message (MSG_YNC, "Some accessions already have TPA accession numbers.  Remove these first?");
27610       if (ans == ANS_CANCEL)
27611       {
27612         tpalist = TPAFromFileListFree (tpalist);
27613       }
27614       else if (ans == ANS_YES)
27615       {
27616         *replace = TRUE;
27617       }
27618       else
27619       {
27620         *replace = FALSE;
27621       }
27622     }
27623     else
27624     {
27625       *replace = FALSE;
27626     }
27627   }
27628 
27629   return tpalist;
27630 }
27631 
27632 
RemoveOldTPAAccessions(UserObjectPtr uop)27633 NLM_EXTERN void RemoveOldTPAAccessions (UserObjectPtr uop)
27634 {
27635   UserFieldPtr  ufp, ufp_next, ufp_prev = NULL;
27636   ObjectIdPtr   oip;
27637 
27638   if (uop == NULL) return;
27639   oip = uop->type;
27640   if (oip == NULL || StringICmp (oip->str, "TpaAssembly") != 0) return;
27641 
27642   ufp = uop->data;
27643   while (ufp != NULL)
27644   {
27645     ufp_next = ufp->next;
27646     if (ufp->choice == 11)
27647     {
27648       if (ufp_prev == NULL)
27649       {
27650         uop->data = ufp_next;
27651       }
27652       else
27653       {
27654         ufp_prev->next = ufp_next;
27655       }
27656       ufp->next = NULL;
27657       ufp = UserFieldFree (ufp);
27658     }
27659     else
27660     {
27661       ufp_prev = ufp;
27662     }
27663     ufp = ufp_next;
27664   }
27665 }
27666 
27667 /* Need to get list of accessions from file, determine which ones are in the table,
27668  * and determine if any already have TPA accessions.
27669  * If any already have TPA accessions, ask if this should add or replace.
27670  */
LoadTPAAccessionNumbersFromFile(IteM i)27671 extern void LoadTPAAccessionNumbersFromFile (
27672   IteM i
27673 )
27674 {
27675   BaseFormPtr   bfp;
27676   SeqEntryPtr   sep;
27677   Char          path [PATH_MAX];
27678   FILE          *fp;
27679 
27680   ValNodePtr    tpalist, vnp, vnp_acc;
27681   Boolean       replace = FALSE;
27682   UserObjectPtr uop;
27683   SeqDescrPtr   sdp;
27684   TPAFromFilePtr tpfp;
27685 
27686 #ifdef WIN_MAC
27687   bfp = currentFormDataPtr;
27688 #else
27689   bfp = GetObjectExtra (i);
27690 #endif
27691   if (bfp == NULL) return;
27692   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
27693   if (sep == NULL) return;
27694 
27695   path [0] = '\0';
27696   if (! GetInputFileName (path, sizeof (path), NULL, "TEXT")) return;
27697 
27698   fp = FileOpen (path, "r");
27699   if (fp == NULL) return;
27700 
27701   tpalist = ReadTPAAccessionNumbersFile (fp, sep, &replace);
27702   FileClose (fp);
27703 
27704   if (tpalist != NULL)
27705   {
27706     for (vnp = tpalist; vnp != NULL; vnp = vnp->next)
27707     {
27708       tpfp = (TPAFromFilePtr) vnp->data.ptrvalue;
27709       uop = GetTPAAssembly(tpfp->bsp);
27710       if (uop == NULL)
27711       {
27712         sdp = CreateNewDescriptor (SeqMgrGetSeqEntryForData (tpfp->bsp), Seq_descr_user);
27713         uop = CreateTpaAssemblyUserObject ();
27714         sdp->data.ptrvalue = uop;
27715       }
27716 
27717       if (replace)
27718       {
27719         RemoveOldTPAAccessions (uop);
27720       }
27721       for (vnp_acc = tpfp->acc_list; vnp_acc != NULL; vnp_acc = vnp_acc->next)
27722       {
27723         AddAccessionToTpaAssemblyUserObject (uop, vnp_acc->data.ptrvalue, 0, 0);
27724       }
27725     }
27726     tpalist = TPAFromFileListFree (tpalist);
27727 
27728     ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
27729     ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
27730     ArrowCursor ();
27731     Update ();
27732   }
27733 }
27734 
AddHistory(BioseqPtr bsp,ValNodePtr acc_list)27735 static void AddHistory (
27736   BioseqPtr  bsp,
27737   ValNodePtr acc_list
27738 )
27739 {
27740   SeqHistPtr      hist;
27741   ValNodePtr      vnp;
27742   SeqIdPtr        sip;
27743   TextSeqIdPtr    tsip;
27744   Uint4           whichdb;
27745   Char            prefix [20];
27746 
27747   if (bsp == NULL || acc_list == NULL) return;
27748   hist = bsp->hist;
27749   if (hist == NULL)
27750   {
27751     hist = SeqHistNew ();
27752     if (hist == NULL) return;
27753     bsp->hist = hist;
27754   }
27755   for (vnp = acc_list; vnp != NULL; vnp = vnp->next) {
27756     tsip = TextSeqIdNew ();
27757     if (tsip == NULL) return;
27758     tsip->accession = StringSave (vnp->data.ptrvalue);
27759 
27760     sip = ValNodeNew (hist->replace_ids);
27761     if (hist->replace_ids == NULL) {
27762       hist->replace_ids = sip;
27763     }
27764     if (sip == NULL) return;
27765 
27766     sip->data.ptrvalue = (Pointer) tsip;
27767 
27768     StringNCpy_0 (prefix, (CharPtr) vnp->data.ptrvalue, sizeof (prefix));
27769     whichdb = WHICH_db_accession (prefix);
27770     if (ACCN_IS_EMBL (whichdb)) {
27771       sip->choice = SEQID_EMBL;
27772     } else if (ACCN_IS_DDBJ (whichdb)) {
27773       sip->choice = SEQID_DDBJ;
27774     } else {
27775       sip->choice = SEQID_GENBANK;
27776     }
27777   }
27778   if (hist != NULL
27779     && hist->assembly == NULL
27780     && hist->replace_date == NULL
27781     && hist->replace_ids == NULL
27782     && hist->replaced_by_date == NULL
27783     && hist->replaced_by_ids == NULL
27784     && hist->deleted_date == NULL
27785     && ! hist->deleted)
27786   {
27787       bsp->hist = SeqHistFree (bsp->hist);
27788   }
27789 }
27790 
DoIDsMatch(CharPtr seqid,BioseqPtr bsp,Boolean AllowLocal)27791 static Boolean DoIDsMatch (CharPtr seqid, BioseqPtr bsp, Boolean AllowLocal)
27792 {
27793   CharPtr      str;
27794   Int4         seqid_len;
27795   SeqIdPtr     sip;
27796   Boolean      rval = FALSE;
27797   DbtagPtr     dbtag;
27798 
27799   if (bsp == NULL) return FALSE;
27800 
27801   for (sip = bsp->id; sip != NULL; sip = sip->next)
27802   {
27803     if (sip->choice != SEQID_LOCAL || AllowLocal)
27804     {
27805       str = SeqIdWholeLabel (sip, PRINTID_REPORT);
27806       seqid_len = StringCSpn (str, ".");
27807       if (seqid_len > 0)
27808       {
27809         str [ seqid_len ] = 0;
27810       }
27811       if (StringCmp (str, seqid) == 0) rval = TRUE;
27812       str = MemFree (str);
27813     }
27814     if (sip->choice == SEQID_GENERAL)
27815     {
27816       if ((dbtag = (DbtagPtr) sip->data.ptrvalue) != NULL && dbtag->tag != NULL) {
27817         if (StringCmp (seqid, dbtag->tag->str) == 0) rval = TRUE;
27818       }
27819     }
27820   }
27821   return rval;
27822 }
27823 
AddAccessionToGenbankBlock(CharPtr seqid,ValNodePtr acc_list,SeqEntryPtr sep,Boolean add_hist)27824 static Boolean AddAccessionToGenbankBlock (
27825   CharPtr     seqid,
27826   ValNodePtr  acc_list,
27827   SeqEntryPtr sep,
27828   Boolean     add_hist
27829 )
27830 {
27831   BioseqPtr    bsp;
27832   BioseqSetPtr bssp;
27833   GBBlockPtr   gbp;
27834   ValNodePtr   last_one;
27835   SeqDescrPtr       sdp;
27836 
27837   if (seqid == NULL || acc_list == NULL
27838     || sep == NULL || sep->data.ptrvalue == NULL) return FALSE;
27839   if (IS_Bioseq_set (sep)) {
27840     bssp = (BioseqSetPtr) sep->data.ptrvalue;
27841     /* this also delves into nuc-prot sets */
27842     if (bssp != NULL && (bssp->_class == 7 ||
27843                          (IsPopPhyEtcSet (bssp->_class)) ||
27844                          bssp->_class == 1)) {
27845       for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
27846       {
27847         if (AddAccessionToGenbankBlock (seqid, acc_list, sep, add_hist))
27848         {
27849           return TRUE;
27850         }
27851       }
27852       return FALSE;
27853     }
27854   }
27855   if (!IS_Bioseq (sep)) return FALSE;
27856 
27857   bsp = (BioseqPtr) sep->data.ptrvalue;
27858   if (bsp == NULL) return FALSE;
27859   if (! DoIDsMatch (seqid, bsp, TRUE)) return FALSE;
27860 
27861   sdp = BioseqGetSeqDescr (bsp, Seq_descr_genbank, NULL);
27862 
27863   if (sdp == NULL)
27864   {
27865     sdp = CreateNewDescriptor (sep, Seq_descr_genbank);
27866     if (sdp == NULL) return FALSE;
27867   }
27868 
27869   if (sdp->data.ptrvalue == NULL)
27870   {
27871     sdp->data.ptrvalue = GBBlockNew ();
27872     if (sdp->data.ptrvalue == NULL) return FALSE;
27873   }
27874 
27875   gbp = (GBBlockPtr) sdp->data.ptrvalue;
27876 
27877   for (last_one = gbp->extra_accessions;
27878        last_one != NULL && last_one->next != NULL;
27879        last_one = last_one->next)
27880   {}
27881   if (last_one == NULL)
27882   {
27883     gbp->extra_accessions = acc_list;
27884   }
27885   else
27886   {
27887     last_one->next = acc_list;
27888   }
27889   if (add_hist)
27890   {
27891     AddHistory (bsp, acc_list);
27892   }
27893   return TRUE;
27894 }
27895 
27896 
LoadSecondaryAccessionNumbersPlusHistFromFile(IteM i,Boolean add_hist)27897 static void LoadSecondaryAccessionNumbersPlusHistFromFile (
27898   IteM    i,
27899   Boolean add_hist
27900 )
27901 {
27902   BaseFormPtr   bfp;
27903   SeqEntryPtr   sep;
27904   Char          path [PATH_MAX];
27905   FILE          *fp;
27906   Char          str [8192];
27907   size_t        len = 8192;
27908   Boolean       need_seqid;
27909   Char          seqid[100];
27910   Int4          seqid_len;
27911   CharPtr       cp;
27912   CharPtr       acc_end;
27913   Boolean       found_end;
27914   ValNodePtr    acc_list;
27915 
27916 #ifdef WIN_MAC
27917   bfp = currentFormDataPtr;
27918 #else
27919   bfp = GetObjectExtra (i);
27920 #endif
27921   if (bfp == NULL) return;
27922   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
27923   if (sep == NULL) return;
27924 
27925   path [0] = '\0';
27926   if (! GetInputFileName (path, sizeof (path), NULL, "TEXT")) return;
27927 
27928   fp = FileOpen (path, "r");
27929   if (fp == NULL) return;
27930 
27931   need_seqid = TRUE;
27932   acc_list = NULL;
27933   ReadLine (fp, str, len);
27934   while (Nlm_fileDone || str[0] != 0)
27935   {
27936     cp = str;
27937     if (strlen (str) == 0)
27938     {
27939       ReadLine (fp, str, len);
27940       continue;
27941     }
27942     seqid_len = StringCSpn (str, " \t");
27943     if (seqid_len > 0)
27944     {
27945       StringNCpy (seqid, str, seqid_len);
27946       seqid [seqid_len] = 0;
27947       cp = str + seqid_len + 1;
27948     }
27949     else
27950     {
27951       ReadLine (fp, str, len);
27952       continue;
27953     }
27954 
27955     found_end = FALSE;
27956     while (*cp != 0)
27957     {
27958       if (*cp == ' ' || *cp == ' ')
27959       {
27960         cp++;
27961       }
27962       else
27963       {
27964         acc_end = cp + 1;
27965         while (*acc_end != 0 && *acc_end != ' ')
27966         {
27967           acc_end++;
27968         }
27969         if (*acc_end == 0)
27970         {
27971           found_end = TRUE;
27972         }
27973         else
27974         {
27975           *acc_end = 0;
27976         }
27977         ValNodeAddStr (&acc_list, 0, StringSave (cp));
27978         if (found_end)
27979         {
27980           cp = acc_end;
27981         }
27982         else
27983         {
27984           cp = acc_end + 1;
27985         }
27986       }
27987     }
27988 
27989     /* do something with accession list */
27990     if ( ! AddAccessionToGenbankBlock (seqid, acc_list, sep, add_hist))
27991     {
27992       Message (MSG_ERROR,
27993                "Unable to update accession numbers for %s (not found)",
27994                seqid);
27995     }
27996     acc_list = NULL;
27997 
27998     ReadLine (fp, str, len);
27999   }
28000   if (acc_list != NULL
28001     && ! AddAccessionToGenbankBlock (seqid, acc_list, sep, add_hist))
28002   {
28003     Message (MSG_ERROR,
28004              "Unable to update accession numbers for %s (not found)",
28005              seqid);
28006   }
28007 
28008   FileClose (fp);
28009   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
28010   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
28011   ArrowCursor ();
28012   Update ();
28013   return;
28014 }
28015 
LoadSecondaryAccessionNumbersFromFile(IteM i)28016 extern void LoadSecondaryAccessionNumbersFromFile (
28017   IteM i
28018 )
28019 {
28020   LoadSecondaryAccessionNumbersPlusHistFromFile (i, FALSE);
28021 }
28022 
LoadHistoryAccessionNumbersFromFile(IteM i)28023 extern void LoadHistoryAccessionNumbersFromFile (
28024   IteM i
28025 )
28026 {
28027   LoadSecondaryAccessionNumbersPlusHistFromFile (i, TRUE);
28028 }
28029 
28030 
RemoveEmptyGenomeProjectIDCallback(SeqDescrPtr sdp,Pointer userdata)28031 static void RemoveEmptyGenomeProjectIDCallback (SeqDescrPtr sdp, Pointer userdata)
28032 {
28033   ObjValNodePtr ovn;
28034   UserObjectPtr uop;
28035   ObjectIdPtr   oip;
28036   UserFieldPtr  ufp;
28037 
28038   if (sdp == NULL || sdp->extended == 0 || sdp->choice != Seq_descr_user) return;
28039 
28040   if (sdp->data.ptrvalue != NULL) {
28041     uop = (UserObjectPtr) sdp->data.ptrvalue;
28042     oip = uop->type;
28043     if (oip != NULL && StringCmp (oip->str, "GenomeProjectsDB") == 0) {
28044       ufp = uop->data;
28045       while (ufp != NULL) {
28046         oip = ufp->label;
28047         if (oip != NULL
28048             && (StringCmp (oip->str, "ProjectID") == 0 || StringCmp (oip->str, "ParentID") == 0)) {
28049           if(ufp->choice == 2 && ufp->data.intvalue != 0) {
28050             /* found nonempty ID */
28051             return;
28052           }
28053         } else if (oip != NULL) {
28054           /* found nonempty field of other type */
28055           return;
28056         }
28057         ufp = ufp->next;
28058       }
28059       /* found no nonempty fields */
28060       ovn = (ObjValNodePtr) sdp;
28061       ovn->idx.deleteme = TRUE;
28062     }
28063   }
28064 }
28065 
28066 
RemoveEmptyGenomeProjectIDs(IteM i)28067 extern void RemoveEmptyGenomeProjectIDs (IteM i)
28068 {
28069   BaseFormPtr   bfp;
28070   SeqEntryPtr   sep;
28071 
28072 #ifdef WIN_MAC
28073   bfp = currentFormDataPtr;
28074 #else
28075   bfp = GetObjectExtra (i);
28076 #endif
28077   if (bfp == NULL) return;
28078   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
28079   if (sep == NULL) return;
28080 
28081   VisitDescriptorsInSep (sep, NULL, RemoveEmptyGenomeProjectIDCallback);
28082 
28083   DeleteMarkedObjects (bfp->input_entityID, 0, NULL);
28084   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
28085   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
28086   ArrowCursor ();
28087   Update ();
28088   return;
28089 }
28090 
28091 
RemoveAllGenomeProjectIDCallback(SeqDescrPtr sdp,Pointer userdata)28092 static void RemoveAllGenomeProjectIDCallback (SeqDescrPtr sdp, Pointer userdata)
28093 {
28094   ObjValNodePtr ovn;
28095   UserObjectPtr uop;
28096   ObjectIdPtr   oip;
28097 
28098   if (sdp == NULL || sdp->extended == 0 || sdp->choice != Seq_descr_user) return;
28099 
28100   if (sdp->data.ptrvalue != NULL) {
28101     uop = (UserObjectPtr) sdp->data.ptrvalue;
28102     oip = uop->type;
28103     if (oip != NULL && StringCmp (oip->str, "GenomeProjectsDB") == 0) {
28104       ovn = (ObjValNodePtr) sdp;
28105       ovn->idx.deleteme = TRUE;
28106     }
28107   }
28108 }
28109 
28110 
RemoveGenomeProjectIdsBaseForm(BaseFormPtr bfp)28111 extern void RemoveGenomeProjectIdsBaseForm (BaseFormPtr bfp)
28112 {
28113   SeqEntryPtr   sep;
28114 
28115   if (bfp == NULL) return;
28116   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
28117   if (sep == NULL) return;
28118 
28119   VisitDescriptorsInSep (sep, NULL, RemoveAllGenomeProjectIDCallback);
28120 
28121   DeleteMarkedObjects (bfp->input_entityID, 0, NULL);
28122   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
28123   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
28124   ArrowCursor ();
28125   Update ();
28126   return;
28127 }
28128 
RemoveGenomeProjectIDs(IteM i)28129 extern void RemoveGenomeProjectIDs  (IteM i)
28130 {
28131   BaseFormPtr   bfp;
28132 
28133 #ifdef WIN_MAC
28134   bfp = currentFormDataPtr;
28135 #else
28136   bfp = GetObjectExtra (i);
28137 #endif
28138 
28139   RemoveGenomeProjectIdsBaseForm (bfp);
28140 }
28141 
28142 
28143 CharPtr MostUsedFeatureList[] = {
28144   "CDS",
28145   "exon",
28146   "Gene",
28147   "intron",
28148   "mRNA",
28149   "rRNA",
28150   "RNA"
28151 };
28152 
InsertMostUsedFeatureValNodes(ValNodePtr old_list)28153 extern ValNodePtr InsertMostUsedFeatureValNodes (ValNodePtr old_list)
28154 {
28155   ValNodePtr new_list, new_item, old_item;
28156   Int4       index;
28157 
28158   new_list = NULL;
28159   for (index = 0;
28160        index < sizeof (MostUsedFeatureList) / sizeof (CharPtr);
28161        index ++)
28162   {
28163     old_item = FindExactStringListMatch ( old_list, MostUsedFeatureList [index])
28164 ;
28165     if (old_item == NULL) continue;
28166     new_item = ValNodeNew ( new_list);
28167     if (new_item == NULL) return old_list;
28168     new_item->choice = old_item->choice;
28169     new_item->data.ptrvalue = StringSave (MostUsedFeatureList [index]);
28170     if (new_list == NULL) new_list = new_item;
28171   }
28172   if (new_item != NULL)
28173   {
28174     if (old_list != NULL &&
28175       ( StringCmp (old_list->data.ptrvalue, "All") == 0
28176        || StringCmp (old_list->data.ptrvalue, "[ALL FEATURES]") == 0))
28177     {
28178       new_item->next = old_list->next;
28179       old_list->next = new_list;
28180       new_list = old_list;
28181     }
28182     else
28183     {
28184       new_item->next = old_list;
28185     }
28186   }
28187   else
28188   {
28189     new_list = old_list;
28190   }
28191   return new_list;
28192 }
28193 
FindEnumFieldAssoc(EnumFieldAssocPtr alist,CharPtr findStr)28194 static EnumFieldAssocPtr FindEnumFieldAssoc (
28195   EnumFieldAssocPtr alist,
28196   CharPtr findStr
28197 )
28198 {
28199   EnumFieldAssocPtr ap;
28200 
28201   for (ap = alist; ap != NULL && ap->name != NULL; ap++)
28202   {
28203     if (StringCmp (ap->name, findStr) == 0) return ap;
28204   }
28205   return NULL;
28206 }
28207 
CopyEnumFieldAssoc(EnumFieldAssocPtr ap1,EnumFieldAssocPtr ap2)28208 static void CopyEnumFieldAssoc (EnumFieldAssocPtr ap1, EnumFieldAssocPtr ap2)
28209 {
28210   if (ap1 == NULL || ap2 == NULL) return;
28211 
28212   ap1->name = StringSave (ap2->name);
28213   ap1->value = ap2->value;
28214 }
28215 
InsertMostUsedFeatureEnumFieldAssoc(EnumFieldAssocPtr alist)28216 extern EnumFieldAssocPtr InsertMostUsedFeatureEnumFieldAssoc (
28217   EnumFieldAssocPtr alist
28218 )
28219 {
28220   Int4              num_total_fields, index, new_index;
28221   EnumFieldAssocPtr ap, new_alist, old_ap;
28222 
28223   if (alist == NULL) return NULL;
28224 
28225   num_total_fields = sizeof (MostUsedFeatureList) / sizeof (CharPtr);
28226 
28227   for (ap = alist; ap != NULL && ap->name != NULL; ap++)
28228   {
28229     num_total_fields ++;
28230   }
28231   /* need the last null field */
28232   num_total_fields ++;
28233 
28234   new_alist = MemNew (num_total_fields * sizeof (EnumFieldAssoc));
28235   if (new_alist == NULL) return alist;
28236 
28237   /* copy the first item if wildcard */
28238   if (StringCmp (alist->name, "[ALL FEATURES]") == 0)
28239   {
28240     CopyEnumFieldAssoc (new_alist, alist);
28241     new_index = 1;
28242   }
28243   else
28244   {
28245     new_index = 0;
28246   }
28247 
28248   for (index = 0;
28249        index < sizeof (MostUsedFeatureList) / sizeof (CharPtr);
28250        index ++)
28251   {
28252     old_ap = FindEnumFieldAssoc (alist, MostUsedFeatureList [index]);
28253     if (old_ap == NULL) continue;
28254     CopyEnumFieldAssoc (new_alist + new_index++, old_ap);
28255   }
28256 
28257   for (ap = alist; ap != NULL && ap->name != NULL; ap++)
28258   {
28259     CopyEnumFieldAssoc (new_alist + new_index ++, ap);
28260   }
28261   /* copy over the last null field */
28262   if (ap != NULL)
28263   {
28264     CopyEnumFieldAssoc (new_alist + new_index ++, ap);
28265   }
28266   return new_alist;
28267 
28268 }
28269 
28270 static Uint2 UnusualFeatureTypes [] = {
28271   FEATDEF_ORG,
28272   FEATDEF_mutation,
28273   FEATDEF_site_ref,
28274   FEATDEF_gap,
28275   FEATDEF_NON_STD_RESIDUE,
28276   FEATDEF_NUM
28277 };
28278 
BuildFeatureValNodeList(Boolean prefer_most_used,CharPtr wild_card_name,Int4 wild_card_value,Boolean skip_unusual,Boolean skip_import)28279 extern ValNodePtr BuildFeatureValNodeList (
28280   Boolean prefer_most_used,
28281   CharPtr wild_card_name,
28282   Int4    wild_card_value,
28283   Boolean skip_unusual,
28284   Boolean skip_import
28285 )
28286 {
28287   FeatDefPtr  curr;
28288   ValNodePtr  head, vnp;
28289   Uint1       key;
28290   CharPtr     label = NULL;
28291   Uint1       subtype;
28292   Int4        index;
28293   Boolean     skip;
28294   Char        str [256];
28295 
28296   head = NULL;
28297   curr = FeatDefFindNext (NULL, &key, &label, FEATDEF_ANY, TRUE);
28298   while (curr != NULL) {
28299     skip = FALSE;
28300     if (skip_unusual)
28301     {
28302       for (index = 0;
28303            ! skip && index < sizeof ( UnusualFeatureTypes ) / sizeof (Uint2);
28304            index ++)
28305       {
28306         if (curr->featdef_key == UnusualFeatureTypes [ index ]) skip = TRUE;
28307       }
28308     }
28309     if (key != FEATDEF_BAD && ! skip && !IsUnwantedFeatureType(key)) {
28310 
28311       subtype = curr->featdef_key;
28312 	  if (subtype == FEATDEF_PUB)
28313 	  {
28314         StringNCpy_0 (str, curr->typelabel, sizeof (str) - 15);
28315         StringCat (str, " (Publication)");
28316 	  }
28317 	  else if (subtype != FEATDEF_misc_RNA &&
28318           subtype != FEATDEF_precursor_RNA &&
28319           subtype != FEATDEF_mat_peptide &&
28320           subtype != FEATDEF_sig_peptide &&
28321           subtype != FEATDEF_transit_peptide &&
28322           subtype != FEATDEF_Imp_CDS)
28323       {
28324         StringNCpy_0 (str, curr->typelabel, sizeof (str) - 1);
28325       }
28326       else if (! skip_import)
28327       {
28328         StringNCpy_0 (str, curr->typelabel, sizeof (str) - 10);
28329         StringCat (str, "_imp");
28330       }
28331       else
28332       {
28333         skip = TRUE;
28334       }
28335       if (! skip)
28336       {
28337         vnp = ValNodeNew (head);
28338         if (head == NULL) {
28339           head = vnp;
28340         }
28341         if (vnp != NULL) {
28342           vnp->choice = subtype;
28343           vnp->data.ptrvalue = StringSave (str);
28344         }
28345       }
28346     }
28347     curr = FeatDefFindNext (curr, &key, &label, FEATDEF_ANY, TRUE);
28348   }
28349   if (head != NULL) {
28350     head = SortValNode (head, CompareFeatureValNodeStrings);
28351     head = InsertMostUsedFeatureValNodes (head);
28352     if (wild_card_name != NULL)
28353     {
28354       vnp = ValNodeNew (NULL);
28355       if (vnp != NULL) {
28356         vnp->choice = wild_card_value;
28357         vnp->data.ptrvalue = StringSave (wild_card_name);
28358         vnp->next = head;
28359         head = vnp;
28360       }
28361     }
28362   }
28363   return head;
28364 }
28365 
RemoveOldName(OrgRefPtr orp)28366 extern void RemoveOldName (OrgRefPtr orp)
28367 {
28368   OrgModPtr prev = NULL, curr, next_mod;
28369 
28370   if (orp == NULL || orp->orgname == NULL) return;
28371 
28372   curr = orp->orgname->mod;
28373   while (curr != NULL)
28374   {
28375     next_mod = curr->next;
28376     if (curr->subtype == ORGMOD_old_name)
28377     {
28378       if (prev == NULL)
28379       {
28380         orp->orgname->mod = curr->next;
28381       }
28382       else
28383       {
28384         prev->next = curr->next;
28385       }
28386       curr->next = NULL;
28387       OrgModFree (curr);
28388     }
28389     else
28390     {
28391       prev = curr;
28392     }
28393 
28394     curr = next_mod;
28395   }
28396 
28397 }
28398 
SetTaxNameAndRemoveTaxRef(OrgRefPtr orp,CharPtr taxname)28399 extern void SetTaxNameAndRemoveTaxRef (OrgRefPtr orp, CharPtr taxname)
28400 {
28401   Boolean         remove_taxrefs = FALSE;
28402 
28403   if (orp == NULL) return;
28404 
28405   if ( taxname == NULL || orp->taxname == NULL
28406     || StringCmp (taxname, orp->taxname) != 0)
28407   {
28408     remove_taxrefs = TRUE;
28409   }
28410   MemFree (orp->taxname);
28411   orp->taxname = taxname;
28412 
28413   if (! remove_taxrefs) return;
28414 
28415   orp->common = MemFree (orp->common);
28416 
28417   RemoveTaxRef (orp);
28418   RemoveOldName (orp);
28419 }
28420 
28421 static Boolean
FindMatchingProprotein(SeqFeatPtr sfp,SeqMgrFeatContextPtr fcontext,BioseqPtr prot_bsp)28422 FindMatchingProprotein
28423 (SeqFeatPtr sfp,
28424  SeqMgrFeatContextPtr fcontext,
28425  BioseqPtr prot_bsp)
28426 {
28427   SeqFeatPtr        prot_sfp;
28428   SeqMgrFeatContext pcontext;
28429   CharPtr           start;
28430 
28431   if (prot_bsp == NULL || fcontext == NULL) return FALSE;
28432   if (StringNICmp (fcontext->label, "encodes ", 8) == 0) {
28433     start = fcontext->label + 8;
28434   } else {
28435     start = fcontext->label;
28436   }
28437   prot_sfp = NULL;
28438   while ((prot_sfp = SeqMgrGetNextFeature (prot_bsp, prot_sfp,
28439                                            0, 0, &pcontext)) != NULL) {
28440     if (StringCmp (pcontext.label, start) == 0) {
28441       return TRUE;
28442     }
28443   }
28444   return FALSE;
28445 }
28446 
28447 
28448 static void
RemoveRedundantProproteinMiscFeatsOnBioseq(BioseqPtr bsp,Pointer userdata)28449 RemoveRedundantProproteinMiscFeatsOnBioseq
28450 (BioseqPtr bsp,
28451  Pointer userdata)
28452 {
28453   SeqFeatPtr        sfp, cds;
28454   SeqMgrFeatContext fcontext, cds_context;
28455   BioseqPtr         bsp_prot;
28456 
28457   sfp = NULL;
28458 
28459   /* list misc feats */
28460   while ((sfp = SeqMgrGetNextFeature (bsp, sfp, 0, 0, &fcontext)) != NULL) {
28461     if (fcontext.featdeftype == FEATDEF_misc_feature
28462         &&  StringStr(fcontext.label, "proprotein") != NULL) {
28463       cds = NULL;
28464       while ((cds = SeqMgrGetNextFeature (bsp, cds, SEQFEAT_CDREGION, 0, &cds_context)) != NULL) {
28465         if (cds_context.left <= fcontext.left
28466             &&  cds_context.right >= fcontext.right) {
28467           /* Get Protein sequence, look for matching proprotein feat */
28468           bsp_prot = BioseqFind (SeqLocId(cds->product));
28469           if (FindMatchingProprotein (sfp, &fcontext, bsp_prot)) {
28470             sfp->idx.deleteme = TRUE;
28471           }
28472         }
28473       }
28474     }
28475   }
28476 
28477 }
28478 
28479 
RemoveRedundantProproteinMiscFeats(IteM i)28480 extern void RemoveRedundantProproteinMiscFeats (IteM i)
28481 {
28482   BaseFormPtr  bfp;
28483   SeqEntryPtr  sep;
28484 
28485 #ifdef WIN_MAC
28486   bfp = currentFormDataPtr;
28487 #else
28488   bfp = GetObjectExtra (i);
28489 #endif
28490   if (bfp == NULL) return;
28491   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
28492   if (sep == NULL) return;
28493 
28494   /* Visit each bioseq to remove redundant proprotein misc feats */
28495   VisitBioseqsInSep (sep, NULL, RemoveRedundantProproteinMiscFeatsOnBioseq);
28496 
28497   DeleteMarkedObjects (bfp->input_entityID, 0, NULL);
28498   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
28499   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
28500   ArrowCursor ();
28501   Update ();
28502 }
28503 
28504 typedef struct typestraindata
28505 {
28506   FORM_MESSAGE_BLOCK
28507 
28508   GrouP  strain_or_comment_grp;
28509   DialoG string_constraint_dlg;
28510   ButtoN remove_found_btn;
28511 
28512   Boolean             search_strain;
28513   StringConstraintPtr string_constraint;
28514   Boolean             remove_found_text;
28515 } TypeStrainData, PNTR TypeStrainPtr;
28516 
28517 
AddTypeStrainCommentsProc(BioSourcePtr biop,Pointer userdata)28518 static void AddTypeStrainCommentsProc (BioSourcePtr biop, Pointer userdata)
28519 {
28520   TypeStrainPtr      tsp;
28521   CharPtr            orig_note;
28522   CharPtr            format = "type strain of %s";
28523   CharPtr            match = NULL, orig_match, tmp, taxname;
28524   ValNode            vn, vn_comment, vn_taxname;
28525 
28526   if (biop == NULL || biop->org == NULL || biop->org->taxname == NULL) return;
28527 
28528   tsp = (TypeStrainPtr) userdata;
28529 
28530   vn_comment.choice = SourceQualChoice_textqual;
28531   vn_comment.data.intvalue = Source_qual_orgmod_note;
28532   vn_comment.next = NULL;
28533   vn_taxname.choice = SourceQualChoice_textqual;
28534   vn_taxname.data.intvalue = Source_qual_taxname;
28535   vn_taxname.next = NULL;
28536 
28537   orig_note = GetSourceQualFromBioSource (biop, &vn, NULL);
28538   if (orig_note != NULL && StringStr (orig_note, "type strain of") != NULL) {
28539     orig_note = MemFree (orig_note);
28540     return;
28541   }
28542   orig_note = MemFree (orig_note);
28543 
28544   taxname = GetSourceQualFromBioSource (biop, &vn_taxname, NULL);
28545 
28546   if (tsp != NULL && !IsStringConstraintEmpty (tsp->string_constraint)) {
28547     if (tsp->search_strain) {
28548       vn.choice = SourceQualChoice_textqual;
28549       vn.data.intvalue = Source_qual_strain;
28550       vn.next = NULL;
28551       match = GetSourceQualFromBioSource (biop, &vn, NULL);
28552     } else {
28553       vn.choice = SourceQualChoice_textqual;
28554       vn.data.intvalue = Source_qual_orgmod_note;
28555       vn.next = NULL;
28556       match = GetSourceQualFromBioSource (biop, &vn, NULL);
28557       if (match == NULL) {
28558         vn.choice = Source_qual_subsource_note;
28559         match = GetSourceQualFromBioSource (biop, &vn, NULL);
28560       }
28561     }
28562     if (!DoesStringMatchConstraint (match, tsp->string_constraint)) {
28563       match = MemFree (match);
28564       taxname = MemFree (taxname);
28565       return;
28566     }
28567 
28568     orig_match = StringSave (match);
28569     if (tsp->remove_found_text && RemoveStringConstraintPortionFromString (&match, tsp->string_constraint)) {
28570       if (StringHasNoText (match)) {
28571         RemoveSourceQualFromBioSource (biop, &vn, tsp->string_constraint);
28572       } else {
28573         TrimSpacesAroundString (match);
28574         SetSourceQualInBioSource (biop, &vn, tsp->string_constraint, match, ExistingTextOption_replace_old);
28575       }
28576       FindReplaceString (&taxname, orig_match, match, TRUE, TRUE);
28577       SetSourceQualInBioSource (biop, &vn_taxname, NULL, taxname, ExistingTextOption_replace_old);
28578     }
28579     orig_match = MemFree (orig_match);
28580     match = MemFree (match);
28581   }
28582 
28583   vn.choice = SourceQualChoice_textqual;
28584   vn.data.intvalue = Source_qual_strain;
28585   vn.next = NULL;
28586 
28587   if (!StringHasNoText (taxname)) {
28588     tmp = (CharPtr) MemNew (sizeof (Char) * (StringLen (format) + StringLen (taxname)));
28589     sprintf (tmp, format, taxname);
28590     SetSourceQualInBioSource (biop, &vn_comment, NULL, tmp, ExistingTextOption_append_semi);
28591     tmp = MemFree (tmp);
28592   }
28593   taxname = MemFree (taxname);
28594 }
28595 
28596 
AddTypeStrainCommentsWithConstraintProc(ButtoN b)28597 static void AddTypeStrainCommentsWithConstraintProc (ButtoN b)
28598 {
28599   TypeStrainPtr tsp;
28600   SeqEntryPtr   sep;
28601 
28602   tsp = (TypeStrainPtr) GetObjectExtra (b);
28603   if (tsp == NULL) return;
28604   sep = GetTopSeqEntryForEntityID (tsp->input_entityID);
28605   if (sep == NULL) return;
28606 
28607   tsp->string_constraint = DialogToPointer (tsp->string_constraint_dlg);
28608   tsp->remove_found_text = GetStatus (tsp->remove_found_btn);
28609   if (GetValue (tsp->strain_or_comment_grp) == 1) {
28610     tsp->search_strain = TRUE;
28611   } else {
28612     tsp->search_strain = FALSE;
28613   }
28614 
28615   /* Visit each bioseq to remove redundant proprotein misc feats */
28616   VisitBioSourcesInSep (sep, tsp, AddTypeStrainCommentsProc);
28617 
28618   tsp->string_constraint = StringConstraintFree (tsp->string_constraint);
28619 
28620   ObjMgrSetDirtyFlag (tsp->input_entityID, TRUE);
28621   ObjMgrSendMsg (OM_MSG_UPDATE, tsp->input_entityID, 0, 0);
28622   Remove (tsp->form);
28623   ArrowCursor ();
28624   Update ();
28625 }
28626 
AddTypeStrainCommentsWithConstraint(IteM i)28627 extern void AddTypeStrainCommentsWithConstraint (IteM i)
28628 {
28629   BaseFormPtr    bfp;
28630   TypeStrainPtr  tsp;
28631   WindoW         w;
28632   GrouP          h, c;
28633   PrompT         p;
28634   ButtoN         b;
28635   StringConstraintPtr scp;
28636 
28637 #ifdef WIN_MAC
28638   bfp = currentFormDataPtr;
28639 #else
28640   bfp = GetObjectExtra (i);
28641 #endif
28642   if (bfp == NULL) return;
28643 
28644   tsp = (TypeStrainPtr) MemNew (sizeof (TypeStrainData));
28645   if (tsp == NULL) return;
28646   tsp->input_entityID = bfp->input_entityID;
28647 
28648   w = FixedWindow (-50, -33, -10, -10, "Add Type Strain Comments", StdCloseWindowProc);
28649   if (w == NULL) {
28650 	MemFree (tsp);
28651 	return;
28652   }
28653   tsp->form = (ForM) w;
28654   SetObjectExtra (w, tsp, StdCleanupFormProc);
28655 
28656   h = HiddenGroup (w, -1, 0, NULL);
28657 
28658   p = StaticPrompt (h, "When", 0, dialogTextHeight, systemFont, 'c');
28659   tsp->strain_or_comment_grp = HiddenGroup (h, 2, 0, NULL);
28660   RadioButton (tsp->strain_or_comment_grp, "Strain");
28661   RadioButton (tsp->strain_or_comment_grp, "Comment");
28662   SetValue (tsp->strain_or_comment_grp, 1);
28663 
28664   tsp->string_constraint_dlg = StringConstraintDialog (h, NULL, FALSE, NULL, NULL);
28665   scp = StringConstraintNew ();
28666   scp->match_location = String_location_ends;
28667   scp->case_sensitive = TRUE;
28668   PointerToDialog (tsp->string_constraint_dlg, scp);
28669   scp = StringConstraintFree (scp);
28670 
28671   tsp->remove_found_btn = CheckBox (h, "Remove found text", NULL);
28672 
28673   c = HiddenGroup (h, 4, 0, NULL);
28674   b = DefaultButton (c, "Accept", AddTypeStrainCommentsWithConstraintProc);
28675   SetObjectExtra (b, tsp, NULL);
28676   b = PushButton (c, "Cancel", StdCancelButtonProc);
28677   SetObjectExtra (b, tsp, NULL);
28678   AlignObjects (ALIGN_CENTER, (HANDLE) p, (HANDLE) tsp->strain_or_comment_grp, (HANDLE) tsp->string_constraint_dlg,
28679                 (HANDLE) tsp->remove_found_btn, (HANDLE) c, NULL);
28680   RealizeWindow (w);
28681   Show (w);
28682   Update ();
28683 }
28684 
AddTypeStrainCommentsToAll(IteM i)28685 extern void AddTypeStrainCommentsToAll (IteM i)
28686 {
28687   BaseFormPtr  bfp;
28688   SeqEntryPtr  sep;
28689 
28690 #ifdef WIN_MAC
28691   bfp = currentFormDataPtr;
28692 #else
28693   bfp = GetObjectExtra (i);
28694 #endif
28695   if (bfp == NULL) return;
28696   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
28697   if (sep == NULL) return;
28698 
28699   /* Visit each bioseq to remove redundant proprotein misc feats */
28700   VisitBioSourcesInSep (sep, NULL, AddTypeStrainCommentsProc);
28701 
28702   ObjMgrSetDirtyFlag (bfp->input_entityID, TRUE);
28703   ObjMgrSendMsg (OM_MSG_UPDATE, bfp->input_entityID, 0, 0);
28704   ArrowCursor ();
28705   Update ();
28706 }
28707 
SqnNewAlign(BioseqPtr bsp1,BioseqPtr bsp2,SeqAlignPtr PNTR salp)28708 extern void SqnNewAlign (BioseqPtr bsp1, BioseqPtr bsp2, SeqAlignPtr PNTR salp)
28709 {
28710   BLAST_SummaryOptions *options = NULL;
28711   Uint1 mol_was;
28712 
28713   if (bsp1 == NULL || bsp2 == NULL || salp == NULL) return;
28714 
28715   *salp = NULL;
28716   if (ISA_na (bsp1->mol) != ISA_na (bsp2->mol)) return;
28717 
28718   mol_was = bsp2->mol;
28719   bsp2->mol = bsp1->mol;
28720   BLAST_SummaryOptionsInit(&options);
28721 
28722   options->cutoff_evalue = 0.001;
28723   if (bsp1->length > 10000 || bsp2->length > 10000)
28724   {
28725     options->filter_string = StringSave ("m L");
28726     options->word_size = 20;
28727     options->cutoff_evalue = act_get_eval (60);
28728     if (ISA_na (bsp1->mol))
28729     {
28730       options->program = eBlastn;
28731     }
28732     else
28733     {
28734       options->program = eBlastp;
28735     }
28736     options->hint = eBlastHint_None;
28737   }
28738 
28739   BLAST_TwoSequencesSearch(options, bsp1, bsp2, salp);
28740   bsp2->mol = mol_was;
28741   BLAST_SummaryOptionsFree(options);
28742 
28743 }
28744 
28745 /* This section of code is for the Remove Sequences From Alignments function. */
28746 
28747 typedef struct alignmentsequencelist {
28748   SeqIdPtr sip;
28749   Char     descr[255];
28750 } AlignmentSequenceListData, PNTR AlignmentSequenceListPtr;
28751 
28752 static void
ListSequencesInSeqEntry(SeqEntryPtr sep,ValNodePtr PNTR list,Boolean show_nucs,Boolean show_prots)28753 ListSequencesInSeqEntry
28754 (SeqEntryPtr sep,
28755  ValNodePtr PNTR list,
28756  Boolean show_nucs,
28757  Boolean show_prots)
28758 {
28759   BioseqPtr                bsp;
28760   BioseqSetPtr             bssp;
28761   ValNodePtr               vnp;
28762   AlignmentSequenceListPtr aslp;
28763   Int4                     offset;
28764   SeqIdPtr                 bsp_sip;
28765 
28766   if (sep == NULL) return;
28767 
28768   if (IS_Bioseq (sep))
28769   {
28770     bsp = (BioseqPtr) sep->data.ptrvalue;
28771     if (bsp == NULL) return;
28772     if (!show_nucs && ISA_na (bsp->mol))
28773     {
28774       return;
28775     }
28776     if (!show_prots && ISA_aa (bsp->mol))
28777     {
28778       return;
28779     }
28780     aslp = (AlignmentSequenceListPtr) MemNew (sizeof (AlignmentSequenceListData));
28781     if (aslp == NULL) return;
28782     aslp->sip = bsp->id;
28783     aslp->descr[0] = 0;
28784 	  aslp->descr[253] = 0;
28785     offset = 0;
28786     for (bsp_sip = bsp->id; bsp_sip != NULL && offset < 250; bsp_sip = bsp_sip->next) {
28787 	  if (aslp->descr[0] != 0) {
28788 	    aslp->descr[offset] = ':';
28789 	    offset ++;
28790 	  }
28791       SeqIdWrite (bsp_sip, aslp->descr + offset, PRINTID_TEXTID_ACCESSION, 254 - offset);
28792       offset = StringLen (aslp->descr);
28793 	}
28794     vnp = ValNodeNew (*list);
28795     if (vnp != NULL)
28796     {
28797       vnp->data.ptrvalue = aslp;
28798     }
28799     if (*list == NULL)
28800     {
28801       *list = vnp;
28802     }
28803   }
28804   else
28805   {
28806   	bssp = (BioseqSetPtr) sep->data.ptrvalue;
28807     for (sep = bssp->seq_set; sep != NULL; sep = sep->next)
28808     {
28809       ListSequencesInSeqEntry (sep, list, show_nucs, show_prots);
28810     }
28811   }
28812 }
28813 
28814 typedef struct sequencelistctrl
28815 {
28816   ValNodePtr      sequence_list;
28817   Nlm_LstActnProc actn;
28818   Pointer         userdata;
28819 
28820 } SequenceListCtrlData, PNTR SequenceListCtrlPtr;
28821 
CleanupSequenceListCtrl(GraphiC g,VoidPtr data)28822 static void CleanupSequenceListCtrl (
28823   GraphiC g,
28824   VoidPtr data
28825 )
28826 
28827 {
28828   SequenceListCtrlPtr slcp;
28829 
28830   slcp = (SequenceListCtrlPtr) data;
28831   if (slcp != NULL) {
28832 	  slcp->sequence_list = ValNodeFreeData (slcp->sequence_list);
28833   }
28834   MemFree (slcp);
28835 }
28836 
28837 
SequenceListCtrlAction(LisT l)28838 static void SequenceListCtrlAction (LisT l)
28839 {
28840   SequenceListCtrlPtr slcp;
28841 
28842   slcp = (SequenceListCtrlPtr) GetObjectExtra (l);
28843   if (slcp == NULL) return;
28844 
28845   if (slcp->actn != NULL)
28846   {
28847     SetObjectExtra (l, slcp->userdata, NULL);
28848     (slcp->actn) (l);
28849     SetObjectExtra (l, slcp, CleanupSequenceListCtrl);
28850   }
28851 }
28852 
28853 extern LisT
MakeSequenceListControl(GrouP g,SeqEntryPtr sep,Nlm_LstActnProc actn,Pointer userdata,Boolean show_nucs,Boolean show_prots)28854 MakeSequenceListControl
28855 (GrouP g,
28856  SeqEntryPtr sep,
28857  Nlm_LstActnProc actn,
28858  Pointer userdata,
28859  Boolean show_nucs,
28860  Boolean show_prots)
28861 {
28862   LisT                     list_ctrl;
28863   SequenceListCtrlPtr      slcp;
28864   ValNodePtr               vnp;
28865   AlignmentSequenceListPtr aslp;
28866 
28867   slcp = (SequenceListCtrlPtr) MemNew (sizeof (SequenceListCtrlData));
28868   slcp->actn = actn;
28869   slcp->userdata = userdata;
28870   ListSequencesInSeqEntry (sep, &slcp->sequence_list, show_nucs, show_prots);
28871 
28872   list_ctrl = MultiList (g, 20, 8, SequenceListCtrlAction);
28873   SetObjectExtra (list_ctrl, slcp, CleanupSequenceListCtrl);
28874 
28875   for (vnp = slcp->sequence_list; vnp != NULL; vnp = vnp->next) {
28876     aslp = vnp->data.ptrvalue;
28877 	  if (aslp != NULL)
28878 	  {
28879       ListItem (list_ctrl, aslp->descr);
28880 	  }
28881   }
28882 
28883   return list_ctrl;
28884 
28885 }
28886 
28887 
SelectAllSequencesInListCtrl(LisT l)28888 extern void SelectAllSequencesInListCtrl (LisT l)
28889 {
28890   SequenceListCtrlPtr   slcp;
28891   ValNodePtr            vnp;
28892   Int2                  val;
28893 
28894 
28895   slcp = (SequenceListCtrlPtr) GetObjectExtra (l);
28896   if (slcp == NULL) return;
28897 
28898   for (val = 1, vnp = slcp->sequence_list; vnp != NULL; vnp = vnp->next, val++)
28899   {
28900     SetItemStatus (l, val, TRUE);
28901   }
28902 }
28903 
28904 
UnSelectAllSequencesInListCtrl(LisT l)28905 extern void UnSelectAllSequencesInListCtrl (LisT l)
28906 {
28907   SequenceListCtrlPtr   slcp;
28908   ValNodePtr            vnp;
28909   Int2                  val;
28910 
28911 
28912   slcp = (SequenceListCtrlPtr) GetObjectExtra (l);
28913   if (slcp == NULL) return;
28914 
28915   for (val = 1, vnp = slcp->sequence_list; vnp != NULL; vnp = vnp->next, val++)
28916   {
28917     SetItemStatus (l, val, FALSE);
28918   }
28919 }
28920 
28921 
GetSelectedSequenceList(LisT l)28922 extern ValNodePtr GetSelectedSequenceList (LisT l)
28923 {
28924   SequenceListCtrlPtr      slcp;
28925   ValNodePtr               sip_list = NULL, vnp;
28926   Int2                     val;
28927   AlignmentSequenceListPtr aslp;
28928 
28929   slcp = (SequenceListCtrlPtr) GetObjectExtra (l);
28930   if (slcp == NULL) return NULL;
28931 
28932   val = 1;
28933   for (vnp = slcp->sequence_list; vnp != NULL; vnp = vnp->next)
28934   {
28935     aslp = vnp->data.ptrvalue;
28936 	  if (aslp == NULL) continue;
28937 	  if (GetItemStatus (l, val))
28938 	  {
28939 	    ValNodeAddPointer (&sip_list, 0, aslp->sip);
28940 	  }
28941 	  val++;
28942   }
28943 
28944   return sip_list;
28945 }
28946 
28947 /* This function is used so that a sequence ID will only appear once in the list,
28948  * even if it appears in more than one alignment or subalignment.
28949  */
IsIDAlreadyInList(SeqIdPtr sip,ValNodePtr list)28950 static Boolean IsIDAlreadyInList (SeqIdPtr sip, ValNodePtr list)
28951 {
28952   ValNodePtr vnp;
28953   AlignmentSequenceListPtr aslp;
28954 
28955   if (sip == NULL) return FALSE;
28956 
28957   for (vnp = list; vnp != NULL; vnp = vnp->next)
28958   {
28959     aslp = (AlignmentSequenceListPtr) vnp->data.ptrvalue;
28960     if (aslp != NULL && SeqIdComp (aslp->sip, sip) == SIC_YES)
28961     {
28962       return TRUE;
28963     }
28964   }
28965   return FALSE;
28966 }
28967 
28968 /* This function creates the list of sequence IDs and descriptions to use in
28969  * the Remove Sequences From Alignments dialog.
28970  */
ListSequencesInAlignmentsCallback(SeqAnnotPtr sap,Pointer userdata)28971 static void ListSequencesInAlignmentsCallback (SeqAnnotPtr sap, Pointer userdata)
28972 {
28973   SeqAlignPtr salp;
28974   SeqIdPtr    sip_list, sip, bsp_sip;
28975   ValNodePtr PNTR list;
28976   ValNodePtr  vnp;
28977   AlignmentSequenceListPtr aslp;
28978   BioseqPtr                bsp;
28979   Int4                     offset;
28980 
28981   if (sap == NULL || sap->type != 2 || userdata == NULL) return;
28982   salp = (SeqAlignPtr) sap->data;
28983   while (salp != NULL)
28984   {
28985     list = (ValNodePtr PNTR)userdata;
28986     sip_list = SeqAlignIDList (salp);
28987     if (sip_list == NULL) return;
28988     for (sip = sip_list; sip != NULL; sip = sip->next) {
28989       if (IsIDAlreadyInList (sip, *list)) continue;
28990       aslp = (AlignmentSequenceListPtr) MemNew (sizeof (AlignmentSequenceListData));
28991 	  if (aslp == NULL) return;
28992 	  aslp->sip = sip;
28993 	  bsp = BioseqFindCore (sip);
28994 	  if (bsp != NULL) {
28995 		  aslp->descr[0] = 0;
28996 		  aslp->descr[253] = 0;
28997 		  offset = 0;
28998 		  for (bsp_sip = bsp->id; bsp_sip != NULL && offset < 250; bsp_sip = bsp_sip->next) {
28999 			if (aslp->descr[0] != 0) {
29000 			  aslp->descr[offset] = '\t';
29001 			  offset ++;
29002 			}
29003 		    SeqIdWrite (bsp_sip, aslp->descr + offset, PRINTID_TEXTID_ACCESSION, 254 - offset);
29004 			offset = StringLen (aslp->descr);
29005 		  }
29006 	  } else {
29007         SeqIdWrite (sip, aslp->descr, PRINTID_TEXTID_ACCESSION, 254);
29008 	  }
29009 	  vnp = ValNodeNew (*list);
29010 	  vnp->data.ptrvalue = aslp;
29011 	  if (*list == NULL) {
29012 		  *list = vnp;
29013 	  }
29014     }
29015     salp = salp->next;
29016   }
29017 }
29018 
ListSequencesInAlignments(SeqEntryPtr sep)29019 static ValNodePtr ListSequencesInAlignments (SeqEntryPtr sep)
29020 {
29021 	ValNodePtr list = NULL;
29022     VisitAnnotsInSep (sep, (Pointer) &list, ListSequencesInAlignmentsCallback);
29023     return list;
29024 }
29025 
MakeAlignmentSequenceListControl(GrouP g,SeqEntryPtr sep,Nlm_LstActnProc actn,Pointer userdata)29026 static LisT MakeAlignmentSequenceListControl (GrouP g, SeqEntryPtr sep, Nlm_LstActnProc actn, Pointer userdata)
29027 {
29028   LisT                     list_ctrl;
29029   SequenceListCtrlPtr      slcp;
29030   ValNodePtr               vnp;
29031   AlignmentSequenceListPtr aslp;
29032 
29033   slcp = (SequenceListCtrlPtr) MemNew (sizeof (SequenceListCtrlData));
29034   slcp->actn = actn;
29035   slcp->userdata = userdata;
29036   slcp->sequence_list = ListSequencesInAlignments (sep);
29037 
29038   list_ctrl = MultiList (g, 16, 16, SequenceListCtrlAction);
29039   SetObjectExtra (list_ctrl, slcp, CleanupSequenceListCtrl);
29040 
29041   for (vnp = slcp->sequence_list; vnp != NULL; vnp = vnp->next) {
29042     aslp = vnp->data.ptrvalue;
29043 	  if (aslp != NULL)
29044 	  {
29045       ListItem (list_ctrl, aslp->descr);
29046 	  }
29047   }
29048 
29049   return list_ctrl;
29050 
29051 }
29052 
29053 typedef struct removeseqfromaligndata {
29054   FORM_MESSAGE_BLOCK
29055   DialoG      clickable_list_dlg;
29056   DialoG      constraint_dlg;
29057   ValNodePtr  seq_list;
29058   LisT        sequence_list_ctrl;
29059   TexT        mark_seqid_list;
29060   TexT        mark_seqlen;
29061 
29062   SeqEntryPtr sep;
29063   Boolean     remove_all_from_alignments;
29064   Boolean     no_remove_all_from_alignments;
29065   Boolean     remove_all_products;
29066   Boolean     no_remove_all_products;
29067 } RemoveSeqFromAlignData, PNTR RemoveSeqFromAlignPtr;
29068 
DoRemoveSequencesFromAlignment(ButtoN b)29069 static void DoRemoveSequencesFromAlignment (ButtoN b)
29070 {
29071   RemoveSeqFromAlignPtr    rp;
29072   WindoW                   w;
29073   ValNodePtr               vnp, sip_list;
29074 
29075   if (b == NULL) return;
29076   rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
29077   if (rp == NULL) return;
29078 
29079   w = (WindoW) rp->form;
29080   Hide (w);
29081   /* first, check for pairwise alignments */
29082   sip_list = GetSelectedSequenceList (rp->sequence_list_ctrl);
29083   for (vnp = sip_list; vnp != NULL; vnp = vnp->next)
29084   {
29085     if (IsSequenceFirstInPairwise (rp->sep, (SeqIdPtr) vnp->data.ptrvalue))
29086 	  {
29087 	  	Message (MSG_ERROR, "One of the selected sequences is the first in a pairwise alignment."
29088 	  	"  You must convert the alignment to a multiple alignment before trying to remove this sequence.");
29089       Remove (rp->form);
29090       ValNodeFree (sip_list);
29091       return;
29092 	  }
29093   }
29094 
29095   for (vnp = sip_list; vnp != NULL; vnp = vnp->next)
29096   {
29097     RemoveSequenceFromAlignments (rp->sep, (SeqIdPtr) vnp->data.ptrvalue);
29098   }
29099 
29100   ValNodeFree (sip_list);
29101   DeleteMarkedObjects (rp->input_entityID, 0, NULL);
29102   ObjMgrSetDirtyFlag (rp->input_entityID, TRUE);
29103   ObjMgrSendMsg (OM_MSG_UPDATE, rp->input_entityID, 0, 0);
29104   Remove (rp->form);
29105 }
29106 
29107 
RemoveSequencesFromAlignment(IteM i)29108 extern void RemoveSequencesFromAlignment (IteM i)
29109 {
29110   BaseFormPtr              bfp;
29111   WindoW                   w;
29112   RemoveSeqFromAlignPtr    rp;
29113   GrouP                    h, k, c;
29114   ButtoN                   b;
29115 
29116 #ifdef WIN_MAC
29117   bfp = currentFormDataPtr;
29118 #else
29119   bfp = GetObjectExtra (i);
29120 #endif
29121 
29122   if (bfp == NULL) return;
29123 
29124   rp = (RemoveSeqFromAlignPtr) MemNew (sizeof (RemoveSeqFromAlignData));
29125   if (rp == NULL) return;
29126   rp->input_entityID = bfp->input_entityID;
29127   rp->sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
29128   if (rp->sep == NULL)
29129   {
29130 	  MemFree (rp);
29131 	  return;
29132   }
29133 
29134   w = FixedWindow (-50, -33, -10, -10, "Remove Sequences From Alignment", StdCloseWindowProc);
29135   if (w == NULL) {
29136 	MemFree (rp);
29137 	return;
29138   }
29139   rp->form = (ForM) w;
29140   SetObjectExtra (w, rp, StdCleanupFormProc);
29141 
29142   h = HiddenGroup (w, -1, 0, NULL);
29143   k = HiddenGroup (h, 2, 0, NULL);
29144 
29145   rp->sequence_list_ctrl = MakeAlignmentSequenceListControl (k, rp->sep, NULL, NULL);
29146 
29147   c = HiddenGroup (h, 4, 0, NULL);
29148   b = DefaultButton (c, "Accept", DoRemoveSequencesFromAlignment);
29149   SetObjectExtra (b, rp, NULL);
29150   b = PushButton (c, "Cancel", StdCancelButtonProc);
29151   SetObjectExtra (b, rp, NULL);
29152   AlignObjects (ALIGN_CENTER, (HANDLE) k, (HANDLE) c, NULL);
29153   RealizeWindow (w);
29154   Show (w);
29155   Update ();
29156 }
29157 
29158 /* End of Remove Sequences From Alignments function code. */
29159 
29160 /* This section of code is used for removing sequences from the record. */
29161 
DoesBioseqHaveFeaturesWithProductsCallback(SeqFeatPtr sfp,Pointer userdata)29162 static void DoesBioseqHaveFeaturesWithProductsCallback (SeqFeatPtr sfp, Pointer userdata)
29163 {
29164   ValNodePtr PNTR list;
29165   ValNodePtr vnp;
29166 
29167   if (sfp == NULL || userdata == NULL) return;
29168   list = (ValNodePtr PNTR) userdata;
29169 
29170   if (sfp->product != NULL)
29171   {
29172   	vnp = ValNodeNew (*list);
29173   	if (vnp != NULL)
29174   	{
29175   	  vnp->data.ptrvalue = sfp;
29176   	}
29177   	if (*list == NULL)
29178   	{
29179   	  *list = vnp;
29180   	}
29181   }
29182 }
29183 
29184 static void RemoveBioseq (BioseqPtr bsp, RemoveSeqFromAlignPtr rp);
29185 
RemoveBioseqProducts(ValNodePtr product_feature_list,RemoveSeqFromAlignPtr rp)29186 static void RemoveBioseqProducts (ValNodePtr product_feature_list, RemoveSeqFromAlignPtr rp)
29187 {
29188   ValNodePtr vnp;
29189   SeqFeatPtr sfp;
29190   BioseqPtr  bsp;
29191 
29192   for (vnp = product_feature_list; vnp != NULL; vnp = vnp->next)
29193   {
29194     sfp = (SeqFeatPtr) vnp->data.ptrvalue;
29195     if (sfp != NULL)
29196     {
29197   	  bsp = BioseqFindFromSeqLoc (sfp->product);
29198   	  sfp->product = SeqLocFree (sfp->product);
29199   	  RemoveBioseq (bsp, rp);
29200     }
29201   }
29202 }
29203 
RemoveNucProtSet(SeqEntryPtr sep)29204 static void RemoveNucProtSet (SeqEntryPtr sep)
29205 {
29206   BioseqSetPtr bssp;
29207 
29208   if (sep == NULL || !IS_Bioseq_set (sep)) return;
29209   bssp = (BioseqSetPtr) sep->data.ptrvalue;
29210   if (bssp->_class != BioseqseqSet_class_nuc_prot) return;
29211 
29212   bssp->idx.deleteme = TRUE;
29213 }
29214 
29215 typedef struct removealnorproductans
29216 {
29217   WindoW  w;
29218   Boolean ans;
29219   Boolean do_all;
29220   Boolean done;
29221 } RemoveAlnOrProductAnsData, PNTR RemoveAlnOrProductAnsPtr;
29222 
RemoveAlnOrProductYes(ButtoN b)29223 static void RemoveAlnOrProductYes (ButtoN b)
29224 {
29225   RemoveAlnOrProductAnsPtr rp;
29226 
29227   rp = (RemoveAlnOrProductAnsPtr) GetObjectExtra (b);
29228   if (rp == NULL) return;
29229   rp->ans = TRUE;
29230   rp->do_all = FALSE;
29231   Remove (rp->w);
29232   rp->done = TRUE;
29233 }
29234 
RemoveAlnOrProductYesAll(ButtoN b)29235 static void RemoveAlnOrProductYesAll (ButtoN b)
29236 {
29237   RemoveAlnOrProductAnsPtr rp;
29238 
29239   rp = (RemoveAlnOrProductAnsPtr) GetObjectExtra (b);
29240   if (rp == NULL) return;
29241   rp->ans = TRUE;
29242   rp->do_all = TRUE;
29243   Remove (rp->w);
29244   rp->done = TRUE;
29245 }
29246 
RemoveAlnOrProductNo(ButtoN b)29247 static void RemoveAlnOrProductNo (ButtoN b)
29248 {
29249   RemoveAlnOrProductAnsPtr rp;
29250 
29251   rp = (RemoveAlnOrProductAnsPtr) GetObjectExtra (b);
29252   if (rp == NULL) return;
29253   rp->ans = FALSE;
29254   rp->do_all = FALSE;
29255   Remove (rp->w);
29256   rp->done = TRUE;
29257 }
29258 
RemoveAlnOrProductNoAll(ButtoN b)29259 static void RemoveAlnOrProductNoAll (ButtoN b)
29260 {
29261   RemoveAlnOrProductAnsPtr rp;
29262 
29263   rp = (RemoveAlnOrProductAnsPtr) GetObjectExtra (b);
29264   if (rp == NULL) return;
29265   rp->ans = FALSE;
29266   rp->do_all = TRUE;
29267   Remove (rp->w);
29268   rp->done = TRUE;
29269 }
29270 
GetRemoveProducts(RemoveSeqFromAlignPtr rp,CharPtr idstr)29271 static Boolean GetRemoveProducts (RemoveSeqFromAlignPtr rp, CharPtr idstr)
29272 {
29273   RemoveAlnOrProductAnsData rd;
29274 
29275   GrouP                    g, h, c;
29276   ButtoN                   b;
29277   CharPtr                  prompt_fmt = "%s contains features that have products (proteins, etc.).  Would you like to remove the product sequences?";
29278   CharPtr                  prompt_str = NULL;
29279 
29280   if (rp == NULL || idstr == NULL) return FALSE;
29281   if (rp->remove_all_products) return TRUE;
29282   if (rp->no_remove_all_products) return FALSE;
29283 
29284   prompt_str = (CharPtr) MemNew (sizeof (Char) * (StringLen (prompt_fmt) + StringLen (idstr)));
29285   if (prompt_str == NULL) return FALSE;
29286   sprintf (prompt_str, prompt_fmt, idstr);
29287   rd.w = ModalWindow(-20, -13, -10, -10, NULL);
29288   h = HiddenGroup(rd.w, -1, 0, NULL);
29289   SetGroupSpacing (h, 10, 10);
29290   rd.done = FALSE;
29291   g = HiddenGroup (h, 1, 0, NULL);
29292   StaticPrompt (g, prompt_str, 0, popupMenuHeight, programFont, 'l');
29293   c = HiddenGroup (h, 4, 0, NULL);
29294   b = PushButton(c, "Yes", RemoveAlnOrProductYes);
29295   SetObjectExtra (b, &rd, NULL);
29296   b = PushButton(c, "Remove All", RemoveAlnOrProductYesAll);
29297   SetObjectExtra (b, &rd, NULL);
29298   b = DefaultButton(c, "No", RemoveAlnOrProductNo);
29299   SetObjectExtra (b, &rd, NULL);
29300   b = DefaultButton(c, "Remove None", RemoveAlnOrProductNoAll);
29301   SetObjectExtra (b, &rd, NULL);
29302   AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
29303   prompt_str = MemFree (prompt_str);
29304 
29305   Show(rd.w);
29306   Select (rd.w);
29307   rd.done = FALSE;
29308   while (!rd.done)
29309   {
29310     ProcessExternalEvent ();
29311     Update ();
29312   }
29313   ProcessAnEvent ();
29314   if (rd.do_all)
29315   {
29316     if (rd.ans)
29317     {
29318   	  rp->remove_all_products = TRUE;
29319   	  rp->no_remove_all_products = FALSE;
29320     }
29321     else
29322     {
29323   	  rp->remove_all_products = FALSE;
29324   	  rp->no_remove_all_products = TRUE;
29325     }
29326   }
29327   return rd.ans;
29328 }
29329 
29330 
RemoveBioseq(BioseqPtr bsp,RemoveSeqFromAlignPtr rp)29331 static void RemoveBioseq (BioseqPtr bsp, RemoveSeqFromAlignPtr rp)
29332 {
29333   ValNodePtr   product_feature_list = NULL;
29334   CharPtr      str = NULL;
29335   SeqEntryPtr  sep;
29336 
29337   if (bsp == NULL || rp == NULL) return;
29338 
29339   if (IsBioseqInAnyAlignment (bsp, rp->input_entityID))
29340   {
29341     if (!rp->remove_all_from_alignments && !rp->no_remove_all_from_alignments)
29342     {
29343 	    if (ANS_YES == Message (MSG_YN, "This sequence is part of an alignment.  Would you like to remove it from the alignment?"))
29344       {
29345         rp->remove_all_from_alignments = TRUE;
29346       }
29347       else
29348       {
29349         rp->no_remove_all_from_alignments = TRUE;
29350       }
29351     }
29352 
29353     if (rp->remove_all_from_alignments) {
29354       RemoveSequenceFromAlignments (rp->sep, bsp->id);
29355     }
29356 
29357   }
29358   VisitFeaturesOnBsp (bsp, &product_feature_list, DoesBioseqHaveFeaturesWithProductsCallback);
29359   if (product_feature_list != NULL)
29360   {
29361     str = SeqIdWholeLabel (bsp->id, PRINTID_REPORT);
29362     if (GetRemoveProducts (rp, str))
29363     {
29364       RemoveBioseqProducts (product_feature_list, rp);
29365     }
29366     str = MemFree (str);
29367   }
29368 
29369   bsp->idx.deleteme = TRUE;
29370   if (ISA_na (bsp->mol)) {
29371     /* remove nuc-prot set if we are deleting the nucleotide */
29372     sep = GetBestTopParentForData (rp->input_entityID, bsp);
29373     RemoveNucProtSet (sep);
29374   }
29375   ValNodeFree (product_feature_list);
29376 
29377 }
29378 
29379 
IsRemovalFromAlignmentRequired(BioseqPtr bsp)29380 static Boolean IsRemovalFromAlignmentRequired (BioseqPtr bsp)
29381 {
29382   SeqIdPtr sip;
29383   Boolean  rval = TRUE;
29384   ObjectIdPtr oip;
29385 
29386   if (bsp == NULL) {
29387     return FALSE;
29388   }
29389 
29390   for (sip = bsp->id; sip != NULL && rval; sip = sip->next) {
29391     if (sip->choice == SEQID_LOCAL
29392         && (oip = (ObjectIdPtr) sip->data.ptrvalue) != NULL
29393         && StringNICmp (oip->str, "acc", 3) == 0) {
29394       rval = FALSE;
29395     } else if (sip->choice == SEQID_GENBANK) {
29396       rval = FALSE;
29397     }
29398   }
29399   return rval;
29400 }
29401 
29402 
DoRemoveSequencesFromRecord(ButtoN b)29403 static void DoRemoveSequencesFromRecord (ButtoN b)
29404 {
29405   RemoveSeqFromAlignPtr    rp;
29406   WindoW                   w;
29407   ValNodePtr               vnp, item_vnp;
29408   BioseqPtr                bsp;
29409   ClickableItemPtr         cip;
29410   Int4                     num_in_alignment = 0;
29411   Int4                     num_must_remove_in_alignment = 0;
29412   Int4                     num_to_remove = 0;
29413   MsgAnswer                ans;
29414 
29415   if (b == NULL) return;
29416   rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
29417   if (rp == NULL) return;
29418 
29419   w = (WindoW) rp->form;
29420   Hide (w);
29421 
29422   /* pre-check for sequences in alignments */
29423   for (vnp = rp->seq_list; vnp != NULL; vnp = vnp->next) {
29424     cip = (ClickableItemPtr) vnp->data.ptrvalue;
29425     if (!cip->chosen) continue;
29426     if (cip != NULL && cip->item_list != NULL) {
29427       for (item_vnp = cip->item_list; item_vnp != NULL; item_vnp = item_vnp->next) {
29428         if (item_vnp->choice == OBJ_BIOSEQ) {
29429           bsp = item_vnp->data.ptrvalue;
29430           if (bsp != NULL) {
29431             num_to_remove++;
29432             if (IsBioseqInAnyAlignment(bsp, bsp->idx.entityID)) {
29433               num_in_alignment++;
29434               if (IsRemovalFromAlignmentRequired(bsp)) {
29435                 num_must_remove_in_alignment++;
29436               }
29437             }
29438           }
29439         }
29440       }
29441     }
29442   }
29443   if (num_to_remove == 0) {
29444     Show (w);
29445     Message (MSG_ERROR, "You have not selected any sequences for removal!\n(Mark the sequences you want to remove by checking the box next to the sequence or by using the Mark button next to the constraint.)");
29446     return;
29447   }
29448 
29449   if (num_in_alignment > 0) {
29450     if (num_must_remove_in_alignment > 0) {
29451       ans = Message (MSG_OKC, "There are %d sequences that are in alignments and are not currently in the database.  These sequences must be removed from the alignment as well as the record.  Do you want to continue?", num_must_remove_in_alignment);
29452       if (ans == ANS_CANCEL) {
29453         Show (rp->form);
29454         return;
29455       }
29456     }
29457     if (num_in_alignment == num_must_remove_in_alignment) {
29458       rp->remove_all_from_alignments = TRUE;
29459     } else {
29460       ans = Message (MSG_OKC, "There are %d sequences that are in alignments and may represent sequences already present in the database.  These sequences will not be processed as new submissions when sent to GenBank.  Do you wish to remove these sequences from the alignment?",
29461                      num_in_alignment - num_must_remove_in_alignment);
29462       if (ans == ANS_CANCEL) {
29463         Show (rp->form);
29464         return;
29465       } else {
29466         rp->remove_all_from_alignments = TRUE;
29467       }
29468     }
29469   }
29470 
29471   for (vnp = rp->seq_list; vnp != NULL; vnp = vnp->next) {
29472     cip = (ClickableItemPtr) vnp->data.ptrvalue;
29473     if (!cip->chosen) continue;
29474     if (cip != NULL && cip->item_list != NULL) {
29475       for (item_vnp = cip->item_list; item_vnp != NULL; item_vnp = item_vnp->next) {
29476         if (item_vnp->choice == OBJ_BIOSEQ) {
29477           bsp = item_vnp->data.ptrvalue;
29478 	        if (bsp != NULL) {
29479 	          RemoveBioseq (bsp, rp);
29480 	        }
29481         }
29482       }
29483     }
29484   }
29485 
29486   DeleteMarkedObjects (rp->input_entityID, 0, NULL);
29487   ObjMgrSetDirtyFlag (rp->input_entityID, TRUE);
29488   ObjMgrSendMsg (OM_MSG_UPDATE, rp->input_entityID, 0, 0);
29489   Remove (rp->form);
29490 }
29491 
29492 
ChooseCategoriesByMacroSequenceConstraint(ValNodePtr value_list,SequenceConstraintPtr scp,Boolean do_choose)29493 static void ChooseCategoriesByMacroSequenceConstraint (ValNodePtr value_list, SequenceConstraintPtr scp, Boolean do_choose)
29494 {
29495   ClickableItemPtr cip;
29496 
29497   while (value_list != NULL) {
29498     cip = (ClickableItemPtr) value_list->data.ptrvalue;
29499     if (cip != NULL) {
29500       if (cip->item_list != NULL
29501           && cip->item_list->choice == OBJ_BIOSEQ
29502           && cip->item_list->data.ptrvalue != NULL
29503           && DoesSequenceMatchSequenceConstraint ((BioseqPtr) cip->item_list->data.ptrvalue, scp)) {
29504         cip->chosen = do_choose;
29505       } else {
29506           ChooseCategoriesByMacroSequenceConstraint (cip->subcategories, scp, do_choose);
29507       }
29508     }
29509     value_list = value_list->next;
29510   }
29511 }
29512 
29513 
ChooseCategoriesByIdList(ValNodePtr seq_list,CharPtr id_list)29514 NLM_EXTERN void ChooseCategoriesByIdList (ValNodePtr seq_list, CharPtr id_list)
29515 {
29516   SequenceConstraintPtr      scp;
29517 
29518   scp = SequenceConstraintNew ();
29519   scp->id = StringConstraintNew ();
29520   scp->id->match_location = String_location_inlist;
29521   scp->id->case_sensitive = FALSE;
29522   scp->id->match_text = StringSave (id_list);
29523   ChooseCategoriesByMacroSequenceConstraint (seq_list, scp, TRUE);
29524   scp = SequenceConstraintFree (scp);
29525 }
29526 
29527 
SelectSequenceIDsForRemoval(ButtoN b)29528 static void SelectSequenceIDsForRemoval (ButtoN b)
29529 {
29530   RemoveSeqFromAlignPtr rp;
29531   SequenceConstraintPtr  scp;
29532 
29533   rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
29534   if (rp == NULL) return;
29535 
29536   scp = DialogToPointer (rp->constraint_dlg);
29537   ChooseCategoriesByMacroSequenceConstraint (rp->seq_list, scp, TRUE);
29538   PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29539   scp = SequenceConstraintFree (scp);
29540 }
29541 
29542 
UnselectAllSequences(ButtoN b)29543 static void UnselectAllSequences (ButtoN b)
29544 {
29545   RemoveSeqFromAlignPtr rp;
29546 
29547   rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
29548   if (rp != NULL) {
29549     ChooseCategories (rp->seq_list, FALSE);
29550     PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29551   }
29552 }
29553 
29554 
ResortMarkedSegCategories(ButtoN b)29555 static void ResortMarkedSegCategories (ButtoN b)
29556 {
29557   RemoveSeqFromAlignPtr rp;
29558 
29559   rp = (RemoveSeqFromAlignPtr) GetObjectExtra (b);
29560   if (rp != NULL) {
29561     rp->seq_list = ValNodeSort (rp->seq_list, SortVnpByClickableItemChosen);
29562     PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29563   }
29564 }
29565 
29566 
RemoveSequencesFromRecordBaseForm(BaseFormPtr bfp)29567 static void RemoveSequencesFromRecordBaseForm (BaseFormPtr bfp)
29568 {
29569   WindoW                   w;
29570   RemoveSeqFromAlignPtr    rp;
29571   GrouP                    h, c;
29572   ButtoN                   mark_btn, b;
29573   SeqEntryPtr              sep;
29574   SequenceConstraint       seqd;
29575   StringConstraint         scd;
29576   PrompT                   ppt, ppt2;
29577 
29578   if (bfp == NULL) return;
29579 
29580   rp = (RemoveSeqFromAlignPtr) MemNew (sizeof (RemoveSeqFromAlignData));
29581   if (rp == NULL) return;
29582   rp->input_entityID = bfp->input_entityID;
29583   rp->sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
29584   if (rp->sep == NULL)
29585   {
29586 	  MemFree (rp);
29587 	  return;
29588   }
29589 
29590   rp->remove_all_from_alignments = FALSE;
29591   rp->remove_all_products = FALSE;
29592   rp->no_remove_all_from_alignments = FALSE;
29593   rp->no_remove_all_products = FALSE;
29594 
29595   w = FixedWindow (-50, -33, -10, -10, "Remove Sequences From Record", StdCloseWindowProc);
29596   if (w == NULL)
29597   {
29598 	  MemFree (rp);
29599 	  return;
29600   }
29601   rp->form = (ForM) w;
29602   SetObjectExtra (w, rp, StdCleanupFormProc);
29603 
29604   h = HiddenGroup (w, -1, 0, NULL);
29605 
29606   rp->clickable_list_dlg = CreateClickableListDialogExEx (h, "Sequences to Remove", "",
29607                                                       "Use checkbox to mark sequences to remove",
29608                                                       "Single click to navigate to sequence in record",
29609                                                       ScrollToDiscrepancyItem, EditDiscrepancyItem, NULL,
29610                                                       GetDiscrepancyItemText,
29611                                                       stdCharWidth * 30,
29612                                                       stdCharWidth * 30 + 5,
29613                                                       TRUE, FALSE, TRUE);
29614 
29615   ppt2 = StaticPrompt (h, "Mark sequences where:", 0, dialogTextHeight, programFont, 'c');
29616   rp->constraint_dlg = SequenceConstraintDialog (h, NULL, NULL);
29617 
29618   /* set up default constraint */
29619   MemSet (&seqd, 0, sizeof (SequenceConstraint));
29620   MemSet (&scd, 0, sizeof (StringConstraint));
29621   scd.match_location = String_location_inlist;
29622   seqd.id = &scd;
29623   PointerToDialog (rp->constraint_dlg, &seqd);
29624 
29625   mark_btn = PushButton (h, "Mark", SelectSequenceIDsForRemoval);
29626   SetObjectExtra (mark_btn, rp, NULL);
29627 
29628   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
29629   VisitBioseqsInSep (sep, &(rp->seq_list), ListAllSequences);
29630 
29631   PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29632 
29633   ppt = StaticPrompt (h, "Warning - this will remove sequence from Sequin and SMART", 0, dialogTextHeight, programFont, 'c');
29634 
29635   c = HiddenGroup (h, 4, 0, NULL);
29636   b = DefaultButton (c, "Accept", DoRemoveSequencesFromRecord);
29637   SetObjectExtra (b, rp, NULL);
29638   b = PushButton (c, "Unmark All Sequences", UnselectAllSequences);
29639   SetObjectExtra (b, rp, NULL);
29640   b = PushButton (c, "Resort Marked", ResortMarkedSegCategories);
29641   SetObjectExtra (b, rp, NULL);
29642   b = PushButton (c, "Cancel", StdCancelButtonProc);
29643   SetObjectExtra (b, rp, NULL);
29644   AlignObjects (ALIGN_CENTER,
29645                 (HANDLE) rp->clickable_list_dlg,
29646                 (HANDLE) ppt2,
29647                 (HANDLE) rp->constraint_dlg,
29648                 (HANDLE) mark_btn,
29649                 (HANDLE) ppt,
29650                 (HANDLE) c,
29651                 NULL);
29652 
29653   RealizeWindow (w);
29654   Show (w);
29655   Update ();
29656 }
29657 
29658 
MarkBFPRemoveListBtn(ButtoN b)29659 static void MarkBFPRemoveListBtn (ButtoN b)
29660 {
29661   RemoveSeqFromAlignPtr    rp;
29662   SequenceConstraintPtr      scp;
29663 
29664   rp = (RemoveSeqFromAlignPtr)GetObjectExtra (b);
29665   if (rp == NULL || TextHasNoText (rp->mark_seqid_list)) {
29666     return;
29667   }
29668 
29669   scp = SequenceConstraintNew ();
29670   scp->id = StringConstraintNew ();
29671   scp->id->match_location = String_location_inlist;
29672   scp->id->case_sensitive = FALSE;
29673   scp->id->match_text = SaveStringFromText (rp->mark_seqid_list);
29674   ChooseCategoriesByMacroSequenceConstraint (rp->seq_list, scp, TRUE);
29675   PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29676   scp = SequenceConstraintFree (scp);
29677 }
29678 
29679 
MarkBFPRemoveLenBtn(ButtoN b)29680 static void MarkBFPRemoveLenBtn (ButtoN b)
29681 {
29682   RemoveSeqFromAlignPtr    rp;
29683   SequenceConstraintPtr      scp;
29684   CharPtr txt;
29685   Int4    num;
29686 
29687   rp = (RemoveSeqFromAlignPtr)GetObjectExtra (b);
29688   if (rp == NULL) {
29689     return;
29690   }
29691 
29692   txt = SaveStringFromText (rp->mark_seqlen);
29693   num = atoi (txt);
29694   txt = MemFree (txt);
29695   if (num < 1) {
29696     return;
29697   }
29698   scp = SequenceConstraintNew ();
29699   scp->length = ValNodeNew (NULL);
29700   scp->length->choice = QuantityConstraint_less_than;
29701   scp->length->data.intvalue = num;
29702   ChooseCategoriesByMacroSequenceConstraint (rp->seq_list, scp, TRUE);
29703   PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29704   scp = SequenceConstraintFree (scp);
29705 }
29706 
29707 
SubmitterRemoveSequencesFromRecordBaseForm(BaseFormPtr bfp)29708 NLM_EXTERN void SubmitterRemoveSequencesFromRecordBaseForm (BaseFormPtr bfp)
29709 {
29710   WindoW                   w;
29711   RemoveSeqFromAlignPtr    rp;
29712   GrouP                    h, mark_grp, sg, c;
29713   ButtoN                   b, unselect;
29714   SeqEntryPtr              sep;
29715 
29716   if (bfp == NULL) return;
29717 
29718   rp = (RemoveSeqFromAlignPtr) MemNew (sizeof (RemoveSeqFromAlignData));
29719   if (rp == NULL) return;
29720   rp->input_entityID = bfp->input_entityID;
29721   rp->sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
29722   if (rp->sep == NULL)
29723   {
29724 	  MemFree (rp);
29725 	  return;
29726   }
29727 
29728   rp->remove_all_from_alignments = FALSE;
29729   rp->remove_all_products = FALSE;
29730   rp->no_remove_all_from_alignments = FALSE;
29731   rp->no_remove_all_products = FALSE;
29732 
29733   w = FixedWindow (-50, -33, -10, -10, "Delete Sequences", StdCloseWindowProc);
29734   if (w == NULL)
29735   {
29736 	  MemFree (rp);
29737 	  return;
29738   }
29739   rp->form = (ForM) w;
29740   SetObjectExtra (w, rp, StdCleanupFormProc);
29741 
29742   h = HiddenGroup (w, -1, 0, NULL);
29743 
29744   rp->clickable_list_dlg = CreateClickableListDialogExEx (h, "Sequences in your file", "Sequences selected for deletion",
29745                                                       "Use checkbox to select sequences for deletion",
29746                                                       "Single click to navigate to sequence in record",
29747                                                       ScrollToDiscrepancyItem, EditDiscrepancyItem, NULL,
29748                                                       GetDiscrepancyItemText,
29749                                                       stdCharWidth * 30,
29750                                                       stdCharWidth * 30 + 5,
29751                                                       TRUE, FALSE, TRUE);
29752 
29753   mark_grp = HiddenGroup (h, 2, 0, NULL);
29754   SetGroupSpacing (mark_grp, 10, 10);
29755   b = PushButton (mark_grp, "Select sequences in this list:", MarkBFPRemoveListBtn);
29756   SetObjectExtra (b, rp, NULL);
29757   rp->mark_seqid_list = DialogText (mark_grp, "", 30, NULL);
29758   b = PushButton (mark_grp, "Select sequences less than:", MarkBFPRemoveLenBtn);
29759   SetObjectExtra (b, rp, NULL);
29760   sg = HiddenGroup (mark_grp, 2, 0, NULL);
29761   SetGroupSpacing (sg, 10, 10);
29762   rp->mark_seqlen = DialogText (sg, "200", 10, NULL);
29763   StaticPrompt (sg, "nucleotides in length", 0, 0, programFont, 'l');
29764 
29765   sep = GetTopSeqEntryForEntityID (bfp->input_entityID);
29766   VisitBioseqsInSep (sep, &(rp->seq_list), ListAllSequences);
29767 
29768   PointerToDialog (rp->clickable_list_dlg, rp->seq_list);
29769 
29770   unselect = PushButton (h, "Unselect All Sequences", UnselectAllSequences);
29771   SetObjectExtra (unselect, rp, NULL);
29772 
29773   c = HiddenGroup (h, 4, 0, NULL);
29774   b = DefaultButton (c, "Accept", DoRemoveSequencesFromRecord);
29775   SetObjectExtra (b, rp, NULL);
29776   b = PushButton (c, "Cancel", StdCancelButtonProc);
29777   SetObjectExtra (b, rp, NULL);
29778   AlignObjects (ALIGN_CENTER,
29779                 (HANDLE) rp->clickable_list_dlg,
29780                 (HANDLE) mark_grp,
29781                 (HANDLE) unselect,
29782                 (HANDLE) c,
29783                 NULL);
29784 
29785   RealizeWindow (w);
29786   Show (w);
29787   Update ();
29788 }
29789 
29790 
29791 
ButtonOrMenuItemTemplate(IteM i,MenuOrButtonFunc func)29792 NLM_EXTERN void ButtonOrMenuItemTemplate (IteM i, MenuOrButtonFunc func)
29793 {
29794   BaseFormPtr              bfp;
29795 
29796 #ifdef WIN_MAC
29797   bfp = currentFormDataPtr;
29798 #else
29799   bfp = GetObjectExtra (i);
29800 #endif
29801 
29802   if (bfp == NULL || func == NULL) return;
29803   func(bfp);
29804 }
29805 
29806 
ButtonOrMenuButtonTemplate(ButtoN b,MenuOrButtonFunc func)29807 NLM_EXTERN void ButtonOrMenuButtonTemplate (ButtoN b, MenuOrButtonFunc func)
29808 {
29809   BaseFormPtr              bfp;
29810 
29811   bfp = GetObjectExtra (b);
29812 
29813   if (bfp == NULL || func == NULL) return;
29814   func(bfp);
29815 }
29816 
29817 
RemoveSequencesFromRecord(IteM i)29818 extern void RemoveSequencesFromRecord (IteM i)
29819 {
29820   ButtonOrMenuItemTemplate(i, RemoveSequencesFromRecordBaseForm);
29821 }
29822 
29823 
RemoveSequencesFromRecordBtn(ButtoN b)29824 extern void RemoveSequencesFromRecordBtn (ButtoN b)
29825 {
29826   ButtonOrMenuButtonTemplate(b, RemoveSequencesFromRecordBaseForm);
29827 }
29828 
29829 
29830 typedef struct removeseqfromwizardform {
29831   FORM_MESSAGE_BLOCK
29832   DialoG clickable_list_dlg;
29833   TexT   mark_seqid_list;
29834   TexT   mark_seqlen;
29835   ValNodePtr seq_list;
29836 } RemoveSeqFromWizardFormData, PNTR RemoveSeqFromWizardFormPtr;
29837 
29838 
UnselectAllSequencesForRemove(ButtoN b)29839 static void UnselectAllSequencesForRemove (ButtoN b)
29840 {
29841   RemoveSeqFromWizardFormPtr frm;
29842 
29843   frm = (RemoveSeqFromWizardFormPtr) GetObjectExtra (b);
29844   if (frm != NULL) {
29845     ChooseCategories (frm->seq_list, FALSE);
29846     PointerToDialog (frm->clickable_list_dlg, frm->seq_list);
29847   }
29848 }
29849 
29850 
MarkRemoveListBtn(ButtoN b)29851 static void MarkRemoveListBtn (ButtoN b)
29852 {
29853   RemoveSeqFromWizardFormPtr frm;
29854   SequenceConstraintPtr      scp;
29855 
29856   frm = (RemoveSeqFromWizardFormPtr) GetObjectExtra (b);
29857   if (frm == NULL || TextHasNoText (frm->mark_seqid_list)) {
29858     return;
29859   }
29860 
29861   scp = SequenceConstraintNew ();
29862   scp->id = StringConstraintNew ();
29863   scp->id->match_location = String_location_inlist;
29864   scp->id->case_sensitive = FALSE;
29865   scp->id->match_text = SaveStringFromText (frm->mark_seqid_list);
29866   ChooseCategoriesByMacroSequenceConstraint (frm->seq_list, scp, TRUE);
29867   PointerToDialog (frm->clickable_list_dlg, frm->seq_list);
29868   scp = SequenceConstraintFree (scp);
29869 }
29870 
29871 
MarkRemoveLenBtn(ButtoN b)29872 static void MarkRemoveLenBtn (ButtoN b)
29873 {
29874   RemoveSeqFromWizardFormPtr frm;
29875   SequenceConstraintPtr      scp;
29876   CharPtr txt;
29877   Int4    num;
29878 
29879   frm = (RemoveSeqFromWizardFormPtr) GetObjectExtra (b);
29880   if (frm == NULL || TextHasNoText (frm->mark_seqlen)) {
29881     return;
29882   }
29883   txt = SaveStringFromText (frm->mark_seqlen);
29884   num = atoi (txt);
29885   txt = MemFree (txt);
29886   if (num < 1) {
29887     return;
29888   }
29889   scp = SequenceConstraintNew ();
29890   scp->length = ValNodeNew (NULL);
29891   scp->length->choice = QuantityConstraint_less_than;
29892   scp->length->data.intvalue = num;
29893   ChooseCategoriesByMacroSequenceConstraint (frm->seq_list, scp, TRUE);
29894   PointerToDialog (frm->clickable_list_dlg, frm->seq_list);
29895   scp = SequenceConstraintFree (scp);
29896 }
29897 
29898 
RemoveSequencesFromWizardList(ValNodePtr PNTR sequences,Int4 suggested_min)29899 NLM_EXTERN Boolean RemoveSequencesFromWizardList (ValNodePtr PNTR sequences, Int4 suggested_min)
29900 {
29901   WindoW w;
29902   RemoveSeqFromWizardFormPtr frm;
29903   GrouP h, mark_grp, sg, c;
29904   ButtoN b, unselect;
29905   ModalAcceptCancelData acd;
29906   ValNodeBlock removal_list;
29907   ValNodePtr   vnp;
29908   SeqEntryPtr  sep, sep_prev = NULL, sep_next;
29909   ClickableItemPtr cip;
29910   Boolean          rval = FALSE;
29911   Char             buf[20];
29912 
29913   if (sequences == NULL || *sequences == NULL) {
29914     return FALSE;
29915   }
29916 
29917   InitValNodeBlock (&removal_list, NULL);
29918   for (sep = *sequences; sep != NULL; sep = sep->next) {
29919     if (IS_Bioseq (sep)) {
29920       cip = ClickableItemForBioseq (sep->data.ptrvalue);
29921       ValNodeAddPointerToEnd (&removal_list, 0, cip);
29922     }
29923   }
29924 
29925   acd.accepted = FALSE;
29926   acd.cancelled = FALSE;
29927   acd.third_option = FALSE;
29928 
29929   frm = (RemoveSeqFromWizardFormPtr) MemNew (sizeof (RemoveSeqFromWizardFormData));
29930   w = ModalWindow(-20, -13, -10, -10, NULL);
29931   frm->form = (ForM) w;
29932   SetObjectExtra (w, frm, StdCleanupFormProc);
29933   frm->seq_list = removal_list.head;
29934 
29935   h = HiddenGroup (w, -1, 0, NULL);
29936 
29937 
29938   frm->clickable_list_dlg = CreateClickableListDialogExEx (h, "Sequences in your file", "Sequences selected for deletion",
29939                                                       "Use checkbox to select sequences to delete",
29940                                                       "",
29941                                                       NULL, NULL, NULL,
29942                                                       GetDiscrepancyItemText,
29943                                                       stdCharWidth * 30,
29944                                                       stdCharWidth * 30 + 5,
29945                                                       TRUE, FALSE, TRUE);
29946   PointerToDialog (frm->clickable_list_dlg, frm->seq_list);
29947 
29948   mark_grp = HiddenGroup (h, 2, 0, NULL);
29949   SetGroupSpacing (mark_grp, 10, 10);
29950   b = PushButton (mark_grp, "Select sequences in this list:", MarkRemoveListBtn);
29951   SetObjectExtra (b, frm, NULL);
29952   frm->mark_seqid_list = DialogText (mark_grp, "", 30, NULL);
29953   b = PushButton (mark_grp, "Select sequences less than:", MarkRemoveLenBtn);
29954   SetObjectExtra (b, frm, NULL);
29955   sg = HiddenGroup (mark_grp, 2, 0, NULL);
29956   SetGroupSpacing (sg, 10, 10);
29957   sprintf (buf, "%d", suggested_min);
29958   frm->mark_seqlen = DialogText (sg, buf, 10, NULL);
29959   StaticPrompt (sg, "nucleotides in length", 0, 0, programFont, 'l');
29960 
29961   unselect = PushButton (h, "Unselect All Sequences", UnselectAllSequencesForRemove);
29962   SetObjectExtra (unselect, frm, NULL);
29963 
29964   c = HiddenGroup (h, 4, 0, NULL);
29965   SetGroupSpacing (c, 10, 10);
29966   b = PushButton (c, "Delete and Accept", ModalAcceptButton);
29967   SetObjectExtra (b, &acd, NULL);
29968   b = PushButton (c, "Cancel", ModalCancelButton);
29969   SetObjectExtra (b, &acd, NULL);
29970 
29971   AlignObjects (ALIGN_CENTER,
29972                 (HANDLE) frm->clickable_list_dlg,
29973                 (HANDLE) mark_grp,
29974                 (HANDLE) unselect,
29975                 (HANDLE) c,
29976                 NULL);
29977 
29978   RealizeWindow (w);
29979   Show (w);
29980   Update ();
29981   Select (w);
29982   while (!acd.accepted && ! acd.cancelled)
29983   {
29984     ProcessExternalEvent ();
29985     Update ();
29986   }
29987   ProcessAnEvent ();
29988   if (acd.accepted)
29989   {
29990     /* actually remove the sequences */
29991     for (sep = *sequences, vnp = frm->seq_list;
29992          sep != NULL && vnp != NULL;
29993          sep = sep_next, vnp = vnp->next)
29994     {
29995       sep_next = sep->next;
29996       if (IS_Bioseq (sep)
29997           && (cip = (ClickableItemPtr) (vnp->data.ptrvalue)) != NULL
29998           && cip->chosen)
29999       {
30000         if (sep_prev == NULL)
30001         {
30002           *sequences = sep_next;
30003         }
30004         else
30005         {
30006           sep_prev->next = sep_next;
30007         }
30008         sep->next = NULL;
30009         sep = SeqEntryFree (sep);
30010         rval = TRUE;
30011       }
30012       else
30013       {
30014         sep_prev = sep;
30015       }
30016     }
30017   }
30018   frm->seq_list = FreeClickableList (frm->seq_list);
30019   Remove (w);
30020   return rval;
30021 }
30022 
30023 
30024